In [9]:
load('/home/duyetdev/Downloads/Mumbai2013.RData')
write.csv(Mumbai2013, 'mumbai2013.csv')

In [13]:
Mumbai2013 <- read.csv('./mumbai2013.csv')

In [14]:
# Xem 6 dòng đầu
head(Mumbai2013)


XTimeTemperature
1 2013-01-01 00:10:0075.2
2 2013-01-01 00:40:0075.2
3 2013-01-01 01:10:0075.2
4 2013-01-01 01:40:0073.4
5 2013-01-01 02:10:0073.4
6 2013-01-01 02:30:0071.0

In [15]:
# Xem 10 dòng cuối
tail(Mumbai2013, n = 10)


XTimeTemperature
1831918319 2013-12-31 20:10:0077.0
1832018320 2013-12-31 20:30:0076.0
1832118321 2013-12-31 20:40:0077.0
1832218322 2013-12-31 21:10:0077.0
1832318323 2013-12-31 21:40:0077.0
1832418324 2013-12-31 22:10:0075.2
1832518325 2013-12-31 22:40:0075.2
1832618326 2013-12-31 23:10:0075.2
1832718327 2013-12-31 23:30:0072.0
1832818328 2013-12-31 23:40:0075.2

In [16]:
# Cấu trúc (struct), mô tả dataframe
str(Mumbai2013)


'data.frame':	18328 obs. of  3 variables:
 $ X          : int  1 2 3 4 5 6 7 8 9 10 ...
 $ Time       : Factor w/ 18328 levels "2013-01-01 00:10:00",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ Temperature: num  75.2 75.2 75.2 73.4 73.4 71 73.4 73.4 75.2 73.4 ...

In [17]:
# Chiều 
dim(Mumbai2013)


  1. 18328
  2. 3

In [18]:
# Danh sách các cột
names(Mumbai2013)


  1. 'X'
  2. 'Time'
  3. 'Temperature'

In [19]:
# Class name
class(Mumbai2013)


'data.frame'

Thư viện library(dplyr)


In [20]:
library(dplyr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


In [21]:
# Summary dataframe
summary(Mumbai2013)


       X                          Time        Temperature    
 Min.   :    1   2013-01-01 00:10:00:    1   Min.   : 53.00  
 1st Qu.: 4583   2013-01-01 00:40:00:    1   1st Qu.: 78.80  
 Median : 9164   2013-01-01 01:10:00:    1   Median : 82.40  
 Mean   : 9164   2013-01-01 01:40:00:    1   Mean   : 81.79  
 3rd Qu.:13746   2013-01-01 02:10:00:    1   3rd Qu.: 86.00  
 Max.   :18328   2013-01-01 02:30:00:    1   Max.   :102.20  
                 (Other)            :18322                   

In [22]:
glimpse(Mumbai2013)


Observations: 18,328
Variables: 3
$ X           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ...
$ Time        <fctr> 2013-01-01 00:10:00, 2013-01-01 00:40:00, 2013-01-01 0...
$ Temperature <dbl> 75.2, 75.2, 75.2, 73.4, 73.4, 71.0, 73.4, 73.4, 75.2, 7...

In [ ]:

Visualizing data


In [23]:
hist(Mumbai2013$Temperature)



In [24]:
plot(Mumbai2013)