jsonlite

Một dạng dữ liệu cũng khá phổ biến trên Internet là JSON. R có thể đọc file JSON, sử dụng package jsonlite


In [1]:
install.packages('jsonlite')


Installing package into ‘/home/duyetdev/R/x86_64-pc-linux-gnu-library/3.3’
(as ‘lib’ is unspecified)

In [2]:
library(jsonlite)

In [ ]:


In [3]:
wine_json <- '{"name":"Chateau Migraine", "year":1997, "alcohol_pct":12.4, "color":"red", "awarded":false}'

In [6]:
wine <- fromJSON(wine_json)
str(wine)


List of 5
 $ name       : chr "Chateau Migraine"
 $ year       : int 1997
 $ alcohol_pct: num 12.4
 $ color      : chr "red"
 $ awarded    : logi FALSE

In [9]:
wine


$name
'Chateau Migraine'
$year
1997
$alcohol_pct
12.4
$color
'red'
$awarded
FALSE

In [10]:
wine$name


'Chateau Migraine'

from URL

fromJSON() cũng có thể đọc JSON từ URL


In [11]:
quandl_url <- "http://www.quandl.com/api/v1/datasets/IWS/INTERNET_INDIA.json?auth_token=i83asDsiWUUyfoypkgMz"
quandl_data <- fromJSON(quandl_url)

In [12]:
quandl_data


$errors
named list()

$id
[1] 2351831

$source_name
[1] "Internet World Stats"

$source_code
[1] "IWS"

$code
[1] "INTERNET_INDIA"

$name
[1] "India Internet Usage"

$urlize_name
[1] "India-Internet-Usage"

$display_url
[1] "http://www.internetworldstats.com/asia/in.htm"

$description
[1] "Internet Usage and Population Statistics"

$updated_at
[1] "2016-01-01T04:23:55.235Z"

$frequency
[1] "annual"

$from_date
[1] "1998-12-31"

$to_date
[1] "2012-12-31"

$column_names
[1] "YEAR"       "Users"      "Population" "% Pen."    

$private
[1] FALSE

$type
NULL

$premium
[1] FALSE

$data
      [,1]         [,2]       [,3]         [,4]  
 [1,] "2012-12-31" "1.37e+08" "1205073612" "11.4"
 [2,] "2010-12-31" "1e+08"    "1173108018" "8.5" 
 [3,] "2009-12-31" "8.1e+07"  "1156897766" "7"   
 [4,] "2007-12-31" "4.2e+07"  "1129667528" "3.7" 
 [5,] "2006-12-31" "4e+07"    "1112225812" "3.6" 
 [6,] "2005-12-31" "50600000" "1112225812" "4.5" 
 [7,] "2004-12-31" "39200000" "1094870677" "3.6" 
 [8,] "2003-12-31" "22500000" "1094870677" "2.1" 
 [9,] "2002-12-31" "16500000" "1094870677" "1.6" 
[10,] "2001-12-31" "7e+06"    "1094870677" "0.7" 
[11,] "2000-12-31" "5500000"  "1094870677" "0.5" 
[12,] "1999-12-31" "2800000"  "1094870677" "0.3" 
[13,] "1998-12-31" "1400000"  "1094870677" "0.1" 

toJSON()

Convert data.frame sang JSON String


In [18]:
url_csv <- "http://s3.amazonaws.com/assets.datacamp.com/production/course_1478/datasets/water.csv"
water <- read.csv(url_csv)

water <- head(water)
water


waterX1992X1997X2002X2007
Algeria 0.064000000 NA 0.017000000 NA
American Samoa NA NA NA NA
Angola 0.000138000 NA 0.000138000 NA
Antigua and Barbuda0.003300000 NA NA NA
Argentina 0.000749345 0.000749345 0.000749345 NA
Australia 0.029755895 NA 0.029755895 NA

In [22]:
water_json <- toJSON(water)
water_json


[{"water":"Algeria","X1992":0.064,"X2002":0.017},{"water":"American Samoa"},{"water":"Angola","X1992":0.0001,"X2002":0.0001},{"water":"Antigua and Barbuda","X1992":0.0033},{"water":"Argentina","X1992":0.0007,"X1997":0.0007,"X2002":0.0007},{"water":"Australia","X1992":0.0298,"X2002":0.0298}] 

prettify() or minify()


In [23]:
prettify(water_json)


[
    {
        "water": "Algeria",
        "X1992": 0.064,
        "X2002": 0.017
    },
    {
        "water": "American Samoa"
    },
    {
        "water": "Angola",
        "X1992": 0.0001,
        "X2002": 0.0001
    },
    {
        "water": "Antigua and Barbuda",
        "X1992": 0.0033
    },
    {
        "water": "Argentina",
        "X1992": 0.0007,
        "X1997": 0.0007,
        "X2002": 0.0007
    },
    {
        "water": "Australia",
        "X1992": 0.0298,
        "X2002": 0.0298
    }
]
 

In [24]:
minify(water_json)


[{"water":"Algeria","X1992":0.064,"X2002":0.017},{"water":"American Samoa"},{"water":"Angola","X1992":0.0001,"X2002":0.0001},{"water":"Antigua and Barbuda","X1992":0.0033},{"water":"Argentina","X1992":0.0007,"X1997":0.0007,"X2002":0.0007},{"water":"Australia","X1992":0.0298,"X2002":0.0298}] 

In [ ]: