Labo 2 - Statistiques univariées


In [1]:
%reload_ext rpy2.ipython

import pandas as pd
from simpledbf import Dbf5

Aide


In [ ]:
%%R
# help()
# help(function)
# help(package='package-name)

Paquets


In [ ]:
%%R
# install
# install.packages('package-name')

# already installed with conda
#install.packages("foreign")

# new installs
#install.packages("Rcmdr", dependencies = TRUE, repos="http://cran.rstudio.com/") # in conda?
#install.packages("nortest", repos="http://cran.rstudio.com/")
#install.packages("sas7bdat", repos="http://cran.rstudio.com/")
#install.packages("Hmisc", repos="http://cran.rstudio.com/")
#install.packages("pastecs", repos="http://cran.rstudio.com/")

In [ ]:
%%R
# import
# library('package-name')

library(foreign)
library(nortest)
library(sas7bdat)
library(Hmisc)
library(pastecs)

Espace de travail


In [3]:
# Jupyter : "magic functions" : integration with shell
%pwd


Out[3]:
'/home/inrs/EUR8217/labo'

Import fichier externe


In [ ]:
%%R
# import excel : via txt tab separated
fichierTexte <- read.table("data/labo2/SR_Data.txt", header = TRUE)
head(fichierTexte)

In [ ]:
# Python
# import excel

df = pd.read_excel('data/labo2/SR_Data.xls')
df.head()

In [ ]:
%%R
# import DBF (DBase)

fichierDBF <- read.dbf("data/labo2/SR_Data.dbf")
head(fichierDBF)

In [ ]:
# Python
# import DBF (DBase)

dbf = Dbf5('data/labo2/SR_Data.dbf')
df = dbf.to_dataframe()
df.head()

In [ ]:
%%R
# import SPSS

fichierSPSS <- read.spss("data/labo2/Data_SPSS.sav", to.data.frame=TRUE)
head(fichierSPSS)

In [ ]:
# Python
# import SPSS

# savReaderWriter erreur lors de pip install

In [ ]:
%%R
# import SAS

fichierSAS <- read.sas7bdat("data/labo2/tableau1.sas7bdat", debug=FALSE)
head(fichierSAS)

In [ ]:
# Python
# import SAS

In [ ]:
%%R
# data visualisation via GUI
#library(Rcmdr)

Structuration d'un tableau


In [17]:
%%R

# show variable names
names(fichierDBF)
# indexes start at 1


 [1] "POPTOT_FR"  "FAIBLEREV"  "MONOPCT"    "MENAGE1PCT" "IMMREC_PCT"
 [6] "TX_CHOM"    "NOECOLEPCT" "SCO_M9PCT"  "SCO_M13PCT" "PARTIELPCT"
[11] "FAIBREVPCT" "INDICE_PAU" "Dist_Min"   "N_1000"     "Dist_Moy_3"
[16] "Shape_Leng" "Shape_Area"

In [18]:
%%R

# delete variable
fichierDBF$Shape_Leng <- NULL
names(fichierDBF)


 [1] "POPTOT_FR"  "FAIBLEREV"  "MONOPCT"    "MENAGE1PCT" "IMMREC_PCT"
 [6] "TX_CHOM"    "NOECOLEPCT" "SCO_M9PCT"  "SCO_M13PCT" "PARTIELPCT"
[11] "FAIBREVPCT" "INDICE_PAU" "Dist_Min"   "N_1000"     "Dist_Moy_3"
[16] "Shape_Area"

In [19]:
%%R

# rename variable
names(fichierDBF)[1] <- "POPTOT"
names(fichierDBF)


 [1] "POPTOT"     "FAIBLEREV"  "MONOPCT"    "MENAGE1PCT" "IMMREC_PCT"
 [6] "TX_CHOM"    "NOECOLEPCT" "SCO_M9PCT"  "SCO_M13PCT" "PARTIELPCT"
[11] "FAIBREVPCT" "INDICE_PAU" "Dist_Min"   "N_1000"     "Dist_Moy_3"
[16] "Shape_Area"

In [ ]:
%%R

#

Variable centré-réduite

Fonctions statistiques