| Table des matières | Traitement de données >
Wilson G, Aruliah DA, Brown CT, Chue Hong NP, Davis M, Guy RT, et al. (2014) Best Practices for Scientific Computing. PLoS Biol 12(1): e1001745. https://doi.org/10.1371/journal.pbio.1001745
.py
#
In [1]:
# esprit de Python
import this
In [2]:
print("Hello World")
s = "Hello World"
print(s)
reponse = input('Bonjour, comment vous appelez-vous ? ')
print('Bonjour '+reponse)
#%run ./00-script1.py
In [3]:
type('Hello')
Out[3]:
In [4]:
type(4)
Out[4]:
In [5]:
type(4.5)
Out[5]:
In [6]:
type(True)
Out[6]:
In [7]:
type([])
Out[7]:
In [8]:
type([2,3])
Out[8]:
In [9]:
type({})
Out[9]:
In [10]:
type({1: 'sdfasd', 'g': [1,2]})
Out[10]:
In [11]:
# enregistrement vide
class A:
pass
a = A()
a.x = 10
a.y = -2.3
type(a)
Out[11]:
In [12]:
a = 2
a = 2.3
a = 'hello' # les variables n'ont pas de type
b = a + 3 # les valeurs ont un type
In [13]:
# conversions entre types
int('3')
Out[13]:
In [14]:
str(3)
Out[14]:
In [15]:
# opération
i=1
i == 1
Out[15]:
In [16]:
i+=1
i == 1
Out[16]:
In [17]:
i == 2
Out[17]:
In [18]:
print('À quelle vitesse roule le véhicule (km/h) ?')
vitesse = None
temps = None
distance = None
print('Le temps de freinage est', temps, 'et la distance de freinage est', distance)
In [19]:
# listes
a = list(range(10))
print(a)
print(a[0]) # index commence à 0
print(a[-1])
print(a[-2])
a[2] = -10
# méthodes
print(a)
a.sort()
print(a)
a.append(-100)
print(a)
del a[0]
print(a)
a[3] = 'hello'
print(a)
In [20]:
# appartenance
1 in a
Out[20]:
In [21]:
0 in a
Out[21]:
In [22]:
# références
b = list(range(10))
c = b
c[3] = 'bug'
print(b)
In [23]:
# "list comprehensions"
a = list(range(10))
doubles = [2*x for x in a]
print(doubles)
carres = [x*x for x in a]
print(carres)
In [24]:
# boucles
a = list(range(5))
for x in a:
print(x)
for i in range(len(a)):
print(a[i])
i = 0
while i<len(a):
print(a[i])
i += 1
# test
from numpy.random import random_sample
b = random_sample(10)
print(b)
for x in b:
if x > 0.5:
print(x)
else:
print('Nombre plus petit que 0.5', x)
# list comprehensions avec test
c = [x for x in b if x>0.5]
print(c)
c
.
In [25]:
import urllib.request
import zipfile
import io
import matplotlib.mlab as pylab
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
In [26]:
# exemples numpy
a = np.arange(10) # similaire a range(10), retourne une array
b = np.zeros((4,5))
c = np.ones((4,5))
a = np.random.random_sample(10)
# éviter les boucles, extraire des sous-vecteurs (comme avec matlab)
b = a>0.5
print(b)
c = a[b]
print(a)
print(c)
In [27]:
# charger des matrices
data = np.loadtxt('./donnees/vitesse-debit.txt')
plt.plot(data[:,0], data[:,1], 'o')
data.mean(0)
Out[27]:
In [28]:
# jeu de données de voitures http://lib.stat.cmu.edu/DASL/Datafiles/Cars.html
text = '''Country Car MPG Weight Drive_Ratio Horsepower Displacement Cylinders
U.S. Buick Estate Wagon 16.9 4.360 2.73 155 350 8
U.S. Ford Country Squire Wagon 15.5 4.054 2.26 142 351 8
U.S. Chevy Malibu Wagon 19.2 3.605 2.56 125 267 8
U.S. Chrysler LeBaron Wagon 18.5 3.940 2.45 150 360 8
U.S. Chevette 30.0 2.155 3.70 68 98 4
Japan Toyota Corona 27.5 2.560 3.05 95 134 4
Japan Datsun 510 27.2 2.300 3.54 97 119 4
U.S. Dodge Omni 30.9 2.230 3.37 75 105 4
Germany Audi 5000 20.3 2.830 3.90 103 131 5
Sweden Volvo 240 GL 17.0 3.140 3.50 125 163 6
Sweden Saab 99 GLE 21.6 2.795 3.77 115 121 4
France Peugeot 694 SL 16.2 3.410 3.58 133 163 6
U.S. Buick Century Special 20.6 3.380 2.73 105 231 6
U.S. Mercury Zephyr 20.8 3.070 3.08 85 200 6
U.S. Dodge Aspen 18.6 3.620 2.71 110 225 6
U.S. AMC Concord D/L 18.1 3.410 2.73 120 258 6
U.S. Chevy Caprice Classic 17.0 3.840 2.41 130 305 8
U.S. Ford LTD 17.6 3.725 2.26 129 302 8
U.S. Mercury Grand Marquis 16.5 3.955 2.26 138 351 8
U.S. Dodge St Regis 18.2 3.830 2.45 135 318 8
U.S. Ford Mustang 4 26.5 2.585 3.08 88 140 4
U.S. Ford Mustang Ghia 21.9 2.910 3.08 109 171 6
Japan Mazda GLC 34.1 1.975 3.73 65 86 4
Japan Dodge Colt 35.1 1.915 2.97 80 98 4
U.S. AMC Spirit 27.4 2.670 3.08 80 121 4
Germany VW Scirocco 31.5 1.990 3.78 71 89 4
Japan Honda Accord LX 29.5 2.135 3.05 68 98 4
U.S. Buick Skylark 28.4 2.670 2.53 90 151 4
U.S. Chevy Citation 28.8 2.595 2.69 115 173 6
U.S. Olds Omega 26.8 2.700 2.84 115 173 6
U.S. Pontiac Phoenix 33.5 2.556 2.69 90 151 4
U.S. Plymouth Horizon 34.2 2.200 3.37 70 105 4
Japan Datsun 210 31.8 2.020 3.70 65 85 4
Italy Fiat Strada 37.3 2.130 3.10 69 91 4
Germany VW Dasher 30.5 2.190 3.70 78 97 4
Japan Datsun 810 22.0 2.815 3.70 97 146 6
Germany BMW 320i 21.5 2.600 3.64 110 121 4
Germany VW Rabbit 31.9 1.925 3.78 71 89 4
'''
s = io.StringIO(text)
data = pd.read_csv(s, delimiter = '\t')
#data.to_csv('cars.txt', index=False)
print(data.info())
data
Out[28]:
In [29]:
#data.describe(include = 'all')
data.describe()
Out[29]:
In [30]:
data['Country'].value_counts().plot(kind='bar')
data[['Car', 'Country']].describe()
Out[30]:
In [31]:
# exemple de vecteur ou enregistrement
data.loc[0]
Out[31]:
In [32]:
plt.scatter(data.Weight, data.MPG)
Out[32]:
In [33]:
# comptages vélo
filename, message = urllib.request.urlretrieve('http://donnees.ville.montreal.qc.ca/dataset/f170fecc-18db-44bc-b4fe-5b0b6d2c7297/resource/6caecdd0-e5ac-48c1-a0cc-5b537936d5f6/download/comptagevelo20162.csv')
data = pd.read_csv(filename)
print(data.info())
plt.plot(data['CSC (Côte Sainte-Catherine)'])
Out[33]:
In [34]:
# 01/01/16 était un vendredi, le 4 était un lundi
cscComptage = np.array(data['CSC (Côte Sainte-Catherine)'].tolist()[4:4+51*7]).reshape(51,7)
for r in cscComptage:
plt.plot(r)
plt.xticks(range(7),['lundi', 'mardi', 'mercredi', 'jeudi', 'vendredi', 'samedi', 'dimanche'])
plt.ylabel('Nombre de cyclistes')
Out[34]:
In [35]:
plt.imshow(cscComptage, interpolation = 'none', aspect = 'auto')
plt.colorbar()
Out[35]:
In [36]:
# données bixi
filename, message = urllib.request.urlretrieve('https://montreal.bixi.com/c/bixi/file_db/data_all.file/BixiMontrealRentals2018.zip')
zip=zipfile.ZipFile(filename)
data = pd.read_csv(zip.open(zip.namelist()[0]))
In [37]:
print(data.info())
print(data.describe())
# reflechir aux types, sens de moyenner des codes de station
In [38]:
# données météo d'Environnement Canada
#filename, message = urllib.request.urlretrieve('http://climate.weather.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=2017&Month=1&Day=1&timeframe=2&submit=Download+Data')
filename, message = urllib.request.urlretrieve('http://climate.weather.gc.ca/climate_data/bulk_data_f.html?format=csv&stationID=10761&Year=2017&Month=7&Day=1&timeframe=1&submit=Download+Data')
data = pd.read_csv(filename, skiprows = 15, delimiter = ',')
In [39]:
print(data.info())
plt.plot(data['Hum. rel (%)'])
plt.show()
#data.describe()
# plt.plot