Este es un ejemplo de clase sobre la manipulación de archivos csv usando python
In [4]:
# iImporta pandas. Se hace una sola vez. Pandas sirve para importar diferentes tipos de archivos
import pandas as pd
In [7]:
x=pd.read_csv ('AportesDiario_2004.csv', sep=';', decimal=',', thousands='.', skiprows=3)
In [8]:
x.head()
Out[8]:
Fecha
Region Hidrologica
Nombre Rio
Aportes Caudal m3/s
Aportes Energia kWh
Aportes %
0
2004-01-01
ANTIOQUIA
A. SAN LORENZO
17.56
3910000.0
72,27%
1
2004-01-01
ANTIOQUIA
CONCEPCION
6.15
1385300.0
123,69%
2
2004-01-01
ANTIOQUIA
DESV. EEPPM (NEC,PAJ,DOL)
11.43
2574700.0
147,13%
3
2004-01-01
ANTIOQUIA
GRANDE
18.65
4563500.0
79,23%
4
2004-01-01
ANTIOQUIA
GUADALUPE
11.28
2540900.0
80,15%
In [11]:
x["Fecha"]
Out[11]:
0 2004-01-01
1 2004-01-01
2 2004-01-01
3 2004-01-01
4 2004-01-01
5 2004-01-01
6 2004-01-01
7 2004-01-01
8 2004-01-01
9 2004-01-01
10 2004-01-01
11 2004-01-01
12 2004-01-01
13 2004-01-01
14 2004-01-01
15 2004-01-01
16 2004-01-01
17 2004-01-01
18 2004-01-01
19 2004-01-01
20 2004-01-01
21 2004-01-01
22 2004-01-01
23 2004-01-01
24 2004-01-02
25 2004-01-02
26 2004-01-02
27 2004-01-02
28 2004-01-02
29 2004-01-02
...
8756 2004-12-30
8757 2004-12-30
8758 2004-12-30
8759 2004-12-30
8760 2004-12-31
8761 2004-12-31
8762 2004-12-31
8763 2004-12-31
8764 2004-12-31
8765 2004-12-31
8766 2004-12-31
8767 2004-12-31
8768 2004-12-31
8769 2004-12-31
8770 2004-12-31
8771 2004-12-31
8772 2004-12-31
8773 2004-12-31
8774 2004-12-31
8775 2004-12-31
8776 2004-12-31
8777 2004-12-31
8778 2004-12-31
8779 2004-12-31
8780 2004-12-31
8781 2004-12-31
8782 2004-12-31
8783 2004-12-31
8784 NaN
8785 NaN
Name: Fecha, dtype: object
In [13]:
x["Region Hidrologica"]== "ANTIOQUIA"
Out[13]:
0 True
1 True
2 True
3 True
4 True
5 True
6 True
7 True
8 True
9 True
10 True
11 False
12 False
13 False
14 False
15 False
16 False
17 False
18 False
19 False
20 False
21 False
22 False
23 False
24 True
25 True
26 True
27 True
28 True
29 True
...
8756 False
8757 False
8758 False
8759 False
8760 True
8761 True
8762 True
8763 True
8764 True
8765 True
8766 True
8767 True
8768 True
8769 True
8770 True
8771 False
8772 False
8773 False
8774 False
8775 False
8776 False
8777 False
8778 False
8779 False
8780 False
8781 False
8782 False
8783 False
8784 False
8785 False
Name: Region Hidrologica, dtype: bool
In [16]:
x[x["Region Hidrologica"]== "ANTIOQUIA"] # registros que son de Antioquia
Out[16]:
Fecha
Region Hidrologica
Nombre Rio
Aportes Caudal m3/s
Aportes Energia kWh
Aportes %
0
2004-01-01
ANTIOQUIA
A. SAN LORENZO
17.56
3910000.0
72,27%
1
2004-01-01
ANTIOQUIA
CONCEPCION
6.15
1385300.0
123,69%
2
2004-01-01
ANTIOQUIA
DESV. EEPPM (NEC,PAJ,DOL)
11.43
2574700.0
147,13%
3
2004-01-01
ANTIOQUIA
GRANDE
18.65
4563500.0
79,23%
4
2004-01-01
ANTIOQUIA
GUADALUPE
11.28
2540900.0
80,15%
5
2004-01-01
ANTIOQUIA
GUATAPE
32.71
5405100.0
141,49%
6
2004-01-01
ANTIOQUIA
MIEL I
50.63
2214100.0
63,99%
7
2004-01-01
ANTIOQUIA
NARE
20.16
7057000.0
56,73%
8
2004-01-01
ANTIOQUIA
PORCE II
64.61
3242100.0
120,97%
9
2004-01-01
ANTIOQUIA
SAN CARLOS
12.80
1588700.0
70,92%
10
2004-01-01
ANTIOQUIA
TENCHE
1.81
407700.0
66,84%
24
2004-01-02
ANTIOQUIA
A. SAN LORENZO
17.18
3825300.0
70,71%
25
2004-01-02
ANTIOQUIA
CONCEPCION
6.13
1380800.0
123,29%
26
2004-01-02
ANTIOQUIA
DESV. EEPPM (NEC,PAJ,DOL)
11.39
2565700.0
146,61%
27
2004-01-02
ANTIOQUIA
GRANDE
17.73
4338400.0
75,32%
28
2004-01-02
ANTIOQUIA
GUADALUPE
20.30
4572800.0
144,25%
29
2004-01-02
ANTIOQUIA
GUATAPE
18.00
2974400.0
77,86%
30
2004-01-02
ANTIOQUIA
MIEL I
70.57
3086100.0
89,19%
31
2004-01-02
ANTIOQUIA
NARE
17.93
6276300.0
50,45%
32
2004-01-02
ANTIOQUIA
PORCE II
64.20
3221500.0
120,21%
33
2004-01-02
ANTIOQUIA
SAN CARLOS
11.84
1469500.0
65,60%
34
2004-01-02
ANTIOQUIA
TENCHE
3.52
792900.0
129,98%
48
2004-01-03
ANTIOQUIA
A. SAN LORENZO
17.00
3785300.0
69,97%
49
2004-01-03
ANTIOQUIA
CONCEPCION
6.17
1389800.0
124,09%
50
2004-01-03
ANTIOQUIA
DESV. EEPPM (NEC,PAJ,DOL)
11.46
2581500.0
147,51%
51
2004-01-03
ANTIOQUIA
GRANDE
17.48
4277200.0
74,26%
52
2004-01-03
ANTIOQUIA
GUADALUPE
6.10
1374100.0
43,35%
53
2004-01-03
ANTIOQUIA
GUATAPE
12.87
2126700.0
55,67%
54
2004-01-03
ANTIOQUIA
MIEL I
46.76
2044800.0
59,10%
55
2004-01-03
ANTIOQUIA
NARE
20.89
7312500.0
58,78%
...
...
...
...
...
...
...
8715
2004-12-29
ANTIOQUIA
GRANDE
26.05
6353300.0
79,42%
8716
2004-12-29
ANTIOQUIA
GUADALUPE
10.66
2400400.0
56,61%
8717
2004-12-29
ANTIOQUIA
GUATAPE
26.80
4332900.0
85,80%
8718
2004-12-29
ANTIOQUIA
MIEL I
82.77
3623700.0
78,44%
8719
2004-12-29
ANTIOQUIA
NARE
32.64
11311200.0
66,03%
8720
2004-12-29
ANTIOQUIA
PORCE II
85.86
4245700.0
120,62%
8721
2004-12-29
ANTIOQUIA
SAN CARLOS
14.65
1764800.0
52,68%
8722
2004-12-29
ANTIOQUIA
TENCHE
0.71
159900.0
18,81%
8736
2004-12-30
ANTIOQUIA
A. SAN LORENZO
19.09
4171600.0
60,99%
8737
2004-12-30
ANTIOQUIA
CONCEPCION
5.22
1175400.0
78,36%
8738
2004-12-30
ANTIOQUIA
DESV. EEPPM (NEC,PAJ,DOL)
9.69
2181900.0
95,28%
8739
2004-12-30
ANTIOQUIA
GRANDE
22.12
5394800.0
67,44%
8740
2004-12-30
ANTIOQUIA
GUADALUPE
10.72
2413900.0
56,93%
8741
2004-12-30
ANTIOQUIA
GUATAPE
26.67
4311900.0
85,38%
8742
2004-12-30
ANTIOQUIA
MIEL I
71.47
3129000.0
67,73%
8743
2004-12-30
ANTIOQUIA
NARE
24.47
8480000.0
49,50%
8744
2004-12-30
ANTIOQUIA
PORCE II
73.77
3647900.0
103,63%
8745
2004-12-30
ANTIOQUIA
SAN CARLOS
14.36
1729900.0
51,64%
8746
2004-12-30
ANTIOQUIA
TENCHE
0.72
162100.0
19,07%
8760
2004-12-31
ANTIOQUIA
A. SAN LORENZO
18.82
4112600.0
60,13%
8761
2004-12-31
ANTIOQUIA
CONCEPCION
5.16
1161900.0
77,46%
8762
2004-12-31
ANTIOQUIA
DESV. EEPPM (NEC,PAJ,DOL)
9.57
2154900.0
94,10%
8763
2004-12-31
ANTIOQUIA
GRANDE
23.37
5699700.0
71,25%
8764
2004-12-31
ANTIOQUIA
GUADALUPE
9.92
2233700.0
52,68%
8765
2004-12-31
ANTIOQUIA
GUATAPE
18.18
2939300.0
58,20%
8766
2004-12-31
ANTIOQUIA
MIEL I
76.67
3356700.0
72,66%
8767
2004-12-31
ANTIOQUIA
NARE
29.37
10178000.0
59,42%
8768
2004-12-31
ANTIOQUIA
PORCE II
74.85
3701300.0
105,15%
8769
2004-12-31
ANTIOQUIA
SAN CARLOS
14.26
1717800.0
51,28%
8770
2004-12-31
ANTIOQUIA
TENCHE
2.34
526900.0
61,99%
4026 rows × 6 columns
In [17]:
len(x[x["Region Hidrologica"]== "ANTIOQUIA"]) # número de registros
Out[17]:
4026
In [18]:
set(x["Nombre Rio"]) #por definicion un set es un conjunto que no puede tener datos repetidos
Out[18]:
{nan,
'FLORIDA II',
'GUAVIO',
'SINU URRA',
'GUADALUPE',
'GRANDE',
'MAGDALENA BETANIA',
'OTROS RIOS (ESTIMADOS)',
'CALIMA',
'CHUZA',
'SAN CARLOS',
'CONCEPCION',
'TENCHE',
'A. SAN LORENZO',
'BATA',
'PRADO',
'CAUCA SALVAJINA',
'PORCE II',
'NARE',
'BOGOTA N.R.',
'DIGUA',
'GUATAPE',
'ALTOANCHICAYA',
'MIEL I',
'DESV. EEPPM (NEC,PAJ,DOL)'}
In [20]:
x.groupby("Nombre Rio").mean() # agrupa por nombre de río y saca promedio de todas las columna numéricas
Out[20]:
Aportes Caudal m3/s
Aportes Energia kWh
Nombre Rio
A. SAN LORENZO
36.466612
8.105400e+06
ALTOANCHICAYA
43.468716
4.620596e+06
BATA
95.478989
1.571243e+07
BOGOTA N.R.
28.051803
1.277674e+07
CALIMA
11.922077
5.560117e+05
CAUCA SALVAJINA
113.523306
2.519614e+06
CHUZA
9.742486
4.337967e+06
CONCEPCION
6.606667
1.488170e+06
DESV. EEPPM (NEC,PAJ,DOL)
8.312240
2.291866e+06
DIGUA
27.529536
4.299954e+05
FLORIDA II
10.712486
2.148367e+05
GRANDE
27.511967
6.729973e+06
GUADALUPE
20.167978
4.542911e+06
GUATAPE
34.103033
5.624479e+06
GUAVIO
85.884563
2.062469e+07
MAGDALENA BETANIA
374.490765
5.580630e+06
MIEL I
78.433251
3.430428e+06
NARE
49.711612
1.738808e+07
OTROS RIOS (ESTIMADOS)
NaN
3.700457e+06
PORCE II
99.292650
4.976887e+06
PRADO
45.597268
4.984191e+05
SAN CARLOS
24.910492
3.084509e+06
SINU URRA
284.444208
3.165074e+06
TENCHE
3.900874
8.786683e+05
In [21]:
n=set (x["Nombre Rio"])
n
Out[21]:
{nan,
'FLORIDA II',
'GUAVIO',
'SINU URRA',
'GUADALUPE',
'GRANDE',
'MAGDALENA BETANIA',
'OTROS RIOS (ESTIMADOS)',
'CALIMA',
'CHUZA',
'SAN CARLOS',
'CONCEPCION',
'TENCHE',
'A. SAN LORENZO',
'BATA',
'PRADO',
'CAUCA SALVAJINA',
'PORCE II',
'NARE',
'BOGOTA N.R.',
'DIGUA',
'GUATAPE',
'ALTOANCHICAYA',
'MIEL I',
'DESV. EEPPM (NEC,PAJ,DOL)'}
In [30]:
import statistics # importo el paquete estadísticas
for y in n:
z = x[x["Nombre Rio"] == y]["Aportes Energia kWh"]
if len (z) > 0: # si numero de registros es mayor a cero
print(y, statistics.mean(z.values[:])) # imprima y
FLORIDA II nan
GUAVIO 20624689.3443
SINU URRA 3165074.04372
GUADALUPE 4542910.92896
GRANDE 6729973.22404
MAGDALENA BETANIA 5580630.32787
OTROS RIOS (ESTIMADOS) 3700456.8306
CALIMA 556011.748634
CHUZA nan
SAN CARLOS 3084508.74317
CONCEPCION 1488170.4918
TENCHE 878668.306011
A. SAN LORENZO 8105399.72678
BATA 15712428.4153
PRADO 498419.125683
CAUCA SALVAJINA 2519614.48087
PORCE II 4976887.15847
NARE 17388083.6066
BOGOTA N.R. nan
DIGUA 429995.355191
GUATAPE 5624479.23497
ALTOANCHICAYA 4620595.90164
MIEL I 3430428.4153
DESV. EEPPM (NEC,PAJ,DOL) nan
Content source: eacadavid/diplomado2017
Similar notebooks: