In [1]:
import pandas as pd
from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
diamonds = pd.read_csv('diamonds.csv',index_col=0)
In [12]:
diamonds_2 = diamonds[diamonds['carat'] > 2]
diamonds_2
Out[12]:
carat
cut
color
clarity
depth
table
price
x
y
z
12247
2.06
Premium
J
I1
61.2
58.0
5203
8.10
8.07
4.95
13003
2.14
Fair
J
I1
69.4
57.0
5405
7.74
7.70
5.36
13119
2.15
Fair
J
I1
65.5
57.0
5430
8.01
7.95
5.23
13758
2.22
Fair
J
I1
66.7
56.0
5607
8.04
8.02
5.36
13992
2.01
Fair
I
I1
67.4
58.0
5696
7.71
7.64
5.17
13993
2.01
Fair
I
I1
55.9
64.0
5696
8.48
8.39
4.71
14139
2.27
Fair
J
I1
67.6
55.0
5733
8.05
8.00
5.43
14913
2.03
Fair
H
I1
64.4
59.0
6002
7.91
7.85
5.07
14914
2.03
Fair
H
I1
66.6
57.0
6002
7.81
7.75
5.19
15152
2.06
Good
H
I1
64.3
58.0
6091
8.03
7.99
5.15
15320
2.08
Premium
H
I1
61.7
57.0
6150
8.23
8.18
5.06
15685
2.49
Fair
J
I1
66.3
58.0
6289
8.26
8.18
5.45
15782
2.01
Fair
G
I1
70.2
57.0
6315
7.53
7.50
5.27
15816
2.14
Fair
H
I1
66.4
56.0
6328
8.00
7.92
5.29
15866
2.02
Fair
G
I1
68.0
55.0
6346
7.77
7.72
5.27
15867
2.02
Fair
G
I1
65.6
57.0
6346
7.87
7.80
5.15
15906
2.15
Premium
H
I1
62.9
57.0
6357
8.25
8.20
5.18
15941
2.03
Fair
G
I1
66.3
56.0
6377
7.81
7.75
5.16
15942
2.03
Premium
G
I1
61.1
58.0
6377
8.11
8.06
4.94
16257
2.07
Fair
G
I1
67.7
56.0
6503
7.76
7.73
5.25
16284
3.00
Very Good
H
I1
63.1
55.0
6512
9.23
9.10
5.77
16342
2.21
Premium
H
I1
62.2
58.0
6535
8.31
8.27
5.16
16440
2.22
Fair
H
I1
70.1
55.0
6564
7.77
7.74
5.44
16497
2.01
Very Good
H
I1
58.1
63.0
6592
8.30
8.19
4.79
16505
2.10
Fair
G
I1
67.4
59.0
6597
7.82
7.76
5.24
16506
2.10
Fair
G
I1
64.6
58.0
6597
8.05
8.01
5.19
16638
2.25
Fair
H
I1
67.7
58.0
6653
8.01
7.97
5.41
16786
2.01
Good
F
I1
64.0
56.0
6686
7.93
7.91
5.07
16916
2.03
Fair
F
I1
65.6
56.0
6753
7.89
7.86
5.16
17098
2.17
Fair
G
I1
55.6
62.0
6817
8.75
8.69
4.80
...
...
...
...
...
...
...
...
...
...
...
27676
2.48
Very Good
F
SI2
63.4
56.0
18692
8.64
8.55
5.45
27677
2.19
Ideal
D
SI2
61.8
57.0
18693
8.23
8.49
5.17
27679
2.02
Ideal
G
VS2
62.0
57.0
18700
8.10
8.05
5.01
27680
3.51
Premium
J
VS2
62.5
59.0
18701
9.66
9.63
6.03
27681
2.01
Premium
G
SI2
61.2
57.2
18705
8.08
8.14
4.97
27682
2.22
Premium
J
VS1
60.0
60.0
18706
8.49
8.43
5.08
27683
2.07
Good
I
VS2
61.8
61.0
18707
8.12
8.16
5.03
27685
3.01
Premium
J
SI2
60.7
59.0
18710
9.35
9.22
5.64
27686
3.01
Premium
J
SI2
59.7
58.0
18710
9.41
9.32
5.59
27687
2.18
Premium
F
SI1
61.2
60.0
18717
8.38
8.30
5.10
27721
2.02
Very Good
E
SI1
59.8
59.0
18731
8.11
8.20
4.88
27723
2.01
Ideal
G
SI1
62.1
57.0
18736
8.07
8.09
5.02
27724
2.01
Ideal
G
SI1
62.2
56.0
18741
8.02
7.97
4.97
27725
2.01
Very Good
G
SI1
63.1
59.0
18741
8.03
7.98
5.05
27726
2.01
Premium
G
SI1
60.3
59.0
18741
8.23
8.15
4.94
27727
2.36
Premium
H
SI2
59.4
58.0
18745
8.69
8.75
5.18
27728
2.61
Ideal
I
SI2
62.1
56.0
18756
8.85
8.73
5.46
27729
2.03
Ideal
G
SI1
60.0
55.8
18757
8.17
8.30
4.95
27731
2.08
Ideal
H
SI1
58.7
60.0
18760
8.36
8.40
4.92
27732
2.55
Premium
I
VS1
61.8
62.0
18766
8.70
8.65
5.36
27735
2.06
Ideal
I
VS2
62.2
55.0
18779
8.15
8.19
5.08
27737
2.03
Very Good
H
SI1
63.0
60.0
18781
8.00
7.93
5.02
27738
2.05
Premium
F
SI2
60.2
59.0
18784
8.28
8.33
5.00
27739
2.05
Ideal
G
SI1
61.9
57.0
18787
8.10
8.16
5.03
27740
2.80
Good
G
SI2
63.8
58.0
18788
8.90
8.85
0.00
27742
2.15
Ideal
G
SI2
62.6
54.0
18791
8.29
8.35
5.21
27743
2.04
Premium
H
SI1
58.1
60.0
18795
8.37
8.28
4.84
27745
2.29
Premium
I
SI1
61.8
59.0
18797
8.52
8.45
5.24
27747
2.07
Ideal
G
SI2
62.5
55.0
18804
8.20
8.13
5.11
27750
2.29
Premium
I
VS2
60.8
60.0
18823
8.50
8.47
5.16
1889 rows × 10 columns
In [13]:
diamonds_2["price"].plot(kind="hist")
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5dc17e4290>
In [15]:
diamonds.groupby("cut")["x"].std().plot()
Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5dc168bf90>
In [19]:
kernmerkmale = pd.read_excel('demo_excel.xls', sheetname=2, header=3)
kernmerkmale.head()
Out[19]:
SATZART
AGS
NAME
EWZ
EW_M
EW_W
EW_D
EW_A
ALTER_1
ALTER_2
ALTER_3
ALTER_4
ALTER_5
0
0
0
Deutschland
80219695
39153540
41066140
74050320
6169360
13138580
11391700
22838880
16333080
16517450
1
10
1
Schleswig-Holstein
2800119
1360530
1439590
2683670
116450
477010
362390
790060
563490
607170
2
20
10
Schleswig-Holstein (fiktiver Reg.-Bez.)
2800119
1360530
1439590
2683670
116450
477010
362390
790060
563490
607170
3
40
1001
Flensburg, Stadt
82258
40660
41590
77140
5120
12150
15640
22130
15560
16770
4
50
10010000
Flensburg, Stadt
82258
40660
41590
77140
5120
12150
15640
22130
15560
16770
In [24]:
bundeslaender = kernmerkmale[kernmerkmale['SATZART']==10]
bundeslaender
Out[24]:
SATZART
AGS
NAME
EWZ
EW_M
EW_W
EW_D
EW_A
ALTER_1
ALTER_2
ALTER_3
ALTER_4
ALTER_5
1
10
1
Schleswig-Holstein
2800119
1360530
1439590
2683670
116450
477010
362390
790060
563490
607170
1304
10
2
Hamburg
1706696
826140
880560
1495810
210890
267800
275420
537600
301700
324180
1309
10
3
Niedersachsen
7777992
3804410
3973590
7351250
426750
1356370
1049040
2188170
1569480
1614940
2807
10
4
Bremen
650863
316110
334750
580340
70520
98850
105260
181880
126940
137940
2815
10
5
Nordrhein-Westfalen
17538251
8521230
9017020
15931170
1607080
2984650
2481470
4993250
3525620
3553260
3666
10
6
Hessen
5971816
2914740
3057080
5311720
660090
1005660
834310
1746430
1205160
1180260
4548
10
7
Rheinland-Pfalz
3989808
1950420
2039380
3718250
271560
664260
562230
1109500
841540
812270
7103
10
8
Baden-Württemberg
10486660
5133480
5353190
9353030
1133630
1853540
1547500
2997710
2053400
2034520
8713
10
9
Bayern
12397614
6062910
6334700
11383180
1014430
2112190
1797760
3613220
2459360
2415070
12255
10
10
Saarland
999623
485380
514240
933360
66270
148980
135930
269470
224680
220560
12367
10
11
Berlin
3292365
1599840
1692530
2920090
372280
494150
535310
1004770
624090
634030
12372
10
12
Brandenburg
2455780
1207850
1247920
2413580
42190
337040
309010
691800
562490
555430
13012
10
13
Mecklenburg-Vorpommern
1609982
793310
816670
1582250
27730
215300
228240
434590
375550
356300
13959
10
14
Sachsen
4056799
1978040
2078760
3979760
77030
543160
558590
1077560
870980
1006500
14772
10
15
Sachsen-Anhalt
2287040
1116850
1170190
2247810
39230
290700
304600
610810
524660
556270
15129
10
16
Thüringen
2188589
1076410
1112180
2155360
33230
286960
303570
587720
501650
508700
In [31]:
bundeslaender.plot(x=("NAME"),y=[("ALTER_1"),("ALTER_2"),("ALTER_3"),("ALTER_4"),("ALTER_5")],kind="barh")
Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5dc0aaead0>
In [ ]:
In [41]:
relative_age=bundeslaender[["ALTER_1","ALTER_2","ALTER_3","ALTER_4","ALTER_5"]].div(bundeslaender["EWZ"],axis=0)
relative_age.plot(x=bundeslaender["NAME"],kind="barh")
Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5dc0aba7d0>
In [64]:
import numpy as np
berlin = bundeslaender[bundeslaender['NAME']=='Berlin']
print berlin
print 'x'
relative_age_ber=berlin[["ALTER_1","ALTER_2","ALTER_3","ALTER_4","ALTER_5"]].div(berlin["EWZ"],axis=0)
print relative_age_ber
relative_age_ber2=relative_age_ber[["ALTER_1","ALTER_2","ALTER_3","ALTER_4","ALTER_5"]].transpose()
print relative_age_ber2
relative_age_ber2.plot(kind='pie',subplots=True, figsize=(6, 6))
SATZART AGS NAME EWZ EW_M EW_W EW_D EW_A \
12367 10 11 Berlin 3292365 1599840 1692530 2920090 372280
ALTER_1 ALTER_2 ALTER_3 ALTER_4 ALTER_5
12367 494150 535310 1004770 624090 634030
x
ALTER_1 ALTER_2 ALTER_3 ALTER_4 ALTER_5
12367 0.15009 0.162591 0.305182 0.189557 0.192576
12367
ALTER_1 0.150090
ALTER_2 0.162591
ALTER_3 0.305182
ALTER_4 0.189557
ALTER_5 0.192576
Out[64]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f5db7b28190>], dtype=object)
In [65]:
keime = pd.read_excel('multiresistente_keime.xlsx', sheetname=0, header=0)
keime.head()
Out[65]:
Kreisschlussel
Bundesland
Landkreis/Kreisfreie Stadt
Krankenhaus- Patienten 2013
MRE-Fälle 2013
MRE-Fälle pro 1000 Krankenhauspatienten 2013
ESBL-Diagnosen 2013
ESBL-Diagnosen pro 1000 Krankenhauspatienten 2013
VRE-Diagnosen 2013
VRE Diagnosen pro 1000 Krankenhauspatienten 2013
MRSA-Diagnosen 2013
MRSA Diagnosen pro 1000 Krankenhauspatienten 2013
MRE Veränderung 2010-2013 in %
ESBL Veränderung 2010-2013 in %
VRE Veränderung 2010-2013 in %
MRSA Veränderung 2010-2013 in %
MRSA Meldungen an Robert Koch-Institut 2013
0
1057
Schleswig-Holstein
Plön
26634
97
3.641961
126
4.730795
33
1.239018
184
6.908463
-13
26
57
-2
-
1
6431
Hessen
Bergstraße
54563
315
5.773143
289
5.296630
113
2.071000
315
5.773143
-8
46
92
-17
-
2
6635
Hessen
Waldeck-Frankenberg
44049
385
8.740267
450
10.215896
114
2.588027
478
10.851552
107
217
268
90
-
3
7111
Rheinland-Pfalz
Koblenz, kreisfreie Stadt
24518
167
6.811322
155
6.321886
40
1.631454
180
7.341545
24
52
400
14
-
4
7313
Rheinland-Pfalz
Landau in der Pfalz, kreisfreie Stadt
11227
51
4.542620
135
12.024584
24
2.137704
37
3.295627
19
146
-4
-5
-
In [ ]:
mix = pd.merge(keime, df2, left_on="E", right_on="E2")
Content source: fugufisch/hu_bp_python_course
Similar notebooks: