In [1]:
import pandas as pd
from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
diamonds = pd.read_csv('diamonds.csv',index_col=0)

In [12]:
diamonds_2 = diamonds[diamonds['carat'] > 2]
diamonds_2


Out[12]:
carat cut color clarity depth table price x y z
12247 2.06 Premium J I1 61.2 58.0 5203 8.10 8.07 4.95
13003 2.14 Fair J I1 69.4 57.0 5405 7.74 7.70 5.36
13119 2.15 Fair J I1 65.5 57.0 5430 8.01 7.95 5.23
13758 2.22 Fair J I1 66.7 56.0 5607 8.04 8.02 5.36
13992 2.01 Fair I I1 67.4 58.0 5696 7.71 7.64 5.17
13993 2.01 Fair I I1 55.9 64.0 5696 8.48 8.39 4.71
14139 2.27 Fair J I1 67.6 55.0 5733 8.05 8.00 5.43
14913 2.03 Fair H I1 64.4 59.0 6002 7.91 7.85 5.07
14914 2.03 Fair H I1 66.6 57.0 6002 7.81 7.75 5.19
15152 2.06 Good H I1 64.3 58.0 6091 8.03 7.99 5.15
15320 2.08 Premium H I1 61.7 57.0 6150 8.23 8.18 5.06
15685 2.49 Fair J I1 66.3 58.0 6289 8.26 8.18 5.45
15782 2.01 Fair G I1 70.2 57.0 6315 7.53 7.50 5.27
15816 2.14 Fair H I1 66.4 56.0 6328 8.00 7.92 5.29
15866 2.02 Fair G I1 68.0 55.0 6346 7.77 7.72 5.27
15867 2.02 Fair G I1 65.6 57.0 6346 7.87 7.80 5.15
15906 2.15 Premium H I1 62.9 57.0 6357 8.25 8.20 5.18
15941 2.03 Fair G I1 66.3 56.0 6377 7.81 7.75 5.16
15942 2.03 Premium G I1 61.1 58.0 6377 8.11 8.06 4.94
16257 2.07 Fair G I1 67.7 56.0 6503 7.76 7.73 5.25
16284 3.00 Very Good H I1 63.1 55.0 6512 9.23 9.10 5.77
16342 2.21 Premium H I1 62.2 58.0 6535 8.31 8.27 5.16
16440 2.22 Fair H I1 70.1 55.0 6564 7.77 7.74 5.44
16497 2.01 Very Good H I1 58.1 63.0 6592 8.30 8.19 4.79
16505 2.10 Fair G I1 67.4 59.0 6597 7.82 7.76 5.24
16506 2.10 Fair G I1 64.6 58.0 6597 8.05 8.01 5.19
16638 2.25 Fair H I1 67.7 58.0 6653 8.01 7.97 5.41
16786 2.01 Good F I1 64.0 56.0 6686 7.93 7.91 5.07
16916 2.03 Fair F I1 65.6 56.0 6753 7.89 7.86 5.16
17098 2.17 Fair G I1 55.6 62.0 6817 8.75 8.69 4.80
... ... ... ... ... ... ... ... ... ... ...
27676 2.48 Very Good F SI2 63.4 56.0 18692 8.64 8.55 5.45
27677 2.19 Ideal D SI2 61.8 57.0 18693 8.23 8.49 5.17
27679 2.02 Ideal G VS2 62.0 57.0 18700 8.10 8.05 5.01
27680 3.51 Premium J VS2 62.5 59.0 18701 9.66 9.63 6.03
27681 2.01 Premium G SI2 61.2 57.2 18705 8.08 8.14 4.97
27682 2.22 Premium J VS1 60.0 60.0 18706 8.49 8.43 5.08
27683 2.07 Good I VS2 61.8 61.0 18707 8.12 8.16 5.03
27685 3.01 Premium J SI2 60.7 59.0 18710 9.35 9.22 5.64
27686 3.01 Premium J SI2 59.7 58.0 18710 9.41 9.32 5.59
27687 2.18 Premium F SI1 61.2 60.0 18717 8.38 8.30 5.10
27721 2.02 Very Good E SI1 59.8 59.0 18731 8.11 8.20 4.88
27723 2.01 Ideal G SI1 62.1 57.0 18736 8.07 8.09 5.02
27724 2.01 Ideal G SI1 62.2 56.0 18741 8.02 7.97 4.97
27725 2.01 Very Good G SI1 63.1 59.0 18741 8.03 7.98 5.05
27726 2.01 Premium G SI1 60.3 59.0 18741 8.23 8.15 4.94
27727 2.36 Premium H SI2 59.4 58.0 18745 8.69 8.75 5.18
27728 2.61 Ideal I SI2 62.1 56.0 18756 8.85 8.73 5.46
27729 2.03 Ideal G SI1 60.0 55.8 18757 8.17 8.30 4.95
27731 2.08 Ideal H SI1 58.7 60.0 18760 8.36 8.40 4.92
27732 2.55 Premium I VS1 61.8 62.0 18766 8.70 8.65 5.36
27735 2.06 Ideal I VS2 62.2 55.0 18779 8.15 8.19 5.08
27737 2.03 Very Good H SI1 63.0 60.0 18781 8.00 7.93 5.02
27738 2.05 Premium F SI2 60.2 59.0 18784 8.28 8.33 5.00
27739 2.05 Ideal G SI1 61.9 57.0 18787 8.10 8.16 5.03
27740 2.80 Good G SI2 63.8 58.0 18788 8.90 8.85 0.00
27742 2.15 Ideal G SI2 62.6 54.0 18791 8.29 8.35 5.21
27743 2.04 Premium H SI1 58.1 60.0 18795 8.37 8.28 4.84
27745 2.29 Premium I SI1 61.8 59.0 18797 8.52 8.45 5.24
27747 2.07 Ideal G SI2 62.5 55.0 18804 8.20 8.13 5.11
27750 2.29 Premium I VS2 60.8 60.0 18823 8.50 8.47 5.16

1889 rows × 10 columns


In [13]:
diamonds_2["price"].plot(kind="hist")


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5dc17e4290>

In [15]:
diamonds.groupby("cut")["x"].std().plot()


Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5dc168bf90>

In [19]:
kernmerkmale = pd.read_excel('demo_excel.xls', sheetname=2, header=3)
kernmerkmale.head()


Out[19]:
SATZART AGS NAME EWZ EW_M EW_W EW_D EW_A ALTER_1 ALTER_2 ALTER_3 ALTER_4 ALTER_5
0 0 0 Deutschland 80219695 39153540 41066140 74050320 6169360 13138580 11391700 22838880 16333080 16517450
1 10 1 Schleswig-Holstein 2800119 1360530 1439590 2683670 116450 477010 362390 790060 563490 607170
2 20 10 Schleswig-Holstein (fiktiver Reg.-Bez.) 2800119 1360530 1439590 2683670 116450 477010 362390 790060 563490 607170
3 40 1001 Flensburg, Stadt 82258 40660 41590 77140 5120 12150 15640 22130 15560 16770
4 50 10010000 Flensburg, Stadt 82258 40660 41590 77140 5120 12150 15640 22130 15560 16770

In [24]:
bundeslaender = kernmerkmale[kernmerkmale['SATZART']==10]
bundeslaender


Out[24]:
SATZART AGS NAME EWZ EW_M EW_W EW_D EW_A ALTER_1 ALTER_2 ALTER_3 ALTER_4 ALTER_5
1 10 1 Schleswig-Holstein 2800119 1360530 1439590 2683670 116450 477010 362390 790060 563490 607170
1304 10 2 Hamburg 1706696 826140 880560 1495810 210890 267800 275420 537600 301700 324180
1309 10 3 Niedersachsen 7777992 3804410 3973590 7351250 426750 1356370 1049040 2188170 1569480 1614940
2807 10 4 Bremen 650863 316110 334750 580340 70520 98850 105260 181880 126940 137940
2815 10 5 Nordrhein-Westfalen 17538251 8521230 9017020 15931170 1607080 2984650 2481470 4993250 3525620 3553260
3666 10 6 Hessen 5971816 2914740 3057080 5311720 660090 1005660 834310 1746430 1205160 1180260
4548 10 7 Rheinland-Pfalz 3989808 1950420 2039380 3718250 271560 664260 562230 1109500 841540 812270
7103 10 8 Baden-Württemberg 10486660 5133480 5353190 9353030 1133630 1853540 1547500 2997710 2053400 2034520
8713 10 9 Bayern 12397614 6062910 6334700 11383180 1014430 2112190 1797760 3613220 2459360 2415070
12255 10 10 Saarland 999623 485380 514240 933360 66270 148980 135930 269470 224680 220560
12367 10 11 Berlin 3292365 1599840 1692530 2920090 372280 494150 535310 1004770 624090 634030
12372 10 12 Brandenburg 2455780 1207850 1247920 2413580 42190 337040 309010 691800 562490 555430
13012 10 13 Mecklenburg-Vorpommern 1609982 793310 816670 1582250 27730 215300 228240 434590 375550 356300
13959 10 14 Sachsen 4056799 1978040 2078760 3979760 77030 543160 558590 1077560 870980 1006500
14772 10 15 Sachsen-Anhalt 2287040 1116850 1170190 2247810 39230 290700 304600 610810 524660 556270
15129 10 16 Thüringen 2188589 1076410 1112180 2155360 33230 286960 303570 587720 501650 508700

In [31]:
bundeslaender.plot(x=("NAME"),y=[("ALTER_1"),("ALTER_2"),("ALTER_3"),("ALTER_4"),("ALTER_5")],kind="barh")


Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5dc0aaead0>

In [ ]:


In [41]:
relative_age=bundeslaender[["ALTER_1","ALTER_2","ALTER_3","ALTER_4","ALTER_5"]].div(bundeslaender["EWZ"],axis=0)
relative_age.plot(x=bundeslaender["NAME"],kind="barh")


Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5dc0aba7d0>

In [64]:
import numpy as np
berlin = bundeslaender[bundeslaender['NAME']=='Berlin']
print berlin
print 'x'
relative_age_ber=berlin[["ALTER_1","ALTER_2","ALTER_3","ALTER_4","ALTER_5"]].div(berlin["EWZ"],axis=0)
print relative_age_ber
relative_age_ber2=relative_age_ber[["ALTER_1","ALTER_2","ALTER_3","ALTER_4","ALTER_5"]].transpose()
print relative_age_ber2
relative_age_ber2.plot(kind='pie',subplots=True, figsize=(6, 6))


       SATZART  AGS    NAME      EWZ     EW_M     EW_W     EW_D    EW_A  \
12367       10   11  Berlin  3292365  1599840  1692530  2920090  372280   

       ALTER_1  ALTER_2  ALTER_3  ALTER_4  ALTER_5  
12367   494150   535310  1004770   624090   634030  
x
       ALTER_1   ALTER_2   ALTER_3   ALTER_4   ALTER_5
12367  0.15009  0.162591  0.305182  0.189557  0.192576
            12367
ALTER_1  0.150090
ALTER_2  0.162591
ALTER_3  0.305182
ALTER_4  0.189557
ALTER_5  0.192576
Out[64]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f5db7b28190>], dtype=object)

In [65]:
keime = pd.read_excel('multiresistente_keime.xlsx', sheetname=0, header=0)
keime.head()


Out[65]:
Kreisschlussel Bundesland Landkreis/Kreisfreie Stadt Krankenhaus- Patienten 2013 MRE-Fälle 2013 MRE-Fälle pro 1000 Krankenhauspatienten 2013 ESBL-Diagnosen 2013 ESBL-Diagnosen pro 1000 Krankenhauspatienten 2013 VRE-Diagnosen 2013 VRE Diagnosen pro 1000 Krankenhauspatienten 2013 MRSA-Diagnosen 2013 MRSA Diagnosen pro 1000 Krankenhauspatienten 2013 MRE Veränderung 2010-2013 in % ESBL Veränderung 2010-2013 in % VRE Veränderung 2010-2013 in % MRSA Veränderung 2010-2013 in % MRSA Meldungen an Robert Koch-Institut 2013
0 1057 Schleswig-Holstein Plön 26634 97 3.641961 126 4.730795 33 1.239018 184 6.908463 -13 26 57 -2 -
1 6431 Hessen Bergstraße 54563 315 5.773143 289 5.296630 113 2.071000 315 5.773143 -8 46 92 -17 -
2 6635 Hessen Waldeck-Frankenberg 44049 385 8.740267 450 10.215896 114 2.588027 478 10.851552 107 217 268 90 -
3 7111 Rheinland-Pfalz Koblenz, kreisfreie Stadt 24518 167 6.811322 155 6.321886 40 1.631454 180 7.341545 24 52 400 14 -
4 7313 Rheinland-Pfalz Landau in der Pfalz, kreisfreie Stadt 11227 51 4.542620 135 12.024584 24 2.137704 37 3.295627 19 146 -4 -5 -

In [ ]:
mix = pd.merge(keime, df2, left_on="E", right_on="E2")