Panda/h1>

Arbeiten mit Series und Dataframes in Pandas

Arbeiten mit Series in Pandas



In [6]:

    
import pandas as pd #Import der Bibliothek pandas
from pandas import Series



In [7]:

    
objekt = Series([1200,1500,1600,1400,1800])
print objekt









    



0    1200
1    1500
2    1600
3    1400
4    1800
dtype: int64



In [10]:

    
print objekt.values
print objekt.index









    



[1200 1500 1600 1400 1800]
RangeIndex(start=0, stop=5, step=1)



In [11]:

    
objekt2 = Series([1200,1500,1600,1400,1800],index=['Montag','Dienstag','Mittwoch','Donnerstag','Freitag'])
print objekt2









    



Montag        1200
Dienstag      1500
Mittwoch      1600
Donnerstag    1400
Freitag       1800
dtype: int64



In [12]:

    
objekt2['Montag']









    Out[12]:





1200



In [13]:

    
objekt2['Donnerstag']=1600
objekt2









    Out[13]:





Montag        1200
Dienstag      1500
Mittwoch      1600
Donnerstag    1600
Freitag       1800
dtype: int64



In [14]:

    
objekt2[objekt2 >1500]









    Out[14]:





Mittwoch      1600
Donnerstag    1600
Freitag       1800
dtype: int64



In [17]:

    
objekt2 * 3









    Out[17]:





Montag        3600
Dienstag      4500
Mittwoch      4800
Donnerstag    4800
Freitag       5400
dtype: int64



In [21]:

    
staedte = {'Stuttgart': 700000, 'Muenchen': 1000000, 'Hamburg': 800000, 'Koeln': 300000} #Dictionary der Einwohnerzahlen



In [22]:

    
objekt3 = Series(staedte)
objekt3









    Out[22]:





Hamburg       800000
Koeln         300000
Muenchen     1000000
Stuttgart     700000
dtype: int64



In [23]:

    
objekt3.name = 'Einwohnerzahl'
objekt3.index.name = 'Stadt'
objekt3









    Out[23]:





Stadt
Hamburg       800000
Koeln         300000
Muenchen     1000000
Stuttgart     700000
Name: Einwohnerzahl, dtype: int64



In [24]:

    
objekt3.index=['Koeln','Hamburg','Muenchen','Stuttgart'] #Austausch des Indexes durch Zuweisung
objekt3









    Out[24]:





Koeln         800000
Hamburg       300000
Muenchen     1000000
Stuttgart     700000
Name: Einwohnerzahl, dtype: int64



In [25]:

    
objekt3.mean()









    Out[25]:





700000.0



In [1]:

    
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2014', periods=1000))
ts = ts.cumsum()
ts.plot()









    Out[1]:





<matplotlib.axes._subplots.AxesSubplot at 0x86a2d68>

Arbeiten mit Dataframes in Pandas



In [33]:

    
import numpy as np
import pandas as pd
from pandas import DataFrame #aufpassen wie DataFrame geschrieben wird !
#Erzeugen eines Dataframes aus gleichlangen Listen
daten = {'stadt':['Stuttgart', 'Muenchen', 'Hamburg', 'Koeln','Stuttgart'],'jahr':[2014,2015,2016,2017,2015],'pop':[0.7,0.8,0.9,0.85,0.75]}
frame = DataFrame(daten) #erzeugen des DataFrames
print frame









    



   jahr   pop      stadt
0  2014  0.70  Stuttgart
1  2015  0.80   Muenchen
2  2016  0.90    Hamburg
3  2017  0.85      Koeln
4  2015  0.75  Stuttgart



In [34]:

    
frame = DataFrame(daten,columns=['stadt','jahr','pop'])
print frame









    



       stadt  jahr   pop
0  Stuttgart  2014  0.70
1   Muenchen  2015  0.80
2    Hamburg  2016  0.90
3      Koeln  2017  0.85
4  Stuttgart  2015  0.75



In [38]:

    
#Ausgabe spezieller Spalten (Columns)
print frame['jahr']
print '################'
print frame.stadt









    



0    2014
1    2015
2    2016
3    2017
4    2015
Name: jahr, dtype: int64
################
0    Stuttgart
1     Muenchen
2      Hamburg
3        Koeln
4    Stuttgart
Name: stadt, dtype: object



In [40]:

    
#Ausgabe spezieller Zeilen (Rows)
frame.ix[2] #Index als integer









    Out[40]:





stadt    Hamburg
jahr        2016
pop          0.9
Name: 2, dtype: object



In [41]:

    
frame.T



In [1]:

    
import numpy as np
import pandas as pd
from pandas import DataFrame
url="sudoku-10.csv"
df = pd.read_csv(url,header=0,sep=';') #sudoku als csv einlesen
#df.describe()
print (df)









    



   1  2  3  4  5  6  7  8  9
0   6  0  0  5  0  0  0  0  0
1   0  9  0  0  4  0  7  0  0
2   0  8  0  0  0  3  0  0  0
3   0  0  0  7  0  0  1  0  2
4   9  0  2  0  8  0  3  0  6
5   7  0  8  0  0  2  0  0  0
6   0  0  0  2  0  0  0  9  0
7   0  0  5  0  9  0  0  4  0
8   0  0  0  0  0  1  0  0  8



In [ ]:



In [14]:

    
# Using `iloc[]`
print(df.iloc[0][0])

# Using `loc[]`
print(df.loc[0][1])

# Using `at[]`
#print(df.at[0,'1'])

# Using `iat[]`
print(df.iat[0,0])

# Using `get_value(index, column)`
#print(df.get_value(0, '1'))



In [ ]:

    
https://www.datacamp.com/community/tutorials/pandas-tutorial-dataframe-python#gs.J_F2fGU



In [9]:

    
# Use `iloc[]` to select a row
print(df.iloc[0])

# Use `loc[]` to select a column
#print(df.loc[:,1])









    



1    6
2     0
3     0
4     5
5     0
6     0
7     0
8     0
9     0
Name: 0, dtype: int64



In [ ]:

Ausgabe eines Dataframes als csv-Datei



In [2]:

    
import pandas as pd
df.to_csv('myDataFrame10.csv')



In [3]:

    
import pandas as pd
df.to_csv('myDataFrame20.csv', sep=';', encoding='utf-8')



In [ ]:



In [ ]:

	0	1	2	3	4
stadt	Stuttgart	Muenchen	Hamburg	Koeln	Stuttgart
jahr	2014	2015	2016	2017	2015
pop	0.7	0.8	0.9	0.85	0.75