In [190]:
import pandas as pd
from pandas import Series, DataFrame
A series is a 1-Dimensional object similar to an array. There will be an array of data labels corresponding to an array of data.
In [191]:
series1 = Series([1,2,3,4,5])
series1.name = 'MyFirstSeries'
series1.index.name = 'Indx'
series1
Out[191]:
In [192]:
series1.index
Out[192]:
In [193]:
series1.values
Out[193]:
In [194]:
series2 = Series([10,20,30], index=['a','b','c'])
series2
Out[194]:
In [195]:
series2[0] == series2['a'] #check value of an index
Out[195]:
In [196]:
series1[series1 > 3] #get values greater than 3
Out[196]:
In [197]:
series2 / 2 #scalar division of a series
Out[197]:
In [198]:
series2.isnull() #check for nulls (multiple)
pd.isnull(series2)
Out[198]:
In [199]:
List = ['c','a','b'] #get series values by passing in a List
series2[List]
Out[199]:
In [200]:
series1 + series2
Out[200]:
In [201]:
df = pd.read_csv("data1.csv", header=None) #import csv file
In [202]:
df.columns = ["ID","Name", "Birthday"]
In [203]:
df.head() #view top 5 lines
Out[203]:
In [204]:
df.tail(1) #view last line
Out[204]:
In [205]:
df['Name'] #index based on column name (multiple)
df.Name
Out[205]:
In [206]:
df.ix[0] #index based on row number
Out[206]:
In [207]:
df[df['ID'] < 60] #index based on values
Out[207]:
In [208]:
df['ID+1'] = df.apply(lambda row: row['ID'] + 1, axis=1)
df
Out[208]: