In [2]:
import pandas
food_info = pandas.read_csv('food_info.csv')
print type(food_info)
print food_info.dtypes
In [5]:
first_rows = food_info.head()
first_rows
print food_info.head(3)
print food_info.columns
print food_info.shape
In [15]:
#print food_info.loc[1]
#print food_info.loc[6]
#print food_info.loc[8620]
#print food_info['NDB_No']
col_name = ['NDB_No','Shrt_Desc']
print food_info[col_name]
In [17]:
col_name = food_info.columns.tolist()
print col_name
gram_name = []
for c in col_name:
if c.endswith('(g)'):
gram_name.append(c)
gram_df = food_info[gram_name]
print gram_df.head(3)
In [21]:
#print food_info['Copper_(mg)']
div_1000 = food_info['Sodium_(mg)']/1000
#print div_1000
print food_info.shape
food_info['Copper_(g)'] = div_1000
print food_info.shape
In [30]:
food_info.sort_values('Water_(g)',inplace=True,ascending = False)
print food_info['Water_(g)']
In [34]:
import numpy as np
import pandas as pd
titanic_survival = pd.read_csv('titanic_train.csv')
titanic_survival.head()
Out[34]:
In [45]:
age = titanic_survival['Age']
#print age.loc[0:10]
age_is_null = pd.isnull(age)
#print age_is_null
age_is_true = age[age_is_null]
#print age_is_true
age_is_count = len(age_is_true)
#print age_is_count
In [39]:
mean_age = sum(titanic_survival['Age'])/len(titanic_survival['Age'])
print mean_age
In [42]:
good_age = titanic_survival['Age'][age_is_null == False]
#print good_age
correct_mean_age = sum(good_age)/len(good_age)
print correct_mean_age
In [44]:
correct_mean_age_test = titanic_survival['Age'].mean()
print correct_mean_age_test
In [46]:
passenger_class = [1,2,3]
meanfare_by_class = {}
for m in passenger_class:
pclass_row = titanic_survival[titanic_survival['Pclass'] == m]
pclass_fare = pclass_row['Fare']
meanfare_by_class_m = pclass_fare.mean()
meanfare_by_class[m] = meanfare_by_class_m
print meanfare_by_class
In [48]:
passenger_mean_survival = titanic_survival.pivot_table(index='Pclass',values='Survived',aggfunc=np.mean)
print passenger_mean_survival
In [52]:
passenger_age = titanic_survival.pivot_table(index='Pclass',values='Age')
print passenger_age
In [53]:
port_status = titanic_survival.pivot_table(index='Embarked',values=['Fare','Survived'],aggfunc=np.sum)
print port_status
In [55]:
new_titanic_survival = titanic_survival.dropna(axis=0,subset=['Age','Sex'])
#print new_titanic_survival
In [57]:
row_index_83_age = titanic_survival.loc[83,'Age']
row_index_766_pcalss = titanic_survival.loc[766,'Pclass']
print row_index_83_age
print row_index_766_pcalss
In [62]:
sort_titanic_survival = titanic_survival.sort_values('Age',ascending=False)
print sort_titanic_survival.iloc[0:10]
print '-----------------------------'
sort_titanic_survival_reindex = sort_titanic_survival.reset_index(drop = True)
print sort_titanic_survival_reindex.loc[0:10]
In [63]:
def hundredth_row(column):
hundredth_item = column.loc[99]
return hundredth_item
hundredth_row = titanic_survival.apply(hundredth_row)
print hundredth_row
In [64]:
def isnull_count(column):
column_null = pd.isnull(column)
null = column[column_null]
return len(null)
column_null_count = titanic_survival.apply(isnull_count)
print column_null_count
In [71]:
import pandas as pd
findango = pd.read_csv('fandango_score_comparison.csv')
series_film = findango['FILM']
print type(series_film)
print type(findango)
print series_film[0:5]
findango.head()
series_rt = findango['RottenTomatoes']
print findango['RottenTomatoes'][0:5]
In [86]:
from pandas import Series
film_name = series_film.values
print type(film_name)
#print film_name
film_sorce = series_rt.values
series_custom = Series(film_sorce,index=film_name)
series_custom[['Avengers: Age of Ultron (2015)','Cinderella (2015)']]
#series_custom = Series(film_name,index=film_sorce)
series_custom[5:10]
Out[86]:
In [114]:
original_index = series_custom.index.tolist()
print type(series_custom)
#print original_index
print type(original_index)
sort_index = sorted(original_index)
#print series_custom[sort_index]
print sort_index.index
sort_by_index = series_custom.reindex(sort_index)
print sort_by_index.index
In [105]:
#print np.add(sort_by_index,sort_by_index)
#print np.sin(sort_by_index)
print np.max(sort_by_index)
In [108]:
series_greater_than_50 = series_custom[series_custom>50]
#print series_greater_than_50
where_1 = series_custom>90
where_2 = series_custom<100
both_series = series_custom[where_1 & where_2]
print both_series
In [111]:
rt_critics = Series(findango['RottenTomatoes'].values,index=findango['FILM'])
rt_user = Series(findango['RottenTomatoes_User'].values,index=findango['FILM'])
rt_mean = (rt_critics + rt_user) / 2
print rt_mean
In [113]:
fandango = pd.read_csv('fandango_score_comparison.csv')
print type(fandango)
fandango_films = fandango.set_index('FILM',drop=False)
print fandango_films.index
In [122]:
#print fandango_films.dtypes
types = fandango_films.dtypes
float_types = types[types.values == 'float64'].index
float_column = fandango_films[float_types]
#print float_column
deviations = float_column.apply(lambda x: np.std(x))
print deviations
In [124]:
rt_mt_user = float_column[['RT_user_norm','Metacritic_user_nom']]
rt_mt_user.apply(lambda x : np.std(x),axis = 1)
Out[124]:
In [ ]: