In [102]:
import pandas as pd
import numpy as np
import yaml
import glob
import os
import os.path
import csv
import json
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")
%matplotlib inline
In [2]:
def load_from_yaml(t):
stream = file(t, 'r')
dic=yaml.load(stream)
return dic
def load_from_csv(t):
temp=[]
with open(t, 'rb') as csvfile:
rdata = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in rdata:
if(row[0]=='info'):
temp.append(row[2])
return temp
def change_winner(x):
if(x=='SunrisersHyderabad'):
return 'Sunrisers Hyderabad'
if(x=='RisingPuneSupergiants'):
return 'Rising Pune Supergiant'
if(x=='KolkataKnightRiders'):
return 'Kolkata Knight Riders'
if(x=='KingsXIPunjab'):
return 'Kings XI Punjab'
if(x=='RoyalChallengersBangalore'):
return 'Royal Challengers Bangalore'
if(x=='MumbaiIndians'):
return 'Mumbai Indians'
if(x=='DelhiDaredevils'):
return 'Delhi Daredevils'
if(x=='GujaratLions'):
return 'Gujarat Lions'
if(x=='ChennaiSuperKings'):
return 'Chennai Super Kings'
if(x=='RajasthanRoyals'):
return 'Rajasthan Royals'
if(x=='DeccanChargers'):
return 'Deccan Chargers'
if(x=='PuneWarriors'):
return 'Pune Warriors'
if(x=='KochiTuskersKerala'):
return 'Kochi Tuskers Kerala'
if(x=='result:noresult'):
return 'result:noresult'
else:
return x
def won_by_side(x,y,z):
if(str(z)=='field' & str(x)==str(y)):
return 'Batting 2nd Side'
else:
return 'Batting 1st Side'
In [3]:
#iplmatches_info2={}
#for i in glob.glob("./ipl_csv/*.csv"):
# key=i.split('/')[-1].replace('.csv','')
# temp=load_from_csv(i)
# iplmatches_info2[key]=temp
#index_df=pd.Series(['team1','team2','gender','season','date','competition','match_number','venue','city','toss_winner','toss_decision','player_of_match','umpire1','umpire2','reserve_umpire','tv_umpire','match_referee','winner','winner_runs','dummy','dummy1'])
#df=pd.DataFrame.from_dict(iplmatches_info2, orient='index')
#df.columns=index_df
abc
In [4]:
#iplmatches_info={}
#for i in glob.glob("./ipl/*.yaml"):
# key=i.split('/')[-1].replace('.yaml','')
# temp = load_from_yaml(i)
# iplmatches_info[key]=temp['info']
In [5]:
#df=pd.DataFrame(iplmatches_info).T
In [6]:
# Exporting to excel for cleaning
#df.to_excel('ipl_data1.xlsx')
In [7]:
# Importing the clean data from Excel
ipl_data=pd.read_excel('ipl_data.xlsx')
In [9]:
#Convert to date
#ipl_data['dates'] = pd.to_datetime(ipl_data['dates'])
#Extracting Year
#ipl_data['year']=ipl_data['dates'].apply(lambda x: str(x)[:4])
# winner team proper convresion
#ipl_data['winner']=ipl_data['winner'].apply(change_winner)
#removing eliminator
#ipl_data['winner']=ipl_data['winner'].apply(lambda x: str(x).replace('eliminator: ', '').replace('RisingPuneSupergiantss','RisingPuneSupergiants'))
In [11]:
#bk_by_city=ipl_data.groupby([ 'wickets/runs','year'])['year'].count()
#bk_by_city.unstack().T
In [ ]:
In [ ]:
In [425]:
titanic=sns.load_dataset('titanic')
In [427]:
titanic.head()
Out[427]:
In [439]:
g=sns.barplot(x='pclass', y='survived',hue='sex',data=titanic)
g.set_ylabel("survival probability ")
Out[439]:
In [440]:
sns.boxplot(x="pclass", y="fare", hue="sex", data=titanic, palette="PRGn")
sns.despine(offset=10, trim=True)
In [296]:
ipl_final=pd.read_excel("ipl_data_final.xlsx",sheetname="Final")
In [297]:
ipl_final.reset_index(inplace=True)
In [308]:
ipl_final.to_json('ipl_final.json', orient='records')
In [310]:
#import datetime
#ipl_final['year']=ipl_final['date'].dt.year
In [206]:
x=pd.DataFrame(pd.pivot_table(ipl_final, columns=['is_won_or_lost'], index=['team'], aggfunc='count')['Id'])
x.fillna('0')
x.plot(kind='bar',sort_columns='is_won_or_lost',figsize=[10,6],title='Split by result')
Out[206]:
In [204]:
x.fillna(0.0)
Out[204]:
In [304]:
"""def form_list(a,b):
l=[]
l.append(a)
l.append(b)
return l
with open('ipl_final.json') as json_file:
data = json.load(json_file)
for i in data:
t=i['host']
o=i['visiting']
i['teams']=form_list(t, o)
df=pd.DataFrame(data)
df.to_json('xyz.json',orient='records')
"""
Out[304]:
In [ ]:
In [309]:
ipl_final.head()
Out[309]:
In [293]:
In [294]:
In [295]:
In [ ]: