In [1]:
import pyspark
from pyspark import SparkContext
from pyspark import SQLContext
sc = SparkContext()
sql = SQLContext(sc)

READING THE .CSV FILE OF THE DATASET


In [2]:
data_path = "../input/csv/"                                # path directory to input csv files
data_opath = "../output/csv/"                               # path directory to output csv files

data_matches = (sql.read.format("com.databricks.spark.csv").option("header", "true").load(data_path+"matches.csv"))
data_matches.show()


+---+------+----------+----------+--------------------+--------------------+--------------------+-------------+------+----------+--------------------+-----------+--------------+---------------+--------------------+-----------+--------------+-------+
| id|season|      city|      date|               team1|               team2|         toss_winner|toss_decision|result|dl_applied|              winner|win_by_runs|win_by_wickets|player_of_match|               venue|    umpire1|       umpire2|umpire3|
+---+------+----------+----------+--------------------+--------------------+--------------------+-------------+------+----------+--------------------+-----------+--------------+---------------+--------------------+-----------+--------------+-------+
|  1|  2008| Bangalore|2008-04-18|Kolkata Knight Ri...|Royal Challengers...|Royal Challengers...|        field|normal|         0|Kolkata Knight Ri...|        140|             0|    BB McCullum|M Chinnaswamy Sta...|  Asad Rauf|   RE Koertzen|   null|
|  2|  2008|Chandigarh|2008-04-19| Chennai Super Kings|     Kings XI Punjab| Chennai Super Kings|          bat|normal|         0| Chennai Super Kings|         33|             0|     MEK Hussey|Punjab Cricket As...|  MR Benson|    SL Shastri|   null|
|  3|  2008|     Delhi|2008-04-19|    Rajasthan Royals|    Delhi Daredevils|    Rajasthan Royals|          bat|normal|         0|    Delhi Daredevils|          0|             9|    MF Maharoof|    Feroz Shah Kotla|  Aleem Dar|GA Pratapkumar|   null|
|  4|  2008|    Mumbai|2008-04-20|      Mumbai Indians|Royal Challengers...|      Mumbai Indians|          bat|normal|         0|Royal Challengers...|          0|             5|     MV Boucher|    Wankhede Stadium|   SJ Davis|     DJ Harper|   null|
|  5|  2008|   Kolkata|2008-04-20|     Deccan Chargers|Kolkata Knight Ri...|     Deccan Chargers|          bat|normal|         0|Kolkata Knight Ri...|          0|             5|      DJ Hussey|        Eden Gardens|  BF Bowden|   K Hariharan|   null|
|  6|  2008|    Jaipur|2008-04-21|     Kings XI Punjab|    Rajasthan Royals|     Kings XI Punjab|          bat|normal|         0|    Rajasthan Royals|          0|             6|      SR Watson|Sawai Mansingh St...|  Aleem Dar|     RB Tiffin|   null|
|  7|  2008| Hyderabad|2008-04-22|     Deccan Chargers|    Delhi Daredevils|     Deccan Chargers|          bat|normal|         0|    Delhi Daredevils|          0|             9|       V Sehwag|Rajiv Gandhi Inte...|  IL Howell|     AM Saheba|   null|
|  8|  2008|   Chennai|2008-04-23| Chennai Super Kings|      Mumbai Indians|      Mumbai Indians|        field|normal|         0| Chennai Super Kings|          6|             0|      ML Hayden|MA Chidambaram St...|  DJ Harper|GA Pratapkumar|   null|
|  9|  2008| Hyderabad|2008-04-24|     Deccan Chargers|    Rajasthan Royals|    Rajasthan Royals|        field|normal|         0|    Rajasthan Royals|          0|             3|      YK Pathan|Rajiv Gandhi Inte...|  Asad Rauf|     MR Benson|   null|
| 10|  2008|Chandigarh|2008-04-25|     Kings XI Punjab|      Mumbai Indians|      Mumbai Indians|        field|normal|         0|     Kings XI Punjab|         66|             0|  KC Sangakkara|Punjab Cricket As...|  Aleem Dar|     AM Saheba|   null|
| 11|  2008| Bangalore|2008-04-26|Royal Challengers...|    Rajasthan Royals|    Rajasthan Royals|        field|normal|         0|    Rajasthan Royals|          0|             7|      SR Watson|M Chinnaswamy Sta...|  MR Benson|     IL Howell|   null|
| 12|  2008|   Chennai|2008-04-26|Kolkata Knight Ri...| Chennai Super Kings|Kolkata Knight Ri...|          bat|normal|         0| Chennai Super Kings|          0|             9|       JDP Oram|MA Chidambaram St...|  BF Bowden|AV Jayaprakash|   null|
| 13|  2008|    Mumbai|2008-04-27|      Mumbai Indians|     Deccan Chargers|     Deccan Chargers|        field|normal|         0|     Deccan Chargers|          0|            10|   AC Gilchrist|Dr DY Patil Sport...|  Asad Rauf|    SL Shastri|   null|
| 14|  2008|Chandigarh|2008-04-27|    Delhi Daredevils|     Kings XI Punjab|    Delhi Daredevils|          bat|normal|         0|     Kings XI Punjab|          0|             4|      SM Katich|Punjab Cricket As...|RE Koertzen|     I Shivram|   null|
| 15|  2008| Bangalore|2008-04-28| Chennai Super Kings|Royal Challengers...| Chennai Super Kings|          bat|normal|         0| Chennai Super Kings|         13|             0|       MS Dhoni|M Chinnaswamy Sta...|BR Doctrove|     RB Tiffin|   null|
| 16|  2008|   Kolkata|2008-04-29|Kolkata Knight Ri...|      Mumbai Indians|Kolkata Knight Ri...|          bat|normal|         0|      Mumbai Indians|          0|             7|  ST Jayasuriya|        Eden Gardens|  BF Bowden|AV Jayaprakash|   null|
| 17|  2008|     Delhi|2008-04-30|    Delhi Daredevils|Royal Challengers...|Royal Challengers...|        field|normal|         0|    Delhi Daredevils|         10|             0|     GD McGrath|    Feroz Shah Kotla|  Aleem Dar|     I Shivram|   null|
| 18|  2008| Hyderabad|2008-05-01|     Deccan Chargers|     Kings XI Punjab|     Kings XI Punjab|        field|normal|         0|     Kings XI Punjab|          0|             7|       SE Marsh|Rajiv Gandhi Inte...|BR Doctrove|     RB Tiffin|   null|
| 19|  2008|    Jaipur|2008-05-01|    Rajasthan Royals|Kolkata Knight Ri...|    Rajasthan Royals|          bat|normal|         0|    Rajasthan Royals|         45|             0|    SA Asnodkar|Sawai Mansingh St...|RE Koertzen|GA Pratapkumar|   null|
| 20|  2008|   Chennai|2008-05-02| Chennai Super Kings|    Delhi Daredevils| Chennai Super Kings|          bat|normal|         0|    Delhi Daredevils|          0|             8|       V Sehwag|MA Chidambaram St...|  BF Bowden|   K Hariharan|   null|
+---+------+----------+----------+--------------------+--------------------+--------------------+-------------+------+----------+--------------------+-----------+--------------+---------------+--------------------+-----------+--------------+-------+
only showing top 20 rows

TAKING THE TWO TEAMS AND THE WINNER FROM THE DATASET


In [3]:
team_winner = data_matches.select(data_matches.team1,data_matches.team2,data_matches.winner)
team_winner.show()


+--------------------+--------------------+--------------------+
|               team1|               team2|              winner|
+--------------------+--------------------+--------------------+
|Kolkata Knight Ri...|Royal Challengers...|Kolkata Knight Ri...|
| Chennai Super Kings|     Kings XI Punjab| Chennai Super Kings|
|    Rajasthan Royals|    Delhi Daredevils|    Delhi Daredevils|
|      Mumbai Indians|Royal Challengers...|Royal Challengers...|
|     Deccan Chargers|Kolkata Knight Ri...|Kolkata Knight Ri...|
|     Kings XI Punjab|    Rajasthan Royals|    Rajasthan Royals|
|     Deccan Chargers|    Delhi Daredevils|    Delhi Daredevils|
| Chennai Super Kings|      Mumbai Indians| Chennai Super Kings|
|     Deccan Chargers|    Rajasthan Royals|    Rajasthan Royals|
|     Kings XI Punjab|      Mumbai Indians|     Kings XI Punjab|
|Royal Challengers...|    Rajasthan Royals|    Rajasthan Royals|
|Kolkata Knight Ri...| Chennai Super Kings| Chennai Super Kings|
|      Mumbai Indians|     Deccan Chargers|     Deccan Chargers|
|    Delhi Daredevils|     Kings XI Punjab|     Kings XI Punjab|
| Chennai Super Kings|Royal Challengers...| Chennai Super Kings|
|Kolkata Knight Ri...|      Mumbai Indians|      Mumbai Indians|
|    Delhi Daredevils|Royal Challengers...|    Delhi Daredevils|
|     Deccan Chargers|     Kings XI Punjab|     Kings XI Punjab|
|    Rajasthan Royals|Kolkata Knight Ri...|    Rajasthan Royals|
| Chennai Super Kings|    Delhi Daredevils|    Delhi Daredevils|
+--------------------+--------------------+--------------------+
only showing top 20 rows

CHECKING FOR THE MATCHES OF 'KOLKATA KNIGHT RIDERS'


In [4]:
team1_= team_winner.filter("team1 == 'Kolkata Knight Riders'")
team1_.show()


+--------------------+--------------------+--------------------+
|               team1|               team2|              winner|
+--------------------+--------------------+--------------------+
|Kolkata Knight Ri...|Royal Challengers...|Kolkata Knight Ri...|
|Kolkata Knight Ri...| Chennai Super Kings| Chennai Super Kings|
|Kolkata Knight Ri...|      Mumbai Indians|      Mumbai Indians|
|Kolkata Knight Ri...|Royal Challengers...|Kolkata Knight Ri...|
|Kolkata Knight Ri...|     Deccan Chargers|Kolkata Knight Ri...|
|Kolkata Knight Ri...|    Delhi Daredevils|Kolkata Knight Ri...|
|Kolkata Knight Ri...|      Mumbai Indians|      Mumbai Indians|
|Kolkata Knight Ri...| Chennai Super Kings| Chennai Super Kings|
|Kolkata Knight Ri...|    Rajasthan Royals|    Rajasthan Royals|
|Kolkata Knight Ri...|     Deccan Chargers|     Deccan Chargers|
|Kolkata Knight Ri...|Royal Challengers...|Royal Challengers...|
|Kolkata Knight Ri...|     Kings XI Punjab|     Kings XI Punjab|
|Kolkata Knight Ri...|    Delhi Daredevils|    Delhi Daredevils|
|Kolkata Knight Ri...|    Delhi Daredevils|    Delhi Daredevils|
|Kolkata Knight Ri...|Royal Challengers...|Royal Challengers...|
|Kolkata Knight Ri...|     Deccan Chargers|     Deccan Chargers|
|Kolkata Knight Ri...|     Deccan Chargers|Kolkata Knight Ri...|
|Kolkata Knight Ri...|      Mumbai Indians|      Mumbai Indians|
|Kolkata Knight Ri...|     Kings XI Punjab|Kolkata Knight Ri...|
|Kolkata Knight Ri...|     Deccan Chargers|Kolkata Knight Ri...|
+--------------------+--------------------+--------------------+
only showing top 20 rows


In [5]:
import matplotlib.pyplot as plt  #importing the required library for the visualization

from ipywidgets import widgets, interactive  #using the widgets for the dropdown menu and interactive for interactive visuals


#Taking the first team from the list
first_team = widgets.Dropdown(
    #give the options which should be reflected in the list
    options=['Rajasthan Royals','Chennai Super Kings','Deccan Chargers','Gujarat Lions','Delhi Daredevils','Mumbai Indians','Kochi Tuskers Kerala','Royal Challengers Bangalore','Pune Warriors','Rising Pune Supergiants','Sunrisers Hyderabad','Kolkata Knight Riders','Kings XI Punjab'],
    value='Rajasthan Royals',  #the first team choosen as 'Rajasthan Royals'
    description='Team1:',
)

#Taking the second team from the list
second_team = widgets.Dropdown(
    options=['Rajasthan Royals','Chennai Super Kings','Deccan Chargers','Gujarat Lions','Delhi Daredevils','Mumbai Indians','Kochi Tuskers Kerala','Royal Challengers Bangalore','Pune Warriors','Rising Pune Supergiants','Sunrisers Hyderabad','Kolkata Knight Riders','Kings XI Punjab'],
    value='Delhi Daredevils',  #the second team choosen as 'Delhi Daredevils'
    description='Team2:',
)

#define a function plotit to plot the team vs team win percentage
def plotit(first_team,second_team):
    if first_team == second_team:   #check whether the two teams selected are same or not.
        print "SAME TEAM"
    else:  
        
        team1_= team_winner.filter(team_winner.team1 == first_team)
        team11_ = team_winner.filter(team_winner.team2 == first_team)
        team2_= team1_.filter(team1_.team2 == second_team)
        team22_ = team11_.filter(team11_.team1 == second_team)
        
        winners1_ = team2_.filter(team2_.winner == first_team)#checking the matches won by the first_team
        winners11_ = team22_.filter(team22_.winner == first_team)
        winners2_ = team2_.filter(team2_.winner == second_team)  #checking the matches won by the second_team
        winners22_ = team22_.filter(team22_.winner == second_team)
        
        
        #number of matches won by first team
        first_team_win = winners1_.count() 
        first_team_win2 = winners11_.count()
         
        #number of matches won by second team
        second_team_win = winners2_.count()
        second_team_win2 = winners22_.count()        
        
        
        total_matches = team22_.count() + team2_.count() #taking the count of total number of matches
        
        if first_team_win+second_team_win+first_team_win2+second_team_win2 != total_matches:    #checking for any matches without any result
            total_matches = total_matches - (total_matches -(first_team_win + second_team_win + first_team_win2 + second_team_win2))  #calculating new total matches played with significant result
        
        if total_matches == 0:  #checking if the teams ever played a match between each other
            print 'NO MATCHES PLAYED BEFORE'
            
        else:
            first_team_percent = ((first_team_win + first_team_win2) * 100)/float(total_matches) #calculating the percentage win for first team
            second_team_percent = ((second_team_win + second_team_win2) * 100)/float(total_matches) #calculating the percentage win for second team
        
            labels = (first_team,second_team)  #setting the labels for the visualization
        
            sizes = (first_team_percent,second_team_percent)  #setting the percentages for the visualization
        
            colors = ['lightgreen', 'orange'] # taking the colors
        
            plt.pie(sizes, labels=labels, colors=colors,autopct='%1.1f%%',shadow=True, startangle=90)  #making a pie
            plt.title("Team win percentage") #setting the title
            plt.show()  #displaying the pie chart
    
interactive(plotit, first_team=first_team,second_team=second_team)  #making the view interactive using ipywidgets