In [2]:
# Dependencies
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
#import plotly.plotly as py
In [3]:
#Loading data
city_data = pd.read_csv("city_data.csv")
ride_data = pd.read_csv("ride_data.csv")
In [4]:
#Understanding the data
#city_data.head()
In [5]:
#Identifying the columns
#ride_data.shape
#ride_data.columns
#Index(['city', 'date', 'fare', 'ride_id'], dtype='object')
#city_data.columns
#Index(['city', 'driver_count', 'type'], dtype='object')
In [6]:
data_grouped_mean = ride_data.groupby(["city"]).mean()
#data_grouped_mean.type()
#data_grouped_mean = ride_data["city"].mean
#data_grouped_mean = ride_data.drop_duplicates("city")
#data_grouped_mean.head()
#len(data_grouped_mean)
#125
In [7]:
data_grouped_total_rides = ride_data.groupby(["city"]).count()
#data_grouped_total_rides.head()
In [8]:
total_drivers = city_data.groupby(["city"]).sum()
#total_drivers.head()
In [9]:
data_grouped_total_rides = ride_data.groupby(["city"]).count()
In [17]:
grouping_ride = ride_data.pivot_table(
index = ["city"],
values = ["fare", "ride_id"],
aggfunc = {"fare": np.mean,
"ride_id":len},
fill_value = 0)
#grouping_ride
In [13]:
grouping_city = city_data.pivot_table(
index = ["city", "type"],
values = ["driver_count"],
aggfunc = {"driver_count": np.mean,
},
fill_value = 0)
#grouping_city
#Result it is equal to city,and type index; driver count as column
In [18]:
grouping_city.reset_index( inplace=True)
grouping_ride.reset_index( inplace=True)
Ride_Sharing = pd.merge(grouping_ride, grouping_city, 'left', on = ["city"])
Ride_Sharing
#city fare ride_id type driver_count
Out[18]:
In [ ]:
#help(pd.set_option)
In [ ]:
#help(plt.plot)
In [19]:
#Another way to do the same plot
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(Ride_Sharing['driver_count'],Ride_Sharing['fare'], s=Ride_Sharing['ride_id']) # Added third variable as size of the bubble
plt.show()
In [20]:
Ride_Sharing.plot(kind="scatter",
x = 'driver_count',
y = 'fare',
s=Ride_Sharing['ride_id'])
Out[20]:
In [21]:
# Dependencies
import seaborn as sns; sns.set(color_codes=True)
%pylab notebook
In [ ]:
#Ride_Sharing["ride_id"]
#Name: ride_id, Length: 125, dtype: int64
In [ ]:
#size = 100 * len((Ride_Sharing["ride_id"]) - Ride_Sharing["ride_id"].min()) / (Ride_Sharing["ride_id"].max() - Ride_Sharing["ride_id"].min())
In [22]:
g = sns.lmplot(x="driver_count",
y="fare",
hue="type",
fit_reg=False,
data=Ride_Sharing,
size= 5)
In [23]:
#% of Total Fares by City Type
faresBycity = Ride_Sharing.pivot_table(
index = ["type"],
values = ["fare"],
aggfunc = {"fare": sum},
#margins = True ,
#margins_name= "Total",
fill_value = 0)
faresBycity
Out[23]:
In [30]:
faresBycity.plot(kind="pie",
autopct='%1.1f%%',
startangle=90,
fontsize=17,
y = "fare",
explode = (0, 0, 0))
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")
plt.savefig("PyPiesTotal.png")
Out[30]:
In [33]:
#% Total Number of Drivers Per City
DriversbyCity = Ride_Sharing.pivot_table(
index = ["type"],
values = ["driver_count"],
aggfunc = {"driver_count": sum},
#margins = True ,
#margins_name= "Total",
fill_value = 0)
DriversbyCity
Out[33]:
In [36]:
DriversbyCity.plot(kind="pie",
autopct='%1.1f%%',
startangle=90,
fontsize=17,
y = "driver_count",
explode = (0, 0, 0))
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")
plt.savefig("PyPiesdriversTotal.png")
% of Total Rides by City Type
In [38]:
RidesbyCity = Ride_Sharing.pivot_table(
index = ["type"],
values = ["ride_id"],
aggfunc = {"ride_id": sum},
#margins = True ,
#margins_name= "Total",
fill_value = 0)
RidesbyCity
Out[38]:
In [39]:
RidesbyCity.plot(kind="pie",
autopct='%1.1f%%',
startangle=90,
fontsize=17,
y = "ride_id",
explode = (0, 0, 0))
# Tells matplotlib that we want a pie chart with equal axes
plt.axis("equal")
plt.savefig("PyPiesRidesTotal.png")
In [26]:
In [ ]: