In [264]:
import os
#For the sake of cleanliness, ignore warnings in the notebook
import warnings
warnings.filterwarnings('ignore')
In [265]:
os.listdir()
Out[265]:
In [267]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
pd.set_option('max_columns', 50)
#Inline imaging
%matplotlib inline
In [268]:
df = pd.read_csv('positive_negative_pr_contributions.csv', delimiter='|')
df.head()
Out[268]:
In [269]:
#Define the index. inplace=True maintains the same DataFrame object rather than creating a new one
df.set_index('number', inplace=True)
df.head()
#print(users[(users.age == 40) & (users.sex == 'M')].head(3))\
#print(num_pos)
Out[269]:
In [270]:
#Take a single column assign to new dataframe object.
new_df = df[['net_positive']]
#Observe that the DF's value is greater than 50
print(new_df[(new_df.net_positive > 50)])
#Create a list comprehension
nums = [i for i in new_df['net_positive'] if i > 50]
In [341]:
x_1 = new_df[(new_df.net_positive > 50)]
ax1= sns.boxplot(data=x_1, orient="v", notch=False)
#Set the xlim to whatever the maximum plus 10 ticks to show any outliers in the set
#This data represents when PRs have net positive to the codebase greater than 100 lines of code
#ax.set(xlim=(0, max(nums) + 10))
In [289]:
#Jointplot with a linear regression (Defined )
g = sns.jointplot(x=df.index, y=(new_df['net_positive']), kind="reg")
In [293]:
#hexplot
df1 = pd.read_csv('issue_assignees.csv', delimiter="|")
df1.set_index('assigned_name', inplace=True)
In [363]:
ax2 = sns.jointplot(x=new_df.index[:25], y=new_df.net_positive[:25], kind="hex", ylim=(0,100), size=10)
ax2.plot
Out[363]: