In [1]:
import numpy as np
import scipy.stats as st
import pandas as pd
import sys
In [5]:
sys.path.append('/Users/chrismorrow/repos/sci_analysis')
In [6]:
import sci_analysis as a
In [6]:
np.random.seed(987654321)
input_array = st.norm.rvs(size=200)
a.analyze(input_array)
In [7]:
np.random.seed(987654321)
input_array = st.norm.rvs(size=200)
a.analyze(input_array, cdf=True, fit=True)
In [8]:
np.random.seed(987654321)
input_array = st.weibull_min.rvs(1.7, size=500)
a.analyze(input_array, cdf=True, fit=True, distribution='weibull_min')
In [9]:
source_path = "/Users/chrismorrow/Dropbox/Data/"
df = pd.read_csv(source_path + "/BodyFat.csv")
In [10]:
df.corr()
Out[10]:
In [11]:
df['BMI'] = (df['WEIGHT'] / df['HEIGHT'] ** 2) * 703
df['Age_Range'] = pd.cut(df['AGE'].values, [20, 26, 33, 39, 45, 51, 57, 63, 69, 75, 81])
df['Class'] = pd.cut(df['BMI'].values, [0, 15, 16, 18.5, 25, 30, 35, 40, 200],
labels=['Very Severly Underweight', 'Severely Underweight', 'Underweight', 'Normal',
'Overweight', 'Moderately Obese', 'Severely Obese', 'Very Severely Obese'])
In [12]:
a.analyze(df['ABDOMEN'], df['BODYFAT'], xname='Abdomen', yname='Bodyfat')
In [13]:
a.analyze(df['ABDOMEN'], df['BODYFAT'], xname='Abdomen', yname='Bodyfat', boxplot_borders=True)
In [14]:
a.analyze(df['ABDOMEN'], df['BODYFAT'], xname='Abdomen', yname='Bodyfat', boxplot_borders=True, fit=False,
contours=True)
In [15]:
a.analyze(df['BODYFAT'], name='Bodyfat', fit=True)
In [16]:
a.analyze(df['AGE'], name='Age')
In [17]:
df.groupby('Age_Range').mean()['BODYFAT']
Out[17]:
In [18]:
df2 = df[df['HEIGHT'] > 60]
height = {name: group['HEIGHT'].values for name, group in df2[df2['Age_Range'] != '(75, 81]'].groupby('Age_Range')}
a.analyze(height)
In [19]:
df2 = df[df['HEIGHT'] > 60]
groups = list()
height = list()
for name, group in df2[df2['Age_Range'] != '(75, 81]'].groupby('Age_Range'):
groups.append(name)
height.append(group['HEIGHT'].values)
a.analyze(height, groups=groups, categories='Age Group', name='Height', title='Height by Age Group')
In [20]:
a.analyze([df['BODYFAT'].groupby(df['Class']).get_group('Normal'),
df['BODYFAT'].groupby(df['Class']).get_group('Overweight')], groups=['Normal', 'Overweight'])
In [21]:
a.analyze([df['HEIGHT'].groupby(df['Class']).get_group('Normal'),
df['HEIGHT'].groupby(df['Class']).get_group('Overweight')], groups=['Normal', 'Overweight'])
In [22]:
bb = pd.read_csv(source_path + "/ncaa_2016/player_stats.csv")
In [23]:
bb.columns
Out[23]:
In [26]:
a.analyze(bb['fgmpg'], bb['astpg'])
In [27]:
a.analyze(bb['points_avg'])
In [33]:
a.analyze({name: data['fgmpg'] for name, data in bb.groupby('Year')})
In [ ]: