In [19]:
#The kernel is using python 3.5 for pandas 0.17
%matplotlib inline
In [20]:
import numpy as np
np.random.seed(100)
ar = np.random.rand(3)
ar.shape
Out[20]:
In [3]:
ar2 = np.array([1,2,4])
ar2.shape
Out[3]:
In [21]:
import pandas as pd
memberData = np.zeros((4,),
dtype=[('Name','a15'),
('Age','i4'),
('Weight','f4')])
In [5]:
memberData[:] = [('Sanjeev',37,162.4),
('YingLuck',45,137.8),
('Emeka',28,153.3),
('Amy',61,107.3)]
memberDF=pd.DataFrame(memberData);memberDF
Out[5]:
In [6]:
pd.DataFrame(memberData,index=['a','b','c','d'])
Out[6]:
In [7]:
memberDF['Weight']
Out[7]:
In [8]:
type(memberDF)
Out[8]:
In [23]:
football_df=pd.read_csv('./data/E1.csv')
In [33]:
print(football_df.head())
#print(football_df.describe())
In [32]:
from scipy import stats
%precision 2
print(np.mean(football_df['FTHG']))
print(np.median(football_df['FTHG']))
print(stats.mode(football_df['FTHG']))
plt.hist(football_df['FTHG'])
Out[32]:
In [11]:
#print(football_df.set_index(["Date","Div"]))
In [12]:
#football_df.ix['Date']
In [3]:
#chapter 6
import numpy as np
import pandas as pd
import matplotlib as plt
%matplotlib inline
date_stngs=['2014-05-01','2014-05-02','2014-05-05','2014-05-06','2014-05-07']
tradeDates = pd.to_datetime(pd.Series(date_stngs))
closingprices = [531.35, 527.93,527.81,515.14,509.36]
In [14]:
googClosingPrices = pd.DataFrame(data=closingprices,columns=['closing price'],index=tradeDates)
googClosingPrices
Out[14]:
In [15]:
import pandas.io.data as web
import datetime
googPrices = web.get_data_yahoo("GOOG",start=datetime.datetime(2014,5,1),end=datetime.date(2014,5,7))
In [16]:
googFinalPrices=pd.DataFrame(googPrices['Close'],index=tradeDates)
googFinalPrices
Out[16]:
In [17]:
googClosingPricesCDays=googClosingPrices.asfreq('D')
googClosingPricesCDays
Out[17]:
In [18]:
googClosingPricesCDays.isnull()
Out[18]:
In [19]:
googClosingPricesCDays.notnull()
Out[19]:
In [20]:
tDates=tradeDates.copy()
tDates[1]=np.NaN
tDates[4]=np.NaN
tDates
Out[20]:
In [21]:
pd.set_option('display.precision',4)
FBVolume=[82.34,54.11,45.99,55.86,78.5]
TWTRVolume=[15.74,12.71,10.39,134.62,68.84]
socialTradingVolume=pd.concat([pd.Series(FBVolume),pd.Series(TWTRVolume),tradeDates],axis=1,keys=['FB','TWTR','TradeDate'])
socialTradingVolume
Out[21]:
In [22]:
xmasDay=pd.datetime(2014,12,25)
xmasDay
Out[22]:
In [23]:
boxingDay=xmasDay+pd.DateOffset(days=-1)
boxingDay
Out[23]:
In [24]:
today=pd.datetime.now()
today
Out[24]:
In [27]:
lastDay=pd.datetime(2016,1,1)
from pandas.tseries.offsets import QuarterBegin
dtoffset=QuarterBegin()
lastDay+dtoffset
Out[27]:
In [7]:
googTickData=pd.read_csv('./data/GOOG_tickdata_20140527.csv')
googTickData.head()
Out[7]:
In [14]:
googTickData['tstamp']=pd.to_datetime(googTickData['Timestamp'],unit='s',utc=True)
googTickData.head()
Out[14]:
In [24]:
googTickTS=googTickData.set_index('tstamp')
googTickTS=googTickTS.drop('Timestamp',axis=1)
print(googTickTS.head())
print(len(googTickTS))
In [29]:
pd.set_option('display.precision',5)
googTickTS.resample('10Min',how=np.min).head(6)
Out[29]:
In [30]:
pd.set_option('display.precision',5)
googTickTS.resample('10Min',closed='right').head(6)
Out[30]:
In [40]:
googTickTS[:3].resample('30s',fill_method='bfill')
Out[40]:
In [45]:
googTickTS.resample('7T30S').head(5)
Out[45]:
In [2]:
import pandas as pd
pd.Period('2014',freq='A-MAY')
Out[2]:
In [3]:
pd.Period('11/11/2013')+4
Out[3]:
In [5]:
pd.Period('10/10/2014') - pd.Period('11/11/2013')
Out[5]:
In [10]:
perRng=pd.period_range('2/1/2014','2/16/2014',freq='D')
type(perRng)
print(perRng[:4])
In [11]:
import matplotlib.pyplot as plt
import numpy as np
X = np.linspace(-np.pi, np.pi, 256, endpoint=True)
f,g = np.cos(X)+np.sin(X),np.sin(X)-np.cos(X)
f_ser = pd.Series(f)
g_ser = pd.Series(g)
In [12]:
plotDF=pd.concat([f_ser,g_ser],axis=1)
plotDF.index=X
plotDF.columns=['sin(x)+cos(x)','sin(x)-cos(x)']
plotDF.head()
Out[12]:
In [14]:
%matplotlib inline
plotDF.plot()
plt.show()
In [16]:
plotDF.plot(subplots=True,figsize=(6,6))
Out[16]:
In [37]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import colors
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
In [53]:
X = range(0,11)
Y = [1/6.0 if x in range(1,7) else 0.0 for x in X]
plt.plot(X,Y,'go-',linewidth=0,drawstyle='steps-pre',label='p(x)=1/6')
plt.legend(loc="upper left")
plt.vlines(range(1,7),0,max(Y),linestyle='-')
plt.xlabel('x')
plt.ylabel('p(X)')
plt.ylim(0,0.5)
plt.xlim(0,10)
plt.title('Discrete uniform probability distribution with p=1/6')
plt.show()
In [51]:
import datetime as dt
Out[51]:
In [ ]: