In [1]:
import pandas as pd
import numpy as np
import random
import csv
In [2]:
df = pd.read_csv("USstockdata.csv")
In [ ]:
In [3]:
#removes rows with empty data and data containing strings and errors
df.dropna()
df = df[df.RET != "C"]
df = df[df.RET != "B"]
In [4]:
# converts data to proper formats, high precision floats and datetime
df["RET"] = df["RET"].astype(np.float64)
df["date"] = pd.to_datetime(df["date"])
df.set_index(df["date"])
df = df.set_index(pd.DatetimeIndex(df["date"]))
In [5]:
# df.set_index(df["Names Date"])
# df_m = df.groupby(["PERMNO", pd.TimeGrouper(freq='M')])["Returns"].sum()
#filters by count greater than 223
df =df.groupby( "PERMNO").filter( lambda x: len(x) >223)
In [28]:
df.head()
Out[28]:
In [22]:
def sampling_function(number_in_sample, number_of_repeats):
"""
Takes a random sample of stocks and calculates average monthly return for a group of them
Data is stored in a csv file for further analysis
Args:
number_in_sample: number of different stocks
number_of_repeats: times the function was run
"""
lista = []
for n in xrange(1, number_of_repeats):
dfList = df["PERMNO"].tolist()
rand_smpl = [ dfList[i] for i in sorted(random.sample(xrange(len(dfList)), number_in_sample)) ]
# with open('filename.csv', 'a') as myfile:
# wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
# wr.writerow(rand_smpl)
random_sample_df = df[df["PERMNO"].isin(rand_smpl)]
series_sum_monthly = random_sample_df.groupby( pd.TimeGrouper(freq='M'))["RET"].sum()
frame_sum_monthly = series_sum_monthly.to_frame()
frame_sum_monthly["Ratio"] = frame_sum_monthly.RET/number_in_sample
frame_sum_monthly["Variance"] = frame_sum_monthly.RET.var()
pp = frame_sum_monthly["Variance"].iloc[0]
with open('variance.csv', 'a') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow([pp])
frame_sum_monthly.to_csv('filename.csv',mode = 'a',header ='column_names')
lista.append(float(pp))
rezultat = sum(lista)/float(number_in_sample)
return rezultat
In [33]:
#example of a function call
listoo = list(range(df["PERMNO"].nunique()))
listoo.pop(0)
Out[33]:
In [34]:
for n in listoo:
sampling_function(n, 1000)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: