notebook.community

Edit and run



In [1]:

    
import pandas as pd

import numpy as np

import random

import csv



In [2]:

    
df = pd.read_csv("USstockdata.csv")



In [ ]:



In [3]:

    
#removes rows with empty data and data containing strings and errors

df.dropna()


df = df[df.RET != "C"]
df = df[df.RET != "B"]



In [4]:

    
# converts data to proper formats, high precision floats and datetime

df["RET"] = df["RET"].astype(np.float64)
df["date"] = pd.to_datetime(df["date"])
df.set_index(df["date"])
df = df.set_index(pd.DatetimeIndex(df["date"]))



In [5]:

    
# df.set_index(df["Names Date"])
# df_m = df.groupby(["PERMNO", pd.TimeGrouper(freq='M')])["Returns"].sum()

#filters by count greater than 223

df =df.groupby( "PERMNO").filter( lambda x: len(x) >223)



In [28]:

    
df.head()









    Out[28]:






  
    
      
      PERMNO
      date
      RET
      number_of_values
    
  
  
    
      1994-12-30
      10001
      1994-12-30
      -0.033433
      NaN
    
    
      1995-01-31
      10001
      1995-01-31
      -0.031250
      NaN
    
    
      1995-02-28
      10001
      1995-02-28
      -0.026210
      NaN
    
    
      1995-03-31
      10001
      1995-03-31
      0.006377
      NaN
    
    
      1995-04-28
      10001
      1995-04-28
      0.000000
      NaN



In [22]:

    
def sampling_function(number_in_sample, number_of_repeats):
    """
    Takes a random sample of stocks and calculates average monthly return for a group of them
    Data is stored in a csv file for further analysis
    Args:
    number_in_sample: number of different stocks
    number_of_repeats: times the function was run
    
    """
    lista = []
    for n in xrange(1, number_of_repeats):
        dfList = df["PERMNO"].tolist()
        rand_smpl = [ dfList[i] for i in sorted(random.sample(xrange(len(dfList)), number_in_sample)) ]
#         with open('filename.csv', 'a') as myfile:
#             wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
#             wr.writerow(rand_smpl)

        random_sample_df = df[df["PERMNO"].isin(rand_smpl)]
        series_sum_monthly = random_sample_df.groupby( pd.TimeGrouper(freq='M'))["RET"].sum()
        frame_sum_monthly = series_sum_monthly.to_frame()
        frame_sum_monthly["Ratio"] = frame_sum_monthly.RET/number_in_sample
        frame_sum_monthly["Variance"] = frame_sum_monthly.RET.var()
        
        pp = frame_sum_monthly["Variance"].iloc[0]
        


        with open('variance.csv', 'a') as myfile:
            wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
            wr.writerow([pp])
        frame_sum_monthly.to_csv('filename.csv',mode = 'a',header ='column_names')
        
        lista.append(float(pp))
        
    rezultat = sum(lista)/float(number_in_sample)
        
    return rezultat



In [33]:

    
#example of a function call

listoo = list(range(df["PERMNO"].nunique()))
listoo.pop(0)









    Out[33]:





0



In [34]:

    
for n in listoo:
    sampling_function(n, 1000)









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-34-3628ba65e7ce> in <module>()
      1 
      2 for n in listoo:
----> 3     sampling_function(n, 1000)

<ipython-input-22-5e90c4c217d2> in sampling_function(number_in_sample, number_of_repeats)
     12     lista = []
     13     for n in xrange(1, number_of_repeats):
---> 14         dfList = df["PERMNO"].tolist()
     15         rand_smpl = [ dfList[i] for i in sorted(random.sample(xrange(len(dfList)), number_in_sample)) ]
     16 #         with open('filename.csv', 'a') as myfile:

/home/tric/anaconda2/lib/python2.7/site-packages/pandas/core/series.pyc in tolist(self)
   1052     def tolist(self):
   1053         """ Convert Series to a nested list """
-> 1054         return list(self)
   1055 
   1056     def to_dict(self):

KeyboardInterrupt:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	PERMNO	date	RET	number_of_values
1994-12-30	10001	1994-12-30	-0.033433	NaN
1995-01-31	10001	1995-01-31	-0.031250	NaN
1995-02-28	10001	1995-02-28	-0.026210	NaN
1995-03-31	10001	1995-03-31	0.006377	NaN
1995-04-28	10001	1995-04-28	0.000000	NaN