In [1]:
#
# A quick notebook to process some data from csv to json 
#

In [2]:
# grab pandas 
import pandas as pd 
# grab datetime for formatting dates
from datetime import datetime

In [3]:
# load the data into a data frame 
df = pd.read_csv("https://www.google.com/finance/historical?output=csv&q=dva")
print(df.shape)


(251, 6)

In [4]:
# take a look at the first rows 
print(df.head())


        Date   Open   High    Low  Close   Volume
0  30-Jun-17  64.21  65.29  64.11  64.76  1629150
1  29-Jun-17  64.81  64.93  63.60  64.06   880785
2  28-Jun-17  64.50  65.15  64.12  64.72   932358
3  27-Jun-17  64.46  64.88  64.06  64.12  1005212
4  26-Jun-17  63.86  64.89  63.77  64.45  1149985

In [5]:
# grab only date and volume 
df = df[["Date", "Volume"]]
print(df.head())
print(df.shape)


        Date   Volume
0  30-Jun-17  1629150
1  29-Jun-17   880785
2  28-Jun-17   932358
3  27-Jun-17  1005212
4  26-Jun-17  1149985
(251, 2)

In [6]:
# Create a new data frame, 
# convert to date time so we can format it 
df_date_volume = pd.concat([pd.to_datetime(df["Date"], format='%d-%b-%y'), df["Volume"]], axis=1)
print(df_date_volume.head())


        Date   Volume
0 2017-06-30  1629150
1 2017-06-29   880785
2 2017-06-28   932358
3 2017-06-27  1005212
4 2017-06-26  1149985

In [7]:
# probably not the best way but the pandas .to_json functionality 
# was a bit too complicated for the time I wanted to spend on this 

json = "["
for index, row in df_date_volume.iterrows(): 
    json += '{ "date":' 
    # turn the date into a string and only take the chars we want
    json += '"' + str(row["Date"])[0:10] + '",'
    # turn the volume into a string 
    json += '"volume":' + str(row["Volume"])
    json += '},'
json += "]"

In [8]:
print(json[-2:])
# remove the last commea 
json = json[:-2] + ']'
print(json[-2:])


,]
}]

In [9]:
file = open('dva.json', 'w')
file.write(json)
file.close()