In [1]:
#
# A quick notebook to process some data from csv to json
#
In [2]:
# grab pandas
import pandas as pd
# grab datetime for formatting dates
from datetime import datetime
In [3]:
# load the data into a data frame
df = pd.read_csv("https://www.google.com/finance/historical?output=csv&q=dva")
print(df.shape)
In [4]:
# take a look at the first rows
print(df.head())
In [5]:
# grab only date and volume
df = df[["Date", "Volume"]]
print(df.head())
print(df.shape)
In [6]:
# Create a new data frame,
# convert to date time so we can format it
df_date_volume = pd.concat([pd.to_datetime(df["Date"], format='%d-%b-%y'), df["Volume"]], axis=1)
print(df_date_volume.head())
In [7]:
# probably not the best way but the pandas .to_json functionality
# was a bit too complicated for the time I wanted to spend on this
json = "["
for index, row in df_date_volume.iterrows():
json += '{ "date":'
# turn the date into a string and only take the chars we want
json += '"' + str(row["Date"])[0:10] + '",'
# turn the volume into a string
json += '"volume":' + str(row["Volume"])
json += '},'
json += "]"
In [8]:
print(json[-2:])
# remove the last commea
json = json[:-2] + ']'
print(json[-2:])
In [9]:
file = open('dva.json', 'w')
file.write(json)
file.close()