In [1]:
# You may need to install htmllib5,lxml, and BeautifulSoup4. In your terminal/command prompt run:
# conda install lxml
# conda install html5lib
# conda install BeautifulSoup4
# Then restart Jupyter Notebook. (or use pip install if you aren't using the Anaconda Distribution)
import numpy as np
import pandas as pd
import os.path
from datetime import datetime, timedelta
one_hour_ago = datetime.now() - timedelta(hours=1)
if os.path.exists("history.csv"):
filetime = datetime.fromtimestamp(os.path.getctime("history.csv"))
if filetime < one_hour_ago:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
else:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
df = pd.read_csv('history.csv').set_index('Ticker')
df.index.name=None
df[["Date","Catalyst"]] = df.Catalyst.str.extract('(?P<Date>[0-9]{2}\/[0-9]{2}\/[0-9]{4})(?P<Catalyst>.*)', expand=True)
df.loc["OCUL"]
Out[1]:
In [469]:
# You may need to install htmllib5,lxml, and BeautifulSoup4. In your terminal/command prompt run:
# conda install lxml
# conda install html5lib
# conda install BeautifulSoup4
# Then restart Jupyter Notebook. (or use pip install if you aren't using the Anaconda Distribution)
import numpy as np
import pandas as pd
import os.path
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from pandas.tseries.offsets import *
from pylab import text
from mpl_toolkits.axes_grid.anchored_artists import AnchoredText
import quandl
# Quandl API key
quandl.ApiConfig.api_key = "UsYsv7dKGxHHQ5oURP4B"
# Some formatting
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_seq_items', 2)
# Only pull fresh PDUFA data
three_weeks_ago = relativedelta(weeks=3)
one_week_ahead = relativedelta(weeks=1)
one_hour_ago = datetime.now() - timedelta(hours=1)
if os.path.exists("history.csv"):
filetime = datetime.fromtimestamp(os.path.getctime("history.csv"))
if filetime < one_hour_ago:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
else:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
# Create dataframe
df = pd.read_csv('history.csv').set_index('Ticker')
df.index.name=None
df[["Date","Catalyst"]] = df.Catalyst.str.extract('(?P<Date>[0-9]{2}\/[0-9]{2}\/[0-9]{4})(?P<Catalyst>.*)', expand=True)
df['Date'] = pd.to_datetime(df['Date'])
df["Past"] = df["Date"] - DateOffset(weeks=3)
df["Future"] = df["Date"] + DateOffset(weeks=1)
df
# Set stock ticker
stockpick="HALO"
dataset=str(f"WIKI/{stockpick}")
# Set variables for plot creation
length = len(df.loc[stockpick].index)
count = 0
fig, axes = plt.subplots(nrows=length, ncols=1,figsize=(16,length * 3))
fig.subplots_adjust(hspace=0, wspace=0)
allplots=[]
# Combine all data together into list of dataframes, iterate through each part of the list, plot each frame.
while (count < length):
pasttime= df.loc[stockpick].iloc[count]["Past"]
futuretime= df.loc[stockpick].iloc[count]["Future"]
pdufa=df.loc[stockpick].iloc[count]["Date"]
annotate = df.loc[stockpick].iloc[count]["Date"] + timedelta(days=1)
stage = df.loc[stockpick].iloc[count]["Stage"]
catalyst = df.loc[stockpick].iloc[count]["Catalyst"]
drug = df.loc[stockpick].iloc[count]["Drug"]
#Annotation
tooltip = f"stage:{stage} -{catalyst}\n{drug}"
at = AnchoredText(tooltip,
prop=dict(size=10), frameon=True,
loc=2,
)
at.patch.set_boxstyle("round,pad=0.2,rounding_size=0.2")
axes[count].add_artist(at)
axes[count].margins(0.0, 0.5)
#Get quandl data
mydata = quandl.get(dataset,start_date=pasttime,end_date=futuretime)
allplots.append(mydata)
# axes[count].annotate('local max', xy=(pdufa, allplots[count]["Close"].max()), xytext=(annotate, (allplots[count]["Close"].max() - allplots[count]["Close"].min()) * 0.9 + allplots[count]["Close"].min()),
# arrowprops=dict(facecolor='black', shrink=0.05, width=1, headwidth=5),)
# Set y limit for notes
axes[count].set_ylim(allplots[count]["Close"].min() * .99 , (allplots[count]["Close"].max()-allplots[count]["Close"].min()) * 0.5 + allplots[count]["Close"].max())
axes[count].hlines(allplots[count]["Close"].max() * 1.01, allplots[count].head(1).index, allplots[count].tail(1).index, linestyle="-", lw=1, color='black')
if count % 2 == 0:
axes[count].set_facecolor((0.91, 0.91, 0.91))
axes[count].vlines(pdufa, allplots[count]["Close"].min() * .99, allplots[count]["Close"].max() * 1.01, linestyle="--", color='black')
axes[count].plot(allplots[count].index,allplots[count]["Close"],c=np.random.rand(3,), lw=2, label=pdufa)
axes[count].axes.get_xaxis().set_visible(False) # remove x axis
count = count + 1
plt.show()
In [54]:
# You may need to install htmllib5,lxml, and BeautifulSoup4. In your terminal/command prompt run:
# conda install lxml
# conda install html5lib
# conda install BeautifulSoup4
# Then restart Jupyter Notebook. (or use pip install if you aren't using the Anaconda Distribution)
import numpy as np
import pandas as pd
import os.path
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from pandas.tseries.offsets import *
from pylab import text
from mpl_toolkits.axes_grid.anchored_artists import AnchoredText
import quandl
# Quandl API key
quandl.ApiConfig.api_key = "UsYsv7dKGxHHQ5oURP4B"
# Some formatting
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_seq_items', 2)
# Only pull fresh PDUFA data
three_weeks_ago = relativedelta(weeks=3)
one_week_ahead = relativedelta(weeks=1)
one_hour_ago = datetime.now() - timedelta(hours=1)
if os.path.exists("history.csv"):
filetime = datetime.fromtimestamp(os.path.getctime("history.csv"))
if filetime < one_hour_ago:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
else:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
# Create dataframe
df = pd.read_csv('history.csv').set_index('Ticker')
df.index.name=None
df[["Date","Catalyst"]] = df.Catalyst.str.extract('(?P<Date>[0-9]{2}\/[0-9]{2}\/[0-9]{4})(?P<Catalyst>.*)', expand=True)
df['Date'] = pd.to_datetime(df['Date'])
df["Past"] = df["Date"] - DateOffset(weeks=3)
df["Future"] = df["Date"] + DateOffset(weeks=1)
df
# Set stock ticker
stockpick="HALO"
dataset=str(f"WIKI/{stockpick}")
# Set variables for plot creation
length = len(df.loc[stockpick].index)
count = 0
allplots=[]
# Combine all data together into list of dataframes, iterate through each part of the list, plot each frame.
while (count < length):
pasttime= df.loc[stockpick].iloc[count]["Past"]
futuretime= df.loc[stockpick].iloc[count]["Future"]
pdufa=df.loc[stockpick].iloc[count]["Date"]
annotate = df.loc[stockpick].iloc[count]["Date"] + timedelta(days=1)
stage = df.loc[stockpick].iloc[count]["Stage"]
catalyst = df.loc[stockpick].iloc[count]["Catalyst"]
drug = df.loc[stockpick].iloc[count]["Drug"]
#Annotation
#Get quandl data
mydata = quandl.get(dataset,start_date=pasttime,end_date=futuretime)
allplots.append(mydata)
count = count + 1
# len(allplots[3].index) # 21
i=0
total = len(allplots[3].index) - 1
while (i < total):
print(allplots[3].iloc[i]["Close"])
n = i + 1
while (n < total): # see 14 - 8 = 1.0 !!!
rangevalue=(allplots[3].iloc[n]["Close"] - allplots[3].iloc[i]["Close"])
print(f"{n} - {i} = {rangevalue}")
n=n + 1
# print(allplots[3].iloc[i + 1]["Close"] - allplots[3].iloc[i]["Close"] )
i = i + 1
In [8]:
# You may need to install htmllib5,lxml, and BeautifulSoup4. In your terminal/command prompt run:
# conda install lxml
# conda install html5lib
# conda install BeautifulSoup4
# Then restart Jupyter Notebook. (or use pip install if you aren't using the Anaconda Distribution)
import numpy as np
import pandas as pd
import os.path
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from pandas.tseries.offsets import *
from pylab import text
from mpl_toolkits.axes_grid.anchored_artists import AnchoredText
import quandl
# Quandl API key
quandl.ApiConfig.api_key = "UsYsv7dKGxHHQ5oURP4B"
# Some formatting
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_seq_items', 2)
# Only pull fresh PDUFA data
three_weeks_ago = relativedelta(weeks=3)
one_week_ahead = relativedelta(weeks=1)
one_hour_ago = datetime.now() - timedelta(hours=1)
if os.path.exists("history.csv"):
filetime = datetime.fromtimestamp(os.path.getctime("history.csv"))
if filetime < one_hour_ago:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
else:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
# Create dataframe
df = pd.read_csv('history.csv').set_index('Ticker')
df.index.name=None
df[["Date","Catalyst"]] = df.Catalyst.str.extract('(?P<Date>[0-9]{2}\/[0-9]{2}\/[0-9]{4})(?P<Catalyst>.*)', expand=True)
df['Date'] = pd.to_datetime(df['Date'])
df["Past"] = df["Date"] - DateOffset(weeks=5)
df["Future"] = df["Date"] + DateOffset(weeks=2)
df
# Set stock ticker
stockpick="HALO"
dataset=str(f"WIKI/{stockpick}")
# Set variables for plot creation
length = len(df.loc[stockpick].index)
count = 0
fig, axes = plt.subplots(nrows=length, ncols=1,figsize=(16,length * 3))
fig.subplots_adjust(hspace=0, wspace=0)
allplots=[]
# Combine all data together into list of dataframes, iterate through each part of the list, plot each frame.
while (count < length):
pasttime= df.loc[stockpick].iloc[count]["Past"]
futuretime= df.loc[stockpick].iloc[count]["Future"]
pdufa=df.loc[stockpick].iloc[count]["Date"]
annotate = df.loc[stockpick].iloc[count]["Date"] + timedelta(days=1)
stage = df.loc[stockpick].iloc[count]["Stage"]
catalyst = df.loc[stockpick].iloc[count]["Catalyst"]
drug = df.loc[stockpick].iloc[count]["Drug"]
#Annotation
tooltip = f"stage:{stage} -{catalyst}\n{drug}"
at = AnchoredText(tooltip,
prop=dict(size=10), frameon=True,
loc=2,
)
at.patch.set_boxstyle("round,pad=0.2,rounding_size=0.2")
axes[count].add_artist(at)
axes[count].margins(0.0, 0.5)
#Get quandl data
mydata = quandl.get(dataset,start_date=pasttime,end_date=futuretime)
allplots.append(mydata)
# axes[count].annotate('local max', xy=(pdufa, allplots[count]["Close"].max()), xytext=(annotate, (allplots[count]["Close"].max() - allplots[count]["Close"].min()) * 0.9 + allplots[count]["Close"].min()),
# arrowprops=dict(facecolor='black', shrink=0.05, width=1, headwidth=5),)
# Set y limit for notes
axes[count].set_ylim(allplots[count]["Close"].min() * .99 , (allplots[count]["Close"].max()-allplots[count]["Close"].min()) * 0.5 + allplots[count]["Close"].max())
axes[count].hlines(allplots[count]["Close"].max() * 1.01, allplots[count].head(1).index, allplots[count].tail(1).index, linestyle="-", lw=1, color='black')
if count == 3:
axes[count].set_facecolor((0.91, 0.91, 0.91))
axes[count].vlines(pdufa, allplots[count]["Close"].min() * .99, allplots[count]["Close"].max() * 1.01, linestyle="--", color='black')
axes[count].plot(allplots[count].index,allplots[count]["Close"],c=np.random.rand(3,), lw=2, label=pdufa)
axes[count].axes.get_xaxis().set_visible(False) # remove x axis
count = count + 1
plt.show()
In [3]:
# You may need to install htmllib5,lxml, and BeautifulSoup4. In your terminal/command prompt run:
# conda install lxml
# conda install html5lib
# conda install BeautifulSoup4
# Then restart Jupyter Notebook. (or use pip install if you aren't using the Anaconda Distribution)
import numpy as np
import pandas as pd
import os.path
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from pandas.tseries.offsets import *
from pylab import text
from mpl_toolkits.axes_grid.anchored_artists import AnchoredText
import quandl
# Quandl API key
quandl.ApiConfig.api_key = "UsYsv7dKGxHHQ5oURP4B"
# Some formatting
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_seq_items', 2)
# Only pull fresh PDUFA data
three_weeks_ago = relativedelta(weeks=3)
one_week_ahead = relativedelta(weeks=1)
one_hour_ago = datetime.now() - timedelta(hours=1)
if os.path.exists("history.csv"):
filetime = datetime.fromtimestamp(os.path.getctime("history.csv"))
if filetime < one_hour_ago:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
else:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
# Create dataframe
df = pd.read_csv('history.csv').set_index('Ticker')
df.index.name=None
df[["Date","Catalyst"]] = df.Catalyst.str.extract('(?P<Date>[0-9]{2}\/[0-9]{2}\/[0-9]{4})(?P<Catalyst>.*)', expand=True)
df['Date'] = pd.to_datetime(df['Date'])
df["Past"] = df["Date"] - DateOffset(weeks=5)
df["Future"] = df["Date"] + DateOffset(weeks=2)
df
# Set stock ticker
stockpick="HALO"
dataset=str(f"WIKI/{stockpick}")
# Set variables for plot creation
length = len(df.loc[stockpick].index)
count = 0
allplots=[]
e = []
# Combine all data together into list of dataframes, iterate through each part of the list, plot each frame.
while (count < length):
pasttime= df.loc[stockpick].iloc[count]["Past"]
futuretime= df.loc[stockpick].iloc[count]["Future"]
pdufa=df.loc[stockpick].iloc[count]["Date"]
annotate = df.loc[stockpick].iloc[count]["Date"] + timedelta(days=1)
stage = df.loc[stockpick].iloc[count]["Stage"]
catalyst = df.loc[stockpick].iloc[count]["Catalyst"]
drug = df.loc[stockpick].iloc[count]["Drug"]
d = []
#Annotation
#Get quandl data
mydata = quandl.get(dataset,start_date=pasttime,end_date=futuretime)
allplots.append(mydata)
i=0
# print(len(allplots[count].index))
total = len(allplots[count].index) - 1
while (i < total):
n = i + 1
while (n < total): # see 14 - 8 = 1.0 !!!
rangevalue=(allplots[count].iloc[n]["Close"] - allplots[count].iloc[i]["Close"])
d.append(rangevalue)
range = f"{n} - {i}"
n=n + 1
i = i + 1
e.append(d)
count = count + 1
#need to add row names to see what the ranges are specifically.
finaldf=pd.DataFrame(e).transpose()
finaldf
Out[3]:
In [45]:
# You may need to install htmllib5,lxml, and BeautifulSoup4. In your terminal/command prompt run:
# conda install lxml
# conda install html5lib
# conda install BeautifulSoup4
# Then restart Jupyter Notebook. (or use pip install if you aren't using the Anaconda Distribution)
import numpy as np
import pandas as pd
import os.path
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from pandas.tseries.offsets import *
from pandas.tseries.holiday import USFederalHolidayCalendar
from pylab import text
from mpl_toolkits.axes_grid.anchored_artists import AnchoredText
import quandl
# Quandl API key
quandl.ApiConfig.api_key = "UsYsv7dKGxHHQ5oURP4B"
# Some formatting
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_seq_items', 2)
# Only pull fresh PDUFA data
three_weeks_ago = relativedelta(weeks=3)
one_week_ahead = relativedelta(weeks=1)
one_hour_ago = datetime.now() - timedelta(hours=1)
if os.path.exists("history.csv"):
filetime = datetime.fromtimestamp(os.path.getctime("history.csv"))
if filetime < one_hour_ago:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
else:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
# Create dataframe
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
df = pd.read_csv('history.csv').set_index('Ticker')
df.index.name=None
df[["Date","Catalyst"]] = df.Catalyst.str.extract('(?P<Date>[0-9]{2}\/[0-9]{2}\/[0-9]{4})(?P<Catalyst>.*)', expand=True)
df['Date'] = pd.to_datetime(df['Date'])
df['day_of_week'] = df['Date'].dt.day_name()
df["Original_PDUFA"] = df["Date"]
df["Date"] = df["Date"].map(lambda x : x + 0*us_bd)
df["Past"] = df["Date"] - DateOffset(weeks=3)
df["Future"] = df["Date"] + DateOffset(weeks=1)
# Set stock ticker
stockpick="HALO"
dataset=str(f"WIKI/{stockpick}")
# Set variables for plot creation
length = len(df.loc[stockpick].index)
count = 0
fig, axes = plt.subplots(nrows=length, ncols=1,figsize=(24,length * 4))
#set hspace and wspace to 0 for stacked "sparklines" of sorts
fig.subplots_adjust(hspace=.2, wspace=0)
allplots=[]
# Combine all data together into list of dataframes, iterate through each part of the list, plot each frame.
while (count < length):
pasttime= df.loc[stockpick].iloc[count]["Past"]
futuretime= df.loc[stockpick].iloc[count]["Future"]
pdufa=df.loc[stockpick].iloc[count]["Date"]
annotate = df.loc[stockpick].iloc[count]["Date"] + timedelta(days=1)
stage = df.loc[stockpick].iloc[count]["Stage"]
catalyst = df.loc[stockpick].iloc[count]["Catalyst"]
drug = df.loc[stockpick].iloc[count]["Drug"]
print(type(pdufa))
#Annotation
tooltip = f"stage:{stage}\n{drug}"
at = AnchoredText(tooltip,
prop=dict(size=10), frameon=True,
loc=2,
)
at.patch.set_boxstyle("round,pad=0.2,rounding_size=0.2")
axes[count].add_artist(at)
axes[count].margins(0.0, 0.5)
#Get quandl data
mydata = quandl.get(dataset,start_date=pasttime,end_date=futuretime)
allplots.append(mydata)
allplots[count].reset_index(inplace=True)
allplots[count]["PDUFA"] = allplots[count]["Date"] == pdufa
# axes[count].annotate('local max', xy=(pdufa, allplots[count]["Close"].max()), xytext=(annotate, (allplots[count]["Close"].max() - allplots[count]["Close"].min()) * 0.9 + allplots[count]["Close"].min()),
# arrowprops=dict(facecolor='black', shrink=0.05, width=1, headwidth=5),)
# Set y limit for notes
axes[count].set_ylim(allplots[count]["Close"].min() * .99 , (allplots[count]["Close"].max()-allplots[count]["Close"].min()) * 0.5 + allplots[count]["Close"].max())
axes[count].hlines(allplots[count]["Close"].max() * 1.01, allplots[count].head(1).index, allplots[count].tail(1).index, linestyle="-", lw=1, color='black')
if count % 2 == 0:
axes[count].set_facecolor((0.91, 0.91, 0.91))
# axes[count].vlines(15, allplots[count]["Close"].min() * .99, allplots[count]["Close"].max() * 1.01, linestyle="--", color='black')
# axes[count].plot(allplots[count].index,allplots[count]["Close"],c=np.random.rand(3,), lw=2, label=pdufa)
# axes[count].axes.get_xaxis().set_visible(False) # remove x axis
count = count + 1
# plt.show() # need to show x axis temporarily to ensure null times aren't being added
# allplots[0].reset_index(inplace=True)
allplots[0]
Out[45]:
In [30]:
"""
=====================================
Custom tick formatter for time series
=====================================
When plotting time series, e.g., financial time series, one often wants
to leave out days on which there is no data, i.e. weekends. The example
below shows how to use an 'index formatter' to achieve the desired plot
"""
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import matplotlib.cbook as cbook
import matplotlib.ticker as ticker
r = quandl.get("wiki/aapl",start_date="2018-01-01",end_date="2018-02-14")
# first we'll do it the default way, with gaps on weekends
fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
ax = axes[0]
ax.plot(r.index, r['Adj. Close'], 'o-')
ax.set_title("Default")
fig.autofmt_xdate()
# next we'll write a custom formatter
N = len(r)
ind = np.arange(N) # the evenly spaced plot indices
def format_date(x, pos=None):
thisind = np.clip(int(x + 0.5), 0, N - 1)
return r.index.strftime('%Y-%m-%d')
ax = axes[1]
ax.plot(ind, r['Adj. Close'], 'o-')
ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_date))
ax.set_title("Custom tick formatter")
fig.autofmt_xdate()
plt.show()
In [ ]: