In [6]:
# You may need to install htmllib5,lxml, and BeautifulSoup4. In your terminal/command prompt run:
# conda install lxml
# conda install html5lib
# conda install BeautifulSoup4
# Then restart Jupyter Notebook. (or use pip install if you aren't using the Anaconda Distribution)
import numpy as np
import pandas as pd
import os.path
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from pandas.tseries.offsets import *
from pandas.tseries.holiday import USFederalHolidayCalendar
from pylab import text
from mpl_toolkits.axes_grid.anchored_artists import AnchoredText
#stock of interest
ticker="AERI"
#You'll need to first set your API token here and uncomment or set externally in your OS preferably.
#%env TIINGOTOKEN inserttiingotokenhere
# Some formatting
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_seq_items', 2)
# Only pull fresh PDUFA data
one_hour_ago = datetime.now() - timedelta(hours=1)
if os.path.exists("history.csv"):
filetime = datetime.fromtimestamp(os.path.getctime("history.csv"))
if filetime < one_hour_ago:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
else:
histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
histdata[0].to_csv('history.csv',index=False)
# Create master PDUFA dataframe
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
df = pd.read_csv('history.csv').set_index('Ticker')
df.index.name=None
df[["Date","Catalyst"]] = df.Catalyst.str.extract('(?P<Date>[0-9]{2}\/[0-9]{2}\/[0-9]{4})(?P<Catalyst>.*)', expand=True)
df['Date'] = pd.to_datetime(df['Date'])
df['day_of_week'] = df['Date'].dt.day_name()
df["Original_PDUFA"] = df["Date"]
df["Date"] = df["Date"].map(lambda x : x + 0*us_bd)
df["Past"] = df["Date"] - DateOffset(weeks=3)
df["Future"] = df["Date"] + DateOffset(weeks=1)
# Create local PDUFA dataframe for comparison
pdufadf = df.loc[ticker]
length = len(pdufadf.index)
count = 0
tiingotoken = os.environ['TIINGOTOKEN']
oldestpdufa = df.loc[ticker]["Date"].min().date() - timedelta(days=30)
latestpdufa = df.loc[ticker]["Date"].max().date() + timedelta(days=15)
stockdf = pd.DataFrame(pd.read_json(f"https://api.tiingo.com/tiingo/daily/{ticker}/prices?startDate={oldestpdufa}&endDate={latestpdufa}&token={tiingotoken}"))
fig, axes = plt.subplots(nrows=length, ncols=1,figsize=(14,length * 2))
#set hspace and wspace to 0 for stacked "sparklines" of sorts
fig.subplots_adjust(hspace=0, wspace=0)
# Create plot data
allplots=[]
e=[]
while (count < length):
#Initialize variable for nth PDUFA
stage = pdufadf.iloc[count]["Stage"]
catalyst = pdufadf.iloc[count]["Catalyst"]
drug = pdufadf.iloc[count]["Drug"]
#Annotation
tooltip = f"stage:{stage} \n{drug}"
at = AnchoredText(tooltip,
prop=dict(size=10), frameon=True,
loc=2,
)
at.patch.set_boxstyle("round,pad=0.2,rounding_size=0.2")
axes[count].add_artist(at)
axes[count].margins(0.0, 0.0)
axes[count].minorticks_on()
axes[count].grid(which="major", axis="x")
axes[count].grid(which="minor", axis="x")
pdufa=pdufadf.iloc[count]["Date"]
indexvalue = stockdf.index[stockdf["date"] == pdufadf.iloc[count]["Date"]][0]
mydata = stockdf.iloc[indexvalue - 20:indexvalue + 5,].copy()
allplots.append(mydata)
allplots[count]["pdufa"] = allplots[count]["date"] == pdufa
allplots[count].reset_index(inplace=True)
axes[count].set_ylim(allplots[count]["close"].min() * .99 , (allplots[count]["close"].max()-allplots[count]["close"].min()) * 0.5 + allplots[count]["close"].max())
axes[count].hlines(allplots[count]["close"].max() * 1.01, allplots[count].head(1).index, allplots[count].tail(1).index, linestyle="-", lw=1, color='black')
if count % 2 == 0:
axes[count].set_facecolor((0.91, 0.91, 0.91))
axes[count].vlines(allplots[count][allplots[count]["pdufa"] == True].index, allplots[count]["close"].min() * .99, allplots[count]["close"].max() * 1.01, linestyle="--", color='black')
axes[count].plot(allplots[count].index,allplots[count]["close"],c=np.random.uniform(low=.25, high=.7, size=(3,)), lw=2, label=pdufa)
plt.minorticks_on()
# axes[count].axes.get_xaxis().set_visible(False) # remove x axis
i = 0
total = len(allplots[count].index) - 1
d=[]
ecolumns=[]
while (i < total):
n = i + 1
while (n < total):
rangevalue=((allplots[count].iloc[n]["close"] - allplots[count].iloc[i]["close"])/allplots[count].iloc[i]["close"] * 100)
d.append(rangevalue)
rangetest = f"{i} - {n}"
ecolumns.append(rangetest)
n=n + 1
i = i + 1
e.append(d)
count = count + 1
# calcdf = pd.DataFrame(e).transpose()
pdufaint=[]
for x in allplots[:]:
pdufaint.append((x[x["pdufa"] == True].index.values[0]))
pdufaint=min(pdufaint)
testdf=pd.DataFrame(e)
testdf.columns=ecolumns
calcdf=testdf.transpose()
calcdf[["StartRange","EndRange"]] = calcdf.index.to_series().str.split(pat = " - ",expand=True)
calcdf = calcdf.astype({"StartRange": int, "EndRange": int})
calcdf["TotalRange"] = calcdf["EndRange"] - calcdf["StartRange"]
calcdf['Total_POS'] = (calcdf.iloc[:, : len(pdufadf.index)] > 0).sum(axis=1)
calcdf['Total_NEG'] = (calcdf.iloc[:, : len(pdufadf.index)] <= 0).sum(axis=1)
calcdf['Mean'] = calcdf.mean(axis=1)
calcdf['Stddev'] = calcdf.std(axis=1)
calcdf['Variance'] = calcdf.var(axis=1)
avx=0
calcdffinal = calcdf[(calcdf['Total_POS'] >= calcdf["Total_POS"].max() - 1)&(calcdf["EndRange"] < pdufaint)&(calcdf["Stddev"] < 6)].sort_values(by=['Stddev'],ascending=True)
calcdffinal
#CALCULATE BEST FIT DERIVATIVE PRE-PDUFA TO DETERMINE CONSISTENCY OF CURVE AND COMPARE TO TODAY.
#Figure out calculation for "winner". Is it stddev * mean? What about ranges to short?
# plt.show()
# %matplotlib inline
plt.figure(num=None, figsize=(12, 3))
a=[]
for index, row in calcdffinal[:10].iterrows():
a.append(list(range(int(row['StartRange']), int(row['EndRange'])+1)))
b=np.concatenate(a)
bcount = np.bincount(b)
plt.xticks(np.arange(20))
plt.xlim(-1,20)
plt.bar(list(range(0,max(b)+1)),bcount)
avx=0
while (avx < length):
# axes[avx].axvspan(calcdf[(calcdf['Total_POS'] >= calcdf["Total_POS"].max() - 1)].sort_values(by=['Stddev'],ascending=True).iloc[0]["StartRange"]
# , calcdf[(calcdf['Total_POS'] >= calcdf["Total_POS"].max() - 1)].sort_values(by=['Stddev'],ascending=True).iloc[0]["EndRange"]
# , color='green',alpha=1.0)
for index, row in calcdffinal[:10].iterrows():
# axes[avx].axvspan(7,10,color='yellow',alpha=0.2)
highlight1=int(row["StartRange"])
highlight2=int(row["EndRange"])
axes[avx].axvspan(highlight1,highlight2,color='blue',alpha=0.2)
avx = avx + 1
calcdffinal
Out[6]:
In [7]:
#Date Range Calculator for new days
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
date='2019-03-14'
rangestart=12
rangeend=14
df_buy = pd.DataFrame(pd.DatetimeIndex(start=date,end=date, freq="D"), columns=["Date"])
df_buy["PDUFA"] = df_buy["Date"].map(lambda x : x - 0*us_bd)
df_buy["BUY at EOD"] = df_buy["PDUFA"].map(lambda x : x - (20 - rangestart)*us_bd)
df_buy["SELL at EOD"] = df_buy["PDUFA"].map(lambda x : x - (20 - rangeend)*us_bd)
df_buy
Out[7]:
In [ ]: