In [6]:
# You may need to install htmllib5,lxml, and BeautifulSoup4. In your terminal/command prompt run:

# conda install lxml
# conda install html5lib
# conda install BeautifulSoup4
# Then restart Jupyter Notebook. (or use pip install if you aren't using the Anaconda Distribution)
import numpy as np
import pandas as pd
import os.path
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from pandas.tseries.offsets import *
from pandas.tseries.holiday import USFederalHolidayCalendar
from pylab import text
from mpl_toolkits.axes_grid.anchored_artists import AnchoredText

#stock of interest
ticker="AERI"

#You'll need to first set your API token here and uncomment or set externally in your OS preferably.
#%env TIINGOTOKEN inserttiingotokenhere

# Some formatting
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_seq_items', 2)

# Only pull fresh PDUFA data
one_hour_ago = datetime.now() - timedelta(hours=1)
if os.path.exists("history.csv"):
    filetime = datetime.fromtimestamp(os.path.getctime("history.csv"))
    if filetime < one_hour_ago:
        histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
        histdata[0].to_csv('history.csv',index=False)
else:
    histdata = pd.read_html("https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar")
    histdata[0].to_csv('history.csv',index=False)

# Create master PDUFA dataframe
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
df = pd.read_csv('history.csv').set_index('Ticker')
df.index.name=None
df[["Date","Catalyst"]] = df.Catalyst.str.extract('(?P<Date>[0-9]{2}\/[0-9]{2}\/[0-9]{4})(?P<Catalyst>.*)', expand=True)
df['Date'] = pd.to_datetime(df['Date'])
df['day_of_week'] = df['Date'].dt.day_name()
df["Original_PDUFA"] = df["Date"]
df["Date"] = df["Date"].map(lambda x : x + 0*us_bd)
df["Past"] = df["Date"] - DateOffset(weeks=3)
df["Future"] = df["Date"] + DateOffset(weeks=1)    

# Create local PDUFA dataframe for comparison
pdufadf = df.loc[ticker]
length = len(pdufadf.index)
count = 0
tiingotoken = os.environ['TIINGOTOKEN']
oldestpdufa = df.loc[ticker]["Date"].min().date() - timedelta(days=30)
latestpdufa = df.loc[ticker]["Date"].max().date() + timedelta(days=15)
stockdf = pd.DataFrame(pd.read_json(f"https://api.tiingo.com/tiingo/daily/{ticker}/prices?startDate={oldestpdufa}&endDate={latestpdufa}&token={tiingotoken}"))

fig, axes = plt.subplots(nrows=length, ncols=1,figsize=(14,length * 2))
#set hspace and wspace to 0 for stacked "sparklines" of sorts
fig.subplots_adjust(hspace=0, wspace=0)

# Create plot data
allplots=[]
e=[]
while (count < length):
    #Initialize variable for nth PDUFA
    stage = pdufadf.iloc[count]["Stage"]
    catalyst = pdufadf.iloc[count]["Catalyst"]
    drug = pdufadf.iloc[count]["Drug"]
    
    #Annotation
    tooltip = f"stage:{stage} \n{drug}"
    at = AnchoredText(tooltip,
                      prop=dict(size=10), frameon=True,
                      loc=2, 
                      )
    at.patch.set_boxstyle("round,pad=0.2,rounding_size=0.2")
    axes[count].add_artist(at)
    axes[count].margins(0.0, 0.0)
    axes[count].minorticks_on()
    axes[count].grid(which="major", axis="x")
    axes[count].grid(which="minor", axis="x")

    pdufa=pdufadf.iloc[count]["Date"]
    indexvalue = stockdf.index[stockdf["date"] == pdufadf.iloc[count]["Date"]][0]
    mydata = stockdf.iloc[indexvalue - 20:indexvalue + 5,].copy()
    allplots.append(mydata)
    allplots[count]["pdufa"] = allplots[count]["date"] == pdufa
    allplots[count].reset_index(inplace=True)
    axes[count].set_ylim(allplots[count]["close"].min() * .99 , (allplots[count]["close"].max()-allplots[count]["close"].min()) * 0.5 + allplots[count]["close"].max())
    axes[count].hlines(allplots[count]["close"].max() * 1.01, allplots[count].head(1).index, allplots[count].tail(1).index, linestyle="-", lw=1, color='black')
    if count % 2 == 0:
        axes[count].set_facecolor((0.91, 0.91, 0.91))  
    axes[count].vlines(allplots[count][allplots[count]["pdufa"] == True].index, allplots[count]["close"].min() * .99, allplots[count]["close"].max() * 1.01, linestyle="--", color='black')
    axes[count].plot(allplots[count].index,allplots[count]["close"],c=np.random.uniform(low=.25, high=.7, size=(3,)), lw=2, label=pdufa) 
    plt.minorticks_on()
#     axes[count].axes.get_xaxis().set_visible(False) # remove x axis
    i = 0
    total = len(allplots[count].index) - 1
    d=[]
    ecolumns=[]
    while (i < total):
        n = i + 1
        while (n < total):  
            rangevalue=((allplots[count].iloc[n]["close"] - allplots[count].iloc[i]["close"])/allplots[count].iloc[i]["close"] * 100)
            d.append(rangevalue)
            
            rangetest = f"{i} - {n}"
            ecolumns.append(rangetest)
            n=n + 1
        i = i + 1

    e.append(d)
    count = count + 1
#     calcdf = pd.DataFrame(e).transpose()
pdufaint=[]
for x in allplots[:]:
    pdufaint.append((x[x["pdufa"] == True].index.values[0]))
pdufaint=min(pdufaint)
testdf=pd.DataFrame(e)
testdf.columns=ecolumns
calcdf=testdf.transpose()
calcdf[["StartRange","EndRange"]] = calcdf.index.to_series().str.split(pat = " - ",expand=True)
calcdf = calcdf.astype({"StartRange": int, "EndRange": int})
calcdf["TotalRange"] = calcdf["EndRange"] - calcdf["StartRange"]
calcdf['Total_POS'] = (calcdf.iloc[:, : len(pdufadf.index)] > 0).sum(axis=1)
calcdf['Total_NEG'] = (calcdf.iloc[:, : len(pdufadf.index)] <= 0).sum(axis=1)
calcdf['Mean'] = calcdf.mean(axis=1)
calcdf['Stddev'] = calcdf.std(axis=1)
calcdf['Variance'] = calcdf.var(axis=1)
avx=0

calcdffinal = calcdf[(calcdf['Total_POS'] >= calcdf["Total_POS"].max() - 1)&(calcdf["EndRange"] < pdufaint)&(calcdf["Stddev"] < 6)].sort_values(by=['Stddev'],ascending=True)
calcdffinal
#CALCULATE BEST FIT DERIVATIVE PRE-PDUFA TO DETERMINE CONSISTENCY OF CURVE AND COMPARE TO TODAY.
#Figure out calculation for "winner". Is it stddev * mean? What about ranges to short?
# plt.show()
# %matplotlib inline
plt.figure(num=None, figsize=(12, 3))

a=[]
for index, row in calcdffinal[:10].iterrows():
    a.append(list(range(int(row['StartRange']), int(row['EndRange'])+1)))
b=np.concatenate(a)
bcount = np.bincount(b)
plt.xticks(np.arange(20))
plt.xlim(-1,20)
plt.bar(list(range(0,max(b)+1)),bcount)
avx=0
while (avx < length):
#     axes[avx].axvspan(calcdf[(calcdf['Total_POS'] >= calcdf["Total_POS"].max() - 1)].sort_values(by=['Stddev'],ascending=True).iloc[0]["StartRange"]
# , calcdf[(calcdf['Total_POS'] >= calcdf["Total_POS"].max() - 1)].sort_values(by=['Stddev'],ascending=True).iloc[0]["EndRange"]
# , color='green',alpha=1.0)
    for index, row in calcdffinal[:10].iterrows():
#     axes[avx].axvspan(7,10,color='yellow',alpha=0.2)
        highlight1=int(row["StartRange"])
        highlight2=int(row["EndRange"])
        axes[avx].axvspan(highlight1,highlight2,color='blue',alpha=0.2)
    avx = avx + 1
calcdffinal


Out[6]:
0 1 2 3 StartRange EndRange TotalRange Total_POS Total_NEG Mean Stddev Variance
0 - 1 -0.634921 4.829211 3.087479 1.703578 0 1 1 3 1 1.665038 1.605820 2.321111
0 - 2 0.899471 3.769140 0.514580 4.599659 0 2 2 4 0 1.975872 1.676653 2.538188
5 - 6 -3.497212 1.139241 2.845927 1.312551 5 6 1 3 1 1.977834 2.585086 6.047924
5 - 8 -4.865687 1.645570 1.570167 5.414274 5 8 3 3 1 2.640480 3.389745 10.392372
0 - 3 1.322751 3.297998 -8.404803 4.855196 0 3 3 3 1 1.230127 3.670016 12.663306
12 - 14 1.874024 2.816901 5.517909 4.616725 12 14 2 4 0 5.202840 4.466946 18.007479
12 - 13 2.030193 3.329065 5.227493 0.348432 12 13 1 4 0 4.548354 4.551737 18.646477
11 - 14 1.084711 1.517067 7.177974 1.952462 11 14 3 4 0 4.859135 4.574237 18.838658
6 - 14 2.783613 0.500626 4.007634 -2.753036 6 14 8 3 1 4.059871 4.586169 18.954832
12 - 17 4.633004 5.121639 7.163601 4.878049 12 17 5 4 0 6.644033 4.708384 20.292606
11 - 13 1.239669 2.022756 6.882989 -2.207131 11 13 2 3 1 4.215365 4.736989 20.219894
11 - 17 3.822314 3.792668 8.849558 2.207131 11 17 6 4 0 6.296852 4.914009 21.906579
11 - 16 9.969008 5.562579 3.638151 0.084890 11 16 5 4 0 6.139403 4.970315 22.357884
6 - 13 2.941176 1.001252 3.721374 -6.720648 6 13 7 3 1 3.438128 5.014227 22.854048
12 - 16 10.827694 6.914213 2.032914 2.700348 12 16 4 4 0 6.497241 5.039209 23.047525
12 - 15 5.830297 -1.920615 1.936108 3.135889 12 15 3 3 1 4.775742 5.098954 23.408899
11 - 15 5.010331 -3.160556 3.539823 0.509338 11 15 4 3 1 4.433215 5.196171 24.353093
5 - 16 7.906741 5.696203 3.434740 -3.281378 5 16 11 3 1 5.528478 5.317179 25.449209
13 - 17 2.551020 1.734820 1.839926 4.513889 13 17 4 4 0 5.404406 5.379068 26.040995
6 - 17 5.567227 2.753442 5.629771 -2.510121 6 17 11 3 1 5.493369 5.384923 26.098728
7 - 13 3.594080 0.248447 6.988189 -7.618284 7 13 6 3 1 3.690270 5.388149 26.391004
10 - 14 0.772400 0.124688 12.139918 -2.198697 10 14 4 3 1 4.759812 5.486567 27.140188
5 - 17 1.875317 3.924051 8.635918 -1.230517 5 17 12 3 1 5.689419 5.494268 27.171749
10 - 16 9.629248 4.114713 8.436214 -3.990228 10 16 6 3 1 6.021105 5.502136 27.270638
7 - 17 6.236786 1.987578 8.956693 -3.448276 7 17 10 3 1 5.748087 5.633199 28.560841
13 - 16 8.622449 3.469641 -3.035879 2.343750 13 16 3 3 1 5.266662 5.729603 29.565000
14 - 17 2.708227 2.241594 1.559633 0.249792 14 17 3 4 0 4.973250 5.795163 30.286932
10 - 13 0.926880 0.623441 11.831276 -6.188925 10 13 3 3 1 4.132519 5.905085 31.668659
10 - 17 3.501545 2.369077 13.888889 -1.954397 10 17 7 3 1 6.200568 5.930813 31.663699
15 - 17 -1.131333 7.180157 5.128205 1.689189 15 17 2 3 1 5.651802 5.986101 32.260222
8 - 13 4.421950 0.498132 5.024155 -10.350195 8 13 5 3 1 3.288227 5.989421 32.949165
6 - 16 11.817227 4.505632 0.572519 -4.534413 6 16 10 3 1 5.373441 5.990469 32.331759
9 - 14 3.054239 0.752823 10.772358 -7.473035 9 14 5 3 1 4.345154 5.998720 32.634752

In [7]:
#Date Range Calculator for new days
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())

date='2019-03-14'
rangestart=12
rangeend=14

df_buy = pd.DataFrame(pd.DatetimeIndex(start=date,end=date, freq="D"), columns=["Date"])
df_buy["PDUFA"] = df_buy["Date"].map(lambda x : x - 0*us_bd)
df_buy["BUY at EOD"] = df_buy["PDUFA"].map(lambda x : x - (20 - rangestart)*us_bd)
df_buy["SELL at EOD"] = df_buy["PDUFA"].map(lambda x : x - (20 - rangeend)*us_bd)
df_buy


Out[7]:
Date PDUFA BUY at EOD SELL at EOD
0 2019-03-14 2019-03-14 2019-03-04 2019-03-06

In [ ]: