In [6]:
import requests
from bs4 import BeautifulSoup as BSoup
import pandas as pd
In [1]:
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('sqlite:///edgar_idx.db')
conn = engine.connect()
conn.begin()
Out[1]:
In [11]:
import sqlite3
conn3 = sqlite3.connect('edgar_idx.db')
cursor=conn3.cursor()
In [12]:
ticker = "DQ"
#cursor.execute('''SELECT * FROM idx WHERE Symbol=?;''', ("ABBV",))
cursor.execute('''SELECT * FROM cik_ticker_name WHERE ticker=?;''',(ticker,))
res=cursor.fetchall()
print(res)
cursor.execute('''SELECT * FROM idx WHERE cik=?;''', (res[0][0],))
recs = cursor.fetchall()
names = list(map(lambda x: x[0], cursor.description))
print(names)
df = pd.DataFrame(data=recs, columns=names)
df['date'] = pd.to_datetime(df['date'])
df.columns
df.size
df.dtypes
#print(type(recs))
print(recs)
#conn3.close()
#df[df.type == "10-Q"].to_csv("TestData\\"+ticker.lower()+"_all_10qs.csv", index=None)
#df[df.type == "10-K"].to_csv("TestData\\"+ticker.lower()+"_all_10ks.csv", index=None)
df[df.type == "20-F"].to_csv("TestData\\"+ticker.lower()+"_all_20fs.csv", index=None)
In [10]:
ticker = "APTV"
req = requests.get("https://www.sec.gov/cgi-bin/browse-edgar?CIK={:s}&owner=exclude&action=getcompany&Find=Search".format(ticker.lower()))
soup = BSoup(req.content, "lxml")
conmTag = soup.find("span", {"class": "companyName"})
if conmTag:
print(conmTag.text)
atags = soup.findAll("a")
for t in atags:
if "see all company filings" in t.text:
print(t.text)
In [5]:
In [9]:
class ATest(self):
self.d = {}
def getValFromDict(self, n):
if not self.d:
for i in range(0,20):
self.d["Val-{:d}".format(i)]= i
print(self.d)
In [11]:
In [12]:
all_10Qs.to_csv("TestData\\appl_all_10qs.csv", index=None)
all_10Ks.to_csv("TestData\\appl_all_10ks.csv", index=None)
In [13]:
all_10Ks =pd.read_csv("TestData\\nvda_all_10ks.csv", parse_dates=['date'], dtype={'cik':str, 'conm':str, 'type':str,'path':str})
all_10Qs =pd.read_csv("TestData\\nvda_all_10qs.csv", parse_dates=['date'], dtype={'cik':str, 'conm':str, 'type':str,'path':str})
In [9]:
import numpy as np
import matplotlib.pyplot as plt
## NVDA
#y= np.array([3123000000, 3207000000, 2911000000, 2636000000, 2230000000]) #sales
#y= np.array([1.81, 2.05, 1.86, 1.39, 0.98]) #eps
#x=np.array([0.0, -91, -182, -273, -364])
## AAPL
#se = np.array([114949, 126878, 140199]) * 1000.0
#ni = np.array([11519, 13822, 20065 ]) * 1000000.0
#sales = np.array([53265, 61137, 88293, 52579, 45408, 52896, 78351]) * 1000000.0
#eps = np.array([2.36, 2.75, 3.92, 2.18, 1.68, 2.11, 3.38])
## ACLS
se= np.array([397074, 385614, 371527])*1000.0
ni = np.array([8838, 14669, 13915])*1000.0
sales = np.array([95374, 119333, 122185])*1000.0
eps = np.array([0.27, 0.46, 0.43])
days = np.array([0.0, -91, -182, -273, -364, -455, -546])
y = sales[:3]
x = days[:3]
In [10]:
p = np.polyfit(x[:3], y[:3], 2)
yfit = np.polyval(p, x)
sigma = (y[:3] - yfit[:3]) / y[:3]
error = sigma * sigma
res = error.sum()
plt.plot(x,y)
plt.plot(x,yfit)
plt.show()
In [11]:
print(p)
print(error)
print(res)
print((sales[2] - sales[1])/(days[2] - days[1]))
print((eps[2] - eps[1])/(days[2] - days[1]))
print((y[2] - y[0])/(x[2] - x[0]))
In [21]:
import datetime as dt
(dt.datetime(2016,12, 31) - dt.datetime(2018,6, 30)).days
Out[21]:
In [32]:
9.35-1.68- 2.11- 3.38
Out[32]:
In [13]:
229234 - 45408- 52896- 78351
Out[13]:
In [44]:
import pandas as pd
all_10Qs =pd.read_csv("TestData\\nvda_all_10qs.csv", parse_dates=['date'], dtype={'cik':str, 'conm':str, 'type':str,'path':str})
In [50]:
all_10Qs[all_10Qs.date == "2017-05-23"].index[0]
Out[50]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
import requests
from bs4 import BeautifulSoup as Bsoup
import pandas as pd
In [ ]:
ticker = "NVDA"
## Read in the file
f = open ("1045810_NVIDIA-CORP_10-Q_2017-08-23", "r")
lines = f.readlines ()
f.close ()
ser = pd.Series (lines)
#print (ser)
#this denotes where the actual 10-Q starts. I think the 10-Q part is enclosed by <document> tags
html_idxs = (ser[ser == '<TYPE>10-Q\n'])
#get the line number (== index number):
print ("html_idxs: {}".format(html_idxs.index.values))
#and the corresponding line
print (lines[html_idxs.index.values[0]])
#html_idxs_end = (ser['<\TYPE>' in ser])
#print ("html_idxs_end: {}".format (html_idxs_end.index.values))
#print (lines[html_idxs_end.index.values[0]])
#now I can parse the 10-Q with beautifulsoup:
## Find a way to determine which lines to parse automatically. Really, I only need the html-tag, i.e. lines 55-63
## (see NVDA_finstat_test.hmtl)
soup = Bsoup ("".join (lines[55:63]), "lxml") #find the end of the section by searching for /Document?
#collect all div-tags for future use
all_divs = soup.find_all ("div")
#find the div-tag that contains 'ITEM 1.  FINANCIAL STATEMENTS' which indicates the start of the
#income statement's table
#will this work on all filings???
print ("found {} occurrences". format (len(all_divs)))
tables = soup.find_all ("table")
print ("found {} tables".format (len(tables)))
#iterate over the div-tags, not sure if there is a better way. I think everything is is div tags.
flag = False
count = 0
for f in all_divs:
#find beginning of financial statements
if (f.get_text().startswith ("ITEM 1.") and "FINANCIAL STATEMENTS" in f.get_text()):
flag = True
#print (count)
if (flag):
#print (f.get_text())
if ("three months ended" in f.text.lower()):
## At this point, we found the div-tag with the income-statement table
## Try to use pandas
#print((f.find_all('div', text='$')))
for ff in f.find_all('div',text='$'):
ff.decompose()
## Almost! Just need to get rid of the '$', which are offsetting the affected rows by several columns
## This removes the '$' (use re.compile for other currency symbols?), now what about the paren's denoting negative numbers?
tableDf=pd.read_html(str(f))
print(tableDf)
## Manual method
trs = f.find_all("tr")
#print (len(trs))
#print(trs[2].get_text())
for t in trs:
tlist = [item.get_text() for item in t.find_all("td")]
print(" || ".join(tlist))
#print (trs)
break
#print ("\n")
count += 1
<description> XBRL INSTANCE DOCUMENT. Inside the description-tag is the <XBRL>, which contains the XBRL-data for the instance document. Note that there are several XBRL- tags containing the various XBRL documents (schema, extension, linkbases, ...). <xbrli:context id="FI2017Q4">
<xbrli:entity>
<xbrli:identifier scheme="http://www.sec.gov/CIK">0001045810</xbrli:identifier>
</xbrli:entity>
<xbrli:period>
<xbrli:instant>2017-01-29</xbrli:instant>
</xbrli:period>
</xbrli:context>
### Interesting classifiers: