In [2]:
print 'Make the "Get the Data" widget code.'
In [1]:
import bs4
import requests
def makeCode(folder):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
fileNames = gitSoup.select('.js-directory-link') #get tag with URL for each file
return fileNames
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
makeCode(myFolder)
Out[1]:
In [3]:
import bs4
import requests
def makeCode(folder):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
files = gitSoup.select('.js-directory-link') #get tag with URL for each file
urls = []
for f in files:
urls.append(f.get('href'))
print urls
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
makeCode(myFolder)
In [13]:
import bs4
import requests
def makeCode(folder):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
files = gitSoup.select('.js-directory-link') #get tag with URL for each file
urls = []
for f in files:
urls.append(f.get('href')) #put urls into list
# print urls
for u in urls:
if "README.md" in u:
urls.remove(u) #get README out of list
return urls
#print urls
for u in urls:
v = u.replace("/InsideEnergy/Data-for-stories/blob/master", "")
return urls
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
makeCode(myFolder)
Out[13]:
In [29]:
import bs4
import requests
def stripUrls(folder):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
files = gitSoup.select('.js-directory-link') #get tag with URL for each file
urls = []
for f in files:
urls.append(f.get('href')) #put urls into list
# print urls
for u in urls:
if "README.md" in u:
urls.remove(u) #get README out of list
#return urls
#print urls
halfUrls = []
for v in urls:
if "/InsideEnergy/Data-for-stories/blob/master" in v:
w = v.replace("/InsideEnergy/Data-for-stories/blob/master", "")
halfUrls.append(w)
return halfUrls
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
stripUrls(myFolder)
Out[29]:
In [55]:
import bs4
import requests
def makeCode(folder):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
files = gitSoup.select('.js-directory-link') #get tag with URL for each file
urls = []
for f in files:
urls.append(f.get('href')) #put urls into list
# print urls
for u in urls:
if "README.md" in u:
urls.remove(u) #get README out of list
#return urls
#print urls
halfUrls = []
for v in urls:
if "/InsideEnergy/Data-for-stories/blob/master" in v:
w = v.replace("/InsideEnergy/Data-for-stories/blob/master", "")
halfUrls.append(w)
csvFile = halfUrls[0]
codeHasCsv = "http://rawgit.com/insideenergy/Data-for-stories/master%s" % csvFile
print codeHasCsv
xlsFile = halfUrls[1]
codeHasXls = "http://rawgit.com/insideenergy/Data-for-stories/master%s" % xlsFile
print codeHasXls
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
makeCode(myFolder)
In [58]:
import bs4
import requests
def makeCode(folder):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
files = gitSoup.select('.js-directory-link') #get tag with URL for each file
urls = []
for f in files:
urls.append(f.get('href')) #put urls into list
# print urls
for u in urls:
if "README.md" in u:
urls.remove(u) #get README out of list
#return urls
#print urls
halfUrls = []
for v in urls:
if "/InsideEnergy/Data-for-stories/blob/master" in v:
w = v.replace("/InsideEnergy/Data-for-stories/blob/master", "")
halfUrls.append(w)
csvFile = halfUrls[0]
codeHasCsv = "http://rawgit.com/insideenergy/Data-for-stories/master%s" % csvFile
#print codeHasCsv
xlsFile = halfUrls[1]
codeHasXls = "http://rawgit.com/insideenergy/Data-for-stories/master%s" % xlsFile
#print codeHasXls
widgetCode = "<small><strong> Get the data: <a href='" + codeHasCsv + "'>CSV</a> | <a href='" + codeHasXls + "'>XLS</a> | <a href='GOOGLE SHEETS LINK YOU JUST MADE' target='_blank'>Google Sheets</a> | Source and notes: <a href='" + folder + "'>Github</a> </strong></small>"
print widgetCode
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
makeCode(myFolder)
In [59]:
import bs4
import requests
def makeCode(folder, sheet):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
files = gitSoup.select('.js-directory-link') #get tag with URL for each file
urls = []
for f in files:
urls.append(f.get('href')) #put urls into list
for u in urls:
if "README.md" in u:
urls.remove(u) #get README out of list
halfUrls = []
for v in urls:
if "/InsideEnergy/Data-for-stories/blob/master" in v:
w = v.replace("/InsideEnergy/Data-for-stories/blob/master", "")
halfUrls.append(w) #strip extra stuff off front of url
csvFile = halfUrls[0]
codeHasCsv = "http://rawgit.com/insideenergy/Data-for-stories/master%s" % csvFile
xlsFile = halfUrls[1]
codeHasXls = "http://rawgit.com/insideenergy/Data-for-stories/master%s" % xlsFile
#now concatonate the code together
widgetCode = "<small><strong> Get the data: <a href='" + codeHasCsv + "'>CSV</a> | <a href='" + codeHasXls + "'>XLS</a> | <a href='" + sheet + "' target='_blank'>Google Sheets</a> | Source and notes: <a href='" + folder + "'>Github</a> </strong></small>"
print widgetCode
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
print "Enter Google Sheets URL for public viewing:"
mySheet = raw_input()
makeCode(myFolder, mySheet)
In [61]:
import bs4
import requests
def makeCode(folder, sheet):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
files = gitSoup.select('.js-directory-link') #get tag with URL for each file
urls = []
for f in files:
urls.append(f.get('href')) #put urls into list
for u in urls:
if "README.md" in u:
urls.remove(u) #get README out of list
halfUrls = []
for v in urls:
if "/InsideEnergy/Data-for-stories/blob/master" in v:
w = v.replace("/InsideEnergy/Data-for-stories/blob/master", "")
halfUrls.append(w) #strip extra stuff off front of url
csvFile = halfUrls[0]
codeHasCsv = "http://rawgit.com/insideenergy/Data-for-stories/master%s" % csvFile
xlsFile = halfUrls[1]
codeHasXls = "http://rawgit.com/insideenergy/Data-for-stories/master%s" % xlsFile
#now concatonate the code together
widgetCode = "<small><strong> Get the data: <a href='" + codeHasCsv + "'>CSV</a> | <a href='" + codeHasXls + "'>XLS</a> | <a href='" + sheet + "' target='_blank'>Google Sheets</a> | Source and notes: <a href='" + folder + "'>Github</a> </strong></small>"
print widgetCode
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
print "Enter Google Sheets URL for public viewing:"
mySheet = raw_input()
print "~~~~~~~~~~Widget Code - Paste this below your chart~~~~~~~~~~"
makeCode(myFolder, mySheet)
In [1]:
#new function needs to strip off .csv and .xlsx
#needs to say, if two items match, get rid of duplicate
#then add each into its own widget code, enter new sheets input for each one
In [9]:
import bs4
import requests
def stripUrls(folder):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
files = gitSoup.select('.js-directory-link') #get tag with URL for each file
urls = []
for f in files:
urls.append(f.get('href')) #put urls into list
# print urls
for u in urls:
if "README.md" in u:
urls.remove(u) #get README out of list
#return urls
#print urls
halfUrls = []
for v in urls:
if "/InsideEnergy/Data-for-stories/blob/master" in v:
w = v.replace("/InsideEnergy/Data-for-stories/blob/master", "")
halfUrls.append(w) #strip extra stuff off front of url
return halfUrls
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
stripUrls(myFolder)
Out[9]:
In [2]:
import bs4
import requests
def stripUrls(folder):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
files = gitSoup.select('.js-directory-link') #get tag with URL for each file
urls = []
for f in files:
urls.append(f.get('href')) #put urls into list
# print urls
for u in urls:
if "README.md" in u:
urls.remove(u) #get README out of list
halfUrls = []
for v in urls:
if "/InsideEnergy/Data-for-stories/blob/master" in v:
w = v.replace("/InsideEnergy/Data-for-stories/blob/master", "")
halfUrls.append(w) #strip extra stuff off front of url
print halfUrls
justFolders = []
for x in halfUrls:
if ".csv" in x:
y = x.replace(".csv", "")
justFolders.append(y)
if ".xlsx" in x:
z = x.replace(".xlsx", "")
justFolders.append(z)
print justFolders #gets file extensions off
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
stripUrls(myFolder)
In [7]:
import bs4
import requests
def makeCode(folder):
x = requests.get(folder)
x.raise_for_status()
gitSoup = bs4.BeautifulSoup(x.text)
files = gitSoup.select('.js-directory-link') #get tag with URL for each file
urls = [f.get('href') for f in files if 'README.md' not in f.get('href')] #put urls in list without readme filr
halfUrls = []
for v in urls:
if "/InsideEnergy/Data-for-stories/blob/master" in v:
w = v.replace("/InsideEnergy/Data-for-stories/blob/master", "")
halfUrls.append(w) #strip extra stuff off front of url
justFolders = []
for x in halfUrls:
if ".csv" in x:
y = x.replace(".csv", "")
justFolders.append(y)
if ".xlsx" in x:
z = x.replace(".xlsx", "")
justFolders.append(z) #gets file extensions off
noDuplicates = []
for z in justFolders:
if z not in noDuplicates:
noDuplicates.append(z) #gets rid of duplicates
#now concatonate a code for each folder name, and ask for corresponding Google Sheets URL
for i in noDuplicates:
print "Enter the Google Sheets URL for public viewing that corresponds with " + i
mySheet = raw_input()
print "~~~~~~~~~~Widget code for " + i + "~~~~~~~~~~"
print
print '<small><strong> Get the data: <a href="http://rawgit.com/insideenergy/Data-for-stories/master' + i + '.csv">CSV</a> | <a href="http://rawgit.com/insideenergy/Data-for-stories/master' + i + '.xlsx">XLS</a> | <a href="' + mySheet + '" target="_blank">Google Sheets</a> | Source and notes: <a href="' + folder + '">Github</a> </strong></small>'
print
print 'Make the "Get the Data" widget code.'
print "Enter GitHub ULR of your new folder inside 'Data-for-stories':"
myFolder = raw_input()
makeCode(myFolder)
In [ ]: