In [1]:
import pandas as pd
import os
import glob
import requests
In [2]:
mypath = "data/satellite/colorado/summer6months/"
http_files = filter(os.path.isfile, glob.glob(mypath + "*"))
http_files.sort(key=lambda x: os.path.getmtime(x))
http_files = [os.path.basename(i) for i in http_files]
my_list_file = http_files[0] #pop out the list used to generate files in this dir
http_files = http_files[1:] #keep what is not popped out
In [3]:
with open(mypath + my_list_file) as f: #find noaa http files
begin_urls = f.read().splitlines()
In [4]:
begin_urls
Out[4]:
['http://download.class.ngdc.noaa.gov/download/1985025503/001',
'http://download.class.ngdc.noaa.gov/download/1985025513/001',
'http://download.class.ngdc.noaa.gov/download/1985025523/001',
'http://download.class.ngdc.noaa.gov/download/1985025983/001',
'http://download.class.ngdc.noaa.gov/download/1985026333/001',
'http://download.class.ngdc.noaa.gov/download/1985026393/001',
'http://download.class.ngdc.noaa.gov/download/1985026403/001',
'http://download.class.ngdc.noaa.gov/download/1985026413/001',
'http://download.class.ngdc.noaa.gov/download/1985026423/001',
'http://download.class.ngdc.noaa.gov/download/1985026433/001',
'http://download.class.ngdc.noaa.gov/download/1985026443/001',
'http://download.class.ngdc.noaa.gov/download/1985026453/001',
'http://download.class.ngdc.noaa.gov/download/1985026463/001',
'http://download.class.ngdc.noaa.gov/download/1985026473/001',
'http://download.class.ngdc.noaa.gov/download/1985026483/001',
'http://download.class.ngdc.noaa.gov/download/1985026493/001',
'http://download.class.ngdc.noaa.gov/download/1985026503/001',
'http://download.class.ngdc.noaa.gov/download/1985026513/001',
'http://download.class.ngdc.noaa.gov/download/1985026523/001',
'http://download.class.ngdc.noaa.gov/download/1985026533/001',
'http://download.class.ngdc.noaa.gov/download/1985026543/001',
'http://download.class.ngdc.noaa.gov/download/1985026553/001',
'http://download.class.ngdc.noaa.gov/download/1985026563/001',
'http://download.class.ngdc.noaa.gov/download/1985026663/001',
'http://download.class.ngdc.noaa.gov/download/1985027043/001',
'http://download.class.ngdc.noaa.gov/download/1985027053/001',
'http://download.class.ngdc.noaa.gov/download/1985027063/001',
'http://download.class.ngdc.noaa.gov/download/1985027073/001',
'http://download.class.ngdc.noaa.gov/download/1985027083/001',
'http://download.class.ngdc.noaa.gov/download/1985027613/001',
'http://download.class.ngdc.noaa.gov/download/1985027783/001',
'http://download.class.ngdc.noaa.gov/download/1985027793/001',
'http://download.class.ngdc.noaa.gov/download/1985027803/001',
'http://download.class.ngdc.noaa.gov/download/1985027813/001',
'http://download.class.ngdc.noaa.gov/download/1985027823/001',
'http://download.class.ngdc.noaa.gov/download/1985027863/001',
'http://download.class.ngdc.noaa.gov/download/1985027873/001',
'http://download.class.ngdc.noaa.gov/download/1985027883/001',
'http://download.class.ngdc.noaa.gov/download/1985027893/001',
'http://download.class.ngdc.noaa.gov/download/1985027903/001',
'http://download.class.ngdc.noaa.gov/download/1985027913/001',
'http://download.class.ngdc.noaa.gov/download/1985027923/001',
'http://download.class.ngdc.noaa.gov/download/1985027933/001',
'http://download.class.ngdc.noaa.gov/download/1985027943/001',
'http://download.class.ngdc.noaa.gov/download/1985027953/001',
'http://download.class.ngdc.noaa.gov/download/1985027963/001',
'http://download.class.ngdc.noaa.gov/download/1985027973/001',
'http://download.class.ngdc.noaa.gov/download/1985027983/001',
'http://download.class.ngdc.noaa.gov/download/1985027993/001',
'http://download.class.ngdc.noaa.gov/download/1985028003/001',
'http://download.class.ngdc.noaa.gov/download/1985028013/001',
'http://download.class.ngdc.noaa.gov/download/1985028023/001',
'http://download.class.ngdc.noaa.gov/download/1985028033/001',
'http://download.class.ngdc.noaa.gov/download/1985028043/001',
'http://download.class.ngdc.noaa.gov/download/1985028053/001',
'http://download.class.ngdc.noaa.gov/download/1985028063/001',
'http://download.class.ngdc.noaa.gov/download/1985028073/001',
'http://download.class.ngdc.noaa.gov/download/1985028083/001',
'http://download.class.ngdc.noaa.gov/download/1985028133/001',
'http://download.class.ngdc.noaa.gov/download/1985028143/001',
'http://download.class.ngdc.noaa.gov/download/1985028153/001',
'http://download.class.ngdc.noaa.gov/download/1985028163/001',
'http://download.class.ngdc.noaa.gov/download/1985028173/001',
'http://download.class.ngdc.noaa.gov/download/1985028183/001',
'http://download.class.ngdc.noaa.gov/download/1985028193/001',
'http://download.class.ngdc.noaa.gov/download/1985028203/001',
'http://download.class.ngdc.noaa.gov/download/1985028213/001',
'http://download.class.ngdc.noaa.gov/download/1985028223/001',
'http://download.class.ngdc.noaa.gov/download/1985028233/001',
'http://download.class.ngdc.noaa.gov/download/1985028243/001',
'http://download.class.ngdc.noaa.gov/download/1985028253/001',
'http://download.class.ngdc.noaa.gov/download/1985028263/001',
'http://download.class.ngdc.noaa.gov/download/1985028273/001',
'http://download.class.ngdc.noaa.gov/download/1985028283/001',
'http://download.class.ngdc.noaa.gov/download/1985028293/001',
'http://download.class.ngdc.noaa.gov/download/1985028303/001',
'http://download.class.ngdc.noaa.gov/download/1985028313/001',
'http://download.class.ngdc.noaa.gov/download/1985028323/001',
'http://download.class.ngdc.noaa.gov/download/1985028333/001',
'http://download.class.ngdc.noaa.gov/download/1985028383/001',
'http://download.class.ngdc.noaa.gov/download/1985028393/001',
'http://download.class.ngdc.noaa.gov/download/1985028403/001',
'http://download.class.ngdc.noaa.gov/download/1985028413/001',
'http://download.class.ngdc.noaa.gov/download/1985028423/001',
'http://download.class.ngdc.noaa.gov/download/1985028433/001',
'http://download.class.ngdc.noaa.gov/download/1985028443/001',
'http://download.class.ngdc.noaa.gov/download/1985028453/001',
'http://download.class.ngdc.noaa.gov/download/1985028463/001',
'http://download.class.ngdc.noaa.gov/download/1985028473/001',
'http://download.class.ngdc.noaa.gov/download/1985028483/001',
'http://download.class.ngdc.noaa.gov/download/1985028493/001',
'http://download.class.ngdc.noaa.gov/download/1985028503/001',
'http://download.class.ngdc.noaa.gov/download/1985028513/001',
'http://download.class.ngdc.noaa.gov/download/1985028523/001',
'http://download.class.ngdc.noaa.gov/download/1985028533/001',
'http://download.class.ngdc.noaa.gov/download/1985028543/001',
'http://download.class.ngdc.noaa.gov/download/1985028553/001',
'http://download.class.ngdc.noaa.gov/download/1985028563/001',
'http://download.class.ngdc.noaa.gov/download/1985028573/001',
'http://download.class.ngdc.noaa.gov/download/1985028583/001',
'http://download.class.ngdc.noaa.gov/download/1985028833/001',
'http://download.class.ngdc.noaa.gov/download/1985029473/001',
'http://download.class.ngdc.noaa.gov/download/1985030953/001',
'http://download.class.ngdc.noaa.gov/download/1985032673/001',
'http://download.class.ngdc.noaa.gov/download/1985034303/001',
'http://download.class.ngdc.noaa.gov/download/1985035353/001',
'http://download.class.ngdc.noaa.gov/download/1985035363/001',
'http://download.class.ngdc.noaa.gov/download/1985035373/001',
'http://download.class.ngdc.noaa.gov/download/1985035383/001',
'http://download.class.ngdc.noaa.gov/download/1985035393/001',
'http://download.class.ngdc.noaa.gov/download/1985035403/001',
'http://download.class.ngdc.noaa.gov/download/1985035413/001',
'http://download.class.ngdc.noaa.gov/download/1985035423/001',
'http://download.class.ngdc.noaa.gov/download/1985035433/001',
'http://download.class.ngdc.noaa.gov/download/1985035443/001',
'http://download.class.ngdc.noaa.gov/download/1985035453/001',
'http://download.class.ngdc.noaa.gov/download/1985035463/001',
'http://download.class.ngdc.noaa.gov/download/1985035473/001',
'http://download.class.ngdc.noaa.gov/download/1985035483/001',
'http://download.class.ngdc.noaa.gov/download/1985035493/001',
'http://download.class.ngdc.noaa.gov/download/1985035503/001',
'http://download.class.ngdc.noaa.gov/download/1985035513/001',
'http://download.class.ngdc.noaa.gov/download/1985035523/001',
'http://download.class.ngdc.noaa.gov/download/1985035533/001',
'http://download.class.ngdc.noaa.gov/download/1985035543/001',
'http://download.class.ngdc.noaa.gov/download/1985035553/001',
'http://download.class.ngdc.noaa.gov/download/1985035563/001',
'http://download.class.ngdc.noaa.gov/download/1985035573/001',
'http://download.class.ngdc.noaa.gov/download/1985035583/001',
'http://download.class.ngdc.noaa.gov/download/1985035593/001',
'http://download.class.ngdc.noaa.gov/download/1985035603/001',
'http://download.class.ngdc.noaa.gov/download/1985035613/001',
'http://download.class.ngdc.noaa.gov/download/1985035623/001',
'http://download.class.ngdc.noaa.gov/download/1985035633/001',
'http://download.class.ngdc.noaa.gov/download/1985035643/001',
'http://download.class.ngdc.noaa.gov/download/1985035653/001',
'http://download.class.ngdc.noaa.gov/download/1985035663/001',
'http://download.class.ngdc.noaa.gov/download/1985035673/001',
'http://download.class.ngdc.noaa.gov/download/1985035683/001',
'http://download.class.ngdc.noaa.gov/download/1985035693/001',
'http://download.class.ngdc.noaa.gov/download/1985035703/001',
'http://download.class.ngdc.noaa.gov/download/1985035713/001',
'http://download.class.ngdc.noaa.gov/download/1985035723/001',
'http://download.class.ngdc.noaa.gov/download/1985035733/001',
'http://download.class.ngdc.noaa.gov/download/1985035743/001',
'http://download.class.ngdc.noaa.gov/download/1985035753/001',
'http://download.class.ngdc.noaa.gov/download/1985035763/001',
'http://download.class.ngdc.noaa.gov/download/1985035773/001',
'http://download.class.ngdc.noaa.gov/download/1985035783/001',
'http://download.class.ngdc.noaa.gov/download/1985035793/001',
'http://download.class.ngdc.noaa.gov/download/1985035803/001',
'http://download.class.ngdc.noaa.gov/download/1985035813/001',
'http://download.class.ngdc.noaa.gov/download/1985035823/001',
'http://download.class.ngdc.noaa.gov/download/1985035833/001',
'http://download.class.ngdc.noaa.gov/download/1985035843/001',
'http://download.class.ngdc.noaa.gov/download/1985035853/001',
'http://download.class.ngdc.noaa.gov/download/1985035863/001',
'http://download.class.ngdc.noaa.gov/download/1985035873/001',
'http://download.class.ngdc.noaa.gov/download/1985035883/001',
'http://download.class.ngdc.noaa.gov/download/1985035893/001',
'http://download.class.ngdc.noaa.gov/download/1985035903/001',
'http://download.class.ngdc.noaa.gov/download/1985035913/001',
'http://download.class.ngdc.noaa.gov/download/1985035923/001',
'http://download.class.ngdc.noaa.gov/download/1985035933/001',
'http://download.class.ngdc.noaa.gov/download/1985035943/001',
'http://download.class.ngdc.noaa.gov/download/1985035953/001',
'http://download.class.ngdc.noaa.gov/download/1985035963/001',
'http://download.class.ngdc.noaa.gov/download/1985035973/001',
'http://download.class.ngdc.noaa.gov/download/1985035983/001',
'http://download.class.ngdc.noaa.gov/download/1985035993/001',
'http://download.class.ngdc.noaa.gov/download/1985036003/001',
'http://download.class.ngdc.noaa.gov/download/1985036013/001',
'http://download.class.ngdc.noaa.gov/download/1985036023/001',
'http://download.class.ngdc.noaa.gov/download/1985036033/001',
'http://download.class.ngdc.noaa.gov/download/1985036043/001',
'http://download.class.ngdc.noaa.gov/download/1985036053/001',
'http://download.class.ngdc.noaa.gov/download/1985036063/001',
'http://download.class.ngdc.noaa.gov/download/1985036073/001',
'http://download.class.ngdc.noaa.gov/download/1985036083/001',
'http://download.class.ngdc.noaa.gov/download/1985036093/001',
'http://download.class.ngdc.noaa.gov/download/1985036103/001',
'http://download.class.ngdc.noaa.gov/download/1985036113/001']
In [5]:
#base_begin_url = 'http://download.class.ngdc.noaa.gov/download/1985025513/001'
for i, begin_url in enumerate(begin_urls):
df = pd.read_html(mypath + http_files[i],header=0)[0] #read local http file into df
filenames = []
for j in range(len(df)): #generate filenames from dataframe for data
filenames.append(df.loc[j, 'Name'])
for filename in filenames: #for every filename, make a request and save the data
req = requests.get(begin_url + filename)
with open( mypath + 'data/' + filename , 'wb' ) as fout: #save data!
fout.write(req.content)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-18fa24b9b95b> in <module>()
3 for i, begin_url in enumerate(begin_urls):
4
----> 5 df = pd.read_html(mypath + http_files[i],header=0)[0] #read local http file into df
6
7 filenames = []
/Users/scott/anaconda/lib/python2.7/site-packages/pandas/io/html.pyc in read_html(io, match, flavor, header, index_col, skiprows, infer_types, attrs, parse_dates, tupleize_cols, thousands, encoding)
863 'data (you passed a negative value)')
864 return _parse(flavor, io, match, header, index_col, skiprows, infer_types,
--> 865 parse_dates, tupleize_cols, thousands, attrs, encoding)
/Users/scott/anaconda/lib/python2.7/site-packages/pandas/io/html.pyc in _parse(flavor, io, match, header, index_col, skiprows, infer_types, parse_dates, tupleize_cols, thousands, attrs, encoding)
726 break
727 else:
--> 728 raise_with_traceback(retained)
729
730 ret = []
/Users/scott/anaconda/lib/python2.7/site-packages/pandas/io/html.pyc in _parse(flavor, io, match, header, index_col, skiprows, infer_types, parse_dates, tupleize_cols, thousands, attrs, encoding)
720
721 try:
--> 722 tables = p.parse_tables()
723 except Exception as caught:
724 retained = caught
/Users/scott/anaconda/lib/python2.7/site-packages/pandas/io/html.pyc in parse_tables(self)
191
192 def parse_tables(self):
--> 193 tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
194 return (self._build_table(table) for table in tables)
195
/Users/scott/anaconda/lib/python2.7/site-packages/pandas/io/html.pyc in _parse_tables(self, doc, match, attrs)
418
419 if not tables:
--> 420 raise ValueError('No tables found')
421
422 result = []
ValueError: No tables found
In [ ]:
Content source: scottlittle/solar-sensors
Similar notebooks: