In [1]:
import pandas as pd
In [2]:
cd c:/users/rsignell/downloads
In [3]:
df = pd.read_csv('SS_data_edited.txt',delim_whitespace=True,
index_col='sample_dt',parse_dates=True,skiprows=[1],
usecols=['sample_dt','agency_cd','site_no','body_part_id'])
In [4]:
df.head()
Out[4]:
In [5]:
stations=df['site_no'].unique()
print stations
In [6]:
def read_flowfile(infile):
df = pd.read_csv(infile,delim_whitespace=True,index_col='sample_dt',
parse_dates=True,skiprows=28,
names=('agency_cd','site_no','sample_dt','Q','Qcd'),compression='gzip')
return df
In [7]:
for station in stations:
infile = '0%d.rdb.gz' % station
outfile = '%d_sed.csv' % station
print 'reading %s ' % infile
leftDF = read_flowfile(infile)
rightDF = df[df['site_no']==station]
newDF = pd.merge(leftDF,rightDF,on=['agency_cd','site_no'],
left_index=True,right_index=True,how='outer')
newDF.to_csv(outfile)
print 'writing %s ' % outfile
In [8]:
newDF.head(20)
Out[8]:
In [8]: