In [2]:
from pathlib import Path
import csv
import pandas as pd
curr_dir = Path('climate data')
data_states = list(curr_dir.glob('**/*.txt'));
print('processing...')
with open('annual.txt', 'w', newline='') as f:
result = csv.writer(f, delimiter=' ')
# Create first row, i.e., State StateCode Year [Features]
first = pd.read_csv(data_states[0], delim_whitespace=True)
result.writerow(['State', 'StateCode', 'Year'] + list(first.columns[3:]))
for state in data_states:
state_data = pd.read_csv(state, delim_whitespace=True)
sum = state_data.iloc[0, 3:]
row_sz = state_data.shape[0]
for idx in range(1, row_sz):
row = state_data.iloc[idx]
if (idx % 12 == 0):
year = int(state_data.iloc[idx-1]['YearMonth'] // 100)
result.writerow([state.stem, int(row['StateCode']), year] + list(sum))
sum = row[3:]
else:
sum += row[3:]
print('done.')
In [ ]: