In [71]:
from os import listdir
from os.path import isdir, join
import pandas as pd
from paper_reader import *
In [97]:
INPUT_PATH = '/Users/Tom/Desktop/IntroSoc/data'
listdir('/Users/Tom/Desktop/IntroSoc/data/')
Out[97]:
In [23]:
weeks = [w for w in listdir(INPUT_PATH) if isdir(join(INPUT_PATH, w))]
weeks = [x for x in weeks if x.startswith('week')] #additional check
print weeks
In [64]:
all_student_info = pd.read_csv('/Users/Tom/Desktop/IntroSoc/data/full_grades.csv')
all_student_info = all_student_info.fillna(value=0)
all_student_info.index = all_student_info['Username']
columns = list(all_student_info.columns)
In [42]:
lookup = {'week1': 'M1',
'week2': 'M2',
'week3': 'M3',
'week4': 'M4',
'week5': 'M5',
'week6': 'M6',
'week7': 'M7',
'week8': 'M8',
'week9': 'M9',
'week10': 'M10',
'week11': 'M11'}
In [43]:
for i in range(1,12):
print "'week"+str(i)+"'"+": 'M"+str(i)+"'"+","
In [44]:
#To find the paper,grade pair for student i in week w (p,g)iw
#if weekw in weeks
#week_num = int(weeks[0].split('week')[1])
#
Out[44]:
In [51]:
#Use net_id to get the paper from relevant week folder
#column_start = lookup[week]
Out[51]:
In [99]:
grade_column_name = [x for x in all_student_info.columns if x.startswith(lookup['week1']+' ')]
grade_column_name = grade_column_name[0]
In [100]:
grade_column_name
Out[100]:
In [70]:
all_student_info.loc['dba54', grade_column_name]
Out[70]:
In [73]:
files = get_files(INPUT_PATH, '/week1')
In [ ]:
In [78]:
texts = get_text(INPUT_PATH, '/week1', files, list(all_student_info['Username']))
In [ ]:
In [ ]:
In [102]:
#all_student_info.loc['xxx', grade_column_name]
In [92]:
essays = []
grades = []
for student in texts.keys():
text = texts[student]
grade = all_student_info.loc[student, grade_column_name]
essays.append(text)
grades.append(grade)
print len(essays), len(grades), len(texts.keys())
In [93]:
df = pd.DataFrame()
In [94]:
df['students'] = list(texts.keys())
df['essays'] = essays
df['grades'] = grades
In [ ]:
In [ ]: