In [71]:
from os import listdir
from os.path import isdir, join
import pandas as pd
from paper_reader import *

In [97]:
INPUT_PATH = '/Users/Tom/Desktop/IntroSoc/data'
listdir('/Users/Tom/Desktop/IntroSoc/data/')


Out[97]:
['.DS_Store',
 'full_grades.csv',
 'intro_grades.csv',
 'intro_students.csv',
 'week1',
 'week1_final_grades.csv',
 'week1_graded_final.p',
 'week2',
 'week2_graded_final.p',
 'week3',
 'week3_graded_final.p',
 'week4']

In [23]:
weeks = [w for w in listdir(INPUT_PATH) if isdir(join(INPUT_PATH, w))]
weeks = [x for x in weeks if x.startswith('week')] #additional check
print weeks


['week1', 'week2', 'week3', 'week4']

In [64]:
all_student_info = pd.read_csv('/Users/Tom/Desktop/IntroSoc/data/full_grades.csv')
all_student_info = all_student_info.fillna(value=0)
all_student_info.index  = all_student_info['Username']
columns = list(all_student_info.columns)

In [42]:
lookup = {'week1': 'M1',
'week2': 'M2',
'week3': 'M3',
'week4': 'M4',
'week5': 'M5',
'week6': 'M6',
'week7': 'M7',
'week8': 'M8',
'week9': 'M9',
'week10': 'M10',
'week11': 'M11'}

In [43]:
for i in range(1,12):
    print "'week"+str(i)+"'"+": 'M"+str(i)+"'"+","


'week1': 'M1',
'week2': 'M2',
'week3': 'M3',
'week4': 'M4',
'week5': 'M5',
'week6': 'M6',
'week7': 'M7',
'week8': 'M8',
'week9': 'M9',
'week10': 'M10',
'week11': 'M11',

In [44]:
#To find the paper,grade pair for student i in week w (p,g)iw
    #if weekw in weeks
        #week_num = int(weeks[0].split('week')[1])
        #


Out[44]:
['M11', 'M10', 'M9', 'M8', 'M1', 'M3', 'M2', 'M5', 'M4', 'M7', 'M6']

In [51]:
#Use net_id to get the paper from relevant week folder
#column_start = lookup[week]


Out[51]:
1

In [99]:
grade_column_name = [x for x in all_student_info.columns if x.startswith(lookup['week1']+' ')]
grade_column_name = grade_column_name[0]

In [100]:
grade_column_name


Out[100]:
'M1 (Aug 29 & Aug 31) [Total Pts: 100] |495817'

In [70]:
all_student_info.loc['dba54', grade_column_name]


Out[70]:
'80.00'

In [73]:
files = get_files(INPUT_PATH, '/week1')

In [ ]:


In [78]:
texts = get_text(INPUT_PATH, '/week1', files, list(all_student_info['Username']))

In [ ]:


In [ ]:


In [102]:
#all_student_info.loc['xxx', grade_column_name]

In [92]:
essays = []
grades = []
for student in texts.keys():
    text = texts[student]
    grade = all_student_info.loc[student, grade_column_name]
    essays.append(text)
    grades.append(grade)
    print len(essays), len(grades), len(texts.keys())


1 1 211
2 2 211
3 3 211
4 4 211
5 5 211
6 6 211
7 7 211
8 8 211
9 9 211
10 10 211
11 11 211
12 12 211
13 13 211
14 14 211
15 15 211
16 16 211
17 17 211
18 18 211
19 19 211
20 20 211
21 21 211
22 22 211
23 23 211
24 24 211
25 25 211
26 26 211
27 27 211
28 28 211
29 29 211
30 30 211
31 31 211
32 32 211
33 33 211
34 34 211
35 35 211
36 36 211
37 37 211
38 38 211
39 39 211
40 40 211
41 41 211
42 42 211
43 43 211
44 44 211
45 45 211
46 46 211
47 47 211
48 48 211
49 49 211
50 50 211
51 51 211
52 52 211
53 53 211
54 54 211
55 55 211
56 56 211
57 57 211
58 58 211
59 59 211
60 60 211
61 61 211
62 62 211
63 63 211
64 64 211
65 65 211
66 66 211
67 67 211
68 68 211
69 69 211
70 70 211
71 71 211
72 72 211
73 73 211
74 74 211
75 75 211
76 76 211
77 77 211
78 78 211
79 79 211
80 80 211
81 81 211
82 82 211
83 83 211
84 84 211
85 85 211
86 86 211
87 87 211
88 88 211
89 89 211
90 90 211
91 91 211
92 92 211
93 93 211
94 94 211
95 95 211
96 96 211
97 97 211
98 98 211
99 99 211
100 100 211
101 101 211
102 102 211
103 103 211
104 104 211
105 105 211
106 106 211
107 107 211
108 108 211
109 109 211
110 110 211
111 111 211
112 112 211
113 113 211
114 114 211
115 115 211
116 116 211
117 117 211
118 118 211
119 119 211
120 120 211
121 121 211
122 122 211
123 123 211
124 124 211
125 125 211
126 126 211
127 127 211
128 128 211
129 129 211
130 130 211
131 131 211
132 132 211
133 133 211
134 134 211
135 135 211
136 136 211
137 137 211
138 138 211
139 139 211
140 140 211
141 141 211
142 142 211
143 143 211
144 144 211
145 145 211
146 146 211
147 147 211
148 148 211
149 149 211
150 150 211
151 151 211
152 152 211
153 153 211
154 154 211
155 155 211
156 156 211
157 157 211
158 158 211
159 159 211
160 160 211
161 161 211
162 162 211
163 163 211
164 164 211
165 165 211
166 166 211
167 167 211
168 168 211
169 169 211
170 170 211
171 171 211
172 172 211
173 173 211
174 174 211
175 175 211
176 176 211
177 177 211
178 178 211
179 179 211
180 180 211
181 181 211
182 182 211
183 183 211
184 184 211
185 185 211
186 186 211
187 187 211
188 188 211
189 189 211
190 190 211
191 191 211
192 192 211
193 193 211
194 194 211
195 195 211
196 196 211
197 197 211
198 198 211
199 199 211
200 200 211
201 201 211
202 202 211
203 203 211
204 204 211
205 205 211
206 206 211
207 207 211
208 208 211
209 209 211
210 210 211
211 211 211

In [93]:
df = pd.DataFrame()

In [94]:
df['students'] = list(texts.keys())
df['essays'] = essays
df['grades'] = grades

In [ ]:


In [ ]: