Query:

select mt.metadata_value_id, asa.sequence_id

from metadata_taggings as mt

join problems as pr

on pr.id = mt.object_id

join assistment_to_sequence_associations as asa

on asa.assistment_id = pr.assistment_id

where mt.metadata_definition_id = 1 and mt.metadata_value_id >= 5 and mt.metadata_value_id <= 317

order by mt.metadata_value_id, asa.sequence_id;


In [1]:
import csv

In [2]:
def most_common(lst):
    return max(set(lst), key=lst.count)

In [3]:
f = open('C:\Users\BearDeer\Dropbox\skill_seq_map.csv', 'rb')
f2 = open('C:\Users\BearDeer\Dropbox\skill_groups.csv', 'rb')
f3 = open('output.sql', 'wb')
reader = csv.reader(f)
reader_2 = csv.reader(f2)

In [4]:
raw_data = {}
skill_seq = {}
output_data = []

In [5]:
for row in reader:
    skill_id, seq_id = row
    skills = raw_data.get(seq_id, [])
    skills.append(skill_id)
    raw_data[seq_id] = skills

In [6]:
print len(raw_data)


447175

In [7]:
for seq_id, skills in raw_data.items():
    main_skill = most_common(skills)
    seqs = skill_seq.get(main_skill, [])
    seqs.append(seq_id)
    skill_seq[main_skill] = seqs

In [8]:
print len(skill_seq)


275

In [9]:
for row in reader_2:
    skill_id, group_id = row
    seqs = skill_seq.get(skill_id, [])
    for seq in seqs:
        output_sql = 'INSERT INTO experiment_settings(student_id, class_id, experiment_condition, experiment_setting_default_id) VALUES (%s, %s, %s, 3);\n' % (skill_id, seq, group_id)
        f3.write(output_sql)

In [10]:
f.close()
f2.close()
f3.close()