In [1]:
import os
os.chdir('/Users/chbrown/github/crowdflower/examples')
In [38]:
import crowdflower
conn = crowdflower.Connection()
In [29]:
def iter_data(filename='spam.txt', labels=None):
for i, line in enumerate(open(filename), 1):
label, text = line.strip().split('\t', 1)
if labels is None or label in labels:
yield {'id': '{}:{}'.format(filename, i), 'text': text, 'label': label}
In [30]:
ham_data = list(iter_data(labels=['ham']))
spam_data = list(iter_data(labels=['spam']))
In [31]:
# for gold data, get 25 of each class
gold_data = ham_data[:25] + spam_data[:25]
# add the spam_gold key (to trigger)
for datum in gold_data:
datum['spam_gold'] = datum['label']
In [37]:
# for real data, get 100 of each class, and rename the special 'spam_gold' field
test_data = ham_data[25:][:100] + spam_data[25:][:100]
In [ ]:
job = conn.upload(gold_data + test_data)
In [2]:
job_update_result = job.update({
'title': 'Spam detection',
'max_judgments_per_worker': 50,
'units_per_assignment': 10,
'judgments_per_unit': 2,
'payment_cents': 10,
'instructions': '''
<h3>Spam detection</h3>
<p>Judge whether these text messages are spam or not.</p>
''',
'cml': '''
{{text}}
<cml:radios label="spam" validates="required" gold="true">
<cml:radio label="Spam" value="spam"></cml:radio>
<cml:radio label="Not spam" value="ham"></cml:radio>
</cml:radios>
''',
'options': {
'front_load': 1, # quiz mode = 1; turn off with 0
}
})
# print job_update_result
In [ ]:
job.gold_add('spam', 'spam_gold')
In [35]:
job.ping()
Out[35]: