notebook.community

Edit and run



In [1]:

    
import os
os.chdir('/Users/chbrown/github/crowdflower/examples')



In [38]:

    
import crowdflower
conn = crowdflower.Connection()



In [29]:

    
def iter_data(filename='spam.txt', labels=None):
    for i, line in enumerate(open(filename), 1):
        label, text = line.strip().split('\t', 1)
        if labels is None or label in labels:
            yield {'id': '{}:{}'.format(filename, i), 'text': text, 'label': label}



In [30]:

    
ham_data = list(iter_data(labels=['ham']))
spam_data = list(iter_data(labels=['spam']))



In [31]:

    
# for gold data, get 25 of each class
gold_data = ham_data[:25] + spam_data[:25]
# add the spam_gold key (to trigger)
for datum in gold_data:
    datum['spam_gold'] = datum['label']



In [37]:

    
# for real data, get 100 of each class, and rename the special 'spam_gold' field
test_data = ham_data[25:][:100] + spam_data[25:][:100]



In [ ]:

    
job = conn.upload(gold_data + test_data)



In [2]:

    
job_update_result = job.update({
    'title': 'Spam detection',
    'max_judgments_per_worker': 50,
    'units_per_assignment': 10,
    'judgments_per_unit': 2,
    'payment_cents': 10,
    'instructions': '''
        <h3>Spam detection</h3>
        <p>Judge whether these text messages are spam or not.</p>
    ''',
    'cml': '''
        {{text}}
        <cml:radios label="spam" validates="required" gold="true">
            <cml:radio label="Spam" value="spam"></cml:radio>
            <cml:radio label="Not spam" value="ham"></cml:radio>
        </cml:radios>
    ''',
    'options': {
        'front_load': 1, # quiz mode = 1; turn off with 0
    }
})
# print job_update_result









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-e0a2864b68e4> in <module>()
----> 1 job_update_result = job.update({
      2     'title': 'Spam detection',
      3     'max_judgments_per_worker': 50,
      4     'units_per_assignment': 10,
      5     'judgments_per_unit': 2,

NameError: name 'job' is not defined



In [ ]:

    
job.gold_add('spam', 'spam_gold')



In [35]:

    
job.ping()









    Out[35]:





{u'all_judgments': 0,
 u'all_units': 250,
 u'completed_gold_estimate': 0,
 u'completed_non_gold_estimate': 0,
 u'completed_units_estimate': 0,
 u'golden_units': 50,
 u'needed_judgments': 0,
 u'ordered_units': 0,
 u'tainted_judgments': 0}