In [1]:
import json
import itertools
import numpy as np
import pandas as pd
from collections import Counter

In [2]:
with open("ner_units.json") as thefile:
    unitfile = json.load(thefile)

In [13]:
list_of_all_measurements = [x for x in itertools.chain(*[x['ner_units'] for x in unitfile['response']['docs']])]

In [20]:
Counter(list_of_all_measurements).most_common(200)


Out[20]:
[('2 Accent', 138),
 ('1 Accent', 137),
 ('2F %', 126),
 ('95 %', 114),
 ('3A %', 107),
 ('6/13/2014 Signe', 94),
 ('2Fabcnews.go.com %', 80),
 ('100 %', 79),
 ('2Bat %', 62),
 ('2FwireStory %', 60),
 ('5/12/2014 Signe', 58),
 ('6/20/2014 Signe', 58),
 ('2BShot %', 58),
 ('3 Accent', 46),
 ('50 %', 38),
 ('10 %', 36),
 ('2FUS %', 30),
 ('9 ]', 29),
 ('30 %', 23),
 ('five years', 22),
 ('20 %', 20),
 ('2FSports %', 20),
 ('3Fid %', 20),
 ('6/2/2014 Signe', 20),
 ('2Fstory %', 20),
 ('0px auto', 20),
 ('two decades', 19),
 ('28ISSN %', 19),
 ('15 %', 18),
 ('0 %', 18),
 ('2014 Society', 18),
 ('18 U.S.C', 17),
 ('27 %', 17),
 ('1 %', 17),
 ('2015 John', 17),
 ('80 %', 17),
 ('70 %', 16),
 ('5B1 %', 16),
 ('2015 Hearing', 16),
 ('three years', 16),
 ('two weeks', 16),
 ('one thing', 15),
 ('5 %', 14),
 ('90 %', 14),
 ('two types', 14),
 ('1200 UTC', 14),
 ('1:29 PM', 14),
 ('two years', 13),
 ('0px inherit', 12),
 ('1.2 http', 12),
 ('four decades', 12),
 ('60 %', 12),
 ('10 years', 12),
 ('2014 AND', 11),
 ('2.9 http', 11),
 ('3 %', 11),
 ('six months', 10),
 ('3A+Is+Sex+Becoming+More+Social %', 10),
 ('30 years', 10),
 ('20 years', 10),
 ('27Personalized+Porn %', 10),
 ('2FPolitics %', 10),
 ('2FEntertainment %', 10),
 ('2FLifestyle %', 10),
 ('million people', 10),
 ('6 h', 9),
 ('4.9 http', 9),
 ('nine months', 9),
 ('2014 Wiley', 9),
 ('75 %', 9),
 ('22 %', 9),
 ('six categories', 8),
 ('1 trivia game', 8),
 ('18 %', 8),
 ('two orders', 8),
 ('2014 Big', 8),
 ('63 %', 8),
 ('2Ffeed %', 8),
 ('15 Presente', 8),
 ('2Fwww.discoverclarksville.com %', 8),
 ('40 years', 8),
 ('21 %', 8),
 ('24 hours', 8),
 ('2Fatom %', 8),
 ('4 %', 8),
 ('three months', 8),
 ('one place', 8),
 ('100 thousand questions', 8),
 ('million members', 8),
 ('2015 Universal', 8),
 ('25 %', 8),
 ('2Farticles %', 8),
 ('two nights', 8),
 ('16 %', 8),
 ('29 %', 7),
 ('2014 winstgevend', 7),
 ('14 %', 7),
 ('hundred years', 7),
 ('12 years', 7),
 ('23 %', 7),
 ('two approaches', 7),
 ('q= %', 7),
 ('10 percent', 7),
 ('one hour', 7),
 ('11 %', 7),
 ('1px solid rgb', 7),
 ('3 km', 7),
 ('18 months', 7),
 ('11.9 http', 7),
 ('9.9 http', 7),
 ('2 years', 7),
 ('four years', 7),
 ('2014 William', 6),
 ('100 kyr', 6),
 ('2BBembry %', 6),
 ('10 ]', 6),
 ('3.7 W', 6),
 ('two countries', 6),
 ('20:00:00 GMT+', 6),
 ('twenty years', 6),
 ('million homes', 6),
 ('2BHonors %', 6),
 ('million cubic feet', 6),
 ('2008 Apple', 6),
 ('72 %', 6),
 ('28 U.S.C', 6),
 ('one day', 6),
 ('2014 Asylum', 6),
 ('78 %', 6),
 ('one week', 6),
 ('one tap', 6),
 ('19 miles', 6),
 ('2014 KIDinaKORNER/Interscope', 6),
 ('12.9 http', 6),
 ('18:00:00 GMT+', 6),
 ('30 wt', 6),
 ('two population', 6),
 ('six species', 6),
 ('3Dblogger %', 6),
 ('2013 Warner', 6),
 ('1 ]', 6),
 ('million years', 6),
 ('24 %', 6),
 ('6 years', 6),
 ('2BBET %', 6),
 ('28 days', 6),
 ('million barrels', 5),
 ('2 M', 5),
 ('700 mb', 5),
 ('99 %', 5),
 ('seven years', 5),
 ('3D seismic reflection', 5),
 ('one year', 5),
 ('30 countries', 5),
 ('0px rgb', 5),
 ('2015 Island', 5),
 ('0 replie', 5),
 ('2.5 %', 5),
 ('three categories', 5),
 ('40 %', 5),
 ('2014 al', 5),
 ('94 %', 5),
 ('5 kg', 5),
 ('two days', 5),
 ('three species', 5),
 ('two months', 5),
 ('1.2 m3', 5),
 ('2.24 kg·ha1', 5),
 ('30 days', 5),
 ('15 years', 5),
 ('6 months', 5),
 ('100 m', 5),
 ('10 inches', 5),
 ('two groups', 5),
 ('three groups', 5),
 ('76 %', 5),
 ('four times', 5),
 ('5 days', 5),
 ('21 July', 4),
 ('28 %', 4),
 ('one type', 4),
 ('2014 Marvel', 4),
 ('23.9 °C', 4),
 ('12 hours', 4),
 ('3 genera', 4),
 ('100 meters', 4),
 ('100 union', 4),
 ('30 m', 4),
 ('05 MAR', 4),
 ('30° N', 4),
 ('7:30 PM', 4),
 ('2015 BIG', 4),
 ('million tons', 4),
 ('800 hPa', 4),
 ('12 km', 4),
 ('4 days', 4),
 ('seven days', 4),
 ('2BDonor %', 4),
 ('2014 Universal', 4),
 ('24 h', 4)]

In [23]:
def F_to_C(F):
    return((Fahrenheit - 32) * 5.0/9.0)

In [25]:
F_to_C(list_of_all_measurements['F'])


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-25-32dfc62fc1d9> in <module>()
----> 1 F_to_C(list_of_all_measurements['F'])

TypeError: list indices must be integers, not str

In [ ]: