In [1]:
!ls


genome_list (1).xlsx          parse-soil-annotaitons.ipynb
genome_list_adina.xlsx        peerj-545-10000otus.pdf
genome_list_adina_blanks.txt  summary_anl.docx
genome_list_adina_blanks.xlsx summary_anl_email.pdf

In [14]:
d = {}
d2 = {}
d3 = {}
for n, line in enumerate(open("genome_list_adina_blanks.txt", 'rU')):
    if n < 1000:
        dat = line.rstrip().split('\t')
        habitat = dat[3]
        isolation = dat[4]
        isolation2 = dat[5]
        gold_id = dat[8]
        #print habitat, isolation, isolation2, gold_id
        d[isolation2] = d.get(isolation2, 0) + 1
        if isolation2 == "none":
            d2[isolation] = d2.get(isolation,0) + 1
        if isolation == "none":
            d3[habitat] = d2.get(habitat, 0) + 1
            
print d
print d2
print d3


{'Industrial wastewater': 3, 'none': 527, 'Solid animal waste': 2, 'Fermented beverages': 2, 'Digestive system': 26, 'Non-marine Saline and Alkaline': 14, 'Excretory system': 3, 'Lympathic system': 1, 'Respiratory system': 7, 'Silage fermentation': 1, 'Nervous system': 1, 'Skin': 4, 'Thermal springs': 51, 'Freshwater': 48, 'Marine': 31, 'Sediment': 7, 'Dairy products': 6}
{'Animal': 4, 'Industrial production': 2, 'Fish': 6, 'Mammals': 32, 'Algae': 2, 'Plants': 9, 'Built environment': 4, 'Mollusca': 1, 'Fungi': 1, 'Air': 3, 'Food production': 19, 'Biotransformation': 4, 'Amphibia': 1, 'Lab synthesis': 7, 'Wastewater': 21, 'none': 321, 'Lab enrichment': 1, 'Human': 75, 'Protozoa': 1, 'Insecta': 6, 'Solid waste': 1, 'Birds': 6}
{'Terrestrial': 1, 'Dairy isolates': 1, 'Host-Intestinal tract': 1, 'Fish-Host': 1, 'Bird-Host': 1, 'Fish brain': 1, 'Hydrothermal vent': 1, 'Bird': 1, 'Host-Mouse': 1, 'Plant': 1, 'Host-Nasopharyngeal microflora': 1, 'Human skin': 1, 'Oil fields': 1, 'Blood-Host': 1, 'Hot spring-Marine': 1, 'Biogas reactor': 1, 'Host': 1, 'Fresh water-Ponds': 1, 'Blood': 1, 'Animal-Human': 1, 'Aquatic-Hot spring': 1, 'Colon-Gastrointestinal tract-Host-Oral cavity-Respiratory tract': 1, 'Host-Human oral cavity': 1, 'Host-Human airways': 1, 'Host-Human intestinal microflora': 1, 'Human throat': 1, 'Host-Wastewater': 1, 'Animal gastrointestinal tract-Aquaculture': 1, 'Feces': 1, 'Fresh water-Host': 1, 'Goat-Host-Human': 1, 'Insect': 1, 'Host-Human nasopharyngeal microflora': 1, 'Cheese': 1, 'none': 320, 'Host-Oral cavity': 1, 'Sediment-Terrestrial': 1, 'Human feces': 1, 'Host-Human': 1, 'Fresh water-Marine': 1, 'Milk': 1, 'Intestinal tract-Oral cavity-Skin-Urogenital tract': 1, 'Fresh water': 1, 'Host-Human wound': 1, 'Meat food product': 1, 'Host-Skin': 1, 'Fresh water-Groundwater': 1, 'Human airways-Sputum': 1, 'Fish': 5, 'Host-Human skin': 1}

In [ ]: