In [34]:
locations = []
temp_dict = {}
with open('/home/martin/Skrivbord/subcell.txt','r') as f:
    for line in f:
        if '//' == line[0:2]:
            if temp_dict:
                locations.append(temp_dict)
            temp_dict = {}
        else:
            try:
                value = line.split('   ')[1]
            except IndexError:
                pass
        if 'ID' == line[0:2]:
            temp_dict['ID'] = value[0:-2]
        if 'HI' == line[0:2]:
            temp_dict['HI'] = value[0:-2]
        if 'HP' == line[0:2]:
            temp_dict['HP'] = value[0:-2]
        if 'GO' == line[0:2]:
            temp_dict['GO'] = value.split(';')[0]

In [35]:
no_HP = []
no_HP_and_HI = []
for loc in locations:
    try:
        HP = loc['HP']
    except KeyError:
        no_HP.append(loc)
for loc in no_HP:
    try:
        HI = loc['HI']
    except KeyError:
        no_HP_and_HI.append(loc)

In [29]:
import csv
def write_csv(path, dict_list, keys):
    """Function to write a list of dicts as a csv file."""

    with open(path, 'wb') as f:
        w = csv.DictWriter(f, keys)
        w.writeheader()
        w.writerows(dict_list)

In [36]:
write_csv('/home/martin/Skrivbord/loc_no_parent_uniprot_2.csv', no_HP_and_HI, ['ID', 'GO'])

In [37]:
no_HP_and_HI


Out[37]:
[{'GO': 'GO:0020022', 'ID': 'Acidocalcisome'},
 {'GO': 'GO:0009288', 'ID': 'Archaeal flagellum'},
 {'GO': 'GO:0009288', 'ID': 'Bacterial flagellum'},
 {'GO': 'GO:0005933', 'ID': 'Bud'},
 {'GO': 'GO:0031470', 'ID': 'Carboxysome'},
 {'GO': 'GO:0031975', 'ID': 'Cell envelope'},
 {'GO': 'GO:0030054', 'ID': 'Cell junction'},
 {'GO': 'GO:0042995', 'ID': 'Cell projection'},
 {'GO': 'GO:0030428', 'ID': 'Cell septum'},
 {'GO': 'GO:0009986', 'ID': 'Cell surface'},
 {'GO': 'GO:0051286', 'ID': 'Cell tip'},
 {'GO': 'GO:0046858', 'ID': 'Chlorosome'},
 {'GO': 'GO:0005694', 'ID': 'Chromosome'},
 {'GO': 'GO:0032154', 'ID': 'Cleavage furrow'},
 {'GO': 'GO:0001533', 'ID': 'Cornified envelope'},
 {'GO': 'GO:0005737', 'ID': 'Cytoplasm'},
 {'ID': 'Cytoplasmic granule'},
 {'GO': 'GO:0016023', 'ID': 'Cytoplasmic vesicle'},
 {'GO': 'GO:0005783', 'ID': 'Endoplasmic reticulum'},
 {'GO': 'GO:0005793',
  'ID': 'Endoplasmic reticulum-Golgi intermediate compartment'},
 {'GO': 'GO:0005768', 'ID': 'Endosome'},
 {'GO': 'GO:0009289', 'ID': 'Fimbrium'},
 {'GO': 'GO:0020016', 'ID': 'Flagellar pocket'},
 {'GO': 'GO:0042763', 'ID': 'Forespore'},
 {'GO': 'GO:0005794', 'ID': 'Golgi apparatus'},
 {'GO': 'GO:0018995', 'ID': 'Host'},
 {'GO': 'GO:0043657', 'ID': 'Host cell'},
 {'ID': 'Host chloroplast envelope'},
 {'ID': 'Host peroxisome'},
 {'GO': 'GO:0042566', 'ID': 'Hydrogenosome'},
 {'GO': 'GO:0005811', 'ID': 'Lipid droplet'},
 {'GO': 'GO:0005764', 'ID': 'Lysosome'},
 {'GO': 'GO:0042470', 'ID': 'Melanosome'},
 {'GO': 'GO:0016020', 'ID': 'Membrane'},
 {'GO': 'GO:0030496', 'ID': 'Midbody'},
 {'GO': 'GO:0005739', 'ID': 'Mitochondrion'},
 {'GO': 'GO:0032047', 'ID': 'Mitosome'},
 {'GO': 'GO:0042151', 'ID': 'Nematocyst'},
 {'GO': 'GO:0033009', 'ID': 'Nucleomorph'},
 {'GO': 'GO:0005634', 'ID': 'Nucleus'},
 {'GO': 'GO:0020003', 'ID': 'Parasitophorous vacuole'},
 {'GO': 'GO:0043204', 'ID': 'Perikaryon'},
 {'GO': 'GO:0005777', 'ID': 'Peroxisome'},
 {'GO': 'GO:0001917', 'ID': 'Photoreceptor inner segment'},
 {'GO': 'GO:0009536', 'ID': 'Plastid'},
 {'GO': 'GO:0000407', 'ID': 'Preautophagosomal structure'},
 {'GO': 'GO:0005770', 'ID': 'Prevacuolar compartment'},
 {'GO': 'GO:0042763', 'ID': 'Prospore'},
 {'GO': 'GO:0005622', 'ID': 'Protoplasm'},
 {'GO': 'GO:0016529', 'ID': 'Sarcoplasmic reticulum'},
 {'GO': 'GO:0005576', 'ID': 'Secreted'},
 {'ID': 'Spore core'},
 {'GO': 'GO:0044099', 'ID': 'Spore polar tube'},
 {'GO': 'GO:0031160', 'ID': 'Spore wall'},
 {'GO': 'GO:0043659', 'ID': 'Symbiosome'},
 {'GO': 'GO:0055044', 'ID': 'Symplast'},
 {'GO': 'GO:0044216', 'ID': 'Target cell'},
 {'ID': 'Tegument'},
 {'GO': 'GO:0009579', 'ID': 'Thylakoid'},
 {'GO': 'GO:0055039', 'ID': 'Trichocyst'},
 {'GO': 'GO:0005773', 'ID': 'Vacuole'},
 {'GO': 'GO:0019012', 'ID': 'Virion'}]

In [ ]:
locations = []
temp_dict = {}
with open('/home/martin/Skrivbord/subcell.txt','r') as f:
    for line in f:
        if '//' == line[0:2]:
            if temp_dict:
                locations.append(temp_dict)
            temp_dict = {}
        else:
            try:
                value = line.split('   ')[1]
            except IndexError:
                pass
        if 'ID' == line[0:2]:
            temp_dict['ID'] = value[0:-2]
        if 'HI' == line[0:2]:
            temp_dict['HI'] = value[0:-2]
        if 'HP' == line[0:2]:
            temp_dict['HP'] = value[0:-2]
        if 'GO' == line[0:2]:
            temp_dict['GO'] = value.split(';')[0]