find a word and count them



In [1]:

    
from collections import Counter



In [2]:

    
filename = "report_8_nonALMACAL_priority.txt"



In [3]:

    
with open(filename, 'r') as ifile:
    wordcount = Counter(ifile.read().split())

Our current data (largely only splitted, not reduced yet)



In [4]:

    
current = ['3c454.3', 'J0006-0623', 'J0137+3309', 'J0211+1051', 'J0237+2848',  
 'J0241-0815', 'J0334-4008', 'J0440+2728', 'J0517-0520', 'J0538-4405', 'J0730-1141',
 'J1037-2934', 'J1159+2914', 'J1449-004', 'J2232+1143', 'J0057-0024', 'J0138-0540', 
 'J0215-0222', 'J0238+166', 'J0301+0118', 'J0339-0133', 'J0426+0518', 'J0501-0159', 
 'J0521+1638', 'J0541-0541', 'J0750+1231', 'J1048-1909', 'J1225+1253', 'J1550+0527',
 'J2258-2758', 'J0108+0135', 'J0141-0202', 'J0219+0120', 'J0239-0234', 'J0309+1029',
 'J0339-0146', 'J0427-0700', 'J0509+1806', 'J0522-3627', 'J0604+2429', 'J1008+0621', 
 'J1058+0133', 'J1229+0203', 'J1650+0824', 'J0121+1149', 'J0149+0555', 'J0224+0659', 
 'J0239+0416', 'J0327+0044', 'J0423-0120', 'J0438+3004', 'J0510+180', 'J0532-0307', 
 'J0607-0834', 'J1011-0423', 'J1146+3958', 'J1337-1257', 'J2148+0657', 
            'J0538-4405', 'J0747-3310', 'J0922-3959', 'J1833-2103', 'J2148+0657', 
            'J0701-4634', 'J0828-3731', 'J1832-2039', 'J1924-2914', 'J2258-2758']

duplicate = ['J0538-4405', 'J2148+0657', 'J2258-2758']

Some object that the part of the data already taken



In [5]:

    
already = []

for item in wordcount:
    if item in current:
        already.append(item)
        print(item)



In [ ]: