In [1]:
brand_cat = []
with open('data/sectors.txt') as data_file:
for row in data_file:
data = row.strip().split(',')
#print(data)
brand_cat.append((data[1].lower(),data[0].lower()))
brand_cat = brand_cat[1:]
In [ ]:
brand_cat = {}
with open('data/sectors.txt') as data_file:
for row in data_file:
data = row.strip().split(',')
#print(data)
brand_cat.append((data[1].lower(),data[0].lower()))
brand_cat = brand_cat[1:]
In [2]:
brand_names = []
with open('data/brand_followers_corrected_full.tsv') as data_file:
for row in data_file:
data = row.strip().split()
brand_names.append(data[0].lower())
brand_names.sort()
In [3]:
brands_sect = set([x[0] for x in brand_cat])
brand_data = set(brand_names)
In [4]:
brand_in_progress = []
with open('data/correct_brand_names.txt') as data_file:
for row in data_file:
data = row.strip()
#print(data)
brand_in_progress.append(data)
brand_in_progress = set(brand_in_progress)
In [8]:
exclude = set(['acne_free', 'acura_insider', 'airwickus', 'ancientminerals', 'bachflowerdfn', 'biothermusa','goldpeaktea'])
In [9]:
len(brands_sect - brand_data - brand_in_progress - exclude)
Out[9]:
In [10]:
for b in sorted(list(brands_sect - brand_data - brand_in_progress - exclude)):
print(b)
In [7]:
brand_data - brands_sect
Out[7]:
In [50]:
import numpy as np
from datetime import datetime
s = datetime.now()
for i in range(8000):
x = np.arange(500)
print('elapsed', datetime.now()-s)
In [ ]:
In [ ]: