In [1]:
import re, json
from BeautifulSoup import *
from pprint import pprint
from selenium import webdriver
In [2]:
browser = webdriver.Chrome()
url = "https://www.buy.am/en/partners"
browser.get(url)
In [3]:
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
In [4]:
page = browser.page_source
soup = BeautifulSoup(page)
browser.close()
In [5]:
my_dict = {}
sections = soup.findAll("h2",attrs={'style':'text-transform:uppercase'})
for section in sections:
unordered = section.findNext("ul")
elements = unordered.findAll("li")
partner_dict = {}
for element in elements:
for_partner_name = element.find("a").get("href")
if element.find("script") is not None:
almost_partner_name = element.findChild().text
partner_name = re.findall('console.+"(.+)".+',almost_partner_name)
partner_link = element.findAll("a")[1].get("href")
partner_dict.update({partner_name[0]:partner_link})
else:
partner_name = re.findall('^.+/(.+)',for_partner_name)
partner_link = element.find('a').get("href")
partner_dict.update({partner_name[0]:partner_link})
section_dict = {"number_of_partners":len(partner_dict),"partners":partner_dict}
my_dict.update({section.text:section_dict})
pprint(my_dict)
{u'Armenian products': {'number_of_partners': 58,
'partners': {u'Armphone': u'/en/partner/armphone',
u'LKR': u'/en/partner/lkr',
u'Margo': u'/en/partner/margo',
u'aco-by-astghik-safaryan': u'/en/partner/aco-by-astghik-safaryan',
u'aeon-handmade': u'/en/partner/aeon-handmade',
u'agata': u'/en/partner/agata',
u'aj-jewelry': u'/en/partner/aj-jewelry',
u'amore': u'/en/partner/amore',
u'ararat': u'/en/partner/ararat',
u'ararta': u'/en/partner/ararta',
u'armad': u'/en/partner/armad',
u'armenia-in-puzzle': u'/en/partner/armenia-in-puzzle',
u'armine-tumanian': u'/en/partner/armine-tumanian',
u'artsocks': u'/en/partner/artsocks',
u'aylkerp': u'/en/partner/aylkerp',
u'beauty-products': u'/en/partner/beauty-products',
u'bebetta': u'/en/partner/bebetta',
u'beeart': u'/en/partner/beeart',
u'berdashen': u'/en/partner/berdashen',
u'blot-club': u'/en/partner/blot-club',
u'borsaa-jewellery': u'/en/partner/borsaa-jewellery',
u'bow-x': u'/en/partner/bow-x',
u'chernozem': u'/en/partner/chernozem',
u'christins-handmade-boutique': u'/en/partner/christins-handmade-boutique',
u'dark-black': u'/en/partner/dark-black',
u'dee-am': u'/en/partner/dee-am',
u'esco-pharm': u'/en/partner/esco-pharm',
u'ethnos': u'/en/partner/ethnos',
u'forty-47-seven': u'/en/partner/forty-47-seven',
u'go-green-armenia': u'/en/partner/go-green-armenia',
u'haykav': u'/en/partner/haykav',
u'hello': u'/en/partner/hello',
u'honey-am': u'/en/partner/honey-am',
u'ideas': u'/en/partner/ideas',
u'jena-art': u'/en/partner/jena-art',
u'karabakh': u'/en/partner/karabakh',
u'little-einstein': u'/en/partner/little-einstein',
u'madatov': u'/en/partner/madatov',
u'mag-honey': u'/en/partner/mag-honey',
u'maran-winery': u'/en/partner/maran-winery',
u'mkhitaryans': u'/en/partner/mkhitaryans',
u'nena': u'/en/partner/nena',
u'nm': u'/en/partner/nm',
u'nuri-design': u'/en/partner/nuri-design',
u'papier-darmenie': u'/en/partner/papier-darmenie',
u'plywood-design': u'/en/partner/plywood-design',
u'ritea': u'/en/partner/ritea',
u's-luxury': u'/en/partner/s-luxury',
u'single-item': u'/en/partner/single-item',
u'soap-art-studio': u'/en/partner/soap-art-studio',
u'stefani-van': u'/en/partner/stefani-van',
u'teco': u'/en/partner/teco',
u'tm-production': u'/en/partner/tm-production',
u'umba-handmade': u'/en/partner/umba-handmade',
u'wood-style-design': u'/en/partner/wood-style-design',
u'yayak': u'/en/partner/yayak',
u'you-me': u'/en/partner/you-me',
u'zettawood': u'/en/partner/zettawood'}},
u'Clothes / Accessories': {'number_of_partners': 24,
'partners': {u'Elegant': u'/en/partner/elegant',
u'LKR': u'/en/partner/lkr',
u'Margo': u'/en/partner/margo',
u'XL Shop': u'/en/partner/xl-shop',
u'aco-by-astghik-safaryan': u'/en/partner/aco-by-astghik-safaryan',
u'amore': u'/en/partner/amore',
u'ararta': u'/en/partner/ararta',
u'armine-tumanian': u'/en/partner/armine-tumanian',
u'artsocks': u'/en/partner/artsocks',
u'blot-club': u'/en/partner/blot-club',
u'bow-x': u'/en/partner/bow-x',
u'christins-handmade-boutique': u'/en/partner/christins-handmade-boutique',
u'combat': u'/en/partner/combat',
u'ethnos': u'/en/partner/ethnos',
u'franzoni': u'/en/partner/franzoni',
u'hello': u'/en/partner/hello',
u'jena-art': u'/en/partner/jena-art',
u'live-and-wear': u'/en/partner/live-and-wear',
u'megasport': u'/en/partner/megasport',
u'mini-raxevsky': u'/en/partner/mini-raxevsky',
u'mkhitaryans': u'/en/partner/mkhitaryans',
u'pavlovoposadskaja': u'/en/partner/pavlovoposadskaja',
u'uniform': u'/en/partner/uniform',
u'you-me': u'/en/partner/you-me'}},
u'Jewelry / Watches': {'number_of_partners': 14,
'partners': {u'aco-by-astghik-safaryan': u'/en/partner/aco-by-astghik-safaryan',
u'aeon-handmade': u'/en/partner/aeon-handmade',
u'aj-jewelry': u'/en/partner/aj-jewelry',
u'armad': u'/en/partner/armad',
u'armine-tumanian': u'/en/partner/armine-tumanian',
u'borsaa-jewellery': u'/en/partner/borsaa-jewellery',
u'calypso-silver-accessories': u'/en/partner/calypso-silver-accessories',
u'christins-handmade-boutique': u'/en/partner/christins-handmade-boutique',
u'newest': u'/en/partners/newest',
u'plywood-design': u'/en/partner/plywood-design',
u's-luxury': u'/en/partner/s-luxury',
u'siamoods': u'/en/partner/siamoods',
u'soap-art-studio': u'/en/partner/soap-art-studio',
u'time': u'/en/partner/time'}},
u"Kids' World": {'number_of_partners': 21,
'partners': {u'barbie': u'/en/partner/barbie',
u'boo-be': u'/en/partner/boo-be',
u'fancy': u'/en/partner/fancy',
u'first-builders-mega-bloks': u'/en/partner/first-builders-mega-bloks',
u'fisher-price': u'/en/partner/fisher-price',
u'happy-land': u'/en/partner/happy-land',
u'hot-wheels': u'/en/partner/hot-wheels',
u'little-einstein': u'/en/partner/little-einstein',
u'mega-bloks': u'/en/partner/mega-bloks',
u'megatoys': u'/en/partner/megatoys',
u'mini-raxevsky': u'/en/partner/mini-raxevsky',
u'monster-high': u'/en/partner/monster-high',
u'nena': u'/en/partner/nena',
u'newest': u'/en/partners/newest',
u'nuri-design': u'/en/partner/nuri-design',
u'orange-elephant': u'/en/partner/orange-elephant',
u'sims-club': u'/en/partner/sims-club',
u'umba-handmade': u'/en/partner/umba-handmade',
u'vesna-fabrika-igrushek': u'/en/partner/vesna-fabrika-igrushek',
u'walt-disney': u'/en/partner/walt-disney',
u'zettawood': u'/en/partner/zettawood'}},
u'Shoes': {'number_of_partners': 6,
'partners': {u'bebetta': u'/en/partner/bebetta',
u'camel-sandals': u'/en/partner/camel-sandals',
u'combat': u'/en/partner/combat',
u'dark-black': u'/en/partner/dark-black',
u'megasport': u'/en/partner/megasport',
u'nm': u'/en/partner/nm'}},
u'Sports': {'number_of_partners': 4,
'partners': {u'dark-black': u'/en/partner/dark-black',
u'megasport': u'/en/partner/megasport',
u'newest': u'/en/partners/newest',
u'sportlandia': u'/en/partner/sportlandia'}}}
In [6]:
with open("buy.json",'w') as f:
json.dump(my_dict,f,indent=4)
Content source: HrantDavtyan/Data_Scraping
Similar notebooks: