In [2]:
import requests
from bs4 import BeautifulSoup
import re
import json
import pickle
import datetime
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import wikipedia

In [314]:
binary = FirefoxBinary(r'/usr/bin/firefox')
caps = DesiredCapabilities.FIREFOX.copy()
#Set ‘marionette’ browser to True
caps['marionette'] = True
#Launch the Firefox instance by specifying the geckodriver executable path
#driver = webdriver.Firefox(firefox_binary=binary,capabilities=caps)
#driver.wait = WebDriverWait(driver, 5)

In [242]:
def formatOriginalNameToWikiName(originalname):
    """
    return goodname if we return a better format from wiki
    if not we just return empty string
    we need this function,since if not formated yet, most of the time when you search wiki with the bad 
    name in the url it will return nothing.
    """
    wikipedia.set_lang("en")
    allWikiResults=wikipedia.search(originalname)
    if(len(allWikiResults)==0):
        return ""
    else:
        return str(wikipedia.search(originalname)[0])

In [317]:
#inorder to be parallel we need to pass in a new driver
def MakeSeleniumToSearchWithOriginalName(originalname,driver):
    wikiname=formatOriginalNameToWikiName(originalname)
    nametosearch=originalname if(wikiname=="") else wikiname
    driver.get("https://en.wikipedia.org/wiki/"+str(nametosearch))
    result={}
    result["findresult"]={}
    result["nofind"]={}
    try:
        elem = driver.find_element_by_css_selector(".interwiki-ar a")
        """
        these two lines of code needs to run before elem.click(), since it will goto
        another page never find it any more.
        """
        tempdic={}
        #print("me"+str(elem.get_attribute("href")))
        tempdic["arurl"]=str(elem.get_attribute("href"))
        elem.click()
        tempdic["originalname"]=originalname
        tempdic["wikiname"]=wikiname
        firstheading=driver.find_element_by_id("firstHeading")
        #arabic is from left to right that why u need to get the first one that returns.
        tempdic["arname"]=firstheading.text.split("\n")[0]
        #print(tempdic["arname"])
        result["findresult"]=tempdic
    except Exception as e:
        #print(e)
        tempno={}
        tempno["originalname"]=originalname
        result["nofind"]=tempno
        pass
    return result

In [285]:
def clean_line(line):
    # Take out extra space, underscores, comments, etc.
    cleaned = re.sub("_* .+", "", line).strip()
    cleaned = re.sub("_$", "", cleaned, flags=re.MULTILINE)
    return cleaned

def ingest_dictionary(dict_path):
    """
    Read in the country (or other) actor dictionaries.
    """
    with open(dict_path) as f:
        country_file = f.read()
    split_file = country_file.split("\n")
    
    dict_dict = []
    key_name = ""
    alt_names = [] 
    roles = []

    for line in split_file:
        if not line:
            pass
        elif line[0] == "#":
            pass
        elif re.match("[A-Z]", line[0]):
            # handle the previous
            entry = {"actor_en" : key_name,
                    "alt_names_en" : alt_names,
                    "roles" : roles}
            dict_dict.append(entry)
            # zero everything out
            alt_names = []
            roles = []
            # make new key name
            key_name = clean_line(line)
            # check to see if the role is built in
            if bool(re.search("\[[A-Z]{3}\]", line)):
                roles = re.findall("\[(.+?)\]", line)
        elif line[0] == "+":
            cleaned = clean_line(line[1:])
            alt_names.append(cleaned)
        elif re.match("\s", line):
            roles.append(line.strip())
    return dict_dict 
dp = "./Phoenix.Countries.actors.txt"
dict_dict = ingest_dictionary(dp)

In [233]:
len(dict_dict)


Out[233]:
18390

In [315]:
def buildMultiLanguageActorDictionary(dict_dict):
    finalResult={}
    finalResult["goodones"]=[]
    finalResult["badones"]=[]
    driver = webdriver.Firefox(firefox_binary=binary,capabilities=caps)
    driver.wait = WebDriverWait(driver, 5)
    for item in dict_dict:
        originalname=item["actor_en"]
        if(originalname!=""):
            temp=MakeSeleniumToSearchWithOriginalName(originalname,driver)
            if(temp["findresult"]):
                finalResult["goodones"].append(temp["findresult"])
            else:
                finalResult["badones"].append(temp["nofind"])
    return finalResult

In [310]:
len(dict_dict)


Out[310]:
18390

In [318]:
from multiprocessing import Process

def func1():
      print('driver 1 start')
      rst1=buildMultiLanguageActorDictionary(dict_dict[0:40])
      wholeresults.append(rst1)
      print('driver 1 finish')

def func2():
      print('driver 2 start')
      rst2=buildMultiLanguageActorDictionary(dict_dict[40:80])
      wholeresults.append(rst2)
      print('driver 2 finish')
def func3():
      print('driver 3 start')
      rst3=buildMultiLanguageActorDictionary(dict_dict[80:120])
      wholeresults.append(rst3)
      print('driver 3 finish')
def func4():
      print('driver 4 start')
      rst4=buildMultiLanguageActorDictionary(dict_dict[120:160])#len(dict_dict)])
      wholeresults.append(rst4)
      print('driver 4 finish')
        

if __name__ == '__main__':
      wholeresults=[]
      p1 = Process(target=func1)
      p1.start()
      p2 = Process(target=func2)
      p2.start()
      p3= Process(target=func3)
      p3.start()
      p4 = Process(target=func4)
      p4.start()
      
      p1.join()
      p2.join()
      p3.join()
      p4.join()


driver 1 start
driver 2 start
driver 3 start
driver 4 start
Process Process-7:
Process Process-9:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/util/connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/usr/lib/python3.5/socket.py", line 732, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
socket.gaierror: [Errno -3] Temporary failure in name resolution
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/util/connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/usr/lib/python3.5/socket.py", line 732, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):

During handling of the above exception, another exception occurred:

socket.gaierror: [Errno -3] Temporary failure in name resolution
Traceback (most recent call last):

During handling of the above exception, another exception occurred:

  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 600, in urlopen
    chunked=chunked)
Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 356, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 600, in urlopen
    chunked=chunked)
  File "/usr/lib/python3.5/http/client.py", line 1106, in request
    self._send_request(method, url, body, headers)
  File "/usr/lib/python3.5/http/client.py", line 1151, in _send_request
    self.endheaders(body)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 356, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/usr/lib/python3.5/http/client.py", line 1102, in endheaders
    self._send_output(message_body)
  File "/usr/lib/python3.5/http/client.py", line 1106, in request
    self._send_request(method, url, body, headers)
  File "/usr/lib/python3.5/http/client.py", line 934, in _send_output
    self.send(msg)
  File "/usr/lib/python3.5/http/client.py", line 1151, in _send_request
    self.endheaders(body)
  File "/usr/lib/python3.5/http/client.py", line 877, in send
    self.connect()
  File "/usr/lib/python3.5/http/client.py", line 1102, in endheaders
    self._send_output(message_body)
  File "/usr/lib/python3.5/http/client.py", line 934, in _send_output
    self.send(msg)
  File "/usr/lib/python3.5/http/client.py", line 877, in send
    self.connect()
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 166, in connect
    conn = self._new_conn()
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 166, in connect
    conn = self._new_conn()
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 150, in _new_conn
    self, "Failed to establish a new connection: %s" % e)
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPConnection object at 0x7ff21fac1898>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/adapters.py", line 440, in send
    timeout=timeout
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 649, in urlopen
    _stacktrace=sys.exc_info()[2])
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/util/retry.py", line 388, in increment
    raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /w/api.php?action=query&srlimit=10&limit=10&srprop=&format=json&list=search&srsearch=GIOVANNI_SPADOLINI (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ff21fac1898>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 150, in _new_conn
    self, "Failed to establish a new connection: %s" % e)
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPConnection object at 0x7ff21fab1278>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:


During handling of the above exception, another exception occurred:

Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/adapters.py", line 440, in send
    timeout=timeout
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 649, in urlopen
    _stacktrace=sys.exc_info()[2])
  File "<ipython-input-318-da492f6c002b>", line 16, in func3
    rst3=buildMultiLanguageActorDictionary(dict_dict[8000:12000])
  File "<ipython-input-315-5b46402ef819>", line 10, in buildMultiLanguageActorDictionary
    temp=MakeSeleniumToSearchWithOriginalName(originalname,driver)
  File "<ipython-input-317-df5ad56cea67>", line 3, in MakeSeleniumToSearchWithOriginalName
    wikiname=formatOriginalNameToWikiName(originalname)
  File "<ipython-input-242-916760ec7eb8>", line 9, in formatOriginalNameToWikiName
    allWikiResults=wikipedia.search(originalname)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/util.py", line 28, in __call__
    ret = self._cache[key] = self.fn(*args, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/wikipedia.py", line 103, in search
    raw_results = _wiki_request(search_params)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/wikipedia.py", line 737, in _wiki_request
    r = requests.get(API_URL, params=params, headers=headers)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/api.py", line 72, in get
    return request('get', url, params=params, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/util/retry.py", line 388, in increment
    raise MaxRetryError(_pool, url, error or ResponseError(cause))
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/api.py", line 58, in request
    return session.request(method=method, url=url, **kwargs)
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /w/api.php?action=query&srlimit=10&limit=10&srprop=&format=json&list=search&srsearch=LIAMINE_Z%1AROUAL (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ff21fab1278>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/sessions.py", line 502, in request
    resp = self.send(prep, **send_kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-318-da492f6c002b>", line 5, in func1
    rst1=buildMultiLanguageActorDictionary(dict_dict[0:4000])
  File "<ipython-input-315-5b46402ef819>", line 10, in buildMultiLanguageActorDictionary
    temp=MakeSeleniumToSearchWithOriginalName(originalname,driver)
  File "<ipython-input-317-df5ad56cea67>", line 3, in MakeSeleniumToSearchWithOriginalName
    wikiname=formatOriginalNameToWikiName(originalname)
  File "<ipython-input-242-916760ec7eb8>", line 9, in formatOriginalNameToWikiName
    allWikiResults=wikipedia.search(originalname)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/util.py", line 28, in __call__
    ret = self._cache[key] = self.fn(*args, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/wikipedia.py", line 103, in search
    raw_results = _wiki_request(search_params)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/sessions.py", line 612, in send
    r = adapter.send(request, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/wikipedia.py", line 737, in _wiki_request
    r = requests.get(API_URL, params=params, headers=headers)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/api.py", line 72, in get
    return request('get', url, params=params, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/adapters.py", line 504, in send
    raise ConnectionError(e, request=request)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/api.py", line 58, in request
    return session.request(method=method, url=url, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/sessions.py", line 502, in request
    resp = self.send(prep, **send_kwargs)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /w/api.php?action=query&srlimit=10&limit=10&srprop=&format=json&list=search&srsearch=GIOVANNI_SPADOLINI (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ff21fac1898>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/sessions.py", line 612, in send
    r = adapter.send(request, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/adapters.py", line 504, in send
    raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /w/api.php?action=query&srlimit=10&limit=10&srprop=&format=json&list=search&srsearch=LIAMINE_Z%1AROUAL (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ff21fab1278>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))
Process Process-10:
Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/util/connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/usr/lib/python3.5/socket.py", line 732, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 600, in urlopen
    chunked=chunked)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 356, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/usr/lib/python3.5/http/client.py", line 1106, in request
    self._send_request(method, url, body, headers)
  File "/usr/lib/python3.5/http/client.py", line 1151, in _send_request
    self.endheaders(body)
  File "/usr/lib/python3.5/http/client.py", line 1102, in endheaders
    self._send_output(message_body)
  File "/usr/lib/python3.5/http/client.py", line 934, in _send_output
    self.send(msg)
  File "/usr/lib/python3.5/http/client.py", line 877, in send
    self.connect()
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 166, in connect
    conn = self._new_conn()
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 150, in _new_conn
    self, "Failed to establish a new connection: %s" % e)
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPConnection object at 0x7ff21fac11d0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/adapters.py", line 440, in send
    timeout=timeout
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 649, in urlopen
    _stacktrace=sys.exc_info()[2])
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/util/retry.py", line 388, in increment
    raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /w/api.php?action=query&srlimit=10&limit=10&srprop=&format=json&list=search&srsearch=DAVID_OSBORNE_HAY (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ff21fac11d0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-318-da492f6c002b>", line 21, in func4
    rst4=buildMultiLanguageActorDictionary(dict_dict[12000:len(dict_dict)])
  File "<ipython-input-315-5b46402ef819>", line 10, in buildMultiLanguageActorDictionary
    temp=MakeSeleniumToSearchWithOriginalName(originalname,driver)
  File "<ipython-input-317-df5ad56cea67>", line 3, in MakeSeleniumToSearchWithOriginalName
    wikiname=formatOriginalNameToWikiName(originalname)
  File "<ipython-input-242-916760ec7eb8>", line 9, in formatOriginalNameToWikiName
    allWikiResults=wikipedia.search(originalname)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/util.py", line 28, in __call__
    ret = self._cache[key] = self.fn(*args, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/wikipedia.py", line 103, in search
    raw_results = _wiki_request(search_params)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/wikipedia.py", line 737, in _wiki_request
    r = requests.get(API_URL, params=params, headers=headers)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/api.py", line 72, in get
    return request('get', url, params=params, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/api.py", line 58, in request
    return session.request(method=method, url=url, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/sessions.py", line 502, in request
    resp = self.send(prep, **send_kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/sessions.py", line 612, in send
    r = adapter.send(request, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/adapters.py", line 504, in send
    raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /w/api.php?action=query&srlimit=10&limit=10&srprop=&format=json&list=search&srsearch=DAVID_OSBORNE_HAY (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ff21fac11d0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))
Process Process-8:
Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/util/connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/usr/lib/python3.5/socket.py", line 732, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 600, in urlopen
    chunked=chunked)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 356, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/usr/lib/python3.5/http/client.py", line 1106, in request
    self._send_request(method, url, body, headers)
  File "/usr/lib/python3.5/http/client.py", line 1151, in _send_request
    self.endheaders(body)
  File "/usr/lib/python3.5/http/client.py", line 1102, in endheaders
    self._send_output(message_body)
  File "/usr/lib/python3.5/http/client.py", line 934, in _send_output
    self.send(msg)
  File "/usr/lib/python3.5/http/client.py", line 877, in send
    self.connect()
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 166, in connect
    conn = self._new_conn()
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connection.py", line 150, in _new_conn
    self, "Failed to establish a new connection: %s" % e)
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPConnection object at 0x7ff21f9c9e48>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/adapters.py", line 440, in send
    timeout=timeout
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/connectionpool.py", line 649, in urlopen
    _stacktrace=sys.exc_info()[2])
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/urllib3/util/retry.py", line 388, in increment
    raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /w/api.php?action=query&srlimit=10&limit=10&srprop=&format=json&list=search&srsearch=GUILLERMO_RODRIGUEZ_LARA (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ff21f9c9e48>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-318-da492f6c002b>", line 11, in func2
    rst2=buildMultiLanguageActorDictionary(dict_dict[4000:8000])
  File "<ipython-input-315-5b46402ef819>", line 10, in buildMultiLanguageActorDictionary
    temp=MakeSeleniumToSearchWithOriginalName(originalname,driver)
  File "<ipython-input-317-df5ad56cea67>", line 3, in MakeSeleniumToSearchWithOriginalName
    wikiname=formatOriginalNameToWikiName(originalname)
  File "<ipython-input-242-916760ec7eb8>", line 9, in formatOriginalNameToWikiName
    allWikiResults=wikipedia.search(originalname)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/util.py", line 28, in __call__
    ret = self._cache[key] = self.fn(*args, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/wikipedia.py", line 103, in search
    raw_results = _wiki_request(search_params)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/wikipedia/wikipedia.py", line 737, in _wiki_request
    r = requests.get(API_URL, params=params, headers=headers)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/api.py", line 72, in get
    return request('get', url, params=params, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/api.py", line 58, in request
    return session.request(method=method, url=url, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/sessions.py", line 502, in request
    resp = self.send(prep, **send_kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/sessions.py", line 612, in send
    r = adapter.send(request, **kwargs)
  File "/home/yan/python_vir/env/lib/python3.5/site-packages/requests/adapters.py", line 504, in send
    raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='en.wikipedia.org', port=80): Max retries exceeded with url: /w/api.php?action=query&srlimit=10&limit=10&srprop=&format=json&list=search&srsearch=GUILLERMO_RODRIGUEZ_LARA (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ff21f9c9e48>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))

In [3]:
wikipedia.set_lang("ar")

In [4]:
test=wikipedia.search("الاونة")

In [5]:
test


Out[5]:
['أوني',
 'أون',
 'بورغ أون بريس',
 'أون إي',
 'أمبيريو-أون-بوغي (أين)',
 'إوني (كاليفورنيا)',
 'فوكس-أون-بوغيي (أين)',
 'أمبيريو-أون-دومب (أين)',
 'تشات أون',
 'شامانيا-أون-فالرومي (أين)']
from googleapiclient.discovery import build import pprint my_api_key = "AIzaSyBBulleVoiDN9i8NITQqH_BUNGgyWX-nmA" my_cse_id = "003461024781403571159:p4qrcenq1l0" def google_search(search_term, api_key, cse_id, **kwargs): service = build("customsearch", "v1", developerKey=api_key) res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute() #return res['spelling']['correctedQuery'] # return res['item'] return res['items'] results = google_search( 'SIBGHATULLAH_MOJADEDI', my_api_key, my_cse_id, num=1) for result in results: pprint.pprint(result['formattedUrl']) #print(results)
wikipedia.set_lang("en") test=wikipedia.page("SIBGHATULLAH_MOJADEDI").html() soup=BeautifulSoup(test,'lxml') hi=soup.find("li",{"class":"interwiki-ar"})

In [ ]:
wikipedia.page("OBAMA").references

In [10]:
ny = wikipedia.page("New York")
#ny.title
#ny.url
#ny.links[0]
#wikipedia.set_lang("en")
wikipedia.summary("افغانستان", sentences=2)


Out[10]:
'أفغانستان، رسمياً جمهورية أفغانستان الإسلامية هي دولة تقع في آسيا الوسطى تحدها كل من طاجكستان وأوزبكستان وتركمانستان من الشمال وإيران من الغرب والصين من الشرق فيما تحدها باكستان من الجنوب. ومعنى كلمة "أفغانستان" هو أرض الأفغان، وتعتبر إحدى نقاط الاتصال القديمة لطريق الحرير والهجرات البشرية السابقة.'

In [ ]: