In [15]:
import urllib
import numpy as np
job_list_base_url = 'https://www.indeed.com/jobs?q=Natural+Language+Processing'
starts = np.arange(10, 1010, 10)
job_list_urls = [job_list_base_url] + [job_list_base_url + '&start=' + str(i) for i in starts]
job_list_urls[:5]
Out[15]:
['https://www.indeed.com/jobs?q=Natural+Language+Processing',
'https://www.indeed.com/jobs?q=Natural+Language+Processing&start=10',
'https://www.indeed.com/jobs?q=Natural+Language+Processing&start=20',
'https://www.indeed.com/jobs?q=Natural+Language+Processing&start=30',
'https://www.indeed.com/jobs?q=Natural+Language+Processing&start=40']
In [17]:
import urllib.request
In [31]:
In [28]:
In [ ]:
In [40]:
from bs4 import BeautifulSoup
In [41]:
pg = BeautifulSoup(urllib.request.urlopen(job_list_base_url), "lxml")
In [44]:
pg.find_all(attrs= {'class': 'jobtitle turnstileLink'})
Out[44]:
[<a class="jobtitle turnstileLink" data-tn-element="jobTitle" href="/pagead/clk?mo=r&ad=-6NYlbfkN0DAgAc92q6iRaEfcJAuaSGKVeICOvW3P0dpzElS8ir45AVr2KnXyohmCH6uVD67Lw5E64Md99dDeRaYTl6qtLkPZ9v1uE2M91Fjw_9FP1MmpotvjNTqVoyIVQqxfSI9jCHw6FlFXSs2-0gHUP-4RK6DYwOCEa0NAGlmrl8gVExJGC1e1RiR1RKmWEeL9m68eLF4d_-_mrygE2Wu39_ueEgEDT0M-YJ4mLNATZ4BtRIttVcil9uqNJJofcH408zSciI-OCyF7LZEbUOfyeWDvFHkse8FCawMbfl-fkdZulGhvLVXB7Yy7p4NeXInFUQ6QecBc4EkhTyy42aXcETr0cREOnInoBzp-sGoQdVibGgC_rXDTo8Ba4XOK5dQW2VZukVFuy-qCO-0ZQ==&p=1&sk=&fvj=0" id="sja1" onclick="setRefineByCookie([]); sjoc('sja1',0); convCtr('SJ', pingUrlsForGA)" onmousedown="sjomd('sja1'); clk('sja1');" rel="noopener noreferrer nofollow" target="_blank" title="Data Scientist, Natural Language Processing">Data Scientist, <b>Natural</b> <b>Language</b> <b>Processing</b></a>,
<a class="jobtitle turnstileLink" data-tn-element="jobTitle" href="/pagead/clk?mo=r&ad=-6NYlbfkN0Ab67y_gTDH9tSaT0HLOcX4Q3W4UsL2WfVRyJV-qqww-YJ_ltwPKUIOQtlXfcv8uV2CRBX2DQYg6vJA7IJKdNdheFY2-hitiQRVaB-eifqs62egZeaTdzkc_q6rLl-vsVasJBbm2bfzsZ2e6AIqggwb_fKDYdcXk_MsSCP3hBCUf9gYrR3LoW-kmb6frW4xt4gc5KB1zUHttoKgxLTsAQD1u82_-Ya4t5c1PEIcpB1Q7MTzlmuPy8BVyKc9kFMOuJr404237IA6r2veFFj-4hhEuN25TcBuR7wszt0eeykzFGOil5v-MptlyNbX8ZOelHTqT6ZKsoaUy-Aj_niISeVFddyeCQUrdpmBNTGj7ntqCIacAE8NO2FS8XTpIIlS2PKWbN_jmVXdDFXrjRgd4dzh0lb_MzvS8sqEHzPftqVt_krXk1d-cmAhrPVVn9yeXZ4j9eRO9_7Xu7l-JpN7NZ7R-9js_XC_53febb-9ZvQcd8vMxfD12Vfjwdy0uXPyENSZTk3XFD4pEJyFxwoAIkmlZWaQtvMdKuBdk0YXkEFExTYFtsp_uFzpnnUkvT54PXSo5iYKUJPO6vg6NQw44Pb6jMAbwCanYQLoNCAPc4oRMVsIa6WBGyJzarMYSuga24TsvpbDf0f1Uwv7so_qwbOedq57BehHBBIqYkUlsimrJm7bOFpOo9PbdZnINgFspVEEnziHiwxijdfnpT8glIic_bR-VrPgxTbGgBCSVE-oBw==&p=2&sk=&fvj=0" id="sja2" onclick="setRefineByCookie([]); sjoc('sja2',0); convCtr('SJ', pingUrlsForGA)" onmousedown="sjomd('sja2'); clk('sja2');" rel="noopener noreferrer nofollow" target="_blank" title="Natural Language Processing (NLP) Research Scientist"><b>Natural</b> <b>Language</b> <b>Processing</b> (NLP) Research Scientist</a>,
<a class="jobtitle turnstileLink" data-tn-element="jobTitle" href="/pagead/clk?mo=r&ad=-6NYlbfkN0DAgAc92q6iRaEfcJAuaSGKVeICOvW3P0dpzElS8ir45AVr2KnXyohmCH6uVD67Lw6r1gOBlKj0P_aoEp47XVDDXifx9pmFaL4aE8UYLrZupeH2CuWJh3ZJ3HH3tu8rQczBbvYQ6Bs0DckvZJK7h0Q3eI9wlv1VChlb89NQOIrjazrVGFujm4dvl03iwgVnH275dvw21h1C1E24Q-7vPEvAyUl2Mkc9h0ZG6ph0dFimNjVbiPrAQ2IMZT3PL6AOe5-I-0aRUW_SRDZtcKeWkAr2P9rkKkxksop0ORQ9ez-rXlQ_ikKSJxKF62VwwwSjTRaUoeInFbOn349PdmMVpb1xBu6zh1ADeXAfR9UYrchadnSRtocbHPX5qHGunJmaxnA-ZJs3FIcLjQ==&p=3&sk=&fvj=0" id="sja3" onclick="setRefineByCookie([]); sjoc('sja3',0); convCtr('SJ', pingUrlsForGA)" onmousedown="sjomd('sja3'); clk('sja3');" rel="noopener noreferrer nofollow" target="_blank" title="Senior Associate - Cognitive Data Scientist Natural Language Processing">Senior Associate - Cognitive Data Scientist <b>Natural</b> <b>Language</b>...</a>,
<a class="jobtitle turnstileLink" data-tn-element="jobTitle" href="/pagead/clk?mo=r&ad=-6NYlbfkN0CpFJQzrgRR8WqXWK1qKKEqALWJw739KlKqr2H-MSI4eqIyHF2MMS4ja4aJ3X8EHZ1KlMMBygRDgqsX_n7BbQRN3HKa6pSTVUiS7YM0ghYYUcJoMCcBNbcM1k1SKzdtXjzWB3oVgZM-Qqj-8yMzcfhOmjufCuWrqOZyhnuWYiCJJeiIEeFyQFrCwHoC8BKeNdU-kQyxrHH7uLRqaNLZX9euIMM4y9GqxwWy_oIptzMvAluwhRGeOT1HltSsg_QB8mAkNHwDc5yAGi-mBqY1Emw9rqauoryNDMl2TI39A25uzMuQ5jqBeqdXHdmDt0S1KO7TYlrd_5TaOOFnNVCbWicrsXwUEl928n10ojBK4Mxj5kMbae59UT9O9cnap4FETqZ967t6DY-rv5WJcSxc3ZWrrZfPE4_AlzMgJTJvmeXKZS5nxebys460rnscwAe-p_3vS6Mu3K4aw9VlHo7OJ70sKn8hXKn3NWsJ7zP8ikkKx9u-tHBbVVKJktskeTrmC8tc4LckIOFCiGqJ7nNEbR5i76vMYYZdYDvI8utyH7Tv5PJAZXt1KqWL3pHB_ttQc5s-DH77oO-NKKV065RtFCcnf_Qp8A5g27iPe7j4d7ruFAJUhGD6ZNBcASWtYLA0qEYT-hXNUD4LiBigfq9vRqKxtctw8diUVGd0sA6cnExg1oroDzUbPtAy&p=4&sk=&fvj=0" id="sja4" onclick="setRefineByCookie([]); sjoc('sja4',0); convCtr('SJ', pingUrlsForGA)" onmousedown="sjomd('sja4'); clk('sja4');" rel="noopener noreferrer nofollow" target="_blank" title="Natural Language Processing Scientist"><b>Natural</b> <b>Language</b> <b>Processing</b> Scientist</a>,
<a class="jobtitle turnstileLink" data-tn-element="jobTitle" href="/pagead/clk?mo=r&ad=-6NYlbfkN0D9Z_NigMRFBqnj4_9rPbMnaYMSgnKsRu0gcL5XJCnD3bsAiFyqvhMpeKr6mnD6uolc_eyG06a3mEDy9HBN4jCa4urkBW4SPz44YosiJV05ZcU-5SWuifw6AnNq5XN5w9Kfe-bjPpCsQ7oQzlZnOJ9Hph0w3vFMJyQW-fSlhM7qLv7uLqv0iSlKwIBt4s4Z3UF38DX_-gSFnC0GHzzrSpzZQEQYbykX2OOoWuQsNQs_KRVfxo-PbDpIA8Muq5D9ytw8GTitD0hdnO0wOmFHndMMogBtZbhDq2Y5VIyRRobKQTIT1yjGugU1_yDVz5qLgeTLLdF1AyyAXEZkjFCgfKTGzQWcWj3zYcC0cVNNRrH143jbgkMOwVturb2YcQNOz5S2PVGON8XA5vChs5DWcHzoJmd-vv3jqNKtX0AJBvzmXirQcQ-F9Y4ORvC8STHTm8o4EPP4P5GO2Z6kumM445B1Zz5O4vzxOZJU9Ipb4eFeQwVKksD_usyDHYDzhl8h5wAbifwvVG8GH7chxFiC8hsdHBShLlDjd8dJzUxMU9lshC6kcmlf8dcms_KSKzs9t6gen_FZKXw-KjJWE_ULFle5KGOvlUrXhiWy2DTP4vG8tpG74rXlLa_1x_aQovgTg8XN0rm-kqSqRkgUryFBwQ9rklhJr6kxPDTYJKJG-j2_KslY09LANONOA4nUmu9gHJjmCdIkMJhVBQj6KuV9mFZMWyRgLDWaJBeJ4zRcUcMctVaIKfE_soZnCTB-qM5vaKT64gv7m0h6SE4-8oybXJkjdl3clAm4lPiotQkffvZKnJVBPaNN5dZL3mV6qD6GBy5Dbe6J4bddVAf92rYce9vXbo8dLQcf8A3P1Bl4ZDEIivOqTpfns44-1_27JMIvOEHTbS6bh7bX64H9IVNgBI53&p=5&sk=&fvj=0" id="sja5" onclick="setRefineByCookie([]); sjoc('sja5',0); convCtr('SJ', pingUrlsForGA)" onmousedown="sjomd('sja5'); clk('sja5');" rel="noopener noreferrer nofollow" target="_blank" title="Language Engineer"><b>Language</b> Engineer</a>]
In [ ]:
Content source: MingChen0919/learning-apache-spark
Similar notebooks: