ISWC extracted from SACEM website, which is annoying to browse.
In [1]:
import re
import requests
import bs4
In [2]:
resp = requests.request(
'GET',
'https://sigried.sacem.fr/oeuvres/oeuvre/rechercheOeuvre.do?'
'titre=&tiers=Claude+DEBUSSY&searchoption=default&ftad=true&page=1')
In [3]:
soup = bs4.BeautifulSoup(resp.content, 'html.parser')
dt = soup.find_all('dt')
dd = soup.find_all('dd')
for idx, (t, d) in enumerate(zip(dt, dd)):
if 'Arrangeur' not in d.get_text():
print(t.div.a.span.string)
print(d.find_all('span')[-1].string)
print('https://sigried.sacem.fr/oeuvres/oeuvre/printOeuvre.do?cocv='
+ re.match(r'javascript:getDetail\((.*)\);',
t.div.a.get('href')).group(1))
print()