왓챠 내가본 영화 title_url뽑기

  • 추가 피쳐들 뽑기위한 주소

In [1]:
import requests
import json
import pandas as pd

1:23 페이지까지


In [2]:
df = pd.DataFrame(columns = ['title_url', 'code'])
for page_num in range(1, 23):
    response = requests.get("https://watcha.net/v2/users/jXaIHl0ZtdYZ/movies.json?filter%5Bsorting%5D=time&page={page}".format(
        page=page_num))
    watcha_dict = json.loads(response.text)
    watcha_list = watcha_dict.get('cards')
    
    for i in range(24):
        title_url = watcha_list[i].get('items')[0].get('item').get('title_url')
        code = watcha_list[i].get('items')[0].get('item').get('code')
        
        df.loc[len(df)] = [title_url, code]

In [3]:
df


Out[3]:
title_url code
0 spotlight-2015 mnp3sa
1 wiheomhan-somun-jjirasi-2013 mizwpr
2 before-midnight-2013 mkrc6e
3 iyagi-2012 m3vcz7
4 transcendence-2014 mef2j2
5 inganjungdog-2013 mkpnzb
6 the-hunger-games-2014 mfvxi2
7 the-intern-2015 mlqosw
8 byuti-insaideu-2014 mndtm0
9 geomeun-sajedeul-2015 mtb5m7
10 naebujadeul-2014 mtndts
11 silent-hill-2006 mluxpt
12 the-martian-2015 mc4mas
13 the-time-travelers-wife-2009 mwfm8t
14 gundo-2012 mnerjj
15 sin-city-2005 mo0weh
16 mudeomggaji-ganda-2013 muiysd
17 big-hero-6-2014 mxy831
18 lucy-2014 my1nf4
19 the-bucket-list-2007 mjoctb
20 guns-and-talks-2001 m4g420
21 amsal-gaje-2015 mfbyqj
22 beterang-2014 m0e0ng
23 the-maze-runner-2014 mitzzb
24 d-war-2007 m0g5q8
25 geoncughaggaeron-2012 mi2x6x
26 august-rush-2007 mfk8wd
27 bruce-almighty-2003 mp8xml
28 lust-caution-2007 mp0bz7
29 pulp-fiction-1994 ml4cgi
... ... ...
498 silmido-2003 mzqvrj
499 taegukgi-brotherhood-of-2003 mynhon
500 the-man-from-nowhere-2010 mreqp6
501 the-dark-knight-rises-2012 ms8t2k
502 welcome-to-dongmakgol-2005 mzvmtt
503 a-werewolf-boy-2012 mhptrv
504 the-good-the-bad-the-2008 muuvio
505 the-avengers-2012 m3smdg
506 the-berlin-file-2012 magenu
507 sunny-2011 myrr0v
508 transformers-revenge-of-2009 mnm1h8
509 take-off-2009 mgdnhf
510 haeundae-2009 m8gkma
511 the-host-2006 ms5fec
512 speed-scandal-2008 m2izec
513 er-king-and-the-clown-2005 m1l5lg
514 gwanghae-wangi-doen-2012 motei7
515 7beonbangyi-seonmul-2012-1 mw4vzn
516 the-thieves-2012 m2vxj4
517 spirit-of-jeet-keun-do-2004 mv3b4e
518 71-into-the-fire-2010 mco8ts
519 spider-man-2-2004 mvqagi
520 qian-toqian-xun-falseshen-2001 mvzdp0
521 the-butterfly-effect-2004 mdoyxv
522 shutter-island-2010 m0gk77
523 the-truman-show-1998 mkmevg
524 if-only-2004 mxmz5o
525 joint-security-area-2000 m35387
526 the-bourne-supremacy-2004 miogpj
527 haunters-2010 mksqaw

528 rows × 2 columns

24페이지


In [4]:
df1 = pd.DataFrame(columns = ['title_url', 'code'])

response = requests.get("https://watcha.net/v2/users/jXaIHl0ZtdYZ/movies.json?filter%5Bsorting%5D=time&page=23")
watcha_dict = json.loads(response.text)
watcha_list = watcha_dict.get('cards')

len(watcha_list)

for i in range(16):
        title_url = watcha_list[i].get('items')[0].get('item').get('title_url')
        code = watcha_list[i].get('items')[0].get('item').get('code')
        
        df1.loc[len(df1)] = [title_url, code]

In [5]:
df1


Out[5]:
title_url code
0 beulraindeu-2011 ms0tq6
1 the-school-of-rock-2003 mywkgp
2 500-days-of-summer-2009 mrcy2s
3 the-shawshank-redemption-1994 m32kz6
4 love-letter-1995 mkecxr
5 beomjoewayi-jeonjaeng-2011 mh7hlp
6 the-devil-wears-prada-2006 mthg0d
7 moss-2010 m2r7cf
8 real-steel-2011 mash3e
9 leon-1994 m4phlv
10 naega-salinbeomida-2012 mj6jfk
11 malaton-2005 mwq1h6
12 the-classic-2003 mq867z
13 iron-man-3-2013 mm8zqe
14 my-little-bride-2004 mm2fh0
15 all-about-my-wife-2012 mk035p

df + df1 합침


In [6]:
url_df = df.append(df1, ignore_index=True)
url_df


Out[6]:
title_url code
0 spotlight-2015 mnp3sa
1 wiheomhan-somun-jjirasi-2013 mizwpr
2 before-midnight-2013 mkrc6e
3 iyagi-2012 m3vcz7
4 transcendence-2014 mef2j2
5 inganjungdog-2013 mkpnzb
6 the-hunger-games-2014 mfvxi2
7 the-intern-2015 mlqosw
8 byuti-insaideu-2014 mndtm0
9 geomeun-sajedeul-2015 mtb5m7
10 naebujadeul-2014 mtndts
11 silent-hill-2006 mluxpt
12 the-martian-2015 mc4mas
13 the-time-travelers-wife-2009 mwfm8t
14 gundo-2012 mnerjj
15 sin-city-2005 mo0weh
16 mudeomggaji-ganda-2013 muiysd
17 big-hero-6-2014 mxy831
18 lucy-2014 my1nf4
19 the-bucket-list-2007 mjoctb
20 guns-and-talks-2001 m4g420
21 amsal-gaje-2015 mfbyqj
22 beterang-2014 m0e0ng
23 the-maze-runner-2014 mitzzb
24 d-war-2007 m0g5q8
25 geoncughaggaeron-2012 mi2x6x
26 august-rush-2007 mfk8wd
27 bruce-almighty-2003 mp8xml
28 lust-caution-2007 mp0bz7
29 pulp-fiction-1994 ml4cgi
... ... ...
514 gwanghae-wangi-doen-2012 motei7
515 7beonbangyi-seonmul-2012-1 mw4vzn
516 the-thieves-2012 m2vxj4
517 spirit-of-jeet-keun-do-2004 mv3b4e
518 71-into-the-fire-2010 mco8ts
519 spider-man-2-2004 mvqagi
520 qian-toqian-xun-falseshen-2001 mvzdp0
521 the-butterfly-effect-2004 mdoyxv
522 shutter-island-2010 m0gk77
523 the-truman-show-1998 mkmevg
524 if-only-2004 mxmz5o
525 joint-security-area-2000 m35387
526 the-bourne-supremacy-2004 miogpj
527 haunters-2010 mksqaw
528 beulraindeu-2011 ms0tq6
529 the-school-of-rock-2003 mywkgp
530 500-days-of-summer-2009 mrcy2s
531 the-shawshank-redemption-1994 m32kz6
532 love-letter-1995 mkecxr
533 beomjoewayi-jeonjaeng-2011 mh7hlp
534 the-devil-wears-prada-2006 mthg0d
535 moss-2010 m2r7cf
536 real-steel-2011 mash3e
537 leon-1994 m4phlv
538 naega-salinbeomida-2012 mj6jfk
539 malaton-2005 mwq1h6
540 the-classic-2003 mq867z
541 iron-man-3-2013 mm8zqe
542 my-little-bride-2004 mm2fh0
543 all-about-my-wife-2012 mk035p

544 rows × 2 columns


In [7]:
path='C:/Users/JKEUN/ipython notebook/project-02-watcha/resource/'
url_df.to_csv(path+'url_df.csv', index=False, encoding='utf8')

In [ ]: