In [120]:
import pandas as pd
import numpy as np
import os
from urllib import urlretrieve
import time
import glob
In [3]:
os.chdir('/Users/Walkon302/Desktop/deep-learning-models-master/view2buy')
In [10]:
# load the image url file
url = pd.read_csv('eval_img_url.csv', header=None)
In [15]:
# Check the url file
url.head()
Out[15]:
spu
url
0
100000000317431808
http://a.vpimg2.com/upload/merchandise/pdc/808...
1
100000001066491904
http://a.vpimg2.com/upload/merchandise/pdc/904...
2
100000005622976512
http://a.vpimg2.com/upload/merchandise/pdc/512...
3
100000009136676865
http://a.vpimg2.com/upload/merchandise/pdcvis/...
4
100000009494401198
http://a.vpimg2.com/upload/merchandise/pdcvis/...
In [7]:
# Load the filtered view2buy file containing around 300k files.
df = pd.read_pickle('user_fea_for_eval.pkl')
In [29]:
# Filtered view2buy data againg and select the items that we have url for.
df_url_count = df[(df['buy_spu'].isin(url['spu'])) & (df['view_spu'].isin(url['spu']))].groupby('user_id').count()
In [54]:
df_url = df[(df['buy_spu'].isin(url['spu'])) & (df['view_spu'].isin(url['spu']))]
In [53]:
df[(df['buy_spu'].isin(url['spu'])) & (df['view_spu'].isin(url['spu'])) & (df['buy_spu'] == df['view_spu'])].shape
Out[53]:
(30203, 13)
In [39]:
# Reset the index
df_url_count = df_url_count.reset_index()
In [44]:
# Save the id that contains more than 20 browing history. for frther examination. We get around
user_id = df_url_count[df_url_count['view_spu'] > 20]['user_id']
In [70]:
# Select data with selected parameters
df_url = df_url[(df_url['user_id'].isin(user_id))]
In [73]:
df_url2 = pd.merge(df_url, url, left_on='view_spu', right_on='spu')
In [79]:
df_url2.head()
Out[79]:
0
user_id
buy_spu
buy_sn
buy_ct3
view_spu
view_sn
view_ct3
time_interval
view_cnt
view_secondes
view_features
buy_features
spu
url
0
4209887493\t453532580309307392\t10004616\t334\...
4209887493
453532580309307392
10004616
334
14150170026959126
10010102
334
21114
1
11
[0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039...
[0.1, 1.804, 0.049, 0.883, 0.092, 0.053, 0.042...
14150170026959126
http://a.vpimg2.com/upload/merchandise/pdcvis/...
1
529805243\t103096245561765919\t10010102\t334\t...
529805243
103096245561765919
10010102
334
14150170026959126
10010102
334
37794
4
66
[0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039...
[0.467, 0.385, 0.0, 0.043, 0.292, 0.0, 0.448, ...
14150170026959126
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2
3748045464\t446777176556679168\t10005711\t334\...
3748045464
446777176556679168
10005711
334
14150170026959126
10010102
334
18820
1
34
[0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039...
[0.018, 0.161, 0.088, 0.141, 0.231, 0.0, 0.036...
14150170026959126
http://a.vpimg2.com/upload/merchandise/pdcvis/...
3
4209887493\t438895881520357521\t10004616\t334\...
4209887493
438895881520357521
10004616
334
14150170026959126
10010102
334
13978
1
11
[0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039...
[0.036, 0.439, 0.0, 0.074, 0.194, 0.0, 0.331, ...
14150170026959126
http://a.vpimg2.com/upload/merchandise/pdcvis/...
4
4209887493\t74104320184119307\t10004616\t334\t...
4209887493
74104320184119307
10004616
334
14150170026959126
10010102
334
14313
1
11
[0.135, 1.078, 0.06, 0.241, 0.213, 0.22, 0.039...
[0.078, 2.304, 0.132, 0.191, 0.0, 0.087, 0.341...
14150170026959126
http://a.vpimg2.com/upload/merchandise/pdcvis/...
In [76]:
#df_url2.to_pickle('view2buy_url.pkl')
In [110]:
view_url = pd.DataFrame(df_url2.groupby(['view_spu', 'url']).count().reset_index()[['view_spu', 'url']])
In [111]:
view_url
Out[111]:
view_spu
url
0
357872333107204
http://a.vpimg2.com/upload/merchandise/pdc/204...
1
357875526680651
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2
357882254983171
http://a.vpimg2.com/upload/merchandise/pdcvis/...
3
357901107539985
http://a.vpimg2.com/upload/merchandise/pdcvis/...
4
639360131194904
http://a.vpimg2.com/upload/merchandise/pdcvis/...
5
639369692328147
http://a.vpimg2.com/upload/merchandise/pdcvis/...
6
639371126526005
http://a.vpimg2.com/upload/merchandise/pdcvis/...
7
639371126526005
http://a.vpimg2.com/upload/merchandise/pdcvis/...
8
639389503180805
http://a.vpimg2.com/upload/merchandise/pdcvis/...
9
639389503180805
http://a.vpimg2.com/upload/merchandise/pdcvis/...
10
639392717246493
http://a.vpimg2.com/upload/merchandise/pdcvis/...
11
920816362999808
http://a.vpimg2.com/upload/merchandise/pdcvis/...
12
920832942268416
http://a.vpimg2.com/upload/merchandise/pdcvis/...
13
920846404657158
http://a.vpimg2.com/upload/merchandise/pdcvis/...
14
1202299096490170
http://a.vpimg2.com/upload/merchandise/pdcvis/...
15
1202310084616220
http://a.vpimg2.com/upload/merchandise/pdcvis/...
16
1202315469205505
http://a.vpimg2.com/upload/merchandise/pdcvis/...
17
1202326037671948
http://a.vpimg2.com/upload/merchandise/pdcvis/...
18
1202342670667978
http://a.vpimg2.com/upload/merchandise/pdcvis/...
19
1202342670671910
http://a.vpimg2.com/upload/merchandise/pdcvis/...
20
1483782161825904
http://a.vpimg2.com/upload/merchandise/pdcvis/...
21
1765254792024080
http://a.vpimg2.com/upload/merchandise/pdcvis/...
22
2046724026622135
http://a.vpimg2.com/upload/merchandise/pdcvis/...
23
2046767967338535
http://a.vpimg2.com/upload/merchandise/pdcvis/...
24
2046769978417582
http://a.vpimg2.com/upload/merchandise/pdcvis/...
25
2609679722156042
http://a.vpimg2.com/upload/merchandise/pdcvis/...
26
2891145450029056
http://a.vpimg2.com/upload/merchandise/pdcvis/...
27
2891152447287296
http://a.vpimg2.com/upload/merchandise/pdcvis/...
28
2891160600990330
http://a.vpimg2.com/upload/merchandise/pdcvis/...
29
2891169552208298
http://a.vpimg2.com/upload/merchandise/pdcvis/...
...
...
...
2567
9014593909249236993
http://a.vpimg2.com/upload/merchandise/pdc/993...
2568
9015719808079347717
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2569
9016845703713148936
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2570
9017127181215457486
http://a.vpimg2.com/upload/merchandise/pdc/486...
2571
9017971603620016137
http://a.vpimg2.com/upload/merchandise/pdc/137...
2572
9017971606044979200
http://a.vpimg2.com/upload/merchandise/pdc/200...
2573
9018816028550127616
http://a.vpimg2.com/upload/merchandise/pdc/616...
2574
9022193728259383296
http://a.vpimg2.com/upload/merchandise/pdc/296...
2575
9022475203247398916
http://a.vpimg2.com/upload/merchandise/pdc/916...
2576
9022756682497105932
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2577
9023882578131013643
http://a.vpimg2.com/upload/merchandise/pdc/643...
2578
9024727008410832903
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2579
9026415852921315334
http://a.vpimg2.com/upload/merchandise/pdc/334...
2580
9026697332171030534
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2581
9027823227804868619
http://a.vpimg2.com/upload/merchandise/pdc/619...
2582
9031763960910209025
http://a.vpimg2.com/upload/merchandise/pdc/025...
2583
9033171252362444801
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2584
9033734206588821512
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2585
9035141577199349769
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2586
9036830509787594752
http://a.vpimg2.com/upload/merchandise/pdc/752...
2587
9037674851989819400
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2588
9038237806216196097
http://a.vpimg2.com/upload/merchandise/pdc/097...
2589
9039645182176481286
http://a.vpimg2.com/upload/merchandise/pdc/286...
2590
9039926651803541506
http://a.vpimg2.com/upload/merchandise/pdc/506...
2591
9041615583843246080
http://a.vpimg2.com/upload/merchandise/pdc/080...
2592
9088621908743286785
http://a.vpimg2.com/upload/merchandise/pdc/785...
2593
9089747807251402752
http://a.vpimg2.com/upload/merchandise/pdc/752...
2594
9090029283626840066
http://a.vpimg2.com/upload/merchandise/pdc/066...
2595
9090592232181542912
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2596
9094251405871296512
http://a.vpimg2.com/upload/merchandise/pdc/512...
2597 rows × 2 columns
In [114]:
for index, row in view_url.iterrows():
try:
urlretrieve(row['url'], 'view_data_image/{}.jpg'.format(row['view_spu']))
time.sleep(10)
except Exception as e:
print 'index:{}, filename: {}, url: {}'.format(index, row['view_spu'], row['url'])
print 'exception type: {}, args: {}, exception: {}'.format(type(e), e.args, e)
index:2, filename: 357882254983171, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/09/19/38/4dd42d3c-ac7f-47f4-9e00-787e63dce01e.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 4380 out of 60704 bytes',), exception: retrieval incomplete: got only 4380 out of 60704 bytes
index:13, filename: 920846404657158, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/10/29/68/a40ef312-845c-41d5-a852-8401d16c1a59.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:15, filename: 1202310084616220, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/10/07/130/9a5a7f60-14f6-42d3-8ba0-302a0c4e6dc9.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 0 out of 50167 bytes',), exception: retrieval incomplete: got only 0 out of 50167 bytes
index:22, filename: 2046724026622135, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/08/27/35/8b7971f0-ad0b-466d-aa32-7902f85189fb.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 1460 out of 25366 bytes',), exception: retrieval incomplete: got only 1460 out of 25366 bytes
index:27, filename: 2891152447287296, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/09/06/13/bbe7d4f9-1758-4bb6-9527-26813985d859.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 40444 out of 58193 bytes',), exception: retrieval incomplete: got only 40444 out of 58193 bytes
index:216, filename: 24001791427289098, url: http://a.vpimg2.com/upload/merchandise/pdc/098/289/24001791427289098/1/1YY3035460050-5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 12572 out of 33562 bytes',), exception: retrieval incomplete: got only 12572 out of 33562 bytes
index:320, filename: 34134875562242082, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/09/08/136/7883e8f6-2b89-4303-b0cb-4094f3c12416.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 13768 out of 71537 bytes',), exception: retrieval incomplete: got only 13768 out of 71537 bytes
index:329, filename: 34697864820551702, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/12/30/17/20ca2d85-f7d3-4b72-ad5b-df7cce6d052e.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 4096 out of 56464 bytes',), exception: retrieval incomplete: got only 4096 out of 56464 bytes
index:341, filename: 72415513913565187, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/11/14/153/4f8105f6-6838-41a6-8982-36887e6cb9fe.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 4942 out of 31535 bytes',), exception: retrieval incomplete: got only 4942 out of 31535 bytes
index:514, filename: 89304008517624104, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/12/23/175/9dadc353-0dfb-4a71-9ca3-d08f0c949707.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:601, filename: 97185265860571162, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/08/26/64/8c23e513-ce72-499d-bc62-218d2bc78304.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 4380 out of 25253 bytes',), exception: retrieval incomplete: got only 4380 out of 25253 bytes
index:770, filename: 294217768972480514, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/10/25/109/2225d9da-eef5-4014-b9e3-df23bf0a4fc8.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 8760 out of 72331 bytes',), exception: retrieval incomplete: got only 8760 out of 72331 bytes
index:829, filename: 298439914808242231, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/10/19/65/098476fc-706b-4882-bd41-df02b2dbe844.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:1046, filename: 316735778810167299, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/11/23/103/0c26dc30-5b38-4893-be67-4d747e9f5208.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:1072, filename: 319550517313740802, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/10/23/5/51fff803-0eaf-432c-8f2d-188f11a4d7ec.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 0 out of 14962 bytes',), exception: retrieval incomplete: got only 0 out of 14962 bytes
index:1170, filename: 438614409363570705, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/09/01/91/c87ecf96-b1fb-44c4-83ac-2693d546d578.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 7016 out of 41875 bytes',), exception: retrieval incomplete: got only 7016 out of 41875 bytes
index:1172, filename: 438614435236745234, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/11/01/136/ba7c3051-469d-43fd-91f0-41023fa1caf1.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 2920 out of 46864 bytes',), exception: retrieval incomplete: got only 2920 out of 46864 bytes
index:1179, filename: 439458834293702666, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/09/14/118/91645600-82b6-4790-b56d-ed1074915229.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 25448 out of 37777 bytes',), exception: retrieval incomplete: got only 25448 out of 37777 bytes
index:1182, filename: 439458877074387002, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/12/14/63/4c25d43d-d240-47bd-8467-1326bfea3959.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 13768 out of 45119 bytes',), exception: retrieval incomplete: got only 13768 out of 45119 bytes
index:1420, filename: 457191792472825882, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/12/05/19/693e7396-b294-463a-acc2-01de3498c72b.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 29200 out of 39912 bytes',), exception: retrieval incomplete: got only 29200 out of 39912 bytes
index:1467, filename: 461413925257830545, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/11/24/94/bb9d4d9c-a99a-492a-80ea-444288151669.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:1468, filename: 461695358101766158, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/08/23/33/5e31957b-7d46-4c4f-93ac-ef8c76cbc6aa.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:1583, filename: 963565080649408512, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/10/25/6/597de7ee-205a-47bf-9622-90806598dee5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 4380 out of 16794 bytes',), exception: retrieval incomplete: got only 4380 out of 16794 bytes
index:1619, filename: 1082629013079781376, url: http://a.vpimg2.com/upload/merchandise/pdc/376/781/1082629013079781376/1/1051190-5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 0 out of 19793 bytes',), exception: retrieval incomplete: got only 0 out of 19793 bytes
index:1722, filename: 2191359016057393152, url: http://a.vpimg2.com/upload/merchandise/pdc/152/393/2191359016057393152/0/1682430-5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 7300 out of 22591 bytes',), exception: retrieval incomplete: got only 7300 out of 22591 bytes
index:1850, filename: 2826929451589525504, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/11/09/138/57b8d95c-2270-4efc-af22-44adda57040d_t.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 0 out of 98835 bytes',), exception: retrieval incomplete: got only 0 out of 98835 bytes
index:1852, filename: 2827492402281959424, url: http://a.vpimg2.com/upload/merchandise/pdc/424/959/2827492402281959424/0/1060546-5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 0 out of 26327 bytes',), exception: retrieval incomplete: got only 0 out of 26327 bytes
index:1862, filename: 2832840426724958208, url: http://a.vpimg2.com/upload/merchandise/pdc/208/958/2832840426724958208/0/fd38ee57-7a10-4230-a078-cc3433e93abf.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 0 out of 278 bytes',), exception: retrieval incomplete: got only 0 out of 278 bytes
index:1875, filename: 2836781075773157376, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/10/10/157/cf97fe2b-1b9d-448d-8cfe-a2b4c72a5e25.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 2920 out of 25230 bytes',), exception: retrieval incomplete: got only 2920 out of 25230 bytes
index:1887, filename: 2845506800768520192, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/11/09/7/6c5695d8-5188-4efa-9bf6-0d2a495c239d_t.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 41508 out of 96337 bytes',), exception: retrieval incomplete: got only 41508 out of 96337 bytes
index:1997, filename: 3822225051725512705, url: http://a.vpimg2.com/upload/merchandise/pdc/705/512/3822225051725512705/0/1901500-5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 14600 out of 18553 bytes',), exception: retrieval incomplete: got only 14600 out of 18553 bytes
index:2048, filename: 4063730591896084758, url: http://a.vpimg2.com/upload/merchandise/pdc/758/084/4063730591896084758/3/1759647-5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 7016 out of 16669 bytes',), exception: retrieval incomplete: got only 7016 out of 16669 bytes
index:2191, filename: 4428803637838594048, url: http://a.vpimg2.com/upload/merchandise/pdc/048/594/4428803637838594048/0/2123114-5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 14168 out of 14506 bytes',), exception: retrieval incomplete: got only 14168 out of 14506 bytes
index:2200, filename: 4496076157272420353, url: http://a.vpimg2.com/upload/merchandise/pdc/353/420/4496076157272420353/0/2123099-5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 13160 out of 17378 bytes',), exception: retrieval incomplete: got only 13160 out of 17378 bytes
index:2203, filename: 4627806448492449792, url: http://a.vpimg2.com/upload/merchandise/pdc/792/449/4627806448492449792/0/1755203-5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 12626 out of 19516 bytes',), exception: retrieval incomplete: got only 12626 out of 19516 bytes
index:2209, filename: 4706619441972056064, url: http://a.vpimg2.com/upload/merchandise/pdc/064/056/4706619441972056064/1/1755209-5.jpg
exception type: <class 'urllib.ContentTooShortError'>, args: ('retrieval incomplete: got only 5556 out of 15390 bytes',), exception: retrieval incomplete: got only 5556 out of 15390 bytes
index:2300, filename: 6586027870691340619, url: http://a.vpimg2.com/upload/merchandise/pdc/619/340/6586027870691340619/0/1587363-5.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:2314, filename: 6853429096767770624, url: http://a.vpimg2.com/upload/merchandise/pdc/624/770/6853429096767770624/0/1903601-5.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:2414, filename: 8462621471316647936, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/10/24/97/a8c9d912-2f59-4c20-8fe7-977e2d3f8a2e_t.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
In [182]:
needfixed = glob.glob('NeedFixed/*.jpg')
In [183]:
a = pd.DataFrame([x[10:-4] for x in needfixed])
In [184]:
a.columns = ['spu']
In [188]:
a['spu']= a['spu'].astype(int)
In [189]:
b = pd.merge(a, view_url, left_on='spu', right_on='view_spu')
In [191]:
b.head()
Out[191]:
spu
view_spu
url
0
10491016902120013
10491016902120013
http://a.vpimg2.com/upload/merchandise/pdcvis/...
1
1202342670667978
1202342670667978
http://a.vpimg2.com/upload/merchandise/pdcvis/...
2
1202342670671910
1202342670671910
http://a.vpimg2.com/upload/merchandise/pdcvis/...
3
12461341739094214
12461341739094214
http://a.vpimg2.com/upload/merchandise/pdcvis/...
4
16120516436332544
16120516436332544
http://a.vpimg2.com/upload/merchandise/pdcvis/...
In [192]:
b.shape
Out[192]:
(51, 3)
In [193]:
for index, row in b.iterrows():
try:
urlretrieve(row['url'], 'fixed_data_image/{}.jpg'.format(row['view_spu']))
time.sleep(3)
except Exception as e:
print 'index:{}, filename: {}, url: {}'.format(index, row['view_spu'], row['url'])
print 'exception type: {}, args: {}, exception: {}'.format(type(e), e.args, e)
index:29, filename: 448747540332511261, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/11/18/138/95c7b5ec-73f8-440a-a1f1-3086dcdc32fa.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:30, filename: 449873443790426124, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/11/28/45/cf305005-cd37-4ab1-9c94-f62141686804.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:31, filename: 450154915216064523, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/11/18/156/fb3eb7cb-da23-4011-9976-1dcf46d86e7d.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
index:32, filename: 451843765076328474, url: http://a.vpimg2.com/upload/merchandise/pdcvis/2016/11/18/140/3db1a419-59c3-4a44-8deb-04f066383ce2.jpg
exception type: <type 'exceptions.IOError'>, args: ('http protocol error', 0, 'got a bad status line', None), exception: ('http protocol error', 0, 'got a bad status line', None)
In [ ]:
Content source: walkon302/CDIPS_Recommender
Similar notebooks: