Standard Python/Pandas Opening


In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
import matplotlib.pyplot as plt

In [4]:
%matplotlib inline

Read in CSV File from Downloads


In [5]:
prod_df = pd.read_csv('/home/saisons/Code/zazzle-product-analysis/inputs/cl_pr_lst.csv', 
                      dtype={'category_id': np.str, 'product_id': np.str, 'num_of_products': np.int}, 
                      keep_default_na=False)
prod_df.head()


Out[5]:
orig_cat_link category_link_page store category_id category_string product_name product_url dnu_-_product_type category_1 category_2 category_3 category_4 extra_tag product_id final_product_type
0 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Card https://www.zazzle.com/almon_the_strangeling_c... Almon The Strangeling Card Products Dark-Surreal-Art Almon The Strangeling 137406801029247286 Card
1 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Shower Curtain https://www.zazzle.com/almon_the_strangeling_s... Almon The Strangeling Shower Curtain Products Dark-Surreal-Art Almon The Strangeling 256067872012429792 Shower Curtain
2 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Cloth Placemat https://www.zazzle.com/almon_the_strangeling_c... Almon The Strangeling Cloth Placemat Products Dark-Surreal-Art Almon The Strangeling 193201327518871590 Cloth Placemat
3 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Lunch Box https://www.zazzle.com/almon_the_strangeling_l... Almon The Strangeling Lunch Box Products Dark-Surreal-Art Almon The Strangeling 256468844099954287 Lunchbox
4 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Sleeve For MacBooks https://www.zazzle.com/almon_the_strangeling_s... Almon The Strangeling Sleeve For MacBooks Products Dark-Surreal-Art Almon The Strangeling 204328820852931558 Sleeve

In [6]:
cats_df = pd.read_csv('/home/saisons/Code/zazzle-product-analysis/inputs/cl_cat_lst.csv', 
                      dtype={'category_id': np.str, 'product_id': np.str},
                      keep_default_na=False)
cats_df.head()


Out[6]:
category_id store category_string num_of_products category_1 category_2 category_3 category_4
0 196000178886082210 HandDrawnRemastered Products>Altered Vintage Designs>Venus &The Se... 0 Products Altered Vintage Designs Venus &The Sea Monster
1 196002243590151072 HennaHarmony Products>Paisley Passion 6 colours!>Paisley Pa... 94 Products Paisley Passion 6 colours! Paisley Passion (Yellow)
2 196005363647255781 Lace9lives Products>Dark-Surreal-Art>Let Me Out (Escaping... 87 Products Dark-Surreal-Art Let Me Out (Escaping Skeleton)
3 196006792562816884 Lace9lives Products>Symbology>Peace & War>Peace & War (Ye... 52 Products Symbology Peace & War Peace & War (Yellow)
4 196013568156140664 Lace9lives Products>Mythical & Magical>Ticket to a Fantas... 125 Products Mythical & Magical Ticket to a Fantasy World

In [7]:
sale_df = pd.read_csv('/home/saisons/Code/zazzle-product-analysis/inputs/cur_zsales.csv', 
                      dtype={'category_id': np.str, 'product_id': np.str, 'num_of_products': np.int},#, 'royalty_usd': np.float},
                      keep_default_na=False)
sale_df.head()


Out[7]:
date product_id product_title product_type is_customized is_canceled store referred shipped_to order_item_id ... subtotal royalty_rate royalty royalty_usd status month year date.1 state country
0 8/20/2017 11:23 180043446901566033 Henna Elephant (Pink/Purple) zazzle_teapot Yes No HennaHarmony 3rd Party Katie - Naples, FL 169-75976147-6421820 ... $22.18 20.0%+ $3.33 USD 3.33 pending 8 2017 8/20/2017 FL US
1 8/20/2017 10:29 256723023590458263 Humpty Dumpty groupestahl_highsierrabackpack Yes No Lace9lives 3rd Party Marilyn - Middletown, NY 169-48320115-5633619 ... $41.50 20.0%+ $6.22 USD 6.22 pending 8 2017 8/20/2017 NY US
2 8/19/2017 14:40 146674783110809982 The Claddagh (Silver) zazzle_keychain No No Lace9lives None Gazo - Saint Lubin En Vergonnois, France 169-29749457-1488590 ... €4.45 20.0%+ €0.85 EUR fr 0.94 pending 8 2017 8/19/2017 France
3 8/19/2017 4:21 256662535921095525 Music Lover pexagon_usbdriveswivel Yes No Lace9lives None Wendy - Salisbury, NC 169-82559494-4515371 ... $20.14 20.0%+ $3.83 USD 3.83 pending 8 2017 8/19/2017 NC US
4 8/15/2017 8:00 256646834181039898 Henna Elephant (Orange/Red) veronicastreats_cakepop Yes No HennaHarmony None Nanda - Skokie, IL 169-90247570-8878793 ... $105.36 20.0%+ $20.01 USD 20.01 pending 8 2017 8/15/2017 IL US

5 rows × 21 columns


In [8]:
saletotesprod_df = pd.read_csv('/home/saisons/Code/zazzle-product-analysis/inputs/zsales_totes.csv', 
                      dtype={'category_id': np.str, 'product_id': np.str},
                      keep_default_na=False)
saletotesprod_df.head()


Out[8]:
product_id num_sales quant_sold royal_total
0 106002679567361856 1 5 2.65
1 106047590835097129 1 5 2.85
2 106054391572452342 1 5 2.10
3 106059344619901354 1 5 2.65
4 106081164712238321 1 5 2.90

Code Below to Extract Input Sample Headers for Github

prod_df_example = prod_df.head(0)

cats_df_example = cats_df.head(0)

sale_df_example = sale_df.head(0)

saletotesprod_df_example = saletotesprod_df.head(0)

prod_df_example.to_csv("/home/saisons/Code/zazzle-product-analysis/inputs/prod_df_example.csv")

cats_df_example.to_csv("/home/saisons/Code/zazzle-product-analysis/inputs/cats_df_example.csv")

sale_df_example.to_csv("/home/saisons/Code/zazzle-product-analysis/inputs/sale_df_example.csv")

saletotesprod_df_example.to_csv("/home/saisons/Code/zazzle-product-analysis/inputs/saletotes_prod_df_example.csv")

Below import not needed at this time.

top_30 = pd.read_csv('/home/saisons/Downloads/top_sellers30_srtd.csv', dtype={'category_id': np.str})

Info Data for Review

Below lines # out as only needed to verify data integrity while building merges


In [9]:
prod_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61755 entries, 0 to 61754
Data columns (total 15 columns):
orig_cat_link         61755 non-null object
category_link_page    61755 non-null object
store                 61755 non-null object
category_id           61755 non-null object
category_string       61755 non-null object
product_name          61755 non-null object
product_url           61755 non-null object
dnu_-_product_type    61755 non-null object
category_1            61755 non-null object
category_2            61755 non-null object
category_3            61755 non-null object
category_4            61755 non-null object
extra_tag             61755 non-null object
product_id            61755 non-null object
final_product_type    61755 non-null object
dtypes: object(15)
memory usage: 7.1+ MB

In [10]:
cats_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 585 entries, 0 to 584
Data columns (total 8 columns):
category_id        585 non-null object
store              585 non-null object
category_string    585 non-null object
num_of_products    585 non-null object
category_1         585 non-null object
category_2         585 non-null object
category_3         585 non-null object
category_4         585 non-null object
dtypes: object(8)
memory usage: 36.6+ KB

In [11]:
sale_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3703 entries, 0 to 3702
Data columns (total 21 columns):
date             3703 non-null object
product_id       3703 non-null object
product_title    3703 non-null object
product_type     3703 non-null object
is_customized    3703 non-null object
is_canceled      3703 non-null object
store            3703 non-null object
referred         3703 non-null object
shipped_to       3703 non-null object
order_item_id    3703 non-null object
quantity         3703 non-null int64
subtotal         3703 non-null object
royalty_rate     3703 non-null object
royalty          3703 non-null object
royalty_usd      3703 non-null object
status           3703 non-null object
month            3703 non-null int64
year             3703 non-null int64
date.1           3703 non-null object
state            3703 non-null object
country          3703 non-null object
dtypes: int64(3), object(18)
memory usage: 607.6+ KB

In [12]:
saletotesprod_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2015 entries, 0 to 2014
Data columns (total 4 columns):
product_id     2015 non-null object
num_sales      2015 non-null int64
quant_sold     2015 non-null int64
royal_total    2015 non-null float64
dtypes: float64(1), int64(2), object(1)
memory usage: 63.0+ KB

below infos not needed at this time.

top_30.info()

Merge Product and Category DFs

this line not needed, was used to confirm category ID data fix cats_df[cats_df.category_id == '196094447518203739']


In [13]:
prodjn1 = pd.merge(prod_df, cats_df, on='category_id', left_index=True, how='outer')
prodjn1.head()


Out[13]:
orig_cat_link category_link_page store_x category_id category_string_x product_name product_url dnu_-_product_type category_1_x category_2_x ... extra_tag product_id final_product_type store_y category_string_y num_of_products category_1_y category_2_y category_3_y category_4_y
51 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Card https://www.zazzle.com/almon_the_strangeling_c... Almon The Strangeling Card Products Dark-Surreal-Art ... 137406801029247286 Card Lace9lives Products>Dark-Surreal-Art>Almon The Strangeling 41 Products Dark-Surreal-Art Almon The Strangeling
51 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Shower Curtain https://www.zazzle.com/almon_the_strangeling_s... Almon The Strangeling Shower Curtain Products Dark-Surreal-Art ... 256067872012429792 Shower Curtain Lace9lives Products>Dark-Surreal-Art>Almon The Strangeling 41 Products Dark-Surreal-Art Almon The Strangeling
51 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Cloth Placemat https://www.zazzle.com/almon_the_strangeling_c... Almon The Strangeling Cloth Placemat Products Dark-Surreal-Art ... 193201327518871590 Cloth Placemat Lace9lives Products>Dark-Surreal-Art>Almon The Strangeling 41 Products Dark-Surreal-Art Almon The Strangeling
51 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Lunch Box https://www.zazzle.com/almon_the_strangeling_l... Almon The Strangeling Lunch Box Products Dark-Surreal-Art ... 256468844099954287 Lunchbox Lace9lives Products>Dark-Surreal-Art>Almon The Strangeling 41 Products Dark-Surreal-Art Almon The Strangeling
51 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Sleeve For MacBooks https://www.zazzle.com/almon_the_strangeling_s... Almon The Strangeling Sleeve For MacBooks Products Dark-Surreal-Art ... 204328820852931558 Sleeve Lace9lives Products>Dark-Surreal-Art>Almon The Strangeling 41 Products Dark-Surreal-Art Almon The Strangeling

5 rows × 22 columns


In [14]:
prodjn1.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 61760 entries, 51 to 449
Data columns (total 22 columns):
orig_cat_link         61755 non-null object
category_link_page    61755 non-null object
store_x               61755 non-null object
category_id           61760 non-null object
category_string_x     61755 non-null object
product_name          61755 non-null object
product_url           61755 non-null object
dnu_-_product_type    61755 non-null object
category_1_x          61755 non-null object
category_2_x          61755 non-null object
category_3_x          61755 non-null object
category_4_x          61755 non-null object
extra_tag             61755 non-null object
product_id            61755 non-null object
final_product_type    61755 non-null object
store_y               61760 non-null object
category_string_y     61760 non-null object
num_of_products       61760 non-null object
category_1_y          61760 non-null object
category_2_y          61760 non-null object
category_3_y          61760 non-null object
category_4_y          61760 non-null object
dtypes: object(22)
memory usage: 10.8+ MB

In [15]:
prodjn1.drop(['store_y'],inplace=True,axis=1)
prodjn1.drop(['category_string_y'],inplace=True,axis=1)
prodjn1.drop(['category_1_y'],inplace=True,axis=1)
prodjn1.drop(['category_2_y'],inplace=True,axis=1)
prodjn1.drop(['category_3_y'],inplace=True,axis=1)
prodjn1.drop(['category_4_y'],inplace=True,axis=1)
prodjn1.head()


Out[15]:
orig_cat_link category_link_page store_x category_id category_string_x product_name product_url dnu_-_product_type category_1_x category_2_x category_3_x category_4_x extra_tag product_id final_product_type num_of_products
51 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Card https://www.zazzle.com/almon_the_strangeling_c... Almon The Strangeling Card Products Dark-Surreal-Art Almon The Strangeling 137406801029247286 Card 41
51 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Shower Curtain https://www.zazzle.com/almon_the_strangeling_s... Almon The Strangeling Shower Curtain Products Dark-Surreal-Art Almon The Strangeling 256067872012429792 Shower Curtain 41
51 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Cloth Placemat https://www.zazzle.com/almon_the_strangeling_c... Almon The Strangeling Cloth Placemat Products Dark-Surreal-Art Almon The Strangeling 193201327518871590 Cloth Placemat 41
51 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Lunch Box https://www.zazzle.com/almon_the_strangeling_l... Almon The Strangeling Lunch Box Products Dark-Surreal-Art Almon The Strangeling 256468844099954287 Lunchbox 41
51 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Sleeve For MacBooks https://www.zazzle.com/almon_the_strangeling_s... Almon The Strangeling Sleeve For MacBooks Products Dark-Surreal-Art Almon The Strangeling 204328820852931558 Sleeve 41

In [16]:
prodjn1.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 61760 entries, 51 to 449
Data columns (total 16 columns):
orig_cat_link         61755 non-null object
category_link_page    61755 non-null object
store_x               61755 non-null object
category_id           61760 non-null object
category_string_x     61755 non-null object
product_name          61755 non-null object
product_url           61755 non-null object
dnu_-_product_type    61755 non-null object
category_1_x          61755 non-null object
category_2_x          61755 non-null object
category_3_x          61755 non-null object
category_4_x          61755 non-null object
extra_tag             61755 non-null object
product_id            61755 non-null object
final_product_type    61755 non-null object
num_of_products       61760 non-null object
dtypes: object(16)
memory usage: 8.0+ MB

In [17]:
prodjn2 = pd.merge(prodjn1, saletotesprod_df, on='product_id', left_index=True, how='outer')
prodjn2.head()


Out[17]:
orig_cat_link category_link_page store_x category_id category_string_x product_name product_url dnu_-_product_type category_1_x category_2_x category_3_x category_4_x extra_tag product_id final_product_type num_of_products num_sales quant_sold royal_total
201 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Card https://www.zazzle.com/almon_the_strangeling_c... Almon The Strangeling Card Products Dark-Surreal-Art Almon The Strangeling 137406801029247286 Card 41 1.0 1.0 0.14
2014 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Shower Curtain https://www.zazzle.com/almon_the_strangeling_s... Almon The Strangeling Shower Curtain Products Dark-Surreal-Art Almon The Strangeling 256067872012429792 Shower Curtain 41 NaN NaN NaN
2014 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Cloth Placemat https://www.zazzle.com/almon_the_strangeling_c... Almon The Strangeling Cloth Placemat Products Dark-Surreal-Art Almon The Strangeling 193201327518871590 Cloth Placemat 41 NaN NaN NaN
2014 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Lunch Box https://www.zazzle.com/almon_the_strangeling_l... Almon The Strangeling Lunch Box Products Dark-Surreal-Art Almon The Strangeling 256468844099954287 Lunchbox 41 NaN NaN NaN
2014 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196094447518203739 Products>Dark-Surreal-Art>Almon The Strangeling Almon The Strangeling Sleeve For MacBooks https://www.zazzle.com/almon_the_strangeling_s... Almon The Strangeling Sleeve For MacBooks Products Dark-Surreal-Art Almon The Strangeling 204328820852931558 Sleeve 41 NaN NaN NaN

In [18]:
prodjn2.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 62050 entries, 201 to 2014
Data columns (total 19 columns):
orig_cat_link         61755 non-null object
category_link_page    61755 non-null object
store_x               61755 non-null object
category_id           61760 non-null object
category_string_x     61755 non-null object
product_name          61755 non-null object
product_url           61755 non-null object
dnu_-_product_type    61755 non-null object
category_1_x          61755 non-null object
category_2_x          61755 non-null object
category_3_x          61755 non-null object
category_4_x          61755 non-null object
extra_tag             61755 non-null object
product_id            62045 non-null object
final_product_type    61755 non-null object
num_of_products       61760 non-null object
num_sales             2016 non-null float64
quant_sold            2016 non-null float64
royal_total           2016 non-null float64
dtypes: float64(3), object(16)
memory usage: 9.5+ MB

In [19]:
prod_info1 = prodjn2.groupby(
    ["product_id", "category_id"]
).num_sales.sum().reset_index().sort_values("num_sales", ascending=False)
prod_info1.head()


Out[19]:
product_id category_id num_sales
57155 256866073303201414 196983102186638194 20.0
13753 172744702617404847 196917132511372151 19.0
27675 240655745434825411 196725522850429377 19.0
13443 172334587813319837 196845705324780293 15.0
33573 256159898166220748 196261136230877731 15.0

In [20]:
prod_info2 = prodjn2.groupby(
    ["product_id", "category_id"]
).quant_sold.sum().reset_index().sort_values("quant_sold", ascending=False)
prod_info2.head()


Out[20]:
product_id category_id quant_sold
12001 161670850774986886 196322371922991352 360.0
11953 161562242323931117 196803349085736354 341.0
2357 121641832254102633 196739766766069967 330.0
12142 161890250660972983 196917132511372151 295.0
25750 229879596692946003 196809909809659306 250.0

In [21]:
prod_info3 = prodjn2.groupby(
    ["product_id", "category_id"]
).royal_total.sum().reset_index().sort_values("royal_total", ascending=False)
prod_info3.head()


Out[21]:
product_id category_id royal_total
13443 172334587813319837 196845705324780293 299.09
3403 127693927069360036 196717574142457136 204.20
13753 172744702617404847 196917132511372151 165.29
4079 130120506914147195 196247206110906638 163.60
27675 240655745434825411 196725522850429377 108.11

In [22]:
prod_info1.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 61638 entries, 57155 to 61637
Data columns (total 3 columns):
product_id     61638 non-null object
category_id    61638 non-null object
num_sales      1725 non-null float64
dtypes: float64(1), object(2)
memory usage: 1.9+ MB

In [23]:
prod_info2.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 61638 entries, 12001 to 61637
Data columns (total 3 columns):
product_id     61638 non-null object
category_id    61638 non-null object
quant_sold     1725 non-null float64
dtypes: float64(1), object(2)
memory usage: 1.9+ MB

In [24]:
prod_info3.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 61638 entries, 13443 to 61637
Data columns (total 3 columns):
product_id     61638 non-null object
category_id    61638 non-null object
royal_total    1725 non-null float64
dtypes: float64(1), object(2)
memory usage: 1.9+ MB

prod_info1_fix = prod_info1.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False) prod_info1_fix.info()

prod_info2_fix = prod_info2.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False) prod_info2_fix.info()

prod_info3_fix = prod_info3.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False) prod_info3_fix.info()

If NaN sorting out is required at a later date, enable 3 markdown lines above and add _fix to merge values below


In [25]:
prod_info_jn1 = pd.merge(prod_info1, prod_info2, on='product_id', left_index=True, how='outer')
prod_info_jn1.drop(['category_id_y'],inplace=True,axis=1)
prod_info_jn1.head()


Out[25]:
product_id category_id_x num_sales quant_sold
57155 256866073303201414 196983102186638194 20.0 46.0
13753 172744702617404847 196917132511372151 19.0 67.0
27675 240655745434825411 196725522850429377 19.0 28.0
13443 172334587813319837 196845705324780293 15.0 96.0
33573 256159898166220748 196261136230877731 15.0 19.0

In [26]:
prod_info_jn1.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 61642 entries, 57155 to 61637
Data columns (total 4 columns):
product_id       61642 non-null object
category_id_x    61642 non-null object
num_sales        1725 non-null float64
quant_sold       1725 non-null float64
dtypes: float64(2), object(2)
memory usage: 2.4+ MB

If NaN sorting out is required at a later date, enable 3 markdown lines above and add _fix to merge values below


In [27]:
prod_info_jn2 = pd.merge(prod_info_jn1, prod_info3, on='product_id', left_index=True, how='outer')
prod_info_jn2.drop(['category_id_x'],inplace=True,axis=1)
prod_info_jn2.head()


Out[27]:
product_id num_sales quant_sold category_id royal_total
57155 256866073303201414 20.0 46.0 196983102186638194 33.66
13753 172744702617404847 19.0 67.0 196917132511372151 165.29
27675 240655745434825411 19.0 28.0 196725522850429377 108.11
13443 172334587813319837 15.0 96.0 196845705324780293 299.09
33573 256159898166220748 15.0 19.0 196261136230877731 62.86

In [28]:
prod_info_jn2.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 61650 entries, 57155 to 61637
Data columns (total 5 columns):
product_id     61650 non-null object
num_sales      1725 non-null float64
quant_sold     1725 non-null float64
category_id    61650 non-null object
royal_total    1725 non-null float64
dtypes: float64(3), object(2)
memory usage: 2.8+ MB

In [29]:
cat_info_1 = prodjn2.groupby(
    ["category_id"]
).royal_total.sum().reset_index().sort_values("royal_total", ascending=False)
cat_info_1.head()


Out[29]:
category_id royal_total
546 196917132511372151 710.01
150 196247206110906638 537.36
158 196261136230877731 534.60
490 196827874078752949 487.79
498 196845705324780293 436.19

In [30]:
cat_info_1.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 581 entries, 546 to 580
Data columns (total 2 columns):
category_id    581 non-null object
royal_total    371 non-null float64
dtypes: float64(1), object(1)
memory usage: 13.6+ KB

In [31]:
cat_info_2 = prodjn2.groupby(
    ["category_id"]
).num_sales.sum().reset_index().sort_values("num_sales", ascending=False)
cat_info_2.head()


Out[31]:
category_id num_sales
546 196917132511372151 135.0
158 196261136230877731 120.0
490 196827874078752949 101.0
150 196247206110906638 85.0
276 196470141465254086 76.0

In [32]:
cat_info_2.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 581 entries, 546 to 580
Data columns (total 2 columns):
category_id    581 non-null object
num_sales      371 non-null float64
dtypes: float64(1), object(1)
memory usage: 13.6+ KB

In [33]:
cat_info_3 = prodjn2.groupby(
    ["category_id"]
).quant_sold.sum().reset_index().sort_values("quant_sold", ascending=False)
cat_info_3.head()


Out[33]:
category_id quant_sold
546 196917132511372151 1209.0
158 196261136230877731 619.0
104 196175407903508848 524.0
467 196803349085736354 480.0
195 196322371922991352 476.0

In [34]:
cat_info_3.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 581 entries, 546 to 580
Data columns (total 2 columns):
category_id    581 non-null object
quant_sold     371 non-null float64
dtypes: float64(1), object(1)
memory usage: 13.6+ KB

In [35]:
cat_info_jn1 = pd.merge(cat_info_1, cat_info_2, on='category_id', left_index=True, how='outer')
#prod_info_jn2.drop(['category_id_x'],inplace=True,axis=1)
cat_info_jn1.head()


Out[35]:
category_id royal_total num_sales
546 196917132511372151 710.01 135.0
150 196247206110906638 537.36 85.0
158 196261136230877731 534.60 120.0
490 196827874078752949 487.79 101.0
498 196845705324780293 436.19 34.0

In [36]:
cat_info_jn1.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 581 entries, 546 to 580
Data columns (total 3 columns):
category_id    581 non-null object
royal_total    371 non-null float64
num_sales      371 non-null float64
dtypes: float64(2), object(1)
memory usage: 18.2+ KB

In [37]:
cat_info_jn2 = pd.merge(cat_info_jn1, cat_info_3, on='category_id', left_index=True, how='outer')
#prod_info_jn2.drop(['category_id_x'],inplace=True,axis=1)
cat_info_jn2.head()


Out[37]:
category_id royal_total num_sales quant_sold
546 196917132511372151 710.01 135.0 1209.0
150 196247206110906638 537.36 85.0 314.0
158 196261136230877731 534.60 120.0 619.0
490 196827874078752949 487.79 101.0 468.0
498 196845705324780293 436.19 34.0 281.0

In [38]:
cat_info_jn2.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 581 entries, 546 to 580
Data columns (total 4 columns):
category_id    581 non-null object
royal_total    371 non-null float64
num_sales      371 non-null float64
quant_sold     371 non-null float64
dtypes: float64(3), object(1)
memory usage: 22.7+ KB

In [39]:
cat_info_jn3 = pd.merge(cats_df, cat_info_jn2, on='category_id', left_index=True, how='outer')
#prod_info_jn2.drop(['category_id_x'],inplace=True,axis=1)
cat_info_jn3.head()


Out[39]:
category_id store category_string num_of_products category_1 category_2 category_3 category_4 royal_total num_sales quant_sold
1 196000178886082210 HandDrawnRemastered Products>Altered Vintage Designs>Venus &The Se... 0 Products Altered Vintage Designs Venus &The Sea Monster NaN NaN NaN
2 196002243590151072 HennaHarmony Products>Paisley Passion 6 colours!>Paisley Pa... 94 Products Paisley Passion 6 colours! Paisley Passion (Yellow) 27.61 6.0 84.0
3 196005363647255781 Lace9lives Products>Dark-Surreal-Art>Let Me Out (Escaping... 87 Products Dark-Surreal-Art Let Me Out (Escaping Skeleton) 21.35 4.0 5.0
4 196006792562816884 Lace9lives Products>Symbology>Peace & War>Peace & War (Ye... 52 Products Symbology Peace & War Peace & War (Yellow) NaN NaN NaN
5 196013568156140664 Lace9lives Products>Mythical & Magical>Ticket to a Fantas... 125 Products Mythical & Magical Ticket to a Fantasy World 8.33 4.0 7.0

In [40]:
cat_info_jn3.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 585 entries, 1 to 580
Data columns (total 11 columns):
category_id        585 non-null object
store              585 non-null object
category_string    585 non-null object
num_of_products    585 non-null object
category_1         585 non-null object
category_2         585 non-null object
category_3         585 non-null object
category_4         585 non-null object
royal_total        371 non-null float64
num_sales          371 non-null float64
quant_sold         371 non-null float64
dtypes: float64(3), object(8)
memory usage: 54.8+ KB

In [41]:
cat_info_jn3_srt = cat_info_jn3.sort_values("quant_sold", ascending=False)
# cat_info_jn3_srt.drop([1,inplace=True,axis=1)
# cat_info_jn3_srt.drop(['index'],inplace=True,axis=1)
cat_info_jn3.head()


Out[41]:
category_id store category_string num_of_products category_1 category_2 category_3 category_4 royal_total num_sales quant_sold
1 196000178886082210 HandDrawnRemastered Products>Altered Vintage Designs>Venus &The Se... 0 Products Altered Vintage Designs Venus &The Sea Monster NaN NaN NaN
2 196002243590151072 HennaHarmony Products>Paisley Passion 6 colours!>Paisley Pa... 94 Products Paisley Passion 6 colours! Paisley Passion (Yellow) 27.61 6.0 84.0
3 196005363647255781 Lace9lives Products>Dark-Surreal-Art>Let Me Out (Escaping... 87 Products Dark-Surreal-Art Let Me Out (Escaping Skeleton) 21.35 4.0 5.0
4 196006792562816884 Lace9lives Products>Symbology>Peace & War>Peace & War (Ye... 52 Products Symbology Peace & War Peace & War (Yellow) NaN NaN NaN
5 196013568156140664 Lace9lives Products>Mythical & Magical>Ticket to a Fantas... 125 Products Mythical & Magical Ticket to a Fantasy World 8.33 4.0 7.0

In [42]:
cat_info_jn3_srt.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 585 entries, 546 to 580
Data columns (total 11 columns):
category_id        585 non-null object
store              585 non-null object
category_string    585 non-null object
num_of_products    585 non-null object
category_1         585 non-null object
category_2         585 non-null object
category_3         585 non-null object
category_4         585 non-null object
royal_total        371 non-null float64
num_sales          371 non-null float64
quant_sold         371 non-null float64
dtypes: float64(3), object(8)
memory usage: 54.8+ KB

In [43]:
cat_info_jn3.to_csv("/home/saisons/Code/zazzle-product-analysis/outputs/cat_info_jn3.csv")

In [44]:
prodjn2.to_csv("/home/saisons/Code/zazzle-product-analysis/outputs/final_output_products.csv")

In [45]:
cat_list = ['196717574142457136', '196739766766069967', '196990207237662364', '196959432771907138', '196189645389229864']

In [46]:
five_cats_list = prodjn2[prodjn2['category_id'].isin(cat_list)]
five_cats_list.head(5)


Out[46]:
orig_cat_link category_link_page store_x category_id category_string_x product_name product_url dnu_-_product_type category_1_x category_2_x category_3_x category_4_x extra_tag product_id final_product_type num_of_products num_sales quant_sold royal_total
1056 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196990207237662364 Products>Holidays & Special Occassions>St Patr... A Pint of the Black Stuff Postcard https://www.zazzle.com/a_pint_of_the_black_stu... A Pint of the Black Stuff Postcard Products Holidays & Special Occassions St Patricks Day A Pint of the Black Stuff 239207052013421039 Postcard 32 3.0 4.0 0.56
1720 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196990207237662364 Products>Holidays & Special Occassions>St Patr... A Pint of the Black Stuff Wrapping Paper https://www.zazzle.com/a_pint_of_the_black_stu... A Pint of the Black Stuff Wrapping Paper Products Holidays & Special Occassions St Patricks Day A Pint of the Black Stuff 256636982936103797 Wrapping Paper 32 4.0 4.0 3.31
1734 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196990207237662364 Products>Holidays & Special Occassions>St Patr... A Pint of the Black Stuff Snowflake Pewter Chr... https://www.zazzle.com/a_pint_of_the_black_stu... A Pint of the Black Stuff Snowflake Pewter Chr... Products Holidays & Special Occassions St Patricks Day A Pint of the Black Stuff 256658203144719145 Premium Ornament 32 1.0 1.0 3.13
499 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196990207237662364 Products>Holidays & Special Occassions>St Patr... A Pint of the Black Stuff Drink Coaster https://www.zazzle.com/a_pint_of_the_black_stu... A Pint of the Black Stuff Drink Coaster Products Holidays & Special Occassions St Patricks Day A Pint of the Black Stuff 163752179919501672 Sandstone Coaster 32 1.0 1.0 3.84
469 https://www.zazzle.com/Lace9lives/gifts?ps=120... https://www.zazzle.com/lace9lives/gifts?ps=120... Lace9lives 196990207237662364 Products>Holidays & Special Occassions>St Patr... A Pint of the Black Stuff Card https://www.zazzle.com/a_pint_of_the_black_stu... A Pint of the Black Stuff Card Products Holidays & Special Occassions St Patricks Day A Pint of the Black Stuff 161399044306790705 Card 32 6.0 185.0 37.05

In [47]:
five_cats_list.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 465 entries, 1056 to 2014
Data columns (total 19 columns):
orig_cat_link         465 non-null object
category_link_page    465 non-null object
store_x               465 non-null object
category_id           465 non-null object
category_string_x     465 non-null object
product_name          465 non-null object
product_url           465 non-null object
dnu_-_product_type    465 non-null object
category_1_x          465 non-null object
category_2_x          465 non-null object
category_3_x          465 non-null object
category_4_x          465 non-null object
extra_tag             465 non-null object
product_id            465 non-null object
final_product_type    465 non-null object
num_of_products       465 non-null object
num_sales             60 non-null float64
quant_sold            60 non-null float64
royal_total           60 non-null float64
dtypes: float64(3), object(16)
memory usage: 72.7+ KB

In [48]:
five_cats_list.to_csv("/home/saisons/Code/zazzle-product-analysis/outputs/five_cats_prods_list.csv")

In [51]:
full_prod_list = prodjn2.groupby(
    ["final_product_type"]
).quant_sold.sum().reset_index().sort_values("quant_sold", ascending=False)
full_prod_list.head()


Out[51]:
final_product_type quant_sold
54 Card 4532.0
86 Envelope 1355.0
222 Stationery 975.0
200 Postcard 844.0
132 Label 610.0

In [52]:
full_prod_list.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 270 entries, 54 to 261
Data columns (total 2 columns):
final_product_type    270 non-null object
quant_sold            191 non-null float64
dtypes: float64(1), object(1)
memory usage: 6.3+ KB

In [53]:
full_prod_list.to_csv("/home/saisons/Code/zazzle-product-analysis/outputs/full_prod_list.csv")

In [ ]:

Code Unused At this Time stored below as Markdown

prodjnfix = pd.merge(cats_df, prod_df, on="category_id", left_index=True, right_index=True, how='outer')

prodjnfix.head()

top_sellers = prodjn2.groupby( ["category_id", "num_of_products"] ).royal_total.sum().reset_index().sort_values("royal_total", ascending=False).head(50)

top_sellers.head(5).reset_index()

merged1 = pd.merge(prod_df, cats_df, on="category_id") merged1 = pd.merge(prod_df, cats_df[['#_of_products']], left_index=True, right_index=True, how='outer')

chart2 = top_sellers.head(5).plot(x="category_id", y="num_of_products")

ax.set_xticklabels(top_sellers."royal_total", rotation=90)

chart3 = top_sellers.head(5).plot(x="category_id", y="royal_total")

top_sellers_50 = top_sellers.head(50)

top_sellers_50.reset_index()

top_50_stats = pd.merge(top_sellers_50, cats_df, on='category_id', right_index=True, how='outer')

top_50_stats.head(5)

top_50_stats.info()

top_50_stats.drop(['num_of_products_y'],inplace=True,axis=1)

top_50_stats.info()

top_50_stats.sort_values("num_of_products_x", ascending=False)

top_50_stats.to_csv("top_50_stats.csv")

weight = top_50_stats.assign(weighted_value = lambda x: x.royal_total/x.num_of_products_x)

top_30_designs = pd.merge(top_30, cats_df, on='category_id', left_index=True, right_index=True, how='outer')

top_30_designs.info()

top_30_designs.head(5)

top_30_designs.info()

top_30_designs.head()

weight.sort_values("Weighted_Value", ascending=False).head(5)

weight.to_csv("weighted_prods.csv")

It all breaks down in the charting....

chart1 = top_sellers.head(5).plot.line()

chart1.set_xticklabels(top_sellers.category_id)

chart1.set_yticklabels(top_sellers.royal_total)

fig = plt.figure() # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

width = 0.4

top_sellers.head(5).Num_of_products.plot(kind='line', color='red', ax=ax, width=width, position=1) top_sellers.head(5).royal_total.plot(kind='line', color='blue', ax=ax2, width=width, position=0)

ax.set_ylabel('Num_of_Products') ax2.set_ylabel('royal_total')

plt.show()

top_sellers.head(5).to_csv("top_supporters5.csv")

fig, ax1 = plt.subplots() x = ()"category_id') ax1.plot('category_id') ax1.set_xlabel('time (s)')

Make the y-axis label, ticks and tick labels match the line color.

ax1.set_ylabel('exp', color='b') ax1.tick_params('y', colors='b')

ax2 = ax1.twinx() s2 = np.sin(2 np.pi t) ax2.plot(t, s2, 'r.') ax2.set_ylabel('sin', color='r') ax2.tick_params('y', colors='r')

fig.tight_layout() plt.show()

chart3 = top_sellers.groupby("category_id").head(5) x=top_sellers.grouby("category_id").head(5) fig,ax=plt.subplots() ax2=ax.twinx() x.plot(ax=ax) y.plot(ax=ax2,style='g:') plt.show()

merged2.groupby("category_id").royal_total.sum().reset_index().sort_values("royal_total", ascending=False)

cats_df[cats_df.category_id == '196094447518203739']

prodjn1 = prod_df.join(cats_df, on="category_id", how='left', lsuffix='', rsuffix='_y', sort=False)

Below line # out as only needed to verify data integrity of column delete

prodjn1.info()

prodjn1.head()

del merged1['category_string_y'] del merged1['store_y'] del merged1['category_1_y'] del merged1['category_2_y'] del merged1['category_3_y'] del merged1['category_4_y']

prodjn1.#_of_products.value_counts()

prodjn1.head()

Merge prodjn1 and saletotesprod_df

prodjn2 = prodjn1.join(cats_df, on="product_id", how='left', lsuffix='', rsuffix='_y', sort=False)

prodjn2.head().reset_index()

Merge Category and Sale Summaries

merged3 = pd.merge(cats_df, saletotesprod_df, on="product_id")

Count of Product Types

merged2.sort_values("royal_total", ascending=False)

def combine_names(row): if row.true_final.startswith('Business'): return 'Business Card' if row.true_final.startswith('Coaster'): return 'Coaster' if row.true_final.startswith('Coaster'): return 'Coaster' return row.true_final return row.true_final

def combine_names1(row): if row.true_final.startswith('Coaster'): return 'Coaster' return row.true_final

merged2.apply(combine_names, axis=1)

merged2.apply(combine_names1, axis=1)

~~ Need to Create:~~

Separate Table of Category Data Totals from Pivot Table of Sale Info

print(merged2).reset_index()

merged1.to_csv('merged1.csv')

merged2.to_csv('merged2.csv')

merged3

merged2.info()

prod_df.info()

merged1.info()