notebook.community

Edit and run



In [1]:

    
#supress warning message
import warnings; warnings.simplefilter("ignore")



In [2]:

    
#import some extension libraries
import pandas as pd #import pandas library



In [3]:

    
superstore = pd.ExcelFile(r'/Users/ishmaelamin/Documents/Northwestern_University/2016_Fall/Predict_480/480_Group_Final_Project/Superstore_Sales_Data.xlsx')



In [4]:

    
sheet_1 = pd.read_excel(superstore,sheetname=0)



In [5]:

    
sheet_2 = pd.read_excel(superstore,sheetname=1)



In [6]:

    
#display worksheet names in the workbook
read_sheets_name = superstore.sheet_names
print(read_sheets_name)

#count sheets in the workbook
print("There are %d sheets in the workbook" % len(read_sheets_name))









    



['External', 'Internal']
There are 2 sheets in the workbook



In [7]:

    
#Display the recordsin the first sheet
sheet_1.head()
len(sheet_1)









    Out[7]:





403



In [8]:

    
len(sheet_2)









    Out[8]:





18



In [9]:

    
sheet_1.ix[:,0:4].head()









    Out[9]:






  
    
      
      id
      Product_Type_ID
      Order_Date
      Price
    
  
  
    
      0
      1
      1
      2009-03-22
      152.48
    
    
      1
      2
      1
      2010-12-22
      152.48
    
    
      2
      3
      1
      2012-10-28
      152.48
    
    
      3
      4
      1
      2012-09-24
      152.48
    
    
      4
      5
      1
      2010-04-20
      152.48



In [10]:

    
#sheet_1[["id","Order_Date"]].head()
sheet_1.set_index('Product_Type_ID',inplace=True)



In [11]:

    
sheet_2.set_index('Product_Type_ID',inplace=True)



In [12]:

    
sheet_1.head()









    Out[12]:






  
    
      
      id
      Order_Date
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
    
    
      Product_Type_ID
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1
      2009-03-22
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
    
    
      1
      2
      2010-12-22
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
    
    
      1
      3
      2012-10-28
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
    
    
      1
      4
      2012-09-24
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
    
    
      1
      5
      2010-04-20
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso



In [13]:

    
sheet_2.head()









    Out[13]:






  
    
      
      Internal_Product_Name
      Internal_Cost
      Internal_Sale_Price
    
    
      Product_Type_ID
      
      
      
    
  
  
    
      1
      CPU
      50
      200
    
    
      2
      Graphics/video card
      35
      75
    
    
      3
      Speakers
      15
      25
    
    
      6
      Wi-Fi
      20
      50
    
    
      7
      Sound card
      15
      40



In [14]:

    
#result = pd.merge(sheet_1, sheet_2, how='left', on=['Product_Type_ID'])
internal = sheet_1.join(sheet_2)



In [15]:

    
len(internal)









    Out[15]:





403



In [16]:

    
internal = internal[["id","Order_Date","Internal_Cost","Internal_Sale_Price","Customer_Segment","Product_Category","Product_Sub_Category","Product_Type","Internal_Product_Name","Vendor"]]



In [17]:

    
internal.head()









    Out[17]:






  
    
      
      id
      Order_Date
      Internal_Cost
      Internal_Sale_Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Internal_Product_Name
      Vendor
    
    
      Product_Type_ID
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1
      2009-03-22
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
    
    
      1
      2
      2010-12-22
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
    
    
      1
      3
      2012-10-28
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
    
    
      1
      4
      2012-09-24
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
    
    
      1
      5
      2010-04-20
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso



In [18]:

    
internal['Business_Type'] = pd.Series('internal', index=internal.index)



In [19]:

    
internal.head()









    Out[19]:






  
    
      
      id
      Order_Date
      Internal_Cost
      Internal_Sale_Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Internal_Product_Name
      Vendor
      Business_Type
    
    
      Product_Type_ID
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1
      2009-03-22
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      2
      2010-12-22
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      3
      2012-10-28
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      4
      2012-09-24
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      5
      2010-04-20
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal



In [20]:

    
internal.columns = ["id","Order_Date","Cost","Price","Customer_Segment","Product_Category","Product_Sub_Category","Product_Type","Product_Name","Vendor","Business_Type"]



In [21]:

    
internal.head()









    Out[21]:






  
    
      
      id
      Order_Date
      Cost
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
      Business_Type
    
    
      Product_Type_ID
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1
      2009-03-22
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      2
      2010-12-22
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      3
      2012-10-28
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      4
      2012-09-24
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      5
      2010-04-20
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal



In [22]:

    
external=sheet_1



In [23]:

    
# append column to both datasets that distinguish them as either internal or external
external['Business_Type'] = pd.Series('external', index=external.index)



In [24]:

    
external.head()









    Out[24]:






  
    
      
      id
      Order_Date
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
      Business_Type
    
    
      Product_Type_ID
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1
      2009-03-22
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
    
    
      1
      2
      2010-12-22
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
    
    
      1
      3
      2012-10-28
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
    
    
      1
      4
      2012-09-24
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
    
    
      1
      5
      2010-04-20
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external



In [25]:

    
#Add cost column so that the two dataframes can be concatenated
external['Cost'] = pd.Series(None, index=external.index)



In [26]:

    
external.head()









    Out[26]:






  
    
      
      id
      Order_Date
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
      Business_Type
      Cost
    
    
      Product_Type_ID
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1
      2009-03-22
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      NaN
    
    
      1
      2
      2010-12-22
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      NaN
    
    
      1
      3
      2012-10-28
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      NaN
    
    
      1
      4
      2012-09-24
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      NaN
    
    
      1
      5
      2010-04-20
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      NaN



In [27]:

    
# get a list of columns
cols = list(external)



In [28]:

    
# move the column to head of list using index, pop and insert
cols.insert(2, cols.pop(cols.index('Cost')))
cols









    Out[28]:





['id',
 'Order_Date',
 'Cost',
 'Price',
 'Customer_Segment',
 'Product_Category',
 'Product_Sub_Category',
 'Product_Type',
 'Product_Name',
 'Vendor',
 'Business_Type']



In [29]:

    
# use ix to reorder
external = external.ix[:, cols]
external.head()









    Out[29]:






  
    
      
      id
      Order_Date
      Cost
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
      Business_Type
    
    
      Product_Type_ID
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1
      2009-03-22
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
    
    
      1
      2
      2010-12-22
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
    
    
      1
      3
      2012-10-28
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
    
    
      1
      4
      2012-09-24
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
    
    
      1
      5
      2010-04-20
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external



In [30]:

    
internal.head()









    Out[30]:






  
    
      
      id
      Order_Date
      Cost
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
      Business_Type
    
    
      Product_Type_ID
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1
      2009-03-22
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      2
      2010-12-22
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      3
      2012-10-28
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      4
      2012-09-24
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
    
    
      1
      5
      2010-04-20
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal



In [31]:

    
#Preserve the value of product_type_id as a column
internal['Product_Type_ID1'] = internal.index



In [32]:

    
internal.head()









    Out[32]:






  
    
      
      id
      Order_Date
      Cost
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
      Business_Type
      Product_Type_ID1
    
    
      Product_Type_ID
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1
      2009-03-22
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
      1
    
    
      1
      2
      2010-12-22
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
      1
    
    
      1
      3
      2012-10-28
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
      1
    
    
      1
      4
      2012-09-24
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
      1
    
    
      1
      5
      2010-04-20
      50
      200
      Enterprise
      Technology
      Computer Components
      CPU
      CPU
      Adesso
      internal
      1



In [33]:

    
external['Product_Type_ID1'] = external.index



In [34]:

    
external.head()









    Out[34]:






  
    
      
      id
      Order_Date
      Cost
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
      Business_Type
      Product_Type_ID1
    
    
      Product_Type_ID
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1
      2009-03-22
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      1
      2
      2010-12-22
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      1
      3
      2012-10-28
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      1
      4
      2012-09-24
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      1
      5
      2010-04-20
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1



In [35]:

    
internal.set_index('id',inplace=True)
external.set_index('id',inplace=True)



In [36]:

    
result = pd.concat([external, internal], ignore_index=True)



In [37]:

    
len(result)









    Out[37]:





806



In [38]:

    
result.head()









    Out[38]:






  
    
      
      Order_Date
      Cost
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
      Business_Type
      Product_Type_ID1
    
  
  
    
      0
      2009-03-22
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      1
      2010-12-22
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      2
      2012-10-28
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      3
      2012-09-24
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      4
      2010-04-20
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1



In [ ]:

    
#results.columns = ["Business_Type","Cost","Customer_Segment","Order_Date","Price","Product_Category","Product_Name","Product_Sub_Category","Product_Type",Product_Type_ID","Vendor"]



In [39]:

    
result.columns = ["Order_Date","Cost","Price","Customer_Segment","Product_Category","Product_Sub_Category","Product_Type","Product_Name","Vendor","Business_Type","Product_Type_ID"]



In [40]:

    
result.head()









    Out[40]:






  
    
      
      Order_Date
      Cost
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
      Business_Type
      Product_Type_ID
    
  
  
    
      0
      2009-03-22
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      1
      2010-12-22
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      2
      2012-10-28
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      3
      2012-09-24
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      4
      2010-04-20
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1



In [41]:

    
result.reset_index()









    Out[41]:






  
    
      
      index
      Order_Date
      Cost
      Price
      Customer_Segment
      Product_Category
      Product_Sub_Category
      Product_Type
      Product_Name
      Vendor
      Business_Type
      Product_Type_ID
    
  
  
    
      0
      0
      2009-03-22
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      1
      1
      2010-12-22
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      2
      2
      2012-10-28
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      3
      3
      2012-09-24
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      4
      4
      2010-04-20
      NaN
      152.48
      Enterprise
      Technology
      Computer Components
      CPU
      Adesso CPU
      Adesso
      external
      1
    
    
      5
      5
      2011-12-24
      NaN
      120.97
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon BP1200DH 12-Digit graphics card
      Canon
      external
      2
    
    
      6
      6
      2009-09-17
      NaN
      120.97
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon BP1200DH graphics card
      Canon
      external
      2
    
    
      7
      7
      2012-05-21
      NaN
      120.97
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon BP1200DH graphics card
      Canon
      external
      2
    
    
      8
      8
      2010-06-07
      NaN
      140.99
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon MP100DHII graphics card
      Canon
      external
      2
    
    
      9
      9
      2012-03-02
      NaN
      150.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon MP41DH graphics card
      Canon
      external
      2
    
    
      10
      10
      2010-08-30
      NaN
      150.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon MP41DH graphics card
      Canon
      external
      2
    
    
      11
      11
      2012-04-11
      NaN
      150.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon MP41DH graphics card
      Canon
      external
      2
    
    
      12
      12
      2010-03-08
      NaN
      150.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon MP41DH graphics card
      Canon
      external
      2
    
    
      13
      13
      2009-01-26
      NaN
      150.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon MP41DH graphics card
      Canon
      external
      2
    
    
      14
      14
      2009-03-25
      NaN
      150.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon MP41DH graphics card
      Canon
      external
      2
    
    
      15
      15
      2012-05-07
      NaN
      150.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon MP41DH graphics card
      Canon
      external
      2
    
    
      16
      16
      2010-08-15
      NaN
      17.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon P1-DHIII Palm graphics card
      Canon
      external
      2
    
    
      17
      17
      2012-05-28
      NaN
      17.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon P1-DHIII Palm graphics card
      Canon
      external
      2
    
    
      18
      18
      2011-12-07
      NaN
      17.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon P1-DHIII Palm graphics card
      Canon
      external
      2
    
    
      19
      19
      2011-12-02
      NaN
      17.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon P1-DHIII Palm graphics card
      Canon
      external
      2
    
    
      20
      20
      2010-07-03
      NaN
      17.98
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Canon P1-DHIII Palm graphics card
      Canon
      external
      2
    
    
      21
      21
      2009-04-07
      NaN
      40.48
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Keytronic Designer graphics card
      Keytronic
      external
      2
    
    
      22
      22
      2011-04-17
      NaN
      40.48
      Enterprise
      Technology
      Computer Components
      Graphics/video card
      Keytronic Designer graphics card
      Keytronic
      external
      2
    
    
      23
      23
      2012-12-21
      NaN
      20.97
      Enterprise
      Technology
      Computer Components
      Wi-Fi
      Microsoft WiFi
      Microsoft
      external
      6
    
    
      24
      24
      2011-03-12
      NaN
      20.97
      Enterprise
      Technology
      Computer Components
      Wi-Fi
      Microsoft WiFi
      Microsoft
      external
      6
    
    
      25
      25
      2011-04-20
      NaN
      20.97
      Enterprise
      Technology
      Computer Components
      Wi-Fi
      Microsoft WiFi
      Microsoft
      external
      6
    
    
      26
      26
      2012-05-07
      NaN
      20.97
      Enterprise
      Technology
      Computer Components
      Wi-Fi
      Microsoft WiFi
      Microsoft
      external
      6
    
    
      27
      27
      2011-07-22
      NaN
      20.97
      Enterprise
      Technology
      Computer Components
      Wi-Fi
      Microsoft WiFi
      Microsoft
      external
      6
    
    
      28
      28
      2010-01-10
      NaN
      20.97
      Enterprise
      Technology
      Computer Components
      Wi-Fi
      Microsoft WiFi
      Microsoft
      external
      6
    
    
      29
      29
      2009-05-13
      NaN
      30.42
      Enterprise
      Technology
      Computer Components
      Wi-Fi
      Microsoft WiFi
      Microsoft
      external
      6
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      776
      776
      2012-05-06
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Brother
      internal
      19
    
    
      777
      777
      2012-07-13
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Brother
      internal
      19
    
    
      778
      778
      2012-05-11
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Canon
      internal
      19
    
    
      779
      779
      2010-01-09
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Canon
      internal
      19
    
    
      780
      780
      2011-02-06
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Canon
      internal
      19
    
    
      781
      781
      2012-03-30
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Canon
      internal
      19
    
    
      782
      782
      2011-09-02
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Canon
      internal
      19
    
    
      783
      783
      2009-03-23
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Canon
      internal
      19
    
    
      784
      784
      2012-03-05
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Canon
      internal
      19
    
    
      785
      785
      2011-11-16
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Hewlett Packard
      internal
      19
    
    
      786
      786
      2010-02-27
      345.0
      500.00
      Enterprise
      Technology
      Computer Components
      RAM
      RAM
      Hewlett Packard
      internal
      19
    
    
      787
      787
      2011-11-16
      10.0
      50.00
      Enterprise
      Technology
      Computer Components
      microprocessor
      microprocessor
      Unknown
      internal
      20
    
    
      788
      788
      2012-03-03
      10.0
      50.00
      Enterprise
      Technology
      Computer Components
      microprocessor
      microprocessor
      Unknown
      internal
      20
    
    
      789
      789
      2010-05-10
      10.0
      50.00
      Enterprise
      Technology
      Computer Components
      microprocessor
      microprocessor
      Unknown
      internal
      20
    
    
      790
      790
      2011-09-15
      10.0
      50.00
      Enterprise
      Technology
      Computer Components
      microprocessor
      microprocessor
      Unknown
      internal
      20
    
    
      791
      791
      2011-02-03
      10.0
      50.00
      Enterprise
      Technology
      Computer Components
      microprocessor
      microprocessor
      Unknown
      internal
      20
    
    
      792
      792
      2009-09-19
      10.0
      50.00
      Enterprise
      Technology
      Computer Components
      microprocessor
      microprocessor
      Unknown
      internal
      20
    
    
      793
      793
      2012-04-04
      10.0
      50.00
      Enterprise
      Technology
      Computer Components
      microprocessor
      microprocessor
      Unknown
      internal
      20
    
    
      794
      794
      2012-10-27
      10.0
      50.00
      Enterprise
      Technology
      Computer Components
      microprocessor
      microprocessor
      Unknown
      internal
      20
    
    
      795
      795
      2009-12-01
      10.0
      50.00
      Enterprise
      Technology
      Computer Components
      microprocessor
      microprocessor
      Unknown
      internal
      20
    
    
      796
      796
      2010-09-19
      10.0
      50.00
      Enterprise
      Technology
      Computer Components
      microprocessor
      microprocessor
      Unknown
      internal
      20
    
    
      797
      797
      2010-06-14
      35.0
      45.00
      Enterprise
      Technology
      Computer Components
      harddrive
      harddrive
      Micro Innovations
      internal
      21
    
    
      798
      798
      2011-06-25
      35.0
      45.00
      Enterprise
      Technology
      Computer Components
      harddrive
      harddrive
      Micro Innovations
      internal
      21
    
    
      799
      799
      2010-12-05
      35.0
      45.00
      Enterprise
      Technology
      Computer Components
      harddrive
      harddrive
      Micro Innovations
      internal
      21
    
    
      800
      800
      2010-02-22
      35.0
      45.00
      Enterprise
      Technology
      Computer Components
      harddrive
      harddrive
      Microsoft
      internal
      21
    
    
      801
      801
      2011-08-16
      35.0
      45.00
      Enterprise
      Technology
      Computer Components
      harddrive
      harddrive
      Microsoft
      internal
      21
    
    
      802
      802
      2010-11-01
      35.0
      45.00
      Enterprise
      Technology
      Computer Components
      harddrive
      harddrive
      Microsoft
      internal
      21
    
    
      803
      803
      2010-06-14
      35.0
      45.00
      Enterprise
      Technology
      Computer Components
      harddrive
      harddrive
      PC Concepts
      internal
      21
    
    
      804
      804
      2012-02-10
      35.0
      45.00
      Enterprise
      Technology
      Computer Components
      harddrive
      harddrive
      PC Concepts
      internal
      21
    
    
      805
      805
      2010-04-19
      35.0
      45.00
      Enterprise
      Technology
      Computer Components
      harddrive
      harddrive
      PC Concepts
      internal
      21
    
  

806 rows × 12 columns



In [42]:

    
#export to file .csv
result.to_csv(r'/Users/ishmaelamin/Documents/Northwestern_University/2016_Fall/Predict_480/superstore_businesstypes.csv')



In [ ]:

	id	Product_Type_ID	Order_Date	Price
0	1	1	2009-03-22	152.48
1	2	1	2010-12-22	152.48
2	3	1	2012-10-28	152.48
3	4	1	2012-09-24	152.48
4	5	1	2010-04-20	152.48

	id	Order_Date	Price	Customer_Segment	Product_Category	Product_Sub_Category	Product_Type	Product_Name	Vendor
Product_Type_ID
1	1	2009-03-22	152.48	Enterprise	Technology	Computer Components	CPU	Adesso CPU	Adesso
1	2	2010-12-22	152.48	Enterprise	Technology	Computer Components	CPU	Adesso CPU	Adesso
1	3	2012-10-28	152.48	Enterprise	Technology	Computer Components	CPU	Adesso CPU	Adesso
1	4	2012-09-24	152.48	Enterprise	Technology	Computer Components	CPU	Adesso CPU	Adesso
1	5	2010-04-20	152.48	Enterprise	Technology	Computer Components	CPU	Adesso CPU	Adesso

	Internal_Product_Name	Internal_Cost	Internal_Sale_Price
Product_Type_ID
1	CPU	50	200
2	Graphics/video card	35	75
3	Speakers	15	25
6	Wi-Fi	20	50
7	Sound card	15	40