In [1]:
%pylab inline
import pandas as pd


Populating the interactive namespace from numpy and matplotlib

In [20]:
bill_of_materials = pd.read_csv('./competition_data/bill_of_materials.csv')

In [21]:
# print bill_of_materials.shape
print bill_of_materials.dtypes
# print bill_of_materials.head()


tube_assembly_id     object
component_id_1       object
quantity_1          float64
component_id_2       object
quantity_2          float64
component_id_3       object
quantity_3          float64
component_id_4       object
quantity_4          float64
component_id_5       object
quantity_5          float64
component_id_6       object
quantity_6          float64
component_id_7       object
quantity_7          float64
component_id_8       object
quantity_8          float64
dtype: object

In [13]:
# bill_of_materials = bill_of_materials.ix[: ,0:3]

In [22]:
bill_of_materials1 = bill_of_materials.pivot(index='tube_assembly_id', columns='component_id_1', values='quantity_1')
bill_of_materials1.drop([nan], axis=1, inplace=True)
bill_of_materials1.columns


Out[22]:
Index([u'9999', u'C-0002', u'C-0004', u'C-0006', u'C-0007', u'C-0008',
       u'C-0009', u'C-0010', u'C-0011', u'C-0012', 
       ...
       u'C-2021', u'C-2022', u'C-2030', u'C-2038', u'C-2039', u'C-2040',
       u'C-2041', u'C-2042', u'C-2043', u'C-2044'],
      dtype='object', name=u'component_id_1', length=1079)

In [23]:
bill_of_materials2 = bill_of_materials.pivot(index='tube_assembly_id', columns='component_id_2', values='quantity_2')
bill_of_materials2.drop([nan], axis=1, inplace=True)
bill_of_materials2.columns


Out[23]:
Index([u'C-0001', u'C-0002', u'C-0003', u'C-0004', u'C-0008', u'C-0012',
       u'C-0013', u'C-0014', u'C-0015', u'C-0016', 
       ...
       u'C-2029', u'C-2030', u'C-2032', u'C-2033', u'C-2034', u'C-2036',
       u'C-2037', u'C-2044', u'C-2045', u'C-2046'],
      dtype='object', name=u'component_id_2', length=834)

In [ ]: