create_data_descriptions



In [3]:
import pandas as pd

In [4]:
data = pd.read_csv('./UCB.csv')


/Users/choldgraf/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:1159: DtypeWarning: Columns (12,51,53,55,58,59,66,94,100,101,102,107,109,139) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)

In [38]:
n_na = data.isnull().sum(0)

In [42]:
data.shape


Out[42]:
(934638, 148)

In [41]:
for val in n_na:
    print val


0
0
0
7
7
306299
2
386525
0
0
60
657578
931079
4073
0
0
0
0
518204
319746
4
507087
507487
416434
793633
934638
520175
521149
622230
685575
902443
612691
889252
515339
145574
0
0
0
0
0
0
0
0
922941
934638
0
934638
0
0
603679
0
615202
615129
673407
673558
691886
690914
0
0
933944
0
0
0
0
0
0
0
0
0
4
934638
934638
0
0
934638
1
1
1
332727
78760
934638
934638
15
934638
934638
934638
934638
1
1
1
1
0
0
3691
208303
74857
934638
934638
0
31583
769706
871568
930946
0
0
0
0
929008
0
496664
536345
932381
932333
934638
0
0
934638
78407
317983
502548
502548
502548
502548
502548
934638
934638
518204
934638
934638
603679
934638
934638
934638
934638
934638
934638
934638
934638
0
929478
934638
0
0
0
0
0
16
16

In [33]:
for ix, dtype in zip(data.dtypes.index.values, data.dtypes.values):
    print ix, ',', dtype


PO ID , int64
PO # , object
Creation Date , object
Original Revision Date , object
Last Revision Date , object
Last Distribution Date , object
Workflow Completion Date , object
PO Closed Date , object
Supplier ID , int64
Supplier Name , object
BFS Supplier Number , object
Supplier Preference , float64
Suppl-Cust Acct # , object
Cart Line ID , float64
PO Line ID , int64
PO Line # , int64
Item Type , object
Spot Buy Flag , object
Form Type , object
SKU/Catalog # , object
Product Description , object
Manufacturer , object
Mfr Catalog # , object
Amount/UOM & UOM , object
Product Size , object
Category Preference , float64
Category Level 1 , object
Category Level 2 , object
Category Level 3 , object
Category Level 4 , object
Category Level 5 , object
Category Name , object
CAS # , float64
UNSPSC , float64
Commodity Code , object
Radioactive , object
Hazmat , object
Controlled , object
RadMinor , object
Select Agent , object
Toxin , object
Recycled , object
Green Product , object
Green Product Description , object
LEED Compliance Details , float64
Energy Star , object
ProdFlag 10 , float64
Quantity , float64
Unit Price , object
Unit Price Date , object
Extended Price , object
List Price , object
List Price Date , object
Current - 1 Unit Price , object
Current - 1 Unit Price Date , object
Current - 1 List Price , object
Current - 1 List Price Date , object
List Price Savings , object
Contract Savings , object
Special Pricing Code , object
Shipping Charge , float64
Handling Charge , float64
Taxable Flag , object
Tax 1 , float64
Tax 2 , float64
Capital Expense Flag , object
Buyer:  username , object
Buyer: First Name , object
Buyer: Last Name , object
Buyer: Email , object
Department , float64
Position , float64
Carrier , object
Shipping Method , object
Carrier Account # , float64
ShipTo Address Internal Name , object
ShipTo Address Code , object
ShipTo Address 1 , object
ShipTo Address 2 , object
ShipTo Address 3 , object
ShipTo Address 4 , float64
ShipTo Address 5 , float64
ShipTo Contact 1 , object
ShipTo Contact 2 , float64
ShipTo Contact 3 , float64
ShipTo Contact 4 , float64
ShipTo Contact 5 , float64
ShipTo City , object
ShipTo State , object
ShipTo Postal Code , object
ShipTo Country , object
BillTo Address Internal Name , object
BillTo Address Code , object
BillTo Address 1 , object
BillTo Address 2 , object
BillTo Address 3 , object
BillTo Address 4 , float64
BillTo Address 5 , float64
BillTo Contact 1 , object
BillTo Contact 2 , object
BillTo Contact 3 , object
BillTo Contact 4 , object
BillTo Contact 5 , object
BillTo City , object
BillTo State , object
BillTo Postal Code , object
BillTo Country , object
Accounting Date , object
PO Counter , int64
Supplier Duns No , object
Federal ID No , object
SIC , float64
NAICS , float64
Austin-Tetra , float64
Fulfillment Center Name , object
Fulfillment Center Address ID , int64
3rd Party Address ID , float64
Supplier Commodity Code , object
Supplier Phone , object
Contract No , object
Contract Renewal No , float64
Contract Name , object
Contract Effective Date , object
Contract Expiration Date , object
Contract Unit Price , float64
Contract Unit Price Variance , float64
FormId , float64
CreditCard Name , float64
Last 4 Digits of Credit Card Number , float64
Price Set Name , object
Replenishment Order , float64
Stock Item ID , float64
Stock Item Name , float64
Stock Units , float64
Stock Supplier Name , float64
Stock Supplier ID , float64
Stock FC Name , float64
Stock FC ID , float64
Line Status , object
SS , object
RFx Status , float64
Qty Net Received , float64
Qty Net Invoiced , float64
Qty Variance , float64
Cost Net Invoiced , float64
Cost Variance , float64
Receive Status , object
Voucher Status , object