graph_cleaning_and_analysis



In [40]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import bipartite

In [2]:
#this has org level 5 data dept names and number
data = pd.read_csv('/Users/dariushbozorgmehri/Documents/My Work/Berkeley Classes Spring 2015/BIDS/data/modified_data_4_15/departmentDataV3_ALL_DATA_CLEAN.csv', low_memory=False)

In [3]:
data = data.rename(columns={'Org.Level.4': 'org_level_4', 'Org.Level.4.Number': 'org_level_4_number','Org.Level.5': 'org_level_5', 'Org.Level.5.Number': 'org_level_5_number'})

In [4]:
data.head(3)


Out[4]:
Unnamed: 0 Tier.3...UC.Category.Group PO.Date PO.Number PO.Commitment PO.Line.Number Org.Level.1 Org.Level.1.Number Org.Level.2.Number Org.Level.2 Org.Level.3 Org.Level.3.Number org_level_4 org_level_4_number org_level_5 org_level_5_number Org.Level.6 Org.Level.6.Number Org.Level.7 Org.Level.7.Number
0 1 NaN NaN NaN 1,378,222,902.63 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 2 Building Construction 07/05/2012 BB00040468 300.00 1 UC Berkeley UCBKL CAMSU Campus Support Facilities Services VCCPD Capital Projects Operations ALPDC CP Operations ALOPS CP Operations-L6 ALOP6 Null from Data Source Null from Data Source
2 3 Building Construction 07/16/2012 BB00044826 2,500.00 1 UC Berkeley UCBKL CAMSU Campus Support Administration & Finance VCBAS Business & Administrative Svcs BSAVC University Police FUPOL UCPD Operations FUOPS Null from Data Source Null from Data Source

In [7]:
data4Number = data[["org_level_4","org_level_4_number"]]
data4Number = data4Number.drop_duplicates()
data4Number = data4Number[["org_level_4_number", "org_level_4"]]
#reindex after drop dup
data4Number.index = range(1,len(data4Number) + 1)

In [8]:
data5Number = data[["org_level_5","org_level_5_number"]]
data5Number = data5Number.drop_duplicates()
data5Number = data5Number[["org_level_5_number", "org_level_5"]]
data5Number.index = range(1,len(data5Number) + 1)

In [10]:
len(data4Number)
#data4Number = data4Number.drop(data4Number.index[[0]])
#data4Number


Out[10]:
291

In [40]:
#data4Number['org_level_4_number'] = data4['org_level_4_number'].to_string()

In [52]:
#a = data4Number.org_level_4_number[3]
#type(data4Number)

In [93]:
#data4Number['org_level_4_number'] =  data4Number['org_level_4_number'].map(str.strip)

In [94]:
#convert data frame into a dictionary
#data4Dic = data4Number.set_index('org_level_4_number').to_dict()

In [11]:
data5Number.org_level_5_number[7]


Out[11]:
'DACE5'

In [12]:
data4Number.head(10)
#reindiex
#data4Number = data4Number.reindex(index = [0,291])

#data4Number.org_level_4_number[1]


Out[12]:
org_level_4_number org_level_4
1 NaN NaN
2 ALPDC Capital Projects Operations
3 BSAVC Business & Administrative Svcs
4 EXADM UNEX Administrative Depts
5 EDDNO Eng Dean's Office
6 EHEEC Elec Engr & Computer Sc
7 DACED Envir Design Dean's Off
8 UKHDS Housing & Dining Services
9 FNATH Intercollegiate Athletics
10 VRIST Info Services & Technology

In [13]:
data4Dic = {}

In [14]:
#CREATE A DICITONARY for org elevel 4
#data4Dic2[data4Number.org_level_4_number[1]] = data4Number.org_level_4[1]
i=1
for i in range(1,len(data4Number)):
    #while i < 3:
    data4Dic.update({data4Number.org_level_4_number[i]:data4Number.org_level_4[i]})

In [15]:
data5Dic = {}

In [16]:
i=1
for i in range(1,len(data5Number)):
    #while i < 3:
    data5Dic.update({data5Number.org_level_5_number[i]:data5Number.org_level_5[i]})

In [17]:
data5Dic
len(data5Number)


Out[17]:
606

In [18]:
data5Number.org_level_5_number[1]


Out[18]:
nan

In [19]:
data4Number.org_level_4_number[1]
#len(data4Number)


Out[19]:
nan

In [20]:
dataOrig = pd.read_csv('/Users/dariushbozorgmehri/Documents/My Work/Berkeley Classes Spring 2015/BIDS/data/UCB_dept_merge.csv', low_memory=False)

In [21]:
dataOrig.head(7)


Out[21]:
po_id po_num creation_date supplier_name item_type product_description manufacturer quantity unit_price department buyer__first_name buyer__last_name po_closed_date department_name spend
0 29847876 BB00195887 2013-05-31 00:00:00 GIVE SOMETHING BACK SQ Hosted Product PORTABLE COMBINATION LAPTOP LOCK, 6 FT. CARBON... KENSINGTON 30 24.32 NaN Dustin Miller 2013-07-16 00:00:00 UIAPA UB Academic Year 795.26
1 29847864 BB00195886 2013-05-31 00:00:00 GRAINGER INC PunchOut Product Wall Mount Fan, Oscillating, Number of Speeds ... AIR KING 1 35.58 NaN Erin Pinkston 2013-08-08 00:00:00 UKHDS Unit 1 Apt Admin 38.78
2 29847796 BB00195884 2013-05-31 00:00:00 BELLCO GLASS INC NonCatalog Product Septum Stopper, 20mm Blue Butyl Rubber QtyPerC... NaN 1 77.41 NaN William Wolf NaN CCHEM RES Research 104.67
3 29847820 BB00195885 2013-05-31 00:00:00 CHEMGLASS LIFE SCIENCES LLC NonCatalog Product Column, Chromatography, 24/40 Outer Joint, 100... NaN 1 108.00 NaN William Wolf 2014-10-30 00:00:00 CCHEM RES Research 71.77
4 29847565 BB00195881 2013-05-31 00:00:00 FISHER SCIENTIFIC SQ Hosted Product Bottles, Media/Lab; Wheaton;Graduated; With ru... Wheaton Science Products Inc 1 135.38 NaN Donald C. RIO NaN IMMCB BH Research 317.31
5 29847565 BB00195881 2013-05-31 00:00:00 FISHER SCIENTIFIC SQ Hosted Product Flasks, Cell Culture; Corning; Wide neck; Stac... Corning Life Sciences Plastic 1 81.29 NaN Donald C. RIO NaN IMMCB BH Research 317.31
6 29847565 BB00195881 2013-05-31 00:00:00 FISHER SCIENTIFIC SQ Hosted Product Filter unit; EMD Millipore; Steriflip; Disposa... Emd Millipore Corp 1 96.56 NaN Donald C. RIO NaN IMMCB BH Research 317.31

In [22]:
dataOrig['department_number'] = dataOrig["department_name"].apply(lambda x: x[:5])
dataOrig['department_name_stripped'] = dataOrig["department_name"].apply(lambda x: x[6:])

In [23]:
dataOrig['department_name_update'] = ""

In [24]:
dataOrig.head()


Out[24]:
po_id po_num creation_date supplier_name item_type product_description manufacturer quantity unit_price department buyer__first_name buyer__last_name po_closed_date department_name spend department_number department_name_stripped department_name_update
0 29847876 BB00195887 2013-05-31 00:00:00 GIVE SOMETHING BACK SQ Hosted Product PORTABLE COMBINATION LAPTOP LOCK, 6 FT. CARBON... KENSINGTON 30 24.32 NaN Dustin Miller 2013-07-16 00:00:00 UIAPA UB Academic Year 795.26 UIAPA UB Academic Year
1 29847864 BB00195886 2013-05-31 00:00:00 GRAINGER INC PunchOut Product Wall Mount Fan, Oscillating, Number of Speeds ... AIR KING 1 35.58 NaN Erin Pinkston 2013-08-08 00:00:00 UKHDS Unit 1 Apt Admin 38.78 UKHDS Unit 1 Apt Admin
2 29847796 BB00195884 2013-05-31 00:00:00 BELLCO GLASS INC NonCatalog Product Septum Stopper, 20mm Blue Butyl Rubber QtyPerC... NaN 1 77.41 NaN William Wolf NaN CCHEM RES Research 104.67 CCHEM RES Research
3 29847820 BB00195885 2013-05-31 00:00:00 CHEMGLASS LIFE SCIENCES LLC NonCatalog Product Column, Chromatography, 24/40 Outer Joint, 100... NaN 1 108.00 NaN William Wolf 2014-10-30 00:00:00 CCHEM RES Research 71.77 CCHEM RES Research
4 29847565 BB00195881 2013-05-31 00:00:00 FISHER SCIENTIFIC SQ Hosted Product Bottles, Media/Lab; Wheaton;Graduated; With ru... Wheaton Science Products Inc 1 135.38 NaN Donald C. RIO NaN IMMCB BH Research 317.31 IMMCB BH Research

In [48]:
"""#this creates a list
dataOrigEdit = dataOrig["department_number"]
#convert back to dataframe
dataOrigEdit = pd.DataFrame(dataOrigEdit)
dataOrigEdit """


Out[48]:
'#this creates a list\ndataOrigEdit = dataOrig["department_number"]\n#convert back to dataframe\ndataOrigEdit = pd.DataFrame(dataOrigEdit)\ndataOrigEdit '

In [114]:
dataMerged = pd.merge(dataOrig, data4Number, left_on='department_number', right_index=True)
dataMerged


Out[114]:
po_id po_num creation_date supplier_name item_type product_description manufacturer quantity unit_price department buyer__first_name buyer__last_name po_closed_date department_name spend department_number department_name_update org_level_4_number org_level_4

In [58]:
i=0
#dataOrig.department_number[i] 

j=1
data4Number.org_level_4_number[j]


Out[58]:
'ALPDC'

WRITE SCRIPT TO INSERT NEW DEPARTMENTS


In [151]:
""" i=0
for i in range(10000,50000):
    j=0
    for j in range(1, len(data4Number)):
        #print j
        if  dataOrig.department_number[i] == data4Number.org_level_4_number[j]:
            dataOrig.department_name_update[i] = data4Number.org_level_4[j]   """


Out[151]:
' i=0\nfor i in range(10000,50000):\n    j=0\n    for j in range(1, len(data4Number)):\n        #print j\n        if  dataOrig.department_number[i] == data4Number.org_level_4_number[j]:\n            dataOrig.department_name_update[i] = data4Number.org_level_4[j]   '

In [25]:
#USE DICTIONARY!!!
i=0
for i in range(1,len(dataOrig)):
    if  dataOrig.department_number[i] in data4Dic:
        #print "in there"
        dataOrig.department_name_update[i] = data4Dic[dataOrig.department_number[i]]
    elif dataOrig.department_number[i] in data5Dic:
        dataOrig.department_name_update[i] = data5Dic[dataOrig.department_number[i]]
    else:
        dataOrig.department_name_update[i] = dataOrig.department_name_stripped[i]

In [27]:
dataOrig.head()


Out[27]:
po_id po_num creation_date supplier_name item_type product_description manufacturer quantity unit_price department buyer__first_name buyer__last_name po_closed_date department_name spend department_number department_name_stripped department_name_update
0 29847876 BB00195887 2013-05-31 00:00:00 GIVE SOMETHING BACK SQ Hosted Product PORTABLE COMBINATION LAPTOP LOCK, 6 FT. CARBON... KENSINGTON 30 24.32 NaN Dustin Miller 2013-07-16 00:00:00 UIAPA UB Academic Year 795.26 UIAPA UB Academic Year
1 29847864 BB00195886 2013-05-31 00:00:00 GRAINGER INC PunchOut Product Wall Mount Fan, Oscillating, Number of Speeds ... AIR KING 1 35.58 NaN Erin Pinkston 2013-08-08 00:00:00 UKHDS Unit 1 Apt Admin 38.78 UKHDS Unit 1 Apt Admin Housing & Dining Services
2 29847796 BB00195884 2013-05-31 00:00:00 BELLCO GLASS INC NonCatalog Product Septum Stopper, 20mm Blue Butyl Rubber QtyPerC... NaN 1 77.41 NaN William Wolf NaN CCHEM RES Research 104.67 CCHEM RES Research Dept Of Chemistry
3 29847820 BB00195885 2013-05-31 00:00:00 CHEMGLASS LIFE SCIENCES LLC NonCatalog Product Column, Chromatography, 24/40 Outer Joint, 100... NaN 1 108.00 NaN William Wolf 2014-10-30 00:00:00 CCHEM RES Research 71.77 CCHEM RES Research Dept Of Chemistry
4 29847565 BB00195881 2013-05-31 00:00:00 FISHER SCIENTIFIC SQ Hosted Product Bottles, Media/Lab; Wheaton;Graduated; With ru... Wheaton Science Products Inc 1 135.38 NaN Donald C. RIO NaN IMMCB BH Research 317.31 IMMCB BH Research Molecular & Cell Biology

In [28]:
dataOrig.department_name_update.describe()


Out[28]:
count                       611110
unique                         524
top       Molecular & Cell Biology
freq                         48808
dtype: object

MAKE THE GRAPH


In [3]:
#READ IN CLEANED UP DATA
#dataOrig.to_csv('/Users/dariushbozorgmehri/Documents/My Work/Berkeley Classes Spring 2015/BIDS/data/temp.csv')
dataOrig = pd.read_csv('/Users/dariushbozorgmehri/Documents/My Work/Berkeley Classes Spring 2015/BIDS/data/temp.csv')

In [4]:
#clean up data
dataOrig['supplier_name'] =  dataOrig['supplier_name'].str.replace(',', ' ')
dataOrig['supplier_name'] =  dataOrig['supplier_name'].str.replace(' ', '_')
dataOrig['department_name_update'] =  dataOrig['department_name_update'].str.replace(',', ' ')
dataOrig['department_name_update'] =  dataOrig['department_name_update'].str.replace(' ', '_')

In [5]:
dataOrig['creation_date'] = dataOrig['creation_date'].str.replace('00:00:00', '')

In [6]:
#create seperate year, month, day colums

dataOrig['year'] = dataOrig["creation_date"].apply(lambda x: x[:4])
dataOrig['month'] = dataOrig['creation_date'].apply(lambda x: x[5:7])
dataOrig['day'] = dataOrig['creation_date'].apply(lambda x: x[8:10])

In [8]:
dataOrig.head(3)


Out[8]:
Unnamed: 0 po_id po_num creation_date supplier_name item_type product_description manufacturer quantity unit_price ... buyer__last_name po_closed_date department_name spend department_number department_name_stripped department_name_update year month day
0 0 29847876 BB00195887 2013-05-31 GIVE_SOMETHING_BACK SQ Hosted Product PORTABLE COMBINATION LAPTOP LOCK, 6 FT. CARBON... KENSINGTON 30 24.32 ... Miller 2013-07-16 00:00:00 UIAPA UB Academic Year 795.26 UIAPA UB Academic Year NaN 2013 05 31
1 1 29847864 BB00195886 2013-05-31 GRAINGER_INC PunchOut Product Wall Mount Fan, Oscillating, Number of Speeds ... AIR KING 1 35.58 ... Pinkston 2013-08-08 00:00:00 UKHDS Unit 1 Apt Admin 38.78 UKHDS Unit 1 Apt Admin Housing_&_Dining_Services 2013 05 31
2 2 29847796 BB00195884 2013-05-31 BELLCO_GLASS_INC NonCatalog Product Septum Stopper, 20mm Blue Butyl Rubber QtyPerC... NaN 1 77.41 ... Wolf NaN CCHEM RES Research 104.67 CCHEM RES Research Dept_Of_Chemistry 2013 05 31

3 rows × 22 columns


In [9]:
#subset data
#data = data[['creation_date', 'supplier_name','item_type','org_level_3']]
dataOrigSubset = dataOrig[[ 'supplier_name','department_name_update','item_type','year','month','day']]

In [11]:
dataOrigSubset.head(50)


Out[11]:
supplier_name department_name_update item_type year month day
0 GIVE_SOMETHING_BACK NaN SQ Hosted Product 2013 05 31
1 GRAINGER_INC Housing_&_Dining_Services PunchOut Product 2013 05 31
2 BELLCO_GLASS_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
3 CHEMGLASS_LIFE_SCIENCES_LLC Dept_Of_Chemistry NonCatalog Product 2013 05 31
4 FISHER_SCIENTIFIC Molecular_&_Cell_Biology SQ Hosted Product 2013 05 31
5 FISHER_SCIENTIFIC Molecular_&_Cell_Biology SQ Hosted Product 2013 05 31
6 FISHER_SCIENTIFIC Molecular_&_Cell_Biology SQ Hosted Product 2013 05 31
7 EPPENDORF_NORTH_AMERICA_INC Molecular_&_Cell_Biology NonCatalog Product 2013 05 31
8 VWR_INTERNATIONAL_INC Molecular_&_Cell_Biology SQ Hosted Product 2013 05 31
9 EMD_MILLIPORE_CORPORATION_(Formerly_EMD_Chemic... Nutritional_Sci_&_Tox_Dept SQ Hosted Product 2013 05 31
10 MATRIX_PROMOTIONAL_MARKETING Summer_Sessions_&_Study_Abroad NonCatalog Product 2013 05 31
11 EMD_MILLIPORE_CORPORATION Nutritional_Sci_&_Tox_Dept SQ Hosted Product 2013 05 31
12 OFFICE_MAX Dept_of_Chemical_E SQ Hosted Product 2013 05 31
13 OFFICE_MAX Dept_of_Chemical_E SQ Hosted Product 2013 05 31
14 OFFICE_MAX Dept_of_Chemical_E SQ Hosted Product 2013 05 31
15 FISHER_SCIENTIFIC Environment__Health_&_Safety SQ Hosted Product 2013 05 31
16 Life_Technologies_Corporation Molecular_&_Cell_Biology SQ Hosted Product 2013 05 31
17 SIGMA-ALDRICH Molecular_&_Cell_Biology SQ Hosted Product 2013 05 31
18 SIGMA-ALDRICH Molecular_&_Cell_Biology SQ Hosted Product 2013 05 31
19 Life_Technologies_Corporation Molecular_&_Cell_Biology SQ Hosted Product 2013 05 31
20 DigiKey_Corp. Space_Sciences_Laboratory NonCatalog Product 2013 05 31
21 DigiKey_Corp. Space_Sciences_Laboratory NonCatalog Product 2013 05 31
22 DELL_MARKETING_LP Inst_of_Personality_&_Soc_Res PunchOut Product 2013 05 31
23 PARTNERS_IN_COMMUNICATIONS_LLC Disabled_Students_Program NonCatalog Product 2013 05 31
24 Dead_End_Productions Ctrs_Trans_Reent_Studt_Parents NonCatalog Product 2013 05 31
25 PARTNERS_IN_COMMUNICATIONS_LLC Disabled_Students_Program NonCatalog Product 2013 05 31
26 PARTNERS_IN_COMMUNICATIONS_LLC Disabled_Students_Program NonCatalog Product 2013 05 31
27 PARTNERS_IN_COMMUNICATIONS_LLC Disabled_Students_Program NonCatalog Product 2013 05 31
28 OFFICE_MAX Professional_Development_Prog SQ Hosted Product 2013 05 31
29 Linked_In_Corporation Human_Resources NonCatalog Product 2013 05 31
30 Linked_In_Corporation Human_Resources NonCatalog Product 2013 05 31
31 ASHBY_FLOWERS Student_Life_Advising_Svs NonCatalog Product 2013 05 31
32 ASHBY_FLOWERS Student_Life_Advising_Svs NonCatalog Product 2013 05 31
33 CDW_G Mathematics PunchOut Product 2013 05 31
34 ONE_WORKPLACE_L_FERRARI_LLC Geography PunchOut Product 2013 05 31
35 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
36 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
37 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
38 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
39 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
40 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
41 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
42 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
43 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
44 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
45 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
46 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
47 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
48 TED_PELLA_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
49 MCMASTER_CARR_SUPPLY_CO Coll_of_Chem_Dean NonCatalog Product 2013 05 31

In [12]:
dataOrigSubset.year.describe()
#data4['year']= data4['year'].astype(int)
#data4['year'].isnull().sum()
#data4[50000:100000]
#len(data4)
len(dataOrigSubset)


Out[12]:
611110

In [26]:
#subsetting by element in row, subset data for year only, drop all 2104
#data = data[data['year'] == '2013']
dataOrigSubset = dataOrigSubset[dataOrigSubset['year'] == '2013']
dataOrigSubset = dataOrigSubset[dataOrigSubset.supplier_name != 'OFFICE_MAX']
dataOrigSubset = dataOrigSubset[dataOrigSubset.supplier_name != 'GIVE_SOMETHING_BACK']
dataOrigSubset = dataOrigSubset[dataOrigSubset.supplier_name != 'ALKO_OFFICE_SUPPLY']


dataOrigSubset.head()


Out[26]:
supplier_name department_name_update item_type year month day
1 GRAINGER_INC Housing_&_Dining_Services PunchOut Product 2013 05 31
2 BELLCO_GLASS_INC Dept_Of_Chemistry NonCatalog Product 2013 05 31
3 CHEMGLASS_LIFE_SCIENCES_LLC Dept_Of_Chemistry NonCatalog Product 2013 05 31
4 FISHER_SCIENTIFIC Molecular_&_Cell_Biology SQ Hosted Product 2013 05 31
5 FISHER_SCIENTIFIC Molecular_&_Cell_Biology SQ Hosted Product 2013 05 31

In [37]:
dataOrigSubset.notnull().sum()


Out[37]:
supplier_name             171444
department_name_update    171444
item_type                 171444
year                      171444
month                     171444
day                       171444
dtype: int64

In [48]:
"""data13Feb = data[data['month'].isin(['02'])]
#data13Feb = data13Feb.drop_duplicates()
data13March = data[data['month'].isin(['03'])]"""


Out[48]:
27

In [27]:
#PREPARE DATA FOR GRAPH
dataToGraph = dataOrigSubset[dataOrigSubset['month'].isin(['02'])]
dataToGraph = dataToGraph[dataToGraph['item_type'].isin(['NonCatalog Product'])]
dataToGraph = dataToGraph[['supplier_name','department_name_update']]
dataToGraph = dataToGraph.drop_duplicates()
#GraphData(dataToGraph, month)
dataToGraph.to_csv('/Users/dariushbozorgmehri/Documents/My Work/Berkeley Classes Spring 2015/BIDS/data/DATA_CUT_JAN13ALL.csv')

In [23]:
dataToGraph.to_csv('/Users/dariushbozorgmehri/Documents/My Work/Berkeley Classes Spring 2015/BIDS/data/DATA2013FEB_NONCAT.csv')

In [142]:
"""#CREATE GRAPH AND ADD NODES AND EDGES
#def GraphData(dataToGraph, month):
G = nx.Graph()
G.add_nodes_from(dataToGraph['department_name_update'], bipartite = 0)
G.add_nodes_from(dataToGraph['supplier_name'], bipartite = 1)
edgeList = [tuple(x) for x in dataToGraph.values]
G.add_edges_from(edgeList)

nx.draw(G)
plt.show()"""

In [69]:
#CLEAR THE GRAPH FOR REGRAPHING
print "clearing graph"
G = G.clear()
#print "number of nodes and edges:"
#if G.nodes() == 0 and G.edges() == 0:
    #print "node and edges are clear"
nodelistDept = []
nodelistSup = []
edgeList = []
print "clearing nodelistDept", nodelistDept, nodelistSup, edgeList


clearing graph
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-69-98c275afe9b7> in <module>()
      1 #CLEAR THE GRAPH FOR REGRAPHING
      2 print "clearing graph"
----> 3 G = G.clear()
      4 #print "number of nodes and edges:"
      5 #if G.nodes() == 0 and G.edges() == 0:

AttributeError: 'NoneType' object has no attribute 'clear'

In [71]:
#CREATE GRAPH AND ADD NODES AND EDGES
#def GraphData(dataToGraph, month):
G=nx.Graph()
#G = nx.Graph()
G.add_nodes_from(dataToGraph['department_name_update'], bipartite = 0)
G.add_nodes_from(dataToGraph['supplier_name'], bipartite = 1)
edgeList = [tuple(x) for x in dataToGraph.values]
G.add_edges_from(edgeList)

"""nx.draw(G)
plt.show()  """

#CREATE A GRAPH WITH NODES COLORED ACCORDING TO DEPARTMENT OR SUPPLIER, AND SIZED
#WITH DEPT LARGER THAN SUPPLIER, CREATE NETWORK X NODELIST FROM DATAFRAME
nodelistDept = dataToGraph['department_name_update'].tolist()
nodelistSup = dataToGraph['supplier_name'].tolist()

#nodes, node_color=(.2,.4,.6,.8) is in R,G,B, A color range, or for shade just use 0.1 ro 1
#Also, b: blue, g: green, r: red, c: cyan, m: magenta, y: yellow, k: black, w: white
#network layout
pos=nx.networkx.spring_layout(G)
#draw nodes
nx.draw_networkx_nodes(G, pos, nodelist = nodelistDept, node_color = 'r', node_size = 50)
nx.draw_networkx_nodes(G, pos, nodelist = nodelistSup, node_color= 'w', node_size=50)
#draw edges
nx.draw_networkx_edges(G,pos,width=0.5,alpha=0.5)
nx.draw_networkx_edges(G,pos,edgelist=edgeList)

"""#draw the labels
labels = {k:k for k in nodelistDept}    
plt.clf()
nx.draw_networkx_labels(G,pos, labels)"""


#nx.draw_networkx_labels(G,pos,font_size=12,font_family='sans-serif')
#plt.show()
#NEWpicName = 'data2013' + month 
plt.savefig('/Users/dariushbozorgmehri/Documents/My Work/Berkeley Classes Spring 2015/BIDS/data/temp_pics/NEWpicName.png')

In [272]:



Out[272]:
(1719, 2251)

In [79]:


In [48]:
cent = nx.bipartite.betweenness_centrality(G, G.nodes())
centDf = pd.DataFrame(cent.items(), columns=['actor', 'centrality']).sort(columns="centrality", ascending=False)


---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
<ipython-input-48-034abe483c8a> in <module>()
----> 1 cent = nx.bipartite.betweenness_centrality(G, G.nodes())
      2 centDf = pd.DataFrame(cent.items(), columns=['actor', 'centrality']).sort(columns="centrality", ascending=False)

/Users/dariushbozorgmehri/anaconda/lib/python2.7/site-packages/networkx/algorithms/bipartite/centrality.pyc in betweenness_centrality(G, nodes)
    151     n = float(len(top))
    152     m = float(len(bottom))
--> 153     s = (n-1) // m
    154     t = (n-1) % m
    155     bet_max_top = (((m**2)*((s+1)**2))+

ZeroDivisionError: float divmod()

In [61]:
bottom_nodes, top_nodes = bipartite.sets(G)

In [62]:
nx.bipartite.density(G, top_nodes), nx.bipartite.density(G, bottom_nodes)


Out[62]:
(0.007556263084438242, 0.007556263084438242)

In [63]:
nx.bipartite.average_clustering(G)


Out[63]:
0.592544711369541

In [64]:
#nx.bipartite.clustering(G)

In [53]:
#nx.bipartite.color(G)

In [65]:
GProjected = nx.bipartite.projected_graph(G, top_nodes)

In [66]:
nx.draw(GProjected)
plt.show()


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
/Users/dariushbozorgmehri/anaconda/lib/python2.7/site-packages/matplotlib/backend_bases.pyc in enter_notify_event(self, guiEvent, xy)
   1931         self._lastx, self._lasty = None, None
   1932 
-> 1933     def enter_notify_event(self, guiEvent=None, xy=None):
   1934         """
   1935         Backend derived classes should call this function when entering

KeyboardInterrupt: 

In [67]:
nx.density(GProjected)


Out[67]:
0.05082897033158813

In [ ]: