In [1]:
import pandas as pd
import numpy as np

In [11]:
df.head()


Out[11]:
RESOURCE MGR_ID ROLE_ROLLUP_1 ROLE_ROLLUP_2 ROLE_DEPTNAME ROLE_TITLE ROLE_FAMILY_DESC ROLE_FAMILY ROLE_CODE
0 39353 85475 117961 118300 123472 117905 117906 290919 117908
1 17183 1540 117961 118343 123125 118536 118536 308574 118539
2 36724 14457 118219 118220 117884 117879 267952 19721 117880
3 36135 5396 117961 118343 119993 118321 240983 290919 118322
4 42680 5905 117929 117930 119569 119323 123932 19793 119325

In [40]:
Dfg.head()


Out[40]:
RESOURCE MGR_ID ROLE_ROLLUP_1 ROLE_ROLLUP_2 ROLE_DEPTNAME ROLE_TITLE ROLE_FAMILY_DESC ROLE_FAMILY ROLE_CODE
RESOURCE NaN 6 2 3 4 4 5 3 4
MGR_ID 16 NaN 1 1 2 3 3 2 3
ROLE_ROLLUP_1 94.4 49.5 NaN 2 13.3 19.3 37.9 11 19.3
ROLE_ROLLUP_2 123.4 60.4 1 NaN 22.4 32.4 48.4 13.4 32.4
ROLE_DEPTNAME 92 20 5 6 NaN 14 23 7 14

In [38]:
sum(df[j].values==25)
Dfg = pd.DataFrame(columns=df.columns.values , index= df.columns.values)

In [54]:
for o in Dfg.columns.values :
    for k in Dfg.columns.values :
        if Dfg.loc[o,k] == 1 :
            print  o , k , Dfg.loc[o,k]


ROLE_ROLLUP_2 ROLE_ROLLUP_1 1.0
ROLE_TITLE ROLE_FAMILY 1.0
ROLE_TITLE ROLE_CODE 1.0
ROLE_CODE ROLE_TITLE 1.0
ROLE_CODE ROLE_FAMILY 1.0

In [52]:
#df = df.drop('ACTION' , axis = 1 )

for i in df.columns.values :
    print i 
    for j in df.columns.values :
        if i != j :
            print "=========="
            count = df[j].value_counts()
            x=  df.groupby(i)[j].apply(lambda f : len(np.unique(f)))
            Dfg.ix[i,j] = x.quantile(0.98)


RESOURCE
==========
==========
==========
==========
==========
==========
==========
==========
MGR_ID
==========
==========
==========
==========
==========
==========
==========
==========
ROLE_ROLLUP_1
==========
==========
==========
==========
==========
==========
==========
==========
ROLE_ROLLUP_2
==========
==========
==========
==========
==========
==========
==========
==========
ROLE_DEPTNAME
==========
==========
==========
==========
==========
==========
==========
==========
ROLE_TITLE
==========
==========
==========
==========
==========
==========
==========
==========
ROLE_FAMILY_DESC
==========
==========
==========
==========
==========
==========
==========
==========
ROLE_FAMILY
==========
==========
==========
==========
==========
==========
==========
==========
ROLE_CODE
==========
==========
==========
==========
==========
==========
==========
==========

In [9]:
df[j].value_counts().index[]


Out[9]:
Int64Index([ 4675, 79092, 25993, 75078,  3853,  6977, 75834, 32270, 42085,
            17308,
            ...
            28793, 18243, 20290, 26407, 43202, 35014, 75535, 30936, 89856,
            16376],
           dtype='int64', length=7518)

In [15]:
print x


RESOURCE  MGR_ID
0         33         9
          36         9
          4417      10
          4468      10
          4583       9
          51138     12
          71215      9
          72347      9
          87908     11
38        47         9
          70         9
          8283       9
          13848      9
          20818      9
          60121     11
136       4500       9
138       3053       9
          17713      9
153       154       13
          163        9
          168        9
          169        9
          174        9
          190        9
          193        9
          196       17
          8203       9
          19760      9
          71215      9
199       5432       9
                    ..
312039    52426      9
312046    1954       9
          24169      9
          49655      9
          50799      9
          55805      9
          101472     9
312081    16922     12
312088    15781      9
312089    15566      8
          15781     10
312103    16922      9
312116    105908    13
312121    49574      9
312122    3869       9
          7012       9
312129    8415       9
          18190      9
          19832      9
312130    1325       9
          14638     11
          54618     13
312131    25813      9
312132    4084       9
312136    51178      9
312139    3966       9
312140    3966       9
312152    4641       9
312153    3048       9
          50781      9
dtype: int64

In [ ]: