WebKB



In [1]:
from kcat import datasets as ds
import pandas as pd
from random import sample

In [2]:
X, Y, y = ds.WebKB().data_arrays

In [3]:
df = pd.DataFrame(X)

In [4]:
print('\t' + '\t'.join([str(i).rjust(4) for i in range(5)]))
print('-' * 45)
for i in sample(range(len(X)), 50):
    groups = df.groupby(i).groups
    lengths = [str(len(groups.get(g, []))).rjust(4) for g in (0, 1, 2, 3, 4)]
    s = '{}:\t{}'.format(str(i).rjust(5), '\t'.join(lengths))
    print(s)


	   0	   1	   2	   3	   4
---------------------------------------------
 2707:	2737	   1	   0	   2	   1
 2591:	2740	   0	   0	   1	   0
 2033:	2733	   0	   0	   3	   5
 1326:	2719	   1	   6	  11	   4
 2548:	2718	   0	   0	  11	  12
 1730:	2735	   0	   1	   2	   3
 2712:	2731	   1	   1	   7	   1
 2188:	2739	   0	   0	   2	   0
 1705:	2636	   2	  19	  65	  19
 2735:	2723	   0	   3	   9	   6
    0:	2688	   1	   4	  27	  19
 2189:	2717	   1	  10	  13	   0
 2638:	2738	   0	   0	   2	   1
  487:	2736	   0	   3	   0	   2
 2394:	2733	   0	   3	   4	   1
 1846:	2737	   0	   0	   2	   2
 2203:	2722	   0	   2	   6	  11
 1407:	2714	   1	   4	  18	   4
 2297:	2737	   0	   0	   1	   3
 2399:	2716	   1	   2	  13	   9
   12:	1843	   1	  34	 300	 558
 2565:	2691	   0	   9	  30	  10
  318:	2568	   0	   7	  61	 103
  142:	2690	   0	   7	  34	  10
 2080:	2736	   0	   0	   3	   2
 2313:	2739	   0	   1	   1	   0
  682:	2721	   2	   3	  13	   2
 2602:	2727	   0	   2	   6	   6
 1374:	2738	   0	   0	   3	   0
  581:	2738	   0	   0	   1	   2
  509:	2702	   1	   5	  19	  14
  326:	2720	   0	   0	   2	  19
  953:	2653	   2	  10	  43	  33
 2140:	2727	   1	   3	   5	   5
   60:	2700	   2	   5	  21	  13
  732:	2457	   3	  35	 159	  87
 2576:	2733	   0	   2	   6	   0
 2578:	2732	   1	   1	   6	   1
 2320:	2734	   0	   0	   5	   2
 2426:	2732	   0	   1	   2	   6
 1196:	2647	   2	  14	  57	  21
 1566:	2738	   0	   0	   1	   2
 2640:	2738	   0	   1	   0	   2
 2068:	2737	   0	   0	   2	   2
   88:	2718	   0	   2	  13	   8
 1901:	2705	   0	   5	  18	  13
 1717:	2611	   3	  18	  71	  38
    1:	 365	   1	   2	 132	2201
   76:	2699	   3	  10	  21	   8
 2029:	2726	   0	   1	   5	   9

In [4]: