In [2]:
# some_file.py
import sys

sys.path.insert(0, './shared_code')



import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import sqlite3 as db
import json
import pandas as pd
import importlib
from IPython.core.display import display, HTML

import os
import RHgenerate_states
import RHcomponents
import RHdisplay
import RHdistance_partition
import RHutilities

%matplotlib inline

In [17]:
con = db.connect('data/rush_hour.db')
cur = con.cursor()

#states.to_sql('states',con,if_exists="append")

In [4]:
combinatorial_class = 36
class_36 = RHgenerate_states.generate_states(2,2)
num_nodes = len(class_36)

In [21]:
len(class_36),class_36[23]


Out[21]:
(116650, (278873228533880682009745564893224, 1))

In [22]:
comps = RHcomponents.components(class_36)

In [32]:
# prep for processing generated list of components
SAVE_BATCH_SIZE = 2000  # Number of Graphs to save in batch; Equals number of rows in DataFrame
db_components = [None]*SAVE_BATCH_SIZE

comb_class_max_depth = 0

solvable_count = 0
unsolvable_count = 0
save_counter = 0

component_columns = ['comb_class','repr_board_int_s1','repr_board_int_s2',\
                     'repr_red_col', 'is_solvable','num_nodes',\
                     'density','max_solution_distance']

comb_class_columns = ['comb_class','num_nodes','num_components','num_solvable_components',\
                      'num_unsolvable_components','max_solution_depth']

for g in  RHcomponents.gen_components(class_36):
    
    save_counter +=1
    if save_counter >= SAVE_BATCH_SIZE:
        df = pd.DataFrame([x for x in db_components if x is not None],columns = component_columns)
        df.to_sql('component',con,if_exists="append")
        
        db_components = [None]*SAVE_BATCH_SIZE
        save_counter = 1
        
    if not g.graph['solvable']:
        unsolvable_count +=1
        node_dict = g.node[0]
        max_soln_distance = None
        
    else:
        
        solvable_count += 1
        
        RHdistance_partition.distance_partition(g)
        
        max_soln_distance = max(g.graph['distance_partition'].keys())
        if max_soln_distance > comb_class_max_depth:
            comb_class_max_depth = max_soln_distance
        for node in g.graph['distance_partition'][max_soln_distance]:
            break
        node_dict = g.node[node]
        
    
    s1,s2 = RHutilities.split_int( node_dict['board_int'])    
    db_components.append( [\
                             combinatorial_class
                            ,s1
                            ,s2
                            ,node_dict['red_col']
                            ,g.graph['solvable']
                            ,len(g.nodes())
                            ,nx.density(g)
                            ,max_soln_distance                         
                          ]
                        )
        
        
if  db_components.count(None) != len(db_components):
    df = pd.DataFrame([x for x in db_components if x is not None],columns = component_columns)
    df.to_sql('component',con,if_exists="append",index=False)

    
#comb_class_columns = ['comb_class','num_nodes','num_components','num_solvable_components',\
#                      'num_unsolvable_components','max_solution_depth']

comb_class_data = [combinatorial_class,num_nodes,unsolvable_count + solvable_count,\
                   solvable_count,unsolvable_count,  \
                   comb_class_max_depth]

df_class = pd.DataFrame(comb_class_data,columns = comb_class_columns)
df_class.to_sql('comb_class',con,if_exists="append",index=False)

In [39]:


In [33]:
df


Out[33]:
comb_class repr_board_int_s1 repr_board_int_s2 repr_red_col is_solvable num_nodes density max_solution_distance
0 36 11254635381874688 1071644672 1 True 308 0.019184 4.0
1 36 21990316663104 34493825024 1 True 112 0.037162 12.0
2 36 9007233614700544 35116724256768 1 True 297 0.019429 4.0
3 36 15221639059038528 133955584 1 True 136 0.031264 10.0
4 36 221238 22058901700928 1 False 24 0.141304 NaN
5 36 21990325051744 133955584 1 True 114 0.038503 11.0
6 36 21990316663108 275011862528 1 True 128 0.035064 11.0
7 36 35115652836736 11259042018263040 3 True 57 0.075815 2.0
8 36 343598919045 11259042018263040 3 True 33 0.102273 5.0
9 36 224640 11259043089907712 3 True 51 0.083137 2.0
10 36 2247401834504448 11259042018263040 1 True 209 0.024798 6.0
11 36 221188 11259317030125568 1 True 233 0.023938 6.0
12 36 1071866112 11276634204307456 1 True 166 0.030741 5.0
13 36 221188 275011863039 1 True 313 0.018883 6.0
14 36 22058901922112 905969664 1 True 126 0.032762 8.0
15 36 22333923746149 0 1 True 204 0.026079 11.0
16 36 67330304 11259042018267128 1 True 204 0.024244 5.0
17 36 134176768 7248018944 1 True 400 0.015539 4.0
18 36 7247978496 1407381323927552 1 True 152 0.034681 4.0
19 36 134177024 17592186044927 1 True 180 0.027250 5.0
20 36 201623699942208 0 1 True 171 0.025731 10.0
21 36 197912848198464 237494511599616 1 True 102 0.031838 12.0
22 36 9205146462677824 0 1 True 173 0.025541 10.0
23 36 2250150556950568 14155776 1 True 186 0.029178 9.0
24 36 9009982404255784 1407380252282880 1 True 152 0.031544 7.0
25 36 537094144 1583302783818240 1 True 32 0.114919 5.0
26 36 175922531756544 1407381158252544 1 True 126 0.037968 8.0
27 36 30030412865541 11259042018263040 1 True 254 0.022595 7.0
28 36 343598915589 11262752870006784 1 True 194 0.028204 8.0
29 36 386548558853 11259033428295680 2 True 39 0.107962 6.0
... ... ... ... ... ... ... ... ...
1331 36 175922531756544 68698505216 1 False 6 0.466667 NaN
1332 36 262064 1935071745409024 4 True 9 0.333333 1.0
1333 36 11259042018291120 11259042018263040 4 True 3 0.666667 1.0
1334 36 28159 15480573963272192 2 False 9 0.333333 NaN
1335 36 2207604829695 0 2 False 6 0.466667 NaN
1336 36 2199031672319 17979214137393152 2 False 12 0.257576 NaN
1337 36 2199031672319 8573157376 2 False 12 0.257576 NaN
1338 36 17981413169065471 0 2 False 6 0.466667 NaN
1339 36 28159 1644874763882496 2 False 9 0.333333 NaN
1340 36 140738025488304 2748789555240 4 True 3 0.666667 1.0
1341 36 21990330597750 21990316441920 5 True 1 0.000000 0.0
1342 36 221695 21990316441974 1 False 3 0.666667 NaN
1343 36 175922531535792 175922531535414 4 True 4 0.666667 1.0
1344 36 15221639058817398 21990316441920 5 True 2 1.000000 0.0
1345 36 223744 175922531794486 1 False 9 0.333333 NaN
1346 36 1407380252286390 1407380252282880 5 True 1 0.000000 0.0
1347 36 21990316441974 21990324830560 5 True 2 1.000000 0.0
1348 36 17996806390771704 0 1 False 3 0.666667 NaN
1349 36 17592253378552 17979214137393152 1 False 12 0.257576 NaN
1350 36 175922531756544 670828544 1 False 3 0.666667 NaN
1351 36 175922531756544 17979214674266112 1 False 6 0.333333 NaN
1352 36 140738025488304 175922531535360 4 True 3 0.666667 1.0
1353 36 21990316441974 21990330597696 5 True 1 0.000000 0.0
1354 36 175922531756976 175922531535360 4 True 2 1.000000 1.0
1355 36 175922531756544 175980513593856 1 False 1 0.000000 NaN
1356 36 15375571274131968 175922531535360 1 False 1 0.000000 NaN
1357 36 9183156146235904 175922531535360 1 False 1 0.000000 NaN
1358 36 1301826733566464 175922531535360 1 False 1 0.000000 NaN
1359 36 175922531756544 9183156146014720 1 False 2 1.000000 NaN
1360 36 175922531756544 15375571273910784 1 False 1 0.000000 NaN

1361 rows × 8 columns


In [36]:
df2 = pd.read_sql_query("select * from comb_class",con)
df2


Out[36]:
comb_class num_nodes num_components num_solvable_components num_unsolvable_components max_solution_depth

In [69]:
if g.graph['solvable']:
    RHdistance_partition.distance_partition(g)
    max_distance = max(g.graph['distance_partition'].keys())
    for node in g.graph['distance_partition'][max_distance]:
        break
    repr_node_dict = g.node[node]
else:
    repr_node_dict = g.node[0]
    max_distance = None

s1,s2 = RHutilities.split_int(repr_node_dict['board_int'])
red_col = repr_node_dict['red_col']
            
    
36, g.graph['solvable'],s1,s2,red_col, len(g.nodes()), nx.density(g),max_distance


Out[69]:
(36, True, 67330304, 11259042018267128, 1, 204, 0.024244180430793006, 5)

In [11]:
len(comps),sum(len(x.nodes()) for x in comps)


Out[11]:
(1361, 116650)

In [12]:
len(comps[0].nodes())


Out[12]:
308

In [14]:
solvable_comps = [g for g in comps if g.graph['solvable']==True]
unsolvable_comps = [g for g in comps if g.graph['solvable'] == False]

In [15]:
df_solvable = pd.DataFrame([len(x.nodes()) for x in solvable_comps])
df_unsolvable = pd.DataFrame([len(x.nodes()) for x in unsolvable_comps])

Here -

  • Select representative for each component
  • save json to file for each component (class_2_2_comp_39823484994949)
  • draw graph with d3

  • What I really want is real time back and forth to pull out components on the fly rather than build them all and save mass files to harddrive.


In [18]:
#df.describe()
#%matplotlib inline
df_solvable.hist(bins=20)


Out[18]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x00000250BD956EB8>]], dtype=object)

In [19]:
df_unsolvable.hist(bins=20)


Out[19]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x00000250BC07E9B0>]], dtype=object)

In [82]:
comp = comps[0]
min_int = min(comp.node[node]['board_int'] for node in comp.nodes())
min_int


Out[82]:
632587360075802098027458330624

In [16]:
df_solvable.describe()


Out[16]:
0
count 1024.000000
mean 107.013672
std 100.871674
min 1.000000
25% 20.000000
50% 60.000000
75% 178.000000
max 400.000000

In [17]:
df_unsolvable.describe()


Out[17]:
0
count 337.000000
mean 20.973294
std 20.752708
min 1.000000
25% 6.000000
50% 15.000000
75% 27.000000
max 96.000000

In [83]:
#node = 0
comp_repr_node = [comp[node] for node in comp.nodes() if comp.node[node]['board_int'] == min_int]
comp_repr_node


Out[83]:
[AtlasView({233: {}, 270: {}, 231: {}, 284: {}})]

In [113]:
len([g for g in comps if len(g.nodes())==1])


Out[113]:
18

In [37]:
n = solvable_comps[40].node[0]

#HTML(RHdisplay.svg_from_state(solvable_comps[40].nodes(0)))

HTML(RHdisplay.svg_from_state(n['board_int'],n['red_col']))


Out[37]:
XLZW

In [39]:
df = pd.DataFrame( [  [len(g.nodes()),g.graph['solvable'] ] for g in comps])

In [55]:
g = solvable_comps[40]
g


Out[55]:
<networkx.classes.graph.Graph at 0x250b11594a8>

In [58]:
RHdistance_partition.distance_partition(g)


Out[58]:
{'board_int': 207885568651978971296788249575424,
 'inner_nbrs': {11, 49},
 'is_soln_state': False,
 'outer_nbrs': {9},
 'red_col': 2,
 'soln_distance': 4}

In [72]:
df_dist = pd.DataFrame( [g.node[n]['soln_distance'] for n in g.nodes() ], columns = ['distance'])

In [74]:
df_dist.groupby(['distance']).size()


Out[74]:
distance
0    41
1    41
2    67
3    71
4    41
5    11
6     1
dtype: int64

In [76]:
HTML(RHdisplay.svg_from_state( g.node[0]['board_int'] , g.node[0]['red_col']))


Out[76]:
XLZW

In [77]:
len(solvable_comps) , len(unsolvable_comps)


Out[77]:
(1024, 337)

In [79]:
len(solvable_comps) + len(unsolvable_comps)


Out[79]:
1361

In [85]:
importlib.reload(RHdistance_partition)
for g in solvable_comps:
    RHdistance_partition.distance_partition(g)

In [142]:
for g in solvable_comps:
    g.graph['max_distance'] = max(g.graph['distance_partition'].keys())
    for node in g.graph['distance_partition'][g.graph['max_distance']]:
        break
    node_dict = g.node[node]
    split_board_int = RHutilities.split_int( node_dict['board_int'])
    g.graph['repr_board_int_s1'] = split_board_int[0]
    g.graph['repr_board_int_s2'] = split_board_int[1]
    g.graph['repr_red_col'] = node_dict['red_col']

In [162]:
df_max_dist = pd.DataFrame( \
            [ [g.graph['max_distance'],\
               g.graph['repr_board_int_s1'],\
               g.graph['repr_board_int_s2'],\
               g.graph['repr_red_col']\
               ,len(g.nodes())\
               ,g.size()\
               ,nx.density(g)\
              ]\
               for g in solvable_comps], columns=['distance','int_s1','int_s2','repr_red_col','num_nodes','num_edges','density'])

In [163]:
df_max_dist.sort_values(['distance'],ascending=False)


Out[163]:
distance int_s1 int_s2 repr_red_col num_nodes num_edges density
168 19 2748789558696 179633383279104 3 56 86 0.055844
769 17 2748789558696 175922545691136 3 80 149 0.047152
642 17 343598694837 25701168185664 4 116 260 0.038981
739 16 2748789776424 25701168185664 1 102 228 0.044263
830 15 2748789558696 670828544 3 84 172 0.049340
870 15 343598694837 21990330597696 4 144 344 0.033411
387 15 2748789558696 175922531535414 3 104 214 0.039955
572 15 21990316469568 670828544 2 86 161 0.044049
408 14 2748789776424 21990330597696 1 126 299 0.037968
506 14 175922531756544 68586307588 1 112 231 0.037162
455 14 175922531756544 68652368128 1 86 161 0.044049
765 14 175922531756544 68593647648 1 64 117 0.058036
505 13 22333915357509 54 1 172 432 0.029376
402 13 24739106218344 54 1 154 388 0.032934
20 12 197912848198464 237494511599616 1 102 164 0.031838
323 12 3092388470829 3710851743744 1 174 476 0.031626
795 12 175922531756800 17996806323437568 1 66 92 0.042890
88 12 197912848198464 905969664 1 126 234 0.029714
1 12 21990316663104 34493825024 1 112 231 0.037162
135 12 3092388470829 54 1 222 634 0.025845
693 12 197912848198464 3456 1 154 320 0.027162
532 12 21990316665152 175853140967424 1 66 92 0.042890
759 12 3092388470829 14155776 1 195 544 0.028760
508 12 175922531756548 17979489015300096 1 77 121 0.041353
776 12 175922531756576 17981413160648704 1 63 89 0.045571
149 11 21990316663136 2199023256063 1 159 378 0.030093
344 11 175922531756544 8650272 1 130 270 0.032200
637 11 21990316663104 1161019854422016 1 63 89 0.045571
345 11 24739643091304 0 1 165 388 0.028677
330 11 2748789776424 133955638 1 180 492 0.030540
... ... ... ... ... ... ... ...
1012 0 21990316441974 343665803525 5 3 2 0.666667
436 0 21990316703606 17592253153280 5 3 2 0.666667
1016 0 21990330597750 21990316441920 5 1 0 0.000000
468 0 21990316605814 11276634271252480 5 12 17 0.257576
815 0 21990316444534 193514784686080 5 12 17 0.257576
1018 0 15221639058817398 21990316441920 5 2 1 1.000000
1019 0 1407380252286390 1407380252282880 5 1 0 0.000000
1020 0 21990316441974 21990324830560 5 2 1 1.000000
587 0 21990316441974 22024676311360 5 5 4 0.400000
1022 0 21990316441974 21990330597696 5 1 0 0.000000
892 0 21990316441974 21990317490500 5 2 1 1.000000
1004 0 21990316441974 21990316469568 5 2 1 1.000000
738 0 21990316441974 21990853314880 5 5 4 0.400000
944 0 24189348086134 21990316441920 5 1 0 0.000000
957 0 21990316441974 17592253414912 5 6 5 0.333333
930 0 25701168185718 21990316441920 5 1 0 0.000000
931 0 22048298500470 21990316441920 5 2 1 1.000000
992 0 18001204453835126 17592253153280 5 6 5 0.333333
993 0 21990316441974 17996806390546432 5 6 5 0.333333
994 0 21990316441974 17660838412288 5 3 2 0.666667
995 0 21990316441974 15480573963272192 5 3 2 0.666667
932 0 21990316441974 15221639058817344 5 2 1 1.000000
800 0 11259042018295798 34359869440 5 3 2 0.666667
933 0 21990316441974 1147894518251840 5 5 4 0.400000
821 0 22058901700982 17592253153280 5 3 2 0.666667
1000 0 1429370568724854 67109120 5 12 17 0.257576
1001 0 21990316441974 25701168185664 5 1 0 0.000000
1002 0 21990316441974 22048298500416 5 2 1 1.000000
1003 0 21990316441974 21990316441974 5 1 0 0.000000
997 0 21990316441974 20341042708520 5 3 2 0.666667

1024 rows × 7 columns


In [164]:
board_int = RHutilities.combine_ints(21990316605814,11276634271252480)
red_col = 5
HTML(RHdisplay.svg_from_state(board_int,red_col))


Out[164]:
XLZW

In [166]:
df_max_dist.groupby(['distance']).agg(['count'])


Out[166]:
int_s1 int_s2 repr_red_col num_nodes num_edges density
count count count count count count
distance
0 34 34 34 34 34 34
1 165 165 165 165 165 165
2 73 73 73 73 73 73
3 18 18 18 18 18 18
4 221 221 221 221 221 221
5 93 93 93 93 93 93
6 55 55 55 55 55 55
7 72 72 72 72 72 72
8 110 110 110 110 110 110
9 53 53 53 53 53 53
10 55 55 55 55 55 55
11 50 50 50 50 50 50
12 11 11 11 11 11 11
13 2 2 2 2 2 2
14 4 4 4 4 4 4
15 4 4 4 4 4 4
16 1 1 1 1 1 1
17 2 2 2 2 2 2
19 1 1 1 1 1 1

In [119]:
df_max_dist.sort_values(by=['distance'] , ascending = False)


Out[119]:
distance int_s1 int_s2 repr_red_col
168 19 175922531535792 240243301154856 4
769 17 2748789558696 175922644781568 3
642 17 343598722053 25701168185664 2
739 16 28008 1924695198793728 2
830 15 140738035933224 2749850714112 1
870 15 27968 22341162893317 2
387 15 671312424 178670650027008 1
572 15 83886454 162728391737344 5
408 14 221504 24746353754152 1
506 14 175922531756544 343464214528 1
455 14 17592253155894 175922665488384 5
765 14 178121563179574 1071644672 5
505 13 27648 22333915163973 2
402 13 2748789776424 21990316441974 1
20 12 197912848198464 29686813949952 1
323 12 343609401389 1902704871866368 1
795 12 67330304 211038184147456 1
88 12 2934 197970830032896 5
1 12 9007233614700544 22058901700928 1
135 12 2748789558696 343598694837 3
693 12 374 197912847977014 5
532 12 21990316441974 17979214674266112 5
759 12 1311157 3093292908584 4
508 12 175922531756544 17979489016348672 1
776 12 175922531756576 17981413160648704 1
149 11 27968 24189348089848 2
344 11 175922531535792 8650272 4
637 11 1125904201810294 18001204453834752 5
345 11 140738025226672 24739105997160 4
330 11 432 2817374841896 4
... ... ... ... ...
1012 0 21990316441974 343665803525 5
436 0 21990316703606 17592253153280 5
1016 0 21990330597750 21990316441920 5
468 0 21990316605814 11276634271252480 5
815 0 21990316444534 193514784686080 5
1018 0 15221639058817398 21990316441920 5
1019 0 1407380252286390 1407380252282880 5
1020 0 21990316441974 21990324830560 5
587 0 21990316441974 22024676311360 5
1022 0 21990316441974 21990330597696 5
892 0 21990316441974 21990317490500 5
1004 0 21990316441974 21990316469568 5
738 0 21990316441974 21990853314880 5
944 0 24189348086134 21990316441920 5
957 0 21990316441974 17592253414912 5
930 0 25701168185718 21990316441920 5
931 0 22048298500470 21990316441920 5
992 0 18001204453835126 17592253153280 5
993 0 21990316441974 17996806390546432 5
994 0 21990316441974 17660838412288 5
995 0 21990316441974 15480573963272192 5
932 0 21990316441974 15221639058817344 5
800 0 11259042018295798 34359869440 5
933 0 21990316441974 1147894518251840 5
821 0 22058901700982 17592253153280 5
1000 0 1429370568724854 67109120 5
1001 0 21990316441974 25701168185664 5
1002 0 21990316441974 22048298500416 5
1003 0 21990316441974 21990316441974 5
997 0 21990316441974 20341042708520 5

1024 rows × 4 columns


In [135]:
df_max_dist.loc[df_max_dist['distance'] == 19]
#HTML(RHdisplay.svg_from_state(n['board_int'],n['red_col']))


Out[135]:
distance int_s1 int_s2 repr_red_col
168 19 175922531535792 240243301154856 4

In [137]:
board_int = RHutilities.combine_ints(2748789558696,240243301154856)
red_col = 4
HTML(RHdisplay.svg_from_state(board_int,red_col))


Out[137]:
XLZW

In [139]:



Out[139]:
{0: {0}, 1: {1}}

In [ ]: