In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import integrate
from scipy.stats import chisqprob
from gmpy2 import digits

In [2]:
def read_results(filename):
    results = (File_bytes, Monte_Carlo_Pi, Rule, Serial_Correlation, Entropy, Chi_square, Mean) = [[] for _ in range(7)]
    with open(filename) as f:
        data = json.load(f)
    variables = {"File-bytes": File_bytes, "Monte-Carlo-Pi": Monte_Carlo_Pi, "Rule": Rule, "Serial-Correlation": Serial_Correlation,
                 "Entropy": Entropy, "Chi-square": Chi_square, "Mean": Mean}
    for i in range(len(data)):
        for k, v in variables.items():
            v.append(data[str(i)][k])
    results = np.array([np.array(r) for r in results]).T
    headers = ["File-bytes", "Monte-Carlo-Pi", "Rule", "Serial-Correlation", "Entropy", "Chi-square", "Mean"]
    return pd.DataFrame(results, columns=headers)

In [3]:
raw = read_results("results/results-3colors-run2.json")

In [4]:
raw.head(10)


Out[4]:
File-bytes Monte-Carlo-Pi Rule Serial-Correlation Entropy Chi-square Mean
0 100000.0 4.000000 4.113183e+11 -0.000010 0.000181 2.549949e+07 108.000130
1 100000.0 3.885755 6.605369e+12 0.250278 3.914079 1.780723e+06 110.799190
2 100000.0 2.892116 6.633363e+11 -0.164928 4.578474 1.195098e+06 134.035750
3 100000.0 3.195248 1.980947e+12 0.001541 7.256266 1.017603e+05 117.142440
4 100000.0 3.374295 1.558258e+12 0.098804 4.189589 1.554628e+06 149.702550
5 13739.0 3.349934 5.405423e+12 0.091629 6.790931 3.612954e+04 120.242812
6 54452.0 3.223361 4.294112e+12 0.013476 7.487716 4.737094e+04 124.792588
7 251.0 1.658537 1.974610e+12 0.116582 5.142711 2.516044e+03 184.549801
8 1.0 NaN 6.945126e+11 -100000.000000 0.000000 2.550000e+02 161.000000
9 100000.0 2.893796 5.312357e+12 -0.114630 4.418390 1.539882e+06 116.494340

In [5]:
sample = raw[raw['File-bytes'] == float(1E5)]
print(len(raw))
print(len(sample))


500
338

In [6]:
sample.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 338 entries, 0 to 498
Data columns (total 7 columns):
File-bytes            338 non-null float64
Monte-Carlo-Pi        338 non-null float64
Rule                  338 non-null float64
Serial-Correlation    338 non-null float64
Entropy               338 non-null float64
Chi-square            338 non-null float64
Mean                  338 non-null float64
dtypes: float64(7)
memory usage: 21.1 KB

In [7]:
sample["pi_deviation"] = np.abs(sample["Monte-Carlo-Pi"] - np.pi)
sample["mean_deviation"] = np.abs(sample["Mean"] - 255 / 2)
sample["p-value"] = chisqprob(sample["Chi-square"], 255)
sample["langton"] = [(27 - digits(r, 3).zfill(27).count("0"))/27 for r in list(map(int,sample["Rule"]))]

sample['Entropy_norm'] = sample['Entropy'] / 8
sample['Entropy'] = sample['Entropy_norm']
sample = sample.drop('Entropy_norm', axis=1)


/home/martijn/.virtualenvs/complex-systems/lib/python3.4/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
/home/martijn/.virtualenvs/complex-systems/lib/python3.4/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/home/martijn/.virtualenvs/complex-systems/lib/python3.4/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
/home/martijn/.virtualenvs/complex-systems/lib/python3.4/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/martijn/.virtualenvs/complex-systems/lib/python3.4/site-packages/ipykernel/__main__.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/martijn/.virtualenvs/complex-systems/lib/python3.4/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [8]:
sample.describe()


Out[8]:
File-bytes Monte-Carlo-Pi Rule Serial-Correlation Entropy Chi-square Mean pi_deviation mean_deviation p-value langton
count 338.0 338.000000 3.380000e+02 338.000000 338.000000 3.380000e+02 338.000000 338.000000 338.000000 338.0 338.000000
mean 100000.0 3.222844 3.739090e+12 -7396.448195 0.502197 6.603855e+06 114.104770 0.592599 30.574848 0.0 0.656038
std 0.0 0.860926 2.178847e+12 26210.112173 0.316821 9.507254e+06 43.017466 0.628967 33.058610 0.0 0.095427
min 100000.0 0.000000 2.133775e+10 -100000.000000 0.000000 4.427120e+04 0.000000 0.002773 0.007560 0.0 0.370370
25% 100000.0 2.997900 1.712792e+12 -0.086712 0.283449 2.976926e+05 100.591710 0.175800 8.862285 0.0 0.592593
50% 100000.0 3.355454 3.717093e+12 -0.000486 0.525635 1.722141e+06 120.224965 0.461058 19.383025 0.0 0.666667
75% 100000.0 3.858934 5.596833e+12 0.051101 0.820136 6.551231e+06 137.145382 0.858407 39.445090 0.0 0.703704
max 100000.0 4.000000 7.604736e+12 0.783507 0.961217 2.550000e+07 242.443550 3.141593 127.500000 0.0 0.888889

In [9]:
sample[sample.mean_deviation < 1]


Out[9]:
File-bytes Monte-Carlo-Pi Rule Serial-Correlation Entropy Chi-square Mean pi_deviation mean_deviation p-value langton
36 100000.0 3.127325 9.381791e+11 -0.041062 0.601161 1.097448e+06 126.96237 0.014268 0.53763 0.0 0.629630
50 100000.0 2.981399 2.007083e+12 -0.026640 0.573680 1.306385e+06 127.07470 0.160194 0.42530 0.0 0.666667
86 100000.0 2.997240 4.072651e+12 -0.007934 0.913943 1.015266e+05 126.55526 0.144353 0.94474 0.0 0.518519
140 100000.0 3.241330 2.453827e+12 -0.057470 0.828608 3.410276e+05 127.75273 0.099737 0.25273 0.0 0.703704
156 100000.0 2.995320 2.566393e+12 0.007654 0.904705 1.145263e+05 128.37223 0.146273 0.87223 0.0 0.592593
162 100000.0 3.237730 3.162927e+12 0.003802 0.841759 2.981401e+05 127.31256 0.096137 0.18744 0.0 0.703704
220 100000.0 3.090844 4.876568e+12 0.009873 0.927911 9.445427e+04 127.31454 0.050749 0.18546 0.0 0.666667
279 100000.0 3.364455 6.630534e+12 0.051399 0.833139 2.453993e+05 127.62971 0.222862 0.12971 0.0 0.851852
309 100000.0 3.095884 6.695664e+12 -0.008912 0.912255 1.040294e+05 127.46819 0.045709 0.03181 0.0 0.592593
329 100000.0 3.235089 4.781620e+12 -0.058853 0.815225 3.324369e+05 127.90400 0.093496 0.40400 0.0 0.629630
356 100000.0 3.130205 3.036424e+12 -0.011411 0.940659 7.160753e+04 127.49244 0.011388 0.00756 0.0 0.740741
415 100000.0 3.175327 6.444096e+12 0.001555 0.942212 8.368959e+04 128.39780 0.033734 0.89780 0.0 0.740741
432 100000.0 3.131405 3.062631e+12 -0.051513 0.803990 3.820025e+05 127.35581 0.010188 0.14419 0.0 0.703704
433 100000.0 3.025081 1.858608e+12 -0.030206 0.748324 4.694376e+05 126.79656 0.116512 0.70344 0.0 0.518519
466 100000.0 2.675387 1.810948e+12 0.039682 0.558679 1.265171e+06 128.08895 0.466206 0.58895 0.0 0.444444

In [15]:
# Plot Entropy of all rules against the langton parameter
ax1 = plt.gca()
sample.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
plt.show()

ax1 = plt.gca()
sample.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
plt.savefig('plots/3c-entropy-langton.png', format='png', dpi=400)

ax1 = plt.gca()
sample.plot("langton", "Entropy", ax=ax1, kind="scatter", marker='o', alpha=.5, s=40)
plt.savefig('plots/3c-entropy-langton.svg', format='svg', dpi=400)

In [10]:
# Plot Chisquare of all rules against the langton parameter
ax2 = plt.gca()
sample.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", marker='o', alpha=.5, s=40)
plt.show()

ax2 = plt.gca()
sample.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", marker='o', alpha=.5, s=40)
plt.savefig('plots/3c-chisquare-langton.png', format='png', dpi=400)

ax2 = plt.gca()
sample.plot("langton", "Chi-square", ax=ax2, logy=True, kind="scatter", marker='o', alpha=.5, s=40)
plt.savefig('plots/3c-chisquare-langton.svg', format='svg', dpi=400)

In [ ]:


In [ ]:


In [ ]: