In [1]:
import networkx as nx
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import warnings
import pydotplus
from networkx.drawing.nx_pydot import graphviz_layout
import random
import nxviz as nv
import matplotlib
import math
import pandas as pd
import scipy.stats as stats
warnings.filterwarnings('ignore')
#%load_ext autoreload
#%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
In [5]:
# https://www.bls.gov/opub/btn/volume-4/people-who-are-not-in-the-labor-force-why-arent-they-working.htm
# About 15%
retired = 0.15
#firstshift =
#secondshift =
#thirdshift =
#timetable = [[False] * 24 for day in range(7)]
In [ ]:
Generate some data for location, persons, persons at location, and persons co-present at location
In [227]:
In [150]:
class Location():
def __init__(self, name, meanduration):
self.name = name
self.lifecycles = self.set_lifecycles()
self.socialclasses = self.set_socialclasses()
self.psychographics = self.set_psychographics()
self.maxfreq = random.randint(1, 10)
self.minfreq = random.randint(0, int(self.maxfreq/3))
self.capacity = self.set_capactiy()
self.expense = self.set_expense()
self.meanduration = meanduration
self.current_clients = 0
def reset_clients(self):
self.current_clients = 0
def set_lifecycles(self):
start = random.randint(0, len(lifecycles) - 1)
howmany = random.randint(1, len(lifecycles))
# Append the list to itself to make circular
# List comprehension to pull out only values
# Pull out the slice
lc = [i[1] for i in lifecycles + lifecycles][start : start + howmany]
return lc
def set_socialclasses(self):
start = random.randint(0, len(socialclasses) - 1)
howmany = random.randint(1, len(socialclasses))
start = start - int(round(howmany/2))
if start < 0:
start = 0
sc = [i[1] for i in socialclasses][start : start + howmany]
return sc
def set_psychographics(self):
k = random.randint(4, len(psychographics))
pg = random.sample(psychographics, k)
return pg
def set_capactiy(self):
a, b = 1.5, 8
loc, scale = 10, 1000
return int(stats.beta.rvs(a, b, loc=loc, scale=scale, size=1))
def set_expense(self):
my_mean, my_std = 3.5, 1
a, b = (1 - my_mean) / my_std, (6 - my_mean) / my_std
return stats.truncnorm.rvs(a, b, loc=my_mean, scale=my_std, size=1)
class Person():
def __init__(self, name, mean_public, connection=None):
self.name = name
self.lifecycle = self.set_lifecycle()
self.socialclass = self.set_socialclass()
self.psychographics = self.set_psychographics()
self.mean_public = mean_public
self.connection = connection
self.current_location = None
self.current_duration = 0
self.visits_history = {}
def add_visit(self, location):
self.current_location = location.name
self.current_duration = 1
try:
self.visits_history[location.name]
except KeyError:
self.visits_history[location.name] = 1
else:
self.visits_history[location.name] += 1
def end_visit(self, location):
self.current_location = None
self.current_duration = 0
location.current_clients -= 1
def set_lifecycle(self):
r = np.random.random()
if r < lifecycles[0][0]:
lc = lifecycles[0][1]
elif r < lifecycles[1][0]:
lc = lifecycles[1][1]
elif r < lifecycles[2][0]:
lc = lifecycles[2][1]
elif r < lifecycles[3][0]:
lc = lifecycles[3][1]
elif r < lifecycles[4][0]:
lc = lifecycles[4][1]
elif r < lifecycles[5][0]:
lc = lifecycles[5][1]
else:
raise('Error in Life Cycle')
return lc
def set_socialclass(self):
r = np.random.random()
if r < socialclasses[0][0]:
sc = socialclasses[0][1]
elif r < socialclasses[1][0]:
sc = socialclasses[1][1]
elif r < socialclasses[2][0]:
sc = socialclasses[2][1]
elif r < socialclasses[3][0]:
sc = socialclasses[3][1]
elif r < socialclasses[4][0]:
sc = socialclasses[4][1]
elif r < socialclasses[5][0]:
sc = socialclasses[5][1]
else:
raise('Error in Social Class')
return sc
def set_psychographics(self):
pg = []
s = np.random.randint(2, size=int(len(psychographics)/2))
for idx, val in enumerate(s):
if val > 0:
pg.append(psychographics[idx*2 + np.random.randint(2)])
return pg
# A small town with a population of 1000 might have about 400 private locations
# and about 50 public locations
movements = 2 * (5 * 6 + 2 * 12)
# move every half hour for 6 hours during weekdays and 12 hours on weekends
In [35]:
100-16-16-16-24
Out[35]:
In [186]:
def generate_locations(public, private):
public_locations = []
for i in range(0, public):
name = 'L' + str(i)
meanduration = int(100 * np.random.beta(2, 80, size=None) + 1)
public_locations.append(
Location(name, sc, pg, maxfreq, minfreq, capacity, expense, meanduration)
)
private_locations = []
for i in range(0, private):
private_locations.append(
Location('R' + str(i), ['any'], ['any'], ['all'], 10000, 0, 100000, 1, 20)
)
return public_locations, private_locations
# name, connection
def generate_persons(number):
persons = []
for i in range(0, number):
persons.append(
Person('P' + str(i), np.random.random())
)
#print(persons[-1:])
return persons
In [ ]:
In [40]:
public_locations, private_locations = generate_locations(10, 1)
In [172]:
persons = generate_persons(1)
In [42]:
# lifecycles, socialclasses, psychographics, maxfreq, minfreq, capacity, expense, meanduration
# lifecycle, socialclass, psychographics, mean_public, connection
def _join_public(idx, person):
count = 0
while count < len(public_locations):
count += 1
if public_locations[count-1].current_clients >= public_locations[count-1].capacity:
continue
if person.lifecycle not in public_locations[count-1].lifecycles:
continue
if person.socialclass not in public_locations[count-1].socialclasses:
continue
if set(person.psychographics).isdisjoint(set(public_locations[count-1].psychographics)):
continue
public_locations[count-1].current_clients += 1
person.add_visit(count-1)
return ('L' + str(count-1), 'P' + str(idx))
print("Unable to match Person with Location")
return False
def join_public(idx, person):
for location in public_locations:
if location.current_clients >= location.capacity:
continue
if person.lifecycle not in location.lifecycles:
continue
if person.socialclass not in location.socialclasses:
continue
if set(person.psychographics).isdisjoint(set(location.psychographics)):
continue
location.current_clients += 1
person.add_visit(location)
return (location.name, person.name)
print("Unable to match Person with Location")
return False
def join_private(idx, person):
return None
def get_current_location(location_name, location_type):
if location_type == 'public_locations':
return next(x for x in public_locations if x.name == location_name)
return False
def get_movers():
movers = []
for idx, person in enumerate(persons):
#print(person.current_location)
if person.current_location == None:
movers.append(idx)
continue
current_location = get_current_location(person.current_location, 'public_locations')
if person.current_duration >= current_location.meanduration:
movers.append(idx)
person.end_visit(current_location)
return movers
def make_edges_from_list(node_list):
edges = []
limit = len(node_list)
for idx, val in enumerate(node_list):
count = idx + 1
while count < limit:
edges.append((val, node_list[count]))
count += 1
return edges
edges = []
person_edges = []
for i in range(0, 100):
movers = get_movers()
np.random.shuffle(movers)
# [x.reset_clients() for x in public_locations]
session_edges = []
for idx in movers:
person = persons[idx]
#print(person.mean_public)
if np.random.random() < person.mean_public:
joint = join_public(idx, person)
if joint != False:
edges.append(joint)
session_edges.append(joint)
else:
join_private(idx, person)
#edges = edges + session_edges
persons_present = [d for n, d in session_edges]
person_person = make_edges_from_list(persons_present)
person_edges = person_edges + person_person
In [93]:
len(edges)
Out[93]:
In [ ]:
In [60]:
#nx.degree_centrality(G)
In [341]:
## assume that you have a graph called G
nodes = dict()
#nodes['group1'] = [(n,d) for n, d in G.nodes(data=True) if d == some_criteria()]
#nodes['chainA'] = [n for n, d in p.nodes(data=True) if d['chain_id'] == 'A']
nodes['group1'] = [n for n, d in G.nodes(data=True) if d['color'] == 'red']
nodes['group2'] = [n for n, d in G.nodes(data=True) if d['color'] == 'blue']
In [342]:
edges = dict()
edges['all'] = [(u,v,d) for u,v,d in G.edges(data=True)]
In [343]:
nodes_cmap = dict()
nodes_cmap['group1'] = 'red'
nodes_cmap['group2'] = 'blue'
edges_cmap = dict()
edges_cmap['all'] = 'green'
In [344]:
from hiveplot import HivePlot
#h = HivePlot(nodes, edges, nodes_cmap, edges_cmap)
h = HivePlot(nodes, edges, nodes_cmap)
h.draw()
plt.show()
In [ ]:
In [ ]:
In [39]:
G=nx.complete_graph(10)
pos=nx.spring_layout(G)
nx.draw_spring(G)
#plt.savefig("edge_colormap.png") # save as png
plt.show() # display
In [49]:
G=nx.balanced_tree(2, 5)
nx.draw(G)
plt.show() # display
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [176]:
set(persons[2].psychographics).isdisjoint(set(public_locations[3].psychographics))
Out[176]:
In [25]:
#s = np.random.poisson(5, 10000)
s = 100 * np.random.beta(2, 80, size=10000) + 1
s.astype(int)
count, bins, ignored = plt.hist(s, 14, normed=True)
plt.show()
In [32]:
int(1000 * np.random.beta(2, 40, size=None))
Out[32]:
In [77]:
mu, sigma = 0.4, 0.25
s = np.random.normal(mu, sigma, 1000)
count, bins, ignored = plt.hist(s, 30, normed=True)
plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *
np.exp( - (bins - mu)**2 / (2 * sigma**2) ),
linewidth=2, color='r')
plt.show()
In [72]:
int(100 * np.random.normal(mu, sigma, size=None))
Out[72]:
In [73]:
int(4 * np.random.normal(mu, sigma, size=None)) + 1
Out[73]:
In [62]:
int(4 * s.min()) + 1
Out[62]:
In [135]:
mu = 0.40
goingout = []
for i in range(0, 100000):
# indexing the tuple (0,1) with T,F
goingout.append((0, 1)[(np.random.random() < mu)])
In [136]:
np.mean(goingout)
Out[136]:
In [147]:
(0, 1)[(np.random.random() < 0.5)]
Out[147]:
In [127]:
(np.random.random() < 0.5)
Out[127]:
In [ ]:
In [56]:
In [55]:
In [152]:
#max(stats.values())
centrality = nx.degree_centrality(MG)
max(centrality.values())
max(centrality, key=centrality.get)
print(max(centrality.values()), max(centrality, key=centrality.get))
max(centrality, key=centrality.get)
In [141]:
from collections import Counter
Counter(person_edges).most_common(10)
Out[141]:
In [148]:
Counter([n[0] for n in person_edges] + [n[1] for n in person_edges]).most_common(10)
Out[148]:
In [ ]:
[n for n, d in G.nodes(data=True) if d['color'] == 'red']
In [79]:
matplotlib.rcParams['figure.figsize'] = (14,8)
In [26]:
mu = 12
variance = 20
sigma = math.sqrt(variance)
x = np.linspace(0, 24, 100)
plt.plot(x,matplotlib.mlab.normpdf(x, mu, sigma))
plt.grid(True, which='both')
plt.show()
In [80]:
#from scipy.stats import norm
loc = 12.5
scale = 5
fig, ax = plt.subplots(1, 1)
mean, var, skew, kurt = stats.norm.stats(moments='mvsk')
x = np.linspace(stats.norm.ppf(0.01, loc=loc, scale=scale), stats.norm.ppf(0.99, loc=loc, scale=scale), 100)
ax.plot(x, stats.norm.pdf(x, loc=loc, scale=scale), 'r-', lw=5, alpha=0.6, label='norm pdf')
plt.grid(True, which='both')
plt.show()
In [4]:
stats.norm.ppf([0.001, 0.5, 0.999])
Out[4]:
In [6]:
x = np.linspace(1, 48, 48)
print(x)
d = stats.norm.pdf(x, loc=loc, scale=scale)
l = d/stats.norm.pdf(12.5, loc=loc, scale=scale) * 0.6
r = np.random.random(size=48)
(r * d/stats.norm.pdf(12.5, loc=loc, scale=scale)) > 0.5
Out[6]:
In [8]:
day_of_week = ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday')
weeks = 1
day_likelihood = stats.argus.rvs(1)
slots_per_day = 5
mean = 24.5
t_table = []
for day in range(0, 7 * weeks):
#print(day)
slots_today = stats.poisson.rvs(slots_per_day)
#print(slots_today)
r = np.random.random()
#print(r)
print(r, day_likelihood, day_of_week[day], slots_today, r)
if r < day_likelihood:
t_table.append({day_of_week[day%7]: np.sort(stats.poisson.rvs(mean, size=slots_today)/2)})
#for idx, day in enumerate(t_table):
#print(day_of_week[idx%7])
t_table
Out[8]:
In [147]:
chi = 1
print(stats.argus.rvs(chi, size=10))
r = stats.argus.rvs(chi, size=1000)
plt.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
plt.legend(loc='best', frameon=False)
plt.show()
In [9]:
# Model Household Income
# https://en.wikipedia.org/wiki/Burr_distribution
# https://en.wikipedia.org/wiki/Household_income_in_the_United_States
c, d = 3, 1
r = stats.burr12.rvs(c, d, size=1000)
plt.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
plt.legend(loc='best', frameon=False)
plt.show()
In [38]:
# name, mean_public
persons = Person('P', np.random.random())
print(persons.socialclass, persons.lifecycle, persons.psychographics)
persons.set_socialclass(), persons.set_lifecycle(), persons.set_psychographics()
Out[38]:
In [151]:
expense = 5
meanduration = 2
location = Location('L', expense, meanduration)
print(location.lifecycles, location.socialclasses, location.psychographics)
location.set_lifecycles(), location.set_socialclasses(), location.set_psychographics()
Out[151]:
In [143]:
a = 1.5
b = 8
loc = 10
scale = 1000
print(list(map(int, stats.beta.rvs(a, b, loc=loc, scale=scale, size=20))))
In [219]:
# Beta Distribution for Capacity
func = stats.beta
a, b = 1.5, 8
loc, scale = 10, 1000
r = func.rvs(a, b, loc=loc, scale=scale, size=1000)
fig, ax = plt.subplots(1, 1)
mean, var, skew, kurt = func.stats(a, b, moments='mvsk')
x = np.linspace(func.ppf(0.001, a, b, loc=loc, scale=scale), func.ppf(0.999, a, b, loc=loc, scale=scale), 100)
#x = np.linspace(0, 1000, 500)
ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.plot(x, func.pdf(x, a, b, loc=loc, scale=scale), 'r-', lw=5, alpha=0.6, label='norm pdf')
plt.grid(True, which='both')
plt.show()
print(list(map(int, func.rvs(a, b, loc=loc, scale=scale, size=20))))
In [215]:
# Normal Distribution for Expense
func = stats.truncnorm
my_mean, my_std = 3.5, 1
a, b = (1 - my_mean) / my_std, (6 - my_mean) / my_std
loc, scale = my_mean, my_std
r = func.rvs(a, b, loc=loc, scale=scale, size=10000)
fig, ax = plt.subplots(1, 1)
mean, var, skew, kurt = func.stats(a, b, moments='mvsk')
x = np.linspace(func.ppf(0.001, a, b, loc=loc, scale=scale), func.ppf(0.999, a, b, loc=loc, scale=scale), 100)
#x = np.linspace(0, 10, 500)
ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.plot(x, func.pdf(x, a, b, loc=loc, scale=scale), 'r-', lw=5, alpha=0.6, label='norm pdf')
plt.grid(True, which='both')
plt.show()
print(list(map(int, func.rvs(a, b, loc=loc, scale=scale, size=20))))
In [225]:
int(100 * np.random.beta(2, 80, size=None) + 1)
# Beta Distribution for Mean Duration
tuu_in_one_hour = 4
tuu_total = tuu_in_one_hour * 24
tuu_sleep = tuu_in_one_hour *
func = stats.beta
a, b = 2, 10
loc, scale = 1, 100
r = func.rvs(a, b, loc=loc, scale=scale, size=1000)
fig, ax = plt.subplots(1, 1)
mean, var, skew, kurt = func.stats(a, b, moments='mvsk')
x = np.linspace(func.ppf(0.001, a, b, loc=loc, scale=scale), func.ppf(0.999, a, b, loc=loc, scale=scale), 100)
#x = np.linspace(0, 1000, 500)
ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.plot(x, func.pdf(x, a, b, loc=loc, scale=scale), 'r-', lw=5, alpha=0.6, label='norm pdf')
plt.grid(True, which='both')
plt.show()
print(list(map(int, func.rvs(a, b, loc=loc, scale=scale, size=20))))
In [ ]:
In [149]:
Out[149]: