In [12]:
import pandas as pd
from faker import Factory

fake = Factory.create()

first_authors = pd.DataFrame(index=[fake.name() for i in range(0,50)])
first_authors.head()


Out[12]:
Stephanie Gonzalez
Karen Hammond
Mario Carter
Janet Pierce
Denise Campos

In [27]:
second_authors = pd.DataFrame(index=[fake.name() for i in range(0,50)])
second_authors.head()


Out[27]:
Scott Brooks
Anna Shah
Mark Ashley
Brian House
Tim Stark

In [45]:
ranges = pd.DatetimeIndex(
    start=pd.Timestamp("2014"),
    end=pd.Timestamp("2016"), 
    freq="W"
    )
makers = pd.DataFrame(ranges, columns=["date"])
makers["key"] = first_authors.sample(len(makers), replace=True).index
makers["value"] = makers['key'].apply(lambda x : fake.numerify())
makers.head()


Out[45]:
date key value
0 2014-01-05 Tracy Avila 460
1 2014-01-12 Michael Schmidt 079
2 2014-01-19 Alan Kelly 850
3 2014-01-26 Stephanie Gonzalez 472
4 2014-02-02 William Villa 497

In [46]:
ranges = pd.DatetimeIndex(
    start=pd.Timestamp("2016"),
    end=pd.Timestamp("2018"), 
    freq="W")
menders = pd.DataFrame(ranges, columns=["date"])
menders["key"] = second_authors.sample(len(menders), replace=True).index
menders["value"] = menders['key'].apply(lambda x : fake.numerify())
menders.head()


Out[46]:
date key value
0 2016-01-03 Alex Browning 886
1 2016-01-10 Chelsea Garcia 859
2 2016-01-17 Jennifer Scott 998
3 2016-01-24 Ana Miller 128
4 2016-01-31 Tim Stark 418

In [47]:
modifications_over_time = pd.concat([makers, menders])
modifications_over_time.head()


Out[47]:
date key value
0 2014-01-05 Tracy Avila 460
1 2014-01-12 Michael Schmidt 079
2 2014-01-19 Alan Kelly 850
3 2014-01-26 Stephanie Gonzalez 472
4 2014-02-02 William Villa 497

In [49]:
modifications_over_time = modifications_over_time.reindex(columns=["key", "value", "date"])
csv_filename = "interactive_steamgraph_synthetic.csv"
modifications_over_time.to_csv("vis/" + csv_filename, index=False)
with open("vis/interactive_steamgraph_template.html", "r") as template:
    content = template.read()
    content = content.replace("${FILENAME}", csv_filename)
    with open("vis/interactive_steamgraph_synthetic.html", "w") as output_file:
        output_file.write(content)
modifications_over_time.head()


Out[49]:
key value date
0 Tracy Avila 460 2014-01-05
1 Michael Schmidt 079 2014-01-12
2 Alan Kelly 850 2014-01-19
3 Stephanie Gonzalez 472 2014-01-26
4 William Villa 497 2014-02-02