In [1]:
import matplotlib.pyplot as plt
import pandas as pd

from falchooser.malscraper.dbaccess import Database, Anime, Statistics
from falchooser.malscraper.scraper import read_titles

In [2]:
DB = Database()
with DB.engine.connect() as conn, conn.begin():
    data = pd.read_sql_table("statistics", conn)
SESSION = DB.get_session()

In [3]:
data


Out[3]:
anime day score users ranked popularity members favorites watching completed onhold dropped plantowatch accessed
0 34350 0 7.91450 9145 8445.0 2855 12909 2 925 0 37 4 11943 2017-04-02 07:52:59.192616+00:00
1 33371 0 NaN 1 NaN 3998 5673 5 1 0 1 0 5671 2017-04-02 07:53:00.376820+00:00
2 33475 0 NaN 123 NaN 2422 17387 43 1 0 0 0 17386 2017-04-02 07:53:01.578552+00:00
3 32407 0 NaN 148 NaN 1881 26924 62 5 0 0 0 26919 2017-04-02 07:53:02.976754+00:00
4 32901 0 NaN 273 NaN 1738 30396 75 0 0 0 0 30396 2017-04-02 07:53:04.353671+00:00
5 34427 0 NaN 0 NaN 4996 3225 5 0 0 0 0 3225 2017-04-02 07:53:05.781733+00:00
6 33203 0 NaN 54 NaN 2906 12202 25 3 0 0 0 12199 2017-04-02 07:53:07.169460+00:00
7 32936 0 6.14828 14828 5762.0 3075 12292 14 4392 3 52 272 7573 2017-04-02 07:53:08.802678+00:00
8 31629 0 7.25200 252634 2437.0 1721 31229 59 10451 0 1161 560 19057 2017-04-02 07:53:10.010223+00:00
9 33840 0 NaN 1 NaN 4096 5297 6 72 1 5 2 5217 2017-04-02 07:53:11.462138+00:00
10 34262 0 NaN 24 NaN 3791 6488 8 106 0 5 3 6374 2017-04-02 07:53:12.919561+00:00
11 34561 0 NaN 3 NaN 2098 22561 23 470 1 25 3 22062 2017-04-02 07:53:14.115359+00:00
12 34494 0 NaN 60 NaN 2754 13673 10 276 0 10 3 13384 2017-04-02 07:53:15.375891+00:00
13 34102 0 NaN 2 NaN 2045 23481 30 477 2 24 4 22974 2017-04-02 07:53:16.794478+00:00
14 33502 0 NaN 63 NaN 2801 13197 20 0 0 1 0 13196 2017-04-02 07:53:18.168118+00:00
15 34822 0 NaN 35 NaN 3064 10864 7 0 0 1 0 10863 2017-04-02 07:53:19.347115+00:00
16 34176 0 NaN 2 NaN 1806 28743 54 4 0 1 0 28738 2017-04-02 07:53:20.731804+00:00
17 34055 0 NaN 306 NaN 2037 22929 45 4 0 1 0 22924 2017-04-02 08:40:07.898840+00:00
18 34257 0 NaN 2 NaN 4776 3519 2 0 0 0 0 3519 2017-04-02 08:40:09.311978+00:00
19 33948 0 NaN 26 NaN 3586 7303 8 2 0 1 0 7300 2017-04-02 08:40:10.564080+00:00
20 33532 0 NaN 4 NaN 5820 2099 2 0 0 0 0 2099 2017-04-02 08:40:13.965136+00:00
21 34501 0 NaN 3 NaN 3776 6433 26 2 0 1 0 6430 2017-04-02 08:40:15.183940+00:00
22 34106 0 NaN 60 NaN 3944 5743 5 1 0 0 0 5742 2017-04-02 08:40:16.564693+00:00
23 34402 0 NaN 2 NaN 5954 1965 2 0 0 0 0 1965 2017-04-02 08:40:20.237650+00:00
24 34591 0 NaN 14 NaN 2575 15184 46 292 17 42 12 14821 2017-04-02 08:40:24.087024+00:00
25 33926 0 NaN 3 NaN 3436 8084 48 4 0 1 0 8079 2017-04-02 08:40:25.494157+00:00
26 32262 0 NaN 169 NaN 1938 24927 62 8 0 1 0 24918 2017-04-02 08:40:26.749173+00:00
27 32951 0 NaN 226 NaN 1866 26690 73 10 0 1 0 26679 2017-04-02 08:40:28.120921+00:00
28 34393 0 NaN 11 NaN 4974 3164 4 1 0 1 0 3162 2017-04-02 08:40:29.331586+00:00
29 32032 0 NaN 41 NaN 3162 9830 14 3 0 1 0 9826 2017-04-02 08:40:30.772765+00:00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1334 33502 43 7.75900 759418 978.0 1103 57926 204 38395 1 667 903 17960 2017-05-15 16:49:18.123035+00:00
1335 34822 43 7.65700 657318 1219.0 1305 47523 155 29084 0 664 1214 16561 2017-05-15 16:49:19.290410+00:00
1336 34176 43 7.55100 5510494 1456.0 902 73477 178 44034 0 720 1325 27398 2017-05-15 16:49:21.139002+00:00
1337 34055 43 7.41500 415867 1922.0 1531 38149 96 21117 1 314 483 16234 2017-05-15 16:49:24.457613+00:00
1338 34257 43 6.54754 54754 5237.0 3932 6009 4 2945 0 76 212 2776 2017-05-15 16:49:26.825921+00:00
1339 33948 43 6.74300 743685 4449.0 2008 24338 45 13692 1 441 1649 8555 2017-05-15 16:49:28.552470+00:00
1340 33532 43 5.46840 46840 7692.0 3977 5856 5 2399 0 133 777 2547 2017-05-15 16:49:30.459013+00:00
1341 34501 43 6.62200 622188 4945.0 2529 16380 45 9138 0 199 594 6449 2017-05-15 16:49:32.179851+00:00
1342 34106 43 7.23848 23848 2607.0 3446 8296 11 3428 0 95 98 4675 2017-05-15 16:49:34.177314+00:00
1343 34402 43 4.54946 54946 8342.0 4228 5036 3 2536 0 97 801 1602 2017-05-15 16:49:36.076951+00:00
1344 34591 43 8.80300 803878 27.0 1843 28327 140 12908 2 334 143 14940 2017-05-15 16:49:37.868706+00:00
1345 33926 43 8.18900 189034 345.0 1209 52073 419 30126 2 614 922 20409 2017-05-15 16:49:39.542088+00:00
1346 32262 43 7.07130 713099 3208.0 842 79323 217 49230 1 1073 3532 25487 2017-05-15 16:49:40.716991+00:00
1347 32951 43 7.76190 7619401 957.0 631 103006 572 71015 6 916 1623 29446 2017-05-15 16:49:43.846257+00:00
1348 34393 43 4.76100 761160 8262.0 3521 7906 17 3632 0 111 787 3376 2017-05-15 16:49:45.493496+00:00
1349 32032 43 7.37400 374910 2099.0 1609 35352 64 20909 0 528 1167 12748 2017-05-15 16:49:46.682220+00:00
1350 35069 43 4.26200 262013 8387.0 2910 12414 15 5495 1 197 1687 5034 2017-05-15 16:49:48.492952+00:00
1351 33834 43 6.01500 15591 6824.0 1408 43384 142 18574 0 649 3059 21102 2017-05-15 16:49:50.191371+00:00
1352 34823 43 5.01300 13220 8120.0 2348 18842 41 9753 0 399 2186 6504 2017-05-15 16:49:51.826303+00:00
1353 33362 43 7.14696 14696 2954.0 3789 6628 24 3042 0 81 148 3357 2017-05-15 16:49:53.066968+00:00
1354 34019 43 6.66400 664744 4794.0 1794 29581 40 17677 0 531 1800 9573 2017-05-15 16:49:54.857468+00:00
1355 34076 43 7.86100 861821 759.0 2412 17886 26 8405 0 220 137 9124 2017-05-15 16:49:56.489511+00:00
1356 34550 43 6.01100 11674 6850.0 3535 7830 6 4457 1 216 1373 1783 2017-05-15 16:49:57.875344+00:00
1357 34866 43 7.33571 33571 2247.0 3699 7052 43 2483 0 35 59 4475 2017-05-15 16:49:59.736375+00:00
1358 30727 43 7.97700 977183 572.0 1006 64335 270 29124 3 520 373 34315 2017-05-15 16:50:00.931948+00:00
1359 30736 43 8.03600 36814 509.0 1024 63115 130 29511 1 601 306 32696 2017-05-15 16:50:02.339940+00:00
1360 33486 43 8.65340 6534548 54.0 307 180264 981 127292 12 1322 352 51286 2017-05-15 16:50:03.533765+00:00
1361 34566 43 7.65180 6518269 1177.0 734 91426 715 62511 0 1199 1436 26280 2017-05-15 16:50:04.712453+00:00
1362 32887 43 7.16900 169726 2849.0 785 85880 227 40299 5 895 1714 42967 2017-05-15 16:50:05.911265+00:00
1363 25777 43 8.71530 7153100 41.0 110 329188 2755 178996 21 2142 906 147123 2017-05-15 16:50:07.102387+00:00

1364 rows × 14 columns


In [4]:
def get_latest_day() -> int:
    return SESSION.query(Statistics.day).order_by(Statistics.day.desc()).first()[0]

In [5]:
def get_statistics(anime: str="", id=None, first_day: int=0, last_day: int=-1):
    # TODO: first_day and last_day do nothing atm.
    # TODO: Returning the anime title for each day seems redundant.
    if last_day < first_day:
        last_day = get_latest_day()
    if id:
        sqlfilter = Anime.id == id
    else:
        sqlfilter = Anime.title.ilike("%"+anime+"%")
    stats = SESSION.query(Anime.title, Statistics.day, Statistics.watching + Statistics.completed, Statistics.users,
                          Statistics.ranked, Statistics.popularity, Statistics.members, Statistics.favorites, Statistics.dropped + Statistics.onhold).join(
                            Statistics, Statistics.anime==Anime.id).filter(
                            sqlfilter, Statistics.day >= first_day, Statistics.day <= last_day).order_by(
                            Anime.title, Statistics.day)
    return stats.all()

In [6]:
def get_anime_and_id(filtered_titles=[]):
    query = [column for column in SESSION.query(Anime.title, Anime.id) if column[0] not in filtered_titles]
    return zip(*query)

In [7]:
filtered_titles = ["Boku no Hero Academia 2nd Season",
                   "Boruto: Naruto Next Generations",
                   "Dungeon ni Deai wo Motomeru no wa Machigatteiru Darou ka Gaiden: Sword Oratoria",
                   "Shingeki no Kyojin Season 2"]
anime_titles, anime_ids = get_anime_and_id(filtered_titles=filtered_titles)
assert len(anime_titles) == 40, "There should be 40 titles with its statistics."
up_to_days = get_latest_day()
print("Evaluating the following series for the first {} days of FAL:".format(up_to_days))
print("-------------------------------------------------------------")
for anime in anime_titles:
    print(anime)


Evaluating the following series for the first 43 days of FAL:
-------------------------------------------------------------
Alice to Zouroku
Atom: The Beginning
Busou Shoujo Machiavellianism
Clockwork Planet
Eromanga-sensei
Frame Arms Girl
Fukumenkei Noise
Gin no Guardian
Granblue Fantasy The Animation
Kabukibu!
Oushitsu Kyoushi Haine
Re:Creators
Sakura Quest
Sakurada Reset
Shuumatsu Nani Shitemasu ka? Isogashii desu ka? Sukutte Moratte Ii desu ka?
Tsuki ga Kirei
Zero kara Hajimeru Mahou no Sho
Berserk (2017)
Cinderella Girls Gekijou
Hinako Note
Kaitou Tenshi Twin Angel (TV)
Kenka Banchou Otome: Girl Beats Boys
Kyoukai no Rinne (TV) 3rd Season
Love Kome: We Love Rice
Natsume Yuujinchou Roku
Quan Zhi Gao Shou
Renai Boukun
Rokudenashi Majutsu Koushi to Akashic Records
Room Mate: One Room Side M
Seikaisuru Kado
Sekai no Yami Zukan
Sin: Nanatsu no Taizai
Souryo to Majiwaru Shikiyoku no Yoru ni...
Starmyu 2nd Season
Tsugumomo
Uchouten Kazoku 2
Warau Salesman New
Shingeki no Bahamut: Virgin Soul
Yu☆Gi☆Oh! VRAINS
Saenai Heroine no Sodatekata ♭

In [8]:
watching_threshold = SESSION.query(Statistics.members).filter(
    Statistics.anime.in_(anime_ids)).order_by(Statistics.members.desc()).first()[0] / 10
watching_index = 2
member_index = 6
dropped_index = 8
fig = plt.figure(figsize=(15,30))
ax1 = fig.add_subplot(3, 1, 1)
ax2 = fig.add_subplot(3, 1, 2)
ax3 = fig.add_subplot(3, 1, 3)
ax1.set_title("Watching + Completed")
ax2.set_title("Total members")
ax3.set_title("Dropped + On hold (obviously low absolute values)")
for id in anime_ids:
    anime_stats = get_statistics(id=id)
    if anime_stats[-1][watching_index] > watching_threshold:
        title = anime_stats[0][0][:30]
        days = [stat[1] for stat in anime_stats]
        watching = [stat[watching_index] for stat in anime_stats]
        members = [stat[member_index] for stat in anime_stats]
        dropped = [stat[dropped_index] for stat in anime_stats]
        ax1.plot(days, watching, label=title)
        ax1.text(up_to_days * 1.01, watching[-1], title)
        ax2.plot(days, members, label=title)
        ax2.text(up_to_days * 1.01, members[-1], title)
        ax3.plot(days, dropped, label=title)
        ax3.text(up_to_days * 1.01, dropped[-1], title)
ax1.set_ylim(bottom=0)
ax1.set_xlim(left=0)
ax1.set_xlim(right=up_to_days)
ax2.set_xlim(left=0)
ax2.set_xlim(right=up_to_days)
ax3.set_xlim(left=0)
ax3.set_xlim(right=up_to_days)
ax1.axhline(watching_threshold, color="black")
ax1.text(up_to_days, watching_threshold, "Threshold", color="black",
        horizontalalignment="right", verticalalignment="top", size="large")
plt.xticks(days)
plt.show()



In [9]:
query = SESSION.query(Anime.title, Statistics.plantowatch).join(Statistics, Statistics.anime==Anime.id).filter(
    Statistics.day==0, ~Anime.title.in_(filtered_titles)).order_by(Statistics.plantowatch.desc()).all()
anime_titles, plantowatch_stats = zip(*query)
anime_titles = tuple(title[:30] for title in anime_titles)
fig = plt.figure(figsize=(15,8))
ax = fig.add_subplot(1, 1, 1)
ax.bar(tuple(range(len(anime_titles))), plantowatch_stats, tick_label=anime_titles)
plt.xticks(tuple(range(len(anime_titles))), rotation="vertical")
plt.title("Plan to watch numbers at first day of FAL")
plt.show()



In [10]:
weekly_interval = tuple(range(0, up_to_days + 1, 7))
if len(weekly_interval) > 1:
    i = 0
    fig, axs = plt.subplots(len(weekly_interval) - 1)
    fig.set_size_inches(15, 8 * (len(weekly_interval) - 1))
    while i + 1 < len(weekly_interval):
        t0_stats = get_statistics(first_day=weekly_interval[i], last_day=weekly_interval[i])
        t1_stats = get_statistics(first_day=weekly_interval[i+1], last_day=weekly_interval[i+1])
        # TODO Use numpy or pandas.
        diff = zip((stat[watching_index] for stat in t0_stats), (stat[watching_index] for stat in t1_stats))
        diff = tuple(t[1] - t[0] for t in diff)
        if hasattr(axs, "__getitem__"):
            ax = axs[i]
        else:
            ax = axs
        anime_titles = tuple(stat[0][:30] for stat in t0_stats)
        rects = ax.bar(tuple(range(len(diff))), diff)
        negative_rects = filter(lambda x: x.get_y() < 0, rects)
        for rect in negative_rects:
            rect.set_color("r")
        ax.set_xticklabels(anime_titles, rotation="vertical")
        ax.set_xticks(tuple(range(len(anime_titles))))
        ax.set_title("Differences for week {} in Watching + Completed".format(i + 1))
        i += 1
    fig.tight_layout()
    plt.show()