In [2]:
import numpy as np

import pandas as pd
pd.options.display.float_format = '{:20,.4f}'.format

# from  matplotlib import pyplot
# import seaborn

import sqlite3

# pylab.rcParams['figure.figsize'] = (12.0, 8.0)

In [3]:
tweetdetails = pd.read_csv('data/BMCevolBioTweetDetails.txt', encoding='utf8', sep="\t", index_col='tweet_id')
tweetdetails.sort_index(inplace=True)

dois = list(tweetdetails.doi.unique())

tweetdetails['created_at'] = pd.to_datetime(tweetdetails.created_at)
tweetdetails['user_id_str'] = tweetdetails.user_id_str.astype(int)


---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-3-b5b404d349f9> in <module>()
----> 1 tweetdetails = pd.read_csv('data/BMCevolBioTweetDetails.txt', encoding='utf8', sep="\t", index_col='tweet_id')
      2 tweetdetails.sort_index(inplace=True)
      3 
      4 dois = list(tweetdetails.doi.unique())
      5 

/usr/local/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    644                     skip_blank_lines=skip_blank_lines)
    645 
--> 646         return _read(filepath_or_buffer, kwds)
    647 
    648     parser_f.__name__ = name

/usr/local/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    387 
    388     # Create the parser.
--> 389     parser = TextFileReader(filepath_or_buffer, **kwds)
    390 
    391     if (nrows is not None) and (chunksize is not None):

/usr/local/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    728             self.options['has_index_names'] = kwds['has_index_names']
    729 
--> 730         self._make_engine(self.engine)
    731 
    732     def close(self):

/usr/local/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
    921     def _make_engine(self, engine='c'):
    922         if engine == 'c':
--> 923             self._engine = CParserWrapper(self.f, **self.options)
    924         else:
    925             if engine == 'python':

/usr/local/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1388         kwds['allow_leading_cols'] = self.index_col is not False
   1389 
-> 1390         self._reader = _parser.TextReader(src, **kwds)
   1391 
   1392         # XXX

pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4184)()

pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:8449)()

FileNotFoundError: File b'data/BMCevolBioTweetDetails.txt' does not exist

In [42]:
tweetdetails = pd.read_csv('data/BMCBioTweetDetails.txt', encoding='utf8', sep="\t", index_col='tweet_id')
tweetdetails.sort_index(inplace=True)

dois = list(tweetdetails.doi.unique())

tweetdetails['created_at'] = pd.to_datetime(tweetdetails.created_at)
tweetdetails['user_id_str'] = tweetdetails.user_id_str.astype(int)

In [3]:
df = pd.read_csv('data/community_counts1930.txt', sep='\t', index_col='community')
df = df.stack()
df.div(df.sum(axis=0, level=0), level=0).multiply(100).unstack()

In [3]:
edgelist = pd.read_csv('data/evolBioAllFollowers.csv', dtype={'in': str, 'out': str, 'doi': str}).drop_duplicates()

In [43]:
con = sqlite3.connect("data/BMCbioSample.db")
friends = pd.read_sql_query("SELECT * FROM friends", con, index_col="user_id")
friends.index = friends.index.astype(int)
followers = pd.read_sql_query("SELECT * FROM followers", con, index_col="user_id")
followers.index = followers.index.astype(int)

df = tweetdetails[['doi', 'user_id_str']].drop_duplicates().set_index('user_id_str').join(friends)[['friend_id', 'doi']]
# df.index.rename('user_id', inplace=True)

edgelist = df.reset_index()
edgelist.columns = ['in', 'out', 'doi']

In [454]:
allcommunities = pd.read_csv('data/follower_communities.txt', sep='\t')
allcommunities = allcommunities[allcommunities['user_id_str'] != -1]
allcommunities['user_id_str'] = allcommunities.user_id_str.astype(int)

In [7]:



Out[7]:
[0, 1]

In [ ]:
def check_activation(user_id_str):
        global activated
        activated.add(user_id_str)
        activated = activated.union(edgelist[edgelist['in'] == user_id_str]['out'])
        return len(activated)
    
    
    def group_color(name):
        if name[1]: 
            return 'b'
        return 'r'
    
    def group_marker(name):
        if name[0]:
            return 'o'
        return '*'
    
    def mode(angle):
        if angle > 75:
            return 1
        elif angle > 15:
            return 2
        else:
            return 3

    fig, axes = plt.subplots(nrows=2, ncols=2)
    plot_map = {0: [0,0], 1: [0,1], 2: [1,0], 3: [1,1]}
    for i, doi in enumerate(dois): 
        tweets = tweetdetails[tweetdetails.doi == doi]
    #     communities = allcommunities[(allcommunities.doi == doi)].set_index('user_id_str')
        
        
        activated = set()
    #     activated_communities = set()
        
        num_activated_users = []
    #     num_activated_communities = []
        
        for row in tweets.iterrows():
            user_id = row[1]['user_id_str']
            num_activated_users.append(check_activation(user_id))
        
    #         try: 
    #             activated_communities.add(communities.ix[user_id]['community'])
    #         except KeyError:
    #             print "%s not found" % user_id
    #             print 
    #         num_activated_communities.append(len(activated_communities))
        
        tweets['activated_users'] = num_activated_users
        tweets['activated_users_p'] = tweets['activated_users'].multiply(100.0).div(tweets['activated_users'].max())
    #     tweets['activated_communities'] = num_activated_communities
    #     tweets['activated_communities_p'] = tweets['activated_communities'].multiply(100.0).div(tweets['activated_communities'].max())
        # Create matplotlib figure
        ax = axes[plot_map[i][0], plot_map[i][1]]
        ax2 = ax.twinx() 
        
        tweets['event_number'] = tweets.index.map(lambda x: tweets.index.get_loc(x))
          
    #     for name, group in [('odd', 1), ('even', 0)]:
    #         tweets[tweets.user_id_str % 2 == group].set_index('created_at').activated_users_p.plot(style='o', ms=12, label=name,ax=ax, alpha=0.5)
    #     tweets.set_index('created_at').activated_users.plot(style='o', ms=6,ax=ax, alpha=0.5)    
        
        
        tweets['new_user'] = ~tweets.duplicated('user_id_str', keep='first')
        print "%.2f%% new users (out of %s)" % (len(tweets[tweets['new_user']])*100./len(tweets), len(tweets))
        print "%.2f%% retweets (out of %s)" % (len(tweets[tweets['is_retweet']])*100./len(tweets), len(tweets))
        groups = ['new_user', 'is_retweet']
        for name, group in tweets.set_index('created_at').groupby(groups).activated_users:
            print "new user: %s, is_retweet: %s: %s%s" % (name[0], name[1], group_marker(name), group_color(name))
            group.plot(marker=group_marker(name), color=group_color(name), linestyle="", ms=6, alpha=0.5, ax=ax)
#         max is because there are cases with tweets at exact same second
    #     df = tweets.activated_users_p.diff(-1).fillna(0).divide(tweets.created_at.diff(-1).fillna(0).map(lambda x: max(x.value/1000000000.0/60/60, 1))).fillna(0)
    #     df.index = tweets.created_at
        
    #     ax.vlines(x=df[df.diff().abs() > df.abs().describe()['mean'] + df.abs().describe()['std']].index, ymin=ymin, ymax=ymax-1, color='r')
    
        
#         df=tweets.set_index('created_at').activated_users_p
#         df = df.resample('48H').max().dropna().astype(int) 
#         df.plot(style='ob', ms=6, ax=ax)
        
#         activations = df.diff(-1).fillna(0).astype(int)
#         days = df.reset_index().created_at.diff(-1).fillna(0).map(lambda x: x.days)
#         days.index = df.index
        
#         angles = np.degrees(np.arctan2(activations, days)) #.map(lambda x: (x+180) % 360)        
#         changes = angles.map(mode) != angles.shift(1).map(mode)
#         changes = changes[3:]
#         changes = changes[changes].shift(-1)
#         ymin, ymax = ax.get_ylim()
# #         angles.plot(style="oy", ax=ax2)
#         ax.vlines(x=changes.index, ymin=ymin, ymax=ymax-1, color='r')
        fig.suptitle(doi)
#         fig.savefig('figures/%s-activation.png' % doi[doi.find('/')+1:].replace('-', ''))
    #     break
    
    
    #     tweets.set_index('event_number').activated_users_p.plot(style='o', ms=12, alpha=0.5)
    #     tweets.set_index('event_number').activated_communities.plot(style='s', ms=12, secondary_y=True, alpha=.2)
    
    
    
    #     print "%s total users reached" % tweets.activated_users.max()
    #     print "%s total communities" % tweets.activated_communities.max()
        fig.savefig('figures/%s-activation-eventnumber.png' % doi.replace('-', '')[-4:])


93.44% new users (out of 61)
54.10% retweets (out of 61)
new user: False, is_retweet: False: *r
new user: False, is_retweet: True: *b
new user: True, is_retweet: False: or
new user: True, is_retweet: True: ob

In [262]:
tweets.iloc[0:50]


Out[262]:
doi created_at created_at_dayofweek screen_name user_id_str user_utc_offset user_followers_count user_friends_count user_description is_retweet retweet_of_status_id_str retweet_of_screen_name is_reply in_reply_to_status_id_str in_reply_to_screen_name text activated_users activated_users_p event_number new_user
tweet_id
526914690103115776 10.1186/s12915-014-0076-2 2014-10-28 01:53:46 Tue Science_News1 2304792937 nan 7 8 The latest live science news: medicine, health... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 9 0.0097 0 True
526983297050902528 10.1186/s12915-014-0076-2 2014-10-28 06:26:23 Tue Separations001 1967048268 nan 128 113 Cutting edge instrumentation and consumables f... False nan NaN False nan NaN New theories = new thinking...we love it! http... 119 0.1287 1 True
526983310749077505 10.1186/s12915-014-0076-2 2014-10-28 06:26:26 Tue Separations1 342623431 nan 82 11 NaN False nan NaN False nan NaN New theories = new thinking...we love it! http... 130 0.1406 2 True
526988363208065025 10.1186/s12915-014-0076-2 2014-10-28 06:46:31 Tue BaumBuzz 573640744 nan 119 30 NaN False nan NaN False nan NaN An inside-out origin for the eukaryote cell. M... 161 0.1742 3 True
527029080731176960 10.1186/s12915-014-0076-2 2014-10-28 09:28:19 Tue christlet 64658763 7,200.0000 1724 661 Researcher #CNRS #univAMU dabbling in #neurobi... False nan NaN False nan NaN Buzz Baum has an intriguing "inside-out" hypot... 818 0.8850 4 True
527029277519536128 10.1186/s12915-014-0076-2 2014-10-28 09:29:06 Tue clathrin 240286563 3,600.0000 2172 978 Cell biologist. My lab researches membrane tra... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 1610 1.7418 5 True
527029506834714624 10.1186/s12915-014-0076-2 2014-10-28 09:30:00 Tue ChercheursdActu 1856913889 7,200.0000 1192 811 L'information éclairée par la recherche http:/... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 2367 2.5608 6 True
527029756517048320 10.1186/s12915-014-0076-2 2014-10-28 09:31:00 Tue AMartinezArias 1376893698 nan 1503 330 Developmental geneticist who wants to know how... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 2590 2.8021 7 True
527030000827256832 10.1186/s12915-014-0076-2 2014-10-28 09:31:58 Tue G_Frasca 350762049 7,200.0000 1172 1220 Je sers la #science et le #PS10e, et c'est ma ... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 3583 3.8764 8 True
527031432259309568 10.1186/s12915-014-0076-2 2014-10-28 09:37:39 Tue andimime 391574909 3,600.0000 236 786 Postdoc in the Baum Lab at the MRC LMCB with a... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 4223 4.5688 9 True
527031804826746880 10.1186/s12915-014-0076-2 2014-10-28 09:39:08 Tue ManuelTHERY 387834679 7,200.0000 899 528 centrosome-centred True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 4483 4.8501 10 True
527032599496392704 10.1186/s12915-014-0076-2 2014-10-28 09:42:18 Tue susaopina 353284142 -7,200.0000 485 719 Bióloga Molecular y Bioquimica, con suerte en ... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 5184 5.6085 11 True
527034739774210048 10.1186/s12915-014-0076-2 2014-10-28 09:50:48 Tue Mad_PhD 2470699940 nan 298 520 Underpaid & overworked PhD student who sold he... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 5565 6.0207 12 True
527035457637720064 10.1186/s12915-014-0076-2 2014-10-28 09:53:39 Tue andimime 391574909 3,600.0000 236 786 Postdoc in the Baum Lab at the MRC LMCB with a... True 526,988,363,208,065,024.0000 BaumBuzz False nan NaN RT @BaumBuzz: An inside-out origin for the euk... 5565 6.0207 13 False
527039551978885120 10.1186/s12915-014-0076-2 2014-10-28 10:09:55 Tue amarois 22609105 7,200.0000 1273 1651 Altbrarian #digitalscholarship #openaccess #op... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 7033 7.6089 14 True
527039925817188353 10.1186/s12915-014-0076-2 2014-10-28 10:11:24 Tue BMCBiology 299580552 3,600.0000 1719 362 BMC Biology publishes research and methodology... False nan NaN False nan NaN Where did eukaryotes come from? From the insid... 7234 7.8264 15 True
527040104636768257 10.1186/s12915-014-0076-2 2014-10-28 10:12:07 Tue BibCCBSUFCG 2458056060 -10,800.0000 12 10 NaN False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 7245 7.8383 16 True
527040114980323329 10.1186/s12915-014-0076-2 2014-10-28 10:12:10 Tue BibCCBSUFCG 2458056060 -10,800.0000 12 10 NaN False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 7245 7.8383 17 False
527062830277591041 10.1186/s12915-014-0076-2 2014-10-28 11:42:25 Tue tomroud 50340806 -14,400.0000 2536 1709 Nanoblogger scientifique, associate professor ... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 8441 9.1322 18 True
527065845110177792 10.1186/s12915-014-0076-2 2014-10-28 11:54:24 Tue ag_benoit 1432137306 nan 527 706 We do not see the world as it is we see the wo... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 9072 9.8149 19 True
527067393965977600 10.1186/s12915-014-0076-2 2014-10-28 12:00:33 Tue pixeltoo 54980022 7,200.0000 1079 1667 dev android application mobile. Wikipédien. Bo... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 10477 11.3349 20 True
527067451474075648 10.1186/s12915-014-0076-2 2014-10-28 12:00:47 Tue albertcardona 14773239 -25,200.0000 687 130 How does the brain work? Someday, we'll figure... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 10555 11.4193 21 True
527071887612850176 10.1186/s12915-014-0076-2 2014-10-28 12:18:25 Tue DrJCThrash 317216139 -28,800.0000 4105 976 Tweeting microbiology out of the deep South. A... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 11474 12.4136 22 True
527099836890238976 10.1186/s12915-014-0076-2 2014-10-28 14:09:28 Tue jamnic77 94187179 36,000.0000 1169 1271 Wanting a world that values people over profit... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 12698 13.7378 23 True
527120109215555584 10.1186/s12915-014-0076-2 2014-10-28 15:30:02 Tue HNTweets 116276133 3,600.0000 13259 1 Tweets the stories on the front page of Hacker... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell: ... 12700 13.7400 24 True
527120572069982208 10.1186/s12915-014-0076-2 2014-10-28 15:31:52 Tue BornCor 372384808 -25,200.0000 2464 2899 Freelance web designer working on something bi... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 15594 16.8710 25 True
527120967840313344 10.1186/s12915-014-0076-2 2014-10-28 15:33:26 Tue TechnologyNomad 85072500 -18,000.0000 274 76 Technology: All over the place. Tips, tricks, ... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell |... 15659 16.9413 26 True
527121083296915456 10.1186/s12915-014-0076-2 2014-10-28 15:33:54 Tue _hnyc 1423943252 -10,800.0000 250 0 ↪ Hacker News from Ycombinator delivered. False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 15661 16.9434 27 True
527121112208244736 10.1186/s12915-014-0076-2 2014-10-28 15:34:01 Tue reclaimifytest 2584296396 7,200.0000 213 0 NaN False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 15662 16.9445 28 True
527121404370890753 10.1186/s12915-014-0076-2 2014-10-28 15:35:10 Tue pradeepbheron 235495501 25,200.0000 331 67 Pradeep Bheron Coding | ✈Travelling | Photogra... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 15728 17.0159 29 True
527121602404966400 10.1186/s12915-014-0076-2 2014-10-28 15:35:58 Tue retweetnewsorg 2677390964 nan 106 37 Retweeting WorldWide News Just For You. 1 Acco... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 15764 17.0549 30 True
527121703277961217 10.1186/s12915-014-0076-2 2014-10-28 15:36:22 Tue CMaurange 893421024 nan 452 146 Drosophila geneticist. Interested in brain siz... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 15825 17.1209 31 True
527122302002290688 10.1186/s12915-014-0076-2 2014-10-28 15:38:44 Tue WebStartupGroup 19311402 -21,600.0000 8798 7974 Making technology ideas come to life! False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 23682 25.6213 32 True
527122727099174912 10.1186/s12915-014-0076-2 2014-10-28 15:40:26 Tue HackerTheArtist 799240657 nan 208 0 NaN False nan NaN False nan NaN From HN: An inside-out origin for the eukaryot... 23683 25.6224 33 True
527122893709516800 10.1186/s12915-014-0076-2 2014-10-28 15:41:05 Tue newsycbot 15042473 -25,200.0000 20145 1 Latest from Hacker News Y Combinator at http:/... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 23685 25.6245 34 True
527123824480108545 10.1186/s12915-014-0076-2 2014-10-28 15:44:47 Tue hn_rotub 2798716526 nan 60 14 Interesting links, news, inspiration, and side... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 23699 25.6397 35 True
527123935440429057 10.1186/s12915-014-0076-2 2014-10-28 15:45:14 Tue Un1v3rs0Z3r0 368501339 7,200.0000 451 384 Visto por última vez dentro de un agujero negr... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 24049 26.0183 36 True
527125135640518656 10.1186/s12915-014-0076-2 2014-10-28 15:50:00 Tue pwebmedia 961865077 34,200.0000 10835 219 Our Mission Is Your Vision False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 24253 26.2390 37 True
527126860581593088 10.1186/s12915-014-0076-2 2014-10-28 15:56:51 Tue davidcamachoj 146692746 nan 271 413 good news, bad news, and very bad news...... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 24617 26.6328 38 True
527157079937527809 10.1186/s12915-014-0076-2 2014-10-28 17:56:56 Tue ibridazioni 83836479 7,200.0000 896 1165 Psychologist, Design Research Director at Desi... False nan NaN False nan NaN An inside-out origin for the eukaryotic cell h... 25684 27.7872 39 True
527166710046945280 10.1186/s12915-014-0076-2 2014-10-28 18:35:12 Tue ctskennerton 2158831501 -25,200.0000 138 68 Microbiologist, Bioinformatician. More of the ... True 527,071,887,612,850,176.0000 DrJCThrash False nan NaN RT @DrJCThrash: An inside-out origin for the e... 25720 27.8262 40 True
527300867972550656 10.1186/s12915-014-0076-2 2014-10-29 03:28:18 Wed rapiduplift 18282718 19,800.0000 631 153 sedimentary geologist dreaming of a Himalayan ... False nan NaN False nan NaN Interesting! a new hypothesis for the origin o... 25854 27.9711 41 True
527364538422026240 10.1186/s12915-014-0076-2 2014-10-29 07:41:18 Wed flowing 10202432 7,200.0000 145 100 +machine_learning +data_mining +human_rights +... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 25947 28.0718 42 True
527379076902707201 10.1186/s12915-014-0076-2 2014-10-29 08:39:04 Wed ayuk74 229599040 -36,000.0000 85 302 Phd,PharmD, 2007 False nan NaN False nan NaN Evolution of complex life turns current theori... 26216 28.3628 43 True
527398549273985024 10.1186/s12915-014-0076-2 2014-10-29 09:56:27 Wed BMCBiology 299580552 3,600.0000 1719 362 BMC Biology publishes research and methodology... True 527,029,080,731,176,960.0000 christlet False nan NaN RT @christlet: Buzz Baum has an intriguing "in... 26216 28.3628 44 False
527399468799651840 10.1186/s12915-014-0076-2 2014-10-29 10:00:06 Wed BioMedCentral 41561657 3,600.0000 41064 1862 The Open Access Publisher of Science, Medicine... False nan NaN False nan NaN How did complex life evolve? The answer could ... 27680 29.9467 45 True
527400537525067776 10.1186/s12915-014-0076-2 2014-10-29 10:04:21 Wed HansZauner 995259308 7,200.0000 838 800 Freund der Wissenschaft, gelernter Evolutionsb... False nan NaN False nan NaN Evolution der eukaryotischen Zelle "inside out... 28308 30.6261 46 True
527418420091162624 10.1186/s12915-014-0076-2 2014-10-29 11:15:24 Wed Lab_Journal 1060480867 nan 966 215 Forschungsmagazin, Wissenschaftsjournalismus, ... True 527,400,537,525,067,776.0000 HansZauner False nan NaN RT @HansZauner: Evolution der eukaryotischen Z... 28382 30.7061 47 True
527418942848262146 10.1186/s12915-014-0076-2 2014-10-29 11:17:29 Wed Zernunft 223383396 7,200.0000 103 403 My tweets are actually caws. Feminist introver... True 527,400,537,525,067,776.0000 HansZauner False nan NaN RT @HansZauner: Evolution der eukaryotischen Z... 28739 31.0924 48 True
527422352502833153 10.1186/s12915-014-0076-2 2014-10-29 11:31:02 Wed KonSciencePod 1304229252 7,200.0000 591 75 Der #Wissenschaft-s-#Podcast aus #Konstanz. Te... True 527,400,537,525,067,776.0000 HansZauner False nan NaN RT @HansZauner: Evolution der eukaryotischen Z... 28789 31.1465 49 True

In [244]:
fig = plt.figure() 

ax = fig.add_subplot(111) 
ax2 = ax.twinx() 
df=tweets.set_index('created_at').activated_users_p.head(30)
df = df.resample('168H').max().dropna().astype(int) 
df.plot(style='ob', ms=6, ax=ax)

activations = df.diff(1).fillna(0).astype(int)
days = df.reset_index().created_at.diff(1).fillna(0).map(lambda x: x.days)
days.index = df.index

angles = np.degrees(np.arctan2(activations, days)) #.map(lambda x: (x+180) % 360)

changes = angles.map(mode) != angles.shift(1).map(mode)
changes = changes[3:]
changes = changes[changes].shift(-1)
ymin, ymax = ax.get_ylim()
angles.plot(style="oy", ax=ax2)
ax.vlines(x=changes.index, ymin=ymin, ymax=ymax-1, color='r')


Out[244]:
<matplotlib.collections.LineCollection at 0x10def0b10>

In [243]:
24*7


Out[243]:
168

In [233]:



Out[233]:
created_at
2014-01-31     True
2014-02-01     True
2014-02-02     True
2014-02-03     True
2014-02-04     True
2014-02-05    False
2014-02-06    False
2014-02-07      NaN
Freq: 24H, Name: activated_users_p, dtype: object

In [231]:
angles.shift(1)


Out[231]:
created_at
2014-01-31                    nan
2014-02-01                 0.0000
2014-02-02                45.0000
2014-02-03                 0.0000
2014-02-04                45.0000
2014-02-05                71.5651
2014-02-06                 0.0000
2014-02-07                 0.0000
Freq: 24H, Name: activated_users_p, dtype: float64

In [221]:



Out[221]:
created_at
2014-01-31     True
2014-02-01     True
2014-02-02     True
2014-02-03     True
2014-02-04     True
2014-02-05    False
2014-02-06    False
2014-02-07    False
Freq: 24H, Name: activated_users_p, dtype: bool

In [166]:


In [167]:



Out[167]:
created_at
2014-02-25    True
2014-03-06    True
2014-03-23    True
2014-03-25    True
2014-05-28    True
2014-06-10    True
2014-11-21    True
2014-11-24    True
2015-01-11    True
2015-02-21    True
dtype: bool