In [56]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import statsmodels.formula.api as smf
%matplotlib inline

In [2]:
metadata = pd.read_csv('/Users/tunder/Dropbox/python/character/metadata/filtered_fiction_plus_18c.tsv', sep ='\t')
metadata.head()


Out[2]:
docid volid recordid author firstname inferreddate birthdate authgender enumcron title
0 14930 uva.x004123163 NaN Swift, Jonathan, Jonathan 1784 NaN m v.1 The works of the Rev. Dr. Jonathan Swift
1 14931 uva.x004123168 NaN Swift, Jonathan, Jonathan 1784 NaN m v.6 The works of the Rev. Dr. Jonathan Swift
2 14932 uva.x030576706 NaN Swift, Jonathan, Jonathan 1784 NaN m v.11 The works of the Rev. Dr. Jonathan Swift
3 14933 uva.x000530839 NaN Swift, Jonathan, Jonathan 1784 NaN m v.12 The works of the Rev. Dr. Jonathan Swift
4 14934 nyp.33433076096019 NaN Swift, Jonathan, Jonathan 1784 NaN m v. 14 The works of the Rev. Dr. Jonathan Swift

In [3]:
data = pd.read_csv('prestige_character_probabilities.tsv', sep = '\t', dtype = {'docid': 'object'})
data.head()


Out[3]:
docid charid gender pubdate numwords probability
0 0 0|Betsey f 1891 334 0.462642
1 0 0|Phil m 1891 12 0.140581
2 0 0|Elizabeth f 1891 82 0.366735
3 0 0|Mr.Jones m 1891 526 0.553426
4 0 0|Mr.Mitford m 1891 14 0.386104

In [4]:
grouped = data.loc[:, ["probability", "gender", 'pubdate']].groupby('gender')
bygender = grouped.aggregate(np.mean)
bygender.head()


Out[4]:
probability pubdate
gender
f 0.547890 1946.901165
m 0.438264 1948.086980
u 0.473054 1955.238318

In [50]:
authormeta = pd.read_csv('output/authormeta.tsv', sep = '\t')
authormeta['binaryauth'] = authormeta.authgender.map({'f': 1, 'm': 0})
authormeta.head()


Out[50]:
author num_stories reviewed authgender meandate mean_prestige mean_sales numchars charsize pct_women wordratio prob_diff weighted_diff prob_stdev prob_mean binaryauth
0 Beckett, Samuel 13 1 m 1966.307692 0.820090 0.362205 13.846154 79.398322 0.368146 0.309553 0.052216 0.057434 0.045178 0.505375 0.0
1 Haggard, H. Rider 17 1 m 1898.235294 0.534054 0.899441 18.117647 343.630334 0.281828 0.369872 0.044399 0.053472 0.055781 0.481296 0.0
2 Castlemon, Harry 30 0 m 1886.433333 0.195307 0.777778 22.466667 322.592651 0.066558 0.030604 0.085897 0.078611 0.058242 0.445997 0.0
3 Pidgin, Charles Felton 12 0 m 1905.166667 0.179293 0.508571 40.000000 228.147965 0.291458 0.272297 0.072691 0.080473 0.065104 0.481867 0.0
4 Lewis, Wyndham 15 1 m 1945.533333 0.697906 0.453704 31.000000 202.572051 0.191909 0.172247 0.024949 0.030662 0.050687 0.487141 0.0

In [51]:
authormeta.corr()


Out[51]:
num_stories reviewed meandate mean_prestige mean_sales numchars charsize pct_women wordratio prob_diff weighted_diff prob_stdev prob_mean binaryauth
num_stories 1.000000 0.218885 0.136339 0.185862 0.546059 0.063713 0.014521 -0.067036 -0.059694 -0.032356 -0.042344 -0.072736 -0.013056 -0.148931
reviewed 0.218885 1.000000 0.136778 0.476159 0.288765 0.043027 -0.059852 -0.021178 0.002188 -0.052633 -0.054550 -0.064606 0.079513 -0.053148
meandate 0.136339 0.136778 1.000000 0.164638 -0.029856 0.043027 -0.143136 -0.144073 -0.125291 -0.265918 -0.241877 -0.284284 0.030538 -0.110871
mean_prestige 0.185862 0.476159 0.164638 1.000000 0.187538 0.031609 -0.118692 0.099911 0.105872 -0.134599 -0.150892 -0.122698 0.237385 0.033229
mean_sales 0.546059 0.288765 -0.029856 0.187538 1.000000 0.087558 0.066422 -0.049083 -0.033431 0.045883 -0.010033 -0.043694 -0.031284 -0.134423
numchars 0.063713 0.043027 0.043027 0.031609 0.087558 1.000000 -0.029548 0.047587 0.053322 0.020753 0.005668 0.191749 -0.001760 0.021899
charsize 0.014521 -0.059852 -0.143136 -0.118692 0.066422 -0.029548 1.000000 0.165401 0.167887 -0.011163 -0.013940 -0.173970 0.116147 0.142267
pct_women -0.067036 -0.021178 -0.144073 0.099911 -0.049083 0.047587 0.165401 1.000000 0.851266 -0.207798 -0.257510 -0.130680 0.685301 0.581080
wordratio -0.059694 0.002188 -0.125291 0.105872 -0.033431 0.053322 0.167887 0.851266 1.000000 -0.191358 -0.216788 -0.080728 0.608965 0.570961
prob_diff -0.032356 -0.052633 -0.265918 -0.134599 0.045883 0.020753 -0.011163 -0.207798 -0.191358 1.000000 0.815491 0.441574 -0.245904 -0.204912
weighted_diff -0.042344 -0.054550 -0.241877 -0.150892 -0.010033 0.005668 -0.013940 -0.257510 -0.216788 0.815491 1.000000 0.383396 -0.248943 -0.228787
prob_stdev -0.072736 -0.064606 -0.284284 -0.122698 -0.043694 0.191749 -0.173970 -0.130680 -0.080728 0.441574 0.383396 1.000000 -0.213965 -0.120883
prob_mean -0.013056 0.079513 0.030538 0.237385 -0.031284 -0.001760 0.116147 0.685301 0.608965 -0.245904 -0.248943 -0.213965 1.000000 0.507696
binaryauth -0.148931 -0.053148 -0.110871 0.033229 -0.134423 0.021899 0.142267 0.581080 0.570961 -0.204912 -0.228787 -0.120883 0.507696 1.000000

In [74]:
authormodel = smf.ols(formula = 'weighted_diff ~ pct_women + meandate + binaryauth', data = authormeta).fit()
authormodel.summary()


Out[74]:
OLS Regression Results
Dep. Variable: weighted_diff R-squared: 0.164
Model: OLS Adj. R-squared: 0.161
Method: Least Squares F-statistic: 52.30
Date: Thu, 20 Jul 2017 Prob (F-statistic): 7.11e-31
Time: 09:46:10 Log-Likelihood: 1927.6
No. Observations: 804 AIC: -3847.
Df Residuals: 800 BIC: -3828.
Df Model: 3
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept 0.5127 0.050 10.273 0.000 0.415 0.611
pct_women -0.0479 0.008 -6.153 0.000 -0.063 -0.033
meandate -0.0002 2.6e-05 -8.817 0.000 -0.000 -0.000
binaryauth -0.0058 0.002 -2.973 0.003 -0.010 -0.002
Omnibus: 258.429 Durbin-Watson: 2.013
Prob(Omnibus): 0.000 Jarque-Bera (JB): 9749.763
Skew: -0.733 Prob(JB): 0.00
Kurtosis: 19.997 Cond. No. 1.22e+05

In [66]:
authormeta[authormeta.authgender == 'm'].corr()


Out[66]:
num_stories reviewed meandate mean_prestige mean_sales numchars charsize pct_women wordratio prob_diff weighted_diff prob_stdev prob_mean binaryauth
num_stories 1.000000 0.233313 0.141723 0.203661 0.499386 0.072355 0.042709 0.024341 0.035975 -0.086567 -0.098265 -0.137105 0.092943 NaN
reviewed 0.233313 1.000000 0.082260 0.467277 0.290199 0.094755 -0.055158 0.103213 0.110484 -0.122033 -0.118770 -0.083917 0.209005 NaN
meandate 0.141723 0.082260 1.000000 0.097814 -0.041160 0.112701 -0.161474 -0.077726 -0.098248 -0.304985 -0.260813 -0.320776 0.147506 NaN
mean_prestige 0.203661 0.467277 0.097814 1.000000 0.208443 0.017251 -0.097868 0.210429 0.186846 -0.151419 -0.184130 -0.146116 0.364310 NaN
mean_sales 0.499386 0.290199 -0.041160 0.208443 1.000000 0.070393 0.061285 0.044880 0.085258 -0.015551 -0.080063 -0.153395 0.061477 NaN
numchars 0.072355 0.094755 0.112701 0.017251 0.070393 1.000000 0.003731 0.016576 0.020073 0.011869 0.010405 0.121162 0.028648 NaN
charsize 0.042709 -0.055158 -0.161474 -0.097868 0.061285 0.003731 1.000000 0.168346 0.153375 0.073256 0.054532 -0.170386 0.050075 NaN
pct_women 0.024341 0.103213 -0.077726 0.210429 0.044880 0.016576 0.168346 1.000000 0.817833 -0.086775 -0.162461 -0.140035 0.612956 NaN
wordratio 0.035975 0.110484 -0.098248 0.186846 0.085258 0.020073 0.153375 0.817833 1.000000 -0.063072 -0.114356 -0.056690 0.508932 NaN
prob_diff -0.086567 -0.122033 -0.304985 -0.151419 -0.015551 0.011869 0.073256 -0.086775 -0.063072 1.000000 0.831499 0.411620 -0.197177 NaN
weighted_diff -0.098265 -0.118770 -0.260813 -0.184130 -0.080063 0.010405 0.054532 -0.162461 -0.114356 0.831499 1.000000 0.350210 -0.189095 NaN
prob_stdev -0.137105 -0.083917 -0.320776 -0.146116 -0.153395 0.121162 -0.170386 -0.140035 -0.056690 0.411620 0.350210 1.000000 -0.232417 NaN
prob_mean 0.092943 0.209005 0.147506 0.364310 0.061477 0.028648 0.050075 0.612956 0.508932 -0.197177 -0.189095 -0.232417 1.000000 NaN
binaryauth NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

In [15]:
authormeta[authormeta.authgender == 'f'].corr()


Out[15]:
num_stories meandate mean_prestige mean_sales numchars charsize pct_women wordratio prob_diff weighted_diff prob_stdev prob_mean
num_stories 1.000000 0.070293 0.151998 0.667579 0.024708 0.028403 0.015314 0.016825 0.059267 0.052446 0.005806 0.029140
meandate 0.070293 1.000000 0.237417 -0.077555 -0.069427 -0.096531 -0.097399 -0.021226 -0.148353 -0.191635 -0.187684 -0.145022
mean_prestige 0.151998 0.237417 1.000000 0.150619 0.059172 -0.153872 -0.025213 0.008218 -0.003268 -0.042805 -0.054551 -0.029945
mean_sales 0.667579 -0.077555 0.150619 1.000000 0.121972 0.131655 0.021562 0.017285 0.062698 0.061543 0.015039 0.031240
numchars 0.024708 -0.069427 0.059172 0.121972 1.000000 -0.117479 0.107331 0.127662 0.065604 -0.005340 0.340781 0.005935
charsize 0.028403 -0.096531 -0.153872 0.131655 -0.117479 1.000000 -0.000329 0.063396 0.011968 -0.023379 -0.152545 0.086531
pct_women 0.015314 -0.097399 -0.025213 0.021562 0.107331 -0.000329 1.000000 0.716555 -0.176143 -0.153280 -0.033422 0.497102
wordratio 0.016825 -0.021226 0.008218 0.017285 0.127662 0.063396 0.716555 1.000000 -0.112095 -0.079195 0.034334 0.433225
prob_diff 0.059267 -0.148353 -0.003268 0.062698 0.065604 0.011968 -0.176143 -0.112095 1.000000 0.762709 0.493984 -0.017732
weighted_diff 0.052446 -0.191635 -0.042805 0.061543 -0.005340 -0.023379 -0.153280 -0.079195 0.762709 1.000000 0.389891 -0.024579
prob_stdev 0.005806 -0.187684 -0.054551 0.015039 0.340781 -0.152545 -0.033422 0.034334 0.493984 0.389891 1.000000 -0.086264
prob_mean 0.029140 -0.145022 -0.029945 0.031240 0.005935 0.086531 0.497102 0.433225 -0.017732 -0.024579 -0.086264 1.000000

In [67]:
authormeta[authormeta.meandate > 1899].corr()


Out[67]:
num_stories reviewed meandate mean_prestige mean_sales numchars charsize pct_women wordratio prob_diff weighted_diff prob_stdev prob_mean binaryauth
num_stories 1.000000 0.216207 0.028832 0.192364 0.552781 0.080999 -0.008270 -0.020080 -0.026239 0.017046 -0.017367 -0.051526 0.034022 -0.154354
reviewed 0.216207 1.000000 0.118624 0.493815 0.253594 0.115210 -0.104733 0.085850 0.101939 -0.043750 -0.115606 -0.019682 0.201517 -0.008467
meandate 0.028832 0.118624 1.000000 0.144864 0.020717 0.285391 -0.090324 0.039179 0.045959 -0.108303 -0.142281 -0.097999 0.121877 0.032213
mean_prestige 0.192364 0.493815 0.144864 1.000000 0.173665 0.077410 -0.155905 0.231569 0.235436 -0.159304 -0.234630 -0.102519 0.398523 0.089821
mean_sales 0.552781 0.253594 0.020717 0.173665 1.000000 0.180814 -0.002249 -0.039367 -0.009546 0.078740 0.000745 -0.008500 0.019888 -0.124467
numchars 0.080999 0.115210 0.285391 0.077410 0.180814 1.000000 -0.157878 0.044582 0.041489 0.046356 -0.019275 0.173617 0.026330 -0.053770
charsize -0.008270 -0.104733 -0.090324 -0.155905 -0.002249 -0.157878 1.000000 0.104278 0.155188 -0.017621 -0.032118 -0.227254 0.167334 0.165798
pct_women -0.020080 0.085850 0.039179 0.231569 -0.039367 0.044582 0.104278 1.000000 0.874111 -0.231020 -0.333915 -0.161123 0.717269 0.575442
wordratio -0.026239 0.101939 0.045959 0.235436 -0.009546 0.041489 0.155188 0.874111 1.000000 -0.242582 -0.357869 -0.159709 0.667903 0.590426
prob_diff 0.017046 -0.043750 -0.108303 -0.159304 0.078740 0.046356 -0.017621 -0.231020 -0.242582 1.000000 0.824731 0.421907 -0.249271 -0.214917
weighted_diff -0.017367 -0.115606 -0.142281 -0.234630 0.000745 -0.019275 -0.032118 -0.333915 -0.357869 0.824731 1.000000 0.353381 -0.318739 -0.275348
prob_stdev -0.051526 -0.019682 -0.097999 -0.102519 -0.008500 0.173617 -0.227254 -0.161123 -0.159709 0.421907 0.353381 1.000000 -0.229075 -0.167212
prob_mean 0.034022 0.201517 0.121877 0.398523 0.019888 0.026330 0.167334 0.717269 0.667903 -0.249271 -0.318739 -0.229075 1.000000 0.493687
binaryauth -0.154354 -0.008467 0.032213 0.089821 -0.124467 -0.053770 0.165798 0.575442 0.590426 -0.214917 -0.275348 -0.167212 0.493687 1.000000

In [31]:
authormeta[(authormeta.meandate > 1920) & (authormeta.authgender == 'm')].corr()


Out[31]:
num_stories meandate mean_prestige mean_sales numchars charsize pct_women wordratio prob_diff prob_stdev prob_mean
num_stories 1.000000 -0.065257 0.285475 0.508761 0.040698 0.056041 0.067147 0.109698 -0.024528 -0.027788 0.131550
meandate -0.065257 1.000000 0.134988 -0.046628 0.194593 -0.133596 0.003914 0.032082 -0.207532 -0.019468 0.019617
mean_prestige 0.285475 0.134988 1.000000 0.257232 0.022541 -0.085217 0.396409 0.379550 -0.156451 -0.044407 0.511218
mean_sales 0.508761 -0.046628 0.257232 1.000000 0.194626 0.013702 0.062069 0.175694 -0.071750 0.089569 0.169740
numchars 0.040698 0.194593 0.022541 0.194626 1.000000 -0.125138 0.012126 0.053806 0.037956 0.340389 -0.028815
charsize 0.056041 -0.133596 -0.085217 0.013702 -0.125138 1.000000 0.106259 0.095825 0.038795 -0.217598 0.103200
pct_women 0.067147 0.003914 0.396409 0.062069 0.012126 0.106259 1.000000 0.866698 -0.146658 -0.082226 0.659543
wordratio 0.109698 0.032082 0.379550 0.175694 0.053806 0.095825 0.866698 1.000000 -0.182480 -0.094531 0.604170
prob_diff -0.024528 -0.207532 -0.156451 -0.071750 0.037956 0.038795 -0.146658 -0.182480 1.000000 0.382050 -0.108680
prob_stdev -0.027788 -0.019468 -0.044407 0.089569 0.340389 -0.217598 -0.082226 -0.094531 0.382050 1.000000 -0.126740
prob_mean 0.131550 0.019617 0.511218 0.169740 -0.028815 0.103200 0.659543 0.604170 -0.108680 -0.126740 1.000000

In [17]:
otherauthor = pd.read_csv('pairedwithprestige.csv')

In [18]:
def trim_to_24(aname):
    if type(aname) != str:
        return 'Anonymous'

    aname = aname.strip('(),. .[0123456789]')
    if len (aname) > 24:
        return aname[0:24]
    else:
        return aname

other_author = set(otherauthor.author.apply(trim_to_24))

In [19]:
print(other_author - set(authormeta.author))


{'Overstolz, Marie Emelie ', 'Leigh, Alfred', 'Montagu, Lily H', 'Grey', 'Elton, Arthur Hallam', 'Andrews, Anabel (Follanb', 'Holyoke, Hetty', 'Pardoe', 'Fogerty, J', 'O. Douglas', 'Chatterji, Bankim Chandr', 'Post, Helen (Wilmans', 'Maria', 'Engles, William M', 'Johnston, Sir Harry', 'Aytoun, William Edmondst', 'Ingram, J. Forsyth', 'Lean, Florence', 'Newall, John', 'Vereker, Charles Smyth', 'Yale, Catharine Brooks', 'Harbert, Lizzie Boynton', 'Goff, H. N. K', 'McLain, Mary Webster', 'Hoffman, Mary J', 'Christie-Murray, David', 'Newell, Charles Martin', 'Rex, Beach', 'Leonowens, Anna Harriett', 'Hannay, James', 'Chittenden, L. E', 'Radecliffe, Noell', 'Glenn, Isa', 'Perelaer, Michael Theoph', 'Veitch, Sophie F. F', 'Reddin, Kenneth', 'Buckley, William', 'Smith, Francis Hopkinson', 'Watson, William', 'Spencer, Lillian', 'Volckhausen, Adeline', 'Colvill, Helen Hester', 'Swift, John Franklin', 'Perry, Alice', 'Bradford, O. K', 'Aïdé, Hamilton', 'Smythies, Harriet Maria ', 'Châteauclair, Wilfrid', 'Edwards, Matilda Betham', 'Scott, Geo. G', 'Zack', 'Valentine, L', 'Estvan, Mathilde', 'Adderley, James Granvill', 'Fox, Richard A', 'Smith, William', 'Goulding, F. R', 'Pomeroy, John', 'Rives, Hallie', 'Conybeare, William John', 'anonymous', 'Houstoun'}

In [35]:
print(set(authormeta.author) - other_author)


set()

In [68]:
genremeta = pd.read_csv('output/genre_storymeta.tsv', sep = '\t')
genremeta.head()


Out[68]:
docid author title authgender pubdate genre numchars charsize pct_women wordratio prob_diff weighted_diff prob_stdev prob_mean
0 uc1.32106011196133 Heinlein, Robert A. Starship troopers m 1959 scifi 29 58.862069 0.153846 0.048659 0.002086 0.045627 0.049584 0.478987
1 8469 Brontë, Emily Wuthering Heights f 1847 historical 32 421.593750 0.433333 0.443459 0.013174 0.036966 0.063450 0.491630
2 10651 Austen, Jane Pride and Prejudice f 1813 romance 42 346.928571 0.714286 0.640862 0.068306 0.058855 0.063728 0.509821
3 mdp.39015034269400 Leonard, Elmore, Riding the rap m 1995 detective 25 360.520000 0.250000 0.153210 0.027186 0.070882 0.058211 0.496419
4 mdp.39015063511748 Berkeley, Anthony, The poisoned chocolates c m 1929 detective 19 314.842105 0.500000 0.243402 0.037797 0.043456 0.042703 0.486050

In [69]:
grouped = genremeta[genremeta.pubdate > 1900].groupby(['genre', 'authgender'])
genreavg = grouped.aggregate(np.mean)
genreavg


Out[69]:
pubdate numchars charsize pct_women wordratio prob_diff weighted_diff prob_stdev prob_mean
genre authgender
detective f 1940.500000 36.166667 204.851237 0.358923 0.385034 0.043938 0.046218 0.063361 0.481287
m 1942.319149 31.680851 194.327318 0.253157 0.200722 0.048396 0.056077 0.057398 0.483436
u 1994.000000 73.000000 330.356164 0.308824 0.484858 0.034783 0.011604 0.055324 0.479031
romance f 1947.375000 42.875000 214.062008 0.428041 0.518899 0.036215 0.033877 0.057505 0.512110
scifi f 1981.333333 43.888889 189.844360 0.347950 0.329206 0.018747 0.029946 0.053799 0.488649
m 1953.883333 24.866667 243.646166 0.220020 0.214166 0.034972 0.035996 0.053741 0.480353
u 1990.000000 41.000000 214.048780 0.206897 0.136118 -0.032973 -0.018758 0.056144 0.475397
western m 1935.454545 23.454545 251.631583 0.185665 0.202370 0.061239 0.057680 0.052471 0.452596

In [7]:
genremeta.corr()


Out[7]:
pubdate numchars charsize pct_women wordratio prob_diff weighted_diff prob_stdev prob_mean
pubdate 1.000000 0.087487 -0.047805 -0.203835 -0.179622 -0.290928 -0.315559 -0.284285 -0.037632
numchars 0.087487 1.000000 -0.116675 0.191617 0.213407 -0.070770 -0.087493 0.118414 0.095820
charsize -0.047805 -0.116675 1.000000 0.083578 0.045047 -0.029180 -0.024740 -0.246300 0.179546
pct_women -0.203835 0.191617 0.083578 1.000000 0.786075 -0.104107 -0.100844 0.065427 0.561660
wordratio -0.179622 0.213407 0.045047 0.786075 1.000000 -0.037690 -0.101150 0.058492 0.463602
prob_diff -0.290928 -0.070770 -0.029180 -0.104107 -0.037690 1.000000 0.714091 0.483968 -0.107519
weighted_diff -0.315559 -0.087493 -0.024740 -0.100844 -0.101150 0.714091 1.000000 0.383959 -0.084137
prob_stdev -0.284285 0.118414 -0.246300 0.065427 0.058492 0.483968 0.383959 1.000000 -0.068062
prob_mean -0.037632 0.095820 0.179546 0.561660 0.463602 -0.107519 -0.084137 -0.068062 1.000000

In [35]:
def after1900(date):
    if date < 1900:
        return 0
    else:
        return 1

authormeta['century'] = authormeta.meandate.apply(after1900)
    
grouped = authormeta.groupby(['century', 'authgender'])
authoravg = grouped.aggregate(np.mean)
authoravg


Out[35]:
num_stories reviewed meandate mean_prestige mean_sales numchars charsize pct_women wordratio prob_diff weighted_diff prob_stdev prob_mean
century authgender
0 f 8.447514 0.397790 1877.738949 0.478870 0.495331 33.654888 232.038433 0.437561 0.485522 0.055409 0.056436 0.066894 0.500467
m 11.641860 0.483721 1878.095249 0.481033 0.581735 30.629130 214.476230 0.292293 0.296706 0.068334 0.068545 0.069302 0.480436
u 1.909091 0.227273 1871.607792 0.363867 0.248613 32.154329 204.644244 0.395810 0.413640 0.061165 0.068886 0.064771 0.487076
1 f 9.563910 0.496241 1930.825268 0.534538 0.441817 31.713569 224.399359 0.423866 0.483194 0.047738 0.047177 0.059574 0.501259
m 15.491039 0.501792 1929.428001 0.496075 0.520256 33.337437 190.748340 0.275554 0.270708 0.057418 0.059390 0.063218 0.484201
u 5.000000 0.392857 1927.096812 0.520311 0.229838 30.158466 194.269540 0.315528 0.343114 0.046244 0.051748 0.058423 0.489751

In [29]:
authormeta.plot.scatter(x = 'meandate', y = 'weighted_diff')


Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0x11003a898>

In [ ]: