In [59]:
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import path
from matplotlib.ticker import ScalarFormatter, FormatStrFormatter, LogFormatterMathtext
#from matplotlib.pyplot import figure, axes, plot, xlabel, ylabel, title, \
#grid, savefig, show
In [60]:
from tools import FixedOrderFormatter, get_psycinfo_database, GetTemporalPsyc
In [61]:
%matplotlib inline
In [62]:
df = pd.DataFrame.from_csv("data/PsycInfo/PsycInfo Articles Review.csv")
df.set_index(df.index.year, inplace=True)
In [63]:
terms = GetTemporalPsyc()
In [64]:
len(df), len(terms)
Out[64]:
In [65]:
terms["Prob"] = terms.fillna(0).sum(axis=1)
terms["Count"] = terms["Prob"].values * df.loc[terms.index].values.flatten()
In [66]:
x = df.index
y = df.values
y_ = np.log2(y)
m1, b1 = np.polyfit(x, y_, 1)
fig = plt.figure(figsize=(9.0, 6.0))
ax = fig.add_subplot(1,1,1)
_ = plt.plot(x, y, marker='+', label="Publications")
_ = plt.plot(x, 2**(m1*x.values + b1), 'r-', label="Best fit")
z = terms.Count[terms.index >= 1940]
z = z[z.index < 2016]
z_ = np.log2(z.values)
m2, b2 = np.polyfit(z.index, z_, 1)
plt.plot(terms.index, terms.Count, label="Cultural Publications")
_ = plt.plot(z.index, 2**(m2*z.index.values + b2), 'k-', label="Best fit")
ax.set_yscale('log')
ax.yaxis.set_major_formatter(LogFormatterMathtext())
ax.set_ylabel("Articles")
_ = ax.set_xlabel("Year")
plt.tick_params(axis='y', which='minor', labelsize=7)
_ = ax.legend(loc="upper left")
In [67]:
1.0/m2, 1.0/m1
Out[67]:
In [68]:
print (b2)
In [69]:
print (m2)
In [71]:
fig = plt.figure(figsize=(9.0, 6.0))
ax = fig.add_subplot(1,1,1)
font = {'family': "Times New Roman",
'color': 'black',
'size': 18}
y = df.values / 1000.0
_ = plt.plot(x, y, marker = 'd', markevery =3, linewidth = 2, label="All PsycINFO Publications", color = 'gray')
y_ = np.log10(y)
m, b = np.polyfit(x, y_, 1)
_ = plt.plot(x, 10**(m*x.values + b), 'r-', linewidth = 2, label="Curve of Best Fit", color = 'black')
ax.set_ylabel("Number of Publications (in thousands)", font)
_ = ax.set_xlabel("Year", font)
plt.tick_params(axis='y', which='minor', labelsize=7)
z = terms.Count[terms.index >= 1940]/1000
z = z[z.index < 2016]
z_ = np.log2(z.values)
plt.plot(terms.index, terms.Count/1000, marker = 'o', markevery = 4, linewidth = 2, label="Psychology Publications with Cultural Terms", color = 'lightslategray')
m2, b2 = np.polyfit(z.index, z_, 1)
#_ = plt.plot(z.index, 2**(m2*z.index.values + b2), 'r-', linewidth = 2, label="Curve of Best Fit", color = 'black')
_ = ax.legend(loc='best')
_ = plt.xlim(1845, 2020)
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=15)
for tick in ax.get_xticklabels():
tick.set_fontname("Times New Roman")
for tick in ax.get_yticklabels():
tick.set_fontname("Times New Roman")
pylab.savefig('PsycINFO1.png')
In [15]:
y_ = np.log2(y*1000)
m, b = np.polyfit(x, y_, 1)
print("2**({}*x+{})".format(m[0], b[0]))
In [16]:
1.0/m
Out[16]:
y = $2^{\frac{13x}{200} - 112.79} $
every 15.50 years or 15 years and 6 months, publications double.
In [72]:
fig = plt.figure(figsize=(9.0, 6.0))
ax = fig.add_subplot(1,1,1)
font = {'family': "Times New Roman",
'color': 'black',
'size': 12}
y = df.values / 1000.0
_ = plt.plot(x, y, marker = 'd', markevery =4, linewidth = 2, label="All PsycINFO Publications", color = 'gray')
y_ = np.log10(y)
m, b = np.polyfit(x, y_, 1)
_ = plt.plot(x, 10**(m*x.values + b), 'r-', linewidth = 2, label="Curve of Best Fit", color = 'black')
#ax.set_ylabel("Number of Publications (in thousands)", font)
#_ = ax.set_xlabel("Year", font)
plt.tick_params(axis='y', which='major', labelsize=16)
plt.tick_params(axis='x', which='major', labelsize=14)
z = terms.Count[terms.index >= 1940]/1000
z = z[z.index < 2016]
z_ = np.log2(z.values)
plt.plot(terms.index, terms.Count/1000.0, marker = 'o', markevery = 4, linewidth = 2,
label="Psychology Publications with Cultural Terms", color = 'lightslategray')
m2, b2 = np.polyfit(z.index, z_, 1)
_ = plt.plot(z.index, 2**(m2*z.index.values + b2), 'r-', linewidth = 2, label="Curve of Best Fit", color = 'black')
#_ = ax.legend(loc='best')
_ = plt.xlim(1845, 2020)
_ = plt.ylim(0, 10)
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=15)
for tick in ax.get_xticklabels():
tick.set_fontname("Times New Roman")
for tick in ax.get_yticklabels():
tick.set_fontname("Times New Roman")
pylab.savefig('PsycINFO2.png')
In [ ]: