The purpose of this file is to quickly and easily compute and display a statistical report of the generated patent databases.
In [23]:
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
databases = ['assignee','citation','class','inventor','patent','patdesc','lawyer','sciref','usreldoc']
for table in databases:
conn = sqlite3.connect('{0}.sqlite3'.format(table))
cursor = conn.cursor()
res = cursor.execute('select count(*) from {0}'.format(table))
print table,':',res.fetchall()[0][0], 'records'
conn.close()
Histogram of number of inventors per patent, with mean
In [51]:
conn = sqlite3.connect('inventor.sqlite3')
cursor = conn.cursor()
results = cursor.execute('select count(*) from inventor group by Patent;')
inventor_counts = [x[0] for x in results.fetchall()] # we get a list of tuples, so transform it to a regular list
fig = plt.figure()
histogram = fig.add_subplot(111)
n, bins, patches = histogram.hist(inventor_counts)
histogram.set_xlabel('Number of Inventors')
histogram.set_ylabel('Patent Count')
plt.show()
print 'Average Number of Inventors per Patent', np.mean(inventor_counts)
conn.close()
Histogram with number of citations per patent, with mean
In [104]:
conn = sqlite3.connect('citation.sqlite3')
cursor = conn.cursor()
results = cursor.execute('select count(*) from citation group by Patent;')
citation_counts = [x[0] for x in results.fetchall()] # we get a list of tuples, so transform it to a regular list
fig = plt.figure()
histogram = fig.add_subplot(111)
histogram.set_xscale('log')
bins = range(1,5)
bins.extend(list(np.logspace(1,5,base=5)))
n, bins, patches = histogram.hist(citation_counts, bins=bins, histtype='stepfilled')
histogram.set_xlabel('Number of Citations Cited per Patent')
histogram.set_ylabel('Patent Count')
plt.show()
print 'Average Number of Citations per Patent', np.mean(citation_counts)
conn.close()
Histogram with number of citations per patent, with mean
In [ ]: