create indices for faster lookups


In [ ]:
import sqlite3
conn = sqlite3.connect('pmcv2-full.db')
c = conn.cursor()

In [ ]:
c.execute('''CREATE INDEX pmidix ON refs(pmid)''')
c.execute('''CREATE INDEX pmcidix ON pmcidmap(pmid)''')
c.execute('''CREATE INDEX metaix ON meta(pmid)''')
c.execute('''CREATE INDEX authorsix ON authors(pmid)''')
c.execute('''CREATE INDEX keywordsix ON keywords(pmid)''')
c.execute('''CREATE INDEX abstractsix ON abstracts(pmid)''')
#c.execute('''CREATE INDEX tfidfix ON tfidf(pmid)''')

In [ ]:
c.execute('''COMMIT''')
c.close()

add author abbreviations to author table, create index for this, to allow generation of per-author statistics (this will map author abbreviations to their publication PMIDs)


In [5]:
import sqlite3
conn = sqlite3.connect('pmcv2-full.db')
c = conn.cursor()
#c.execute('''DROP TABLE authors2''')
c.execute('''CREATE TABLE authors2 (pmid integer, authnum integer, fn text, ln text, afil text, abbr text)''')


Out[5]:
<sqlite3.Cursor at 0x10f12f260>

In [6]:
c.execute('''SELECT * FROM authors''')
authtab = c.fetchall()
for entry in authtab:
    authorabbr = (entry[2]+entry[3]).replace(" ", "").lower()
    c.execute("INSERT INTO authors2 (pmid, authnum, fn, ln, afil, abbr) VALUES (?, ?, ?, ?, ?, ?)",
              (entry[0], entry[1], entry[2], entry[3], entry[4], authorabbr))
c.execute('''COMMIT''')


---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
<ipython-input-6-f72574975837> in <module>()
      5     c.execute("INSERT INTO authors2 (pmid, authnum, fn, ln, afil, abbr) VALUES (?, ?, ?, ?, ?, ?)",
      6               (entry[0], entry[1], entry[2], entry[3], entry[4], authorabbr))
----> 7 c.execute('''COMMIT''')

OperationalError: cannot commit - no transaction is active

In [7]:
c.execute('''DROP TABLE authors''')


Out[7]:
<sqlite3.Cursor at 0x10f12f260>

In [8]:
c.execute('''ALTER TABLE authors2 RENAME TO authors''')


Out[8]:
<sqlite3.Cursor at 0x10f12f260>

In [9]:
c.execute('''CREATE INDEX authorsabbrix ON authors(abbr)''')


Out[9]:
<sqlite3.Cursor at 0x10f12f260>

In [10]:
c.execute('''CREATE INDEX authorsix ON authors(pmid)''')


Out[10]:
<sqlite3.Cursor at 0x10f12f260>

In [11]:
c.execute('''COMMIT''')
c.close()


---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
<ipython-input-11-dbcb85cf7302> in <module>()
----> 1 c.execute('''COMMIT''')
      2 c.close()

OperationalError: cannot commit - no transaction is active

create authors full name dict


In [21]:
import sqlite3
conn = sqlite3.connect('pmcv2-full.db')
c = conn.cursor()
c.execute('''CREATE TABLE authorfndict (authorabbr text, authorfn text, PRIMARY KEY (authorabbr))''')


Out[21]:
<sqlite3.Cursor at 0x140ec03b0>

In [20]:
c.execute('''DROP TABLE authorfndict''')


Out[20]:
<sqlite3.Cursor at 0x140ec0420>

In [29]:
c.execute('''SELECT fn, ln FROM authors''')
authnames = c.fetchall()
authorabbrs = dict()
for entry in authnames:
    authorabbr = (entry[0]+entry[1]).replace(" ", "").lower()
    authorabbrs[authorabbr] = entry
for entry in authorabbrs.iteritems(): #example item: (u'jiarongmiao', (u'Jiarong', u'Miao'))
    c.execute("INSERT INTO authorfndict (authorabbr, authorfn) VALUES (?, ?)",
              (entry[0], entry[1][0] +" " + entry[1][1]))
c.execute('''COMMIT''')
c.close()


---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
<ipython-input-29-d4559bdf1e5d> in <module>()
      8     c.execute("INSERT INTO authorfndict (authorabbr, authorfn) VALUES (?, ?)",
      9               (entry[0], entry[1][0] +" " + entry[1][1]))
---> 10 c.execute('''COMMIT''')
     11 c.close()

OperationalError: cannot commit - no transaction is active

In [ ]: