Import sqlite3 library


In [ ]:
import sqlite3
import sys
from glob import glob

Connect to a database file or create an in-memory database


In [ ]:
conn = sqlite3.connect('/users/mikespears/Desktop/mydb.db') #file-based db
#conn = sqlite3.connect(':memory:') # in-memory db
c = conn.cursor()

SQLite basics: Create a regular (non fts) table, insert and search for values


In [ ]:
c.execute('''DROP TABLE IF EXISTS uoftcoders''') 
c.execute('''CREATE TABLE uoftcoders (date, title, attendance)''')
conn.commit()

In [ ]:
c.execute('''INSERT INTO uoftcoders VALUES (?, ?, ?)''', ["March 7, 2019", "Full Text Search with SQLITE", 12])
c.execute('''INSERT INTO uoftcoders VALUES (?, ?, ?)''', ["March 14, 2019", "Software defined radio", 15])
conn.commit()

In [ ]:
c.execute('''SELECT * FROM uoftcoders WHERE date = ?''', ["March 7, 2019"])
print(c.fetchall())

In [ ]:
c.execute('''SELECT * FROM uoftcoders WHERE attendance > ?''', [10])
print(c.fetchall())

In [ ]:
#basic pattern matching with LIKE
c.execute('''SELECT * FROM uoftcoders WHERE date LIKE ?''', ["%march_%"])
print(c.fetchall())

Full-Text-Search: Create a table, insert data and search for matches


In [ ]:
c.execute('''DROP TABLE IF EXISTS files''')
c.execute('''CREATE VIRTUAL TABLE files USING fts4(filename, text)''')
#c.execute('''CREATE VIRTUAL TABLE files USING fts4(filename, text, tokenize=porter)''')
#c.execute('''CREATE VIRTUAL TABLE files USING fts4(filename, text, tokenize=unicode61)''')
conn.commit()

In [ ]:
# Insert all the files in a directory (that contains only plaintext files)
textFileDir = "/users/mikespears/downloads/science/"
allFiles = glob(textFileDir + "*.*")

for path in allFiles:
    with open(path) as file: 
        try:
            text = file.read()
            c.execute('''INSERT INTO files(filename, text) VALUES(?, ?)''', [path, text])
            #print(path)
        except:
            e = sys.exc_info()[0]
            print("Read or insert error: %s (in %s)" % (e, path))
            continue
    
    
conn.commit()

In [ ]:
c.execute('''SELECT COUNT(*) from files''')
print(c.fetchall())

In [ ]:
#basic token search text column
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['relativity']):
    print(row)

In [ ]:
#implicit 'AND'
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['special relativity']):
    print(row)

In [ ]:
# OR
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['special OR relativity']):
    print(row)

In [ ]:
#search for a phrase
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['"special relativity"']):
    print(row)

In [ ]:
#exclude a token
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['relativity -special']):
    print(row)

In [ ]:
#NEAR operator
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ["relativity NEAR special"]):
    print(row)

In [ ]:
#specify a column in the search term
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['filename:faq']):
    print(row)

In [ ]:
for row in c.execute('''SELECT snippet(files, ">>>>", "<<<<", "..."), filename FROM FILES WHERE text MATCH ?''', ['relativity']):
    print ("\n\n--------%s\n\n" % row[1])
    print(row[0])

In [ ]:
#get offset
for row in c.execute('''SELECT offsets(FILES) FROM FILES WHERE text MATCH ?''', ['filename:faq']):
    print(row)

In [ ]:
#get matchinfo
for row in c.execute('''SELECT matchinfo(FILES) FROM FILES WHERE files MATCH ?''', ['filename:faq']):
    print(row)

In [ ]:
#direct access to full-text index
c.execute('''DROP TABLE IF EXISTS files_terms''')
c.execute('''CREATE VIRTUAL TABLE files_terms USING fts4aux(files)''')
conn.commit()
for row in c.execute('''SELECT * from files_terms ORDER BY occurrences DESC'''):
    print(row)

In [ ]:
conn.close()