In [ ]:
import sqlite3
import sys
from glob import glob
In [ ]:
conn = sqlite3.connect('/users/mikespears/Desktop/mydb.db') #file-based db
#conn = sqlite3.connect(':memory:') # in-memory db
c = conn.cursor()
In [ ]:
c.execute('''DROP TABLE IF EXISTS uoftcoders''')
c.execute('''CREATE TABLE uoftcoders (date, title, attendance)''')
conn.commit()
In [ ]:
c.execute('''INSERT INTO uoftcoders VALUES (?, ?, ?)''', ["March 7, 2019", "Full Text Search with SQLITE", 12])
c.execute('''INSERT INTO uoftcoders VALUES (?, ?, ?)''', ["March 14, 2019", "Software defined radio", 15])
conn.commit()
In [ ]:
c.execute('''SELECT * FROM uoftcoders WHERE date = ?''', ["March 7, 2019"])
print(c.fetchall())
In [ ]:
c.execute('''SELECT * FROM uoftcoders WHERE attendance > ?''', [10])
print(c.fetchall())
In [ ]:
#basic pattern matching with LIKE
c.execute('''SELECT * FROM uoftcoders WHERE date LIKE ?''', ["%march_%"])
print(c.fetchall())
In [ ]:
c.execute('''DROP TABLE IF EXISTS files''')
c.execute('''CREATE VIRTUAL TABLE files USING fts4(filename, text)''')
#c.execute('''CREATE VIRTUAL TABLE files USING fts4(filename, text, tokenize=porter)''')
#c.execute('''CREATE VIRTUAL TABLE files USING fts4(filename, text, tokenize=unicode61)''')
conn.commit()
In [ ]:
# Insert all the files in a directory (that contains only plaintext files)
textFileDir = "/users/mikespears/downloads/science/"
allFiles = glob(textFileDir + "*.*")
for path in allFiles:
with open(path) as file:
try:
text = file.read()
c.execute('''INSERT INTO files(filename, text) VALUES(?, ?)''', [path, text])
#print(path)
except:
e = sys.exc_info()[0]
print("Read or insert error: %s (in %s)" % (e, path))
continue
conn.commit()
In [ ]:
c.execute('''SELECT COUNT(*) from files''')
print(c.fetchall())
In [ ]:
#basic token search text column
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['relativity']):
print(row)
In [ ]:
#implicit 'AND'
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['special relativity']):
print(row)
In [ ]:
# OR
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['special OR relativity']):
print(row)
In [ ]:
#search for a phrase
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['"special relativity"']):
print(row)
In [ ]:
#exclude a token
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['relativity -special']):
print(row)
In [ ]:
#NEAR operator
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ["relativity NEAR special"]):
print(row)
In [ ]:
#specify a column in the search term
for row in c.execute('''SELECT filename FROM FILES WHERE text MATCH ?''', ['filename:faq']):
print(row)
In [ ]:
for row in c.execute('''SELECT snippet(files, ">>>>", "<<<<", "..."), filename FROM FILES WHERE text MATCH ?''', ['relativity']):
print ("\n\n--------%s\n\n" % row[1])
print(row[0])
In [ ]:
#get offset
for row in c.execute('''SELECT offsets(FILES) FROM FILES WHERE text MATCH ?''', ['filename:faq']):
print(row)
In [ ]:
#get matchinfo
for row in c.execute('''SELECT matchinfo(FILES) FROM FILES WHERE files MATCH ?''', ['filename:faq']):
print(row)
In [ ]:
#direct access to full-text index
c.execute('''DROP TABLE IF EXISTS files_terms''')
c.execute('''CREATE VIRTUAL TABLE files_terms USING fts4aux(files)''')
conn.commit()
for row in c.execute('''SELECT * from files_terms ORDER BY occurrences DESC'''):
print(row)
In [ ]:
conn.close()