In [ ]:
import database
import importlib
import os
from datetime import datetime
from snowballing.operations import load_work, load_citations, reload
from snowballing.operations import work_by_varname, load_work_map_all_years
from snowballing.models import Place, DB
from snowballing.jupyter_utils import idisplay, work_button, new_button
from snowballing.selenium_scholar import SeleniumScholarQuerier
from snowballing.dbmanager import set_attribute
In [ ]:
reload()
result = [
idisplay(work_button(w))
for _, w in load_work_map_all_years()
if hasattr(w, "place1")
or not isinstance(w.place, Place)
]
len(result)
In [ ]:
[p for p in DB.places() if not hasattr(p, "type")]
In [ ]:
[(p, p.type) for p in DB.places()
if p.type not in {
"Conference",
"Journal",
"Magazine",
"Sponsor",
"Proceedings",
"Archive",
"Tech Report",
"Unpublished",
"Thesis",
"Patent",
"Lang",
"Web",
"Standard",
"Book",
"Other",
}]
In [ ]:
reload()
result = [
idisplay(work_button(w))
for _, w in load_work_map_all_years()
if w.category == "work"
]
len(result)
In [ ]:
reload()
result = [
idisplay(work_button(w))
for _, w in load_work_map_all_years()
if w.category == "unrelated"
and not hasattr(w, "due")
]
len(result)
In [ ]:
reload()
result = [
idisplay(work_button(w))
for _, w in load_work_map_all_years()
if not hasattr(w, "name")
or w.name.endswith(".")
]
len(result)
In [ ]:
reload()
result = [
idisplay(work_button(w))
for _, w in load_work_map_all_years()
if not hasattr(w, "authors")
or w.authors.endswith(".")
and not w.authors.endswith(" al.")
and not w.authors[-3] in "- " and w.authors[-2].isalpha()
]
len(result)
In [ ]:
def check_file(arg):
k, w = arg
if hasattr(w, "link") or w.category in ("nofile", "site") or w.place.name == "Patent":
return (k, w, "")
if isinstance(getattr(w, "file"), str):
if os.path.exists(os.path.join("files", w.file.split("#")[0])):
return (k, w, "")
return (k, w, "inexistent")
if w.file is None:
return (k, w, "no attribute")
return (k, w, "type error")
reload()
result = [
idisplay(work_button(w), r)
for _, w, r in map(check_file, load_work_map_all_years())
if r
]
len(result)
In [ ]:
declared_files = {
w.file.split("#")[0] for k, w in load_work_map_all_years()
if isinstance(w.file, str)
}
set(os.listdir('files')) - declared_files - {'alternatives'}
In [ ]:
from IPython.display import display
reload()
result = [
idisplay(work_button(w))
for _, w in load_work_map_all_years()
if w.category == "nofile"
if getattr(w, "request", "") not in ("done", "wont", "researchgate")
if w.place.name != "Patent"
]
len(result)
In [ ]:
querier = None
reload()
worklist = sorted(
[k for k, w in load_work_map_all_years() if not getattr(w, "scholar_ok", False)],
key=lambda x: (int(x[-5:-1]), x)
)
len(worklist)
In [ ]:
if worklist and querier is None:
querier = SeleniumScholarQuerier()
querier.apply_settings(10, 4)
In [ ]:
from snowballing.snowballing import ScholarUpdate
supdate = ScholarUpdate(querier, worklist, force=False)
In [ ]:
supdate
In [ ]:
# Temp
In [ ]:
{w.scholar_ok for k, w in load_work_map_all_years() if getattr(w, "scholar_ok", False)}
Finding scholar cluster: http://webapps.stackexchange.com/questions/45333/how-to-create-a-citation-alert-for-a-paper-without-citation-in-google-scholar
In [ ]:
def set_snowball_date_button(key, date):
def click(w):
set_attribute(key, "snowball", "datetime({0.year}, {0.month}, {0.day})".format(date))
return new_button("Set date", click)
date = datetime(2017, 3, 6)
reload()
result = [
idisplay(
work_button(w),
set_snowball_date_button(k, date),
'"{}", citation_file="{}"'.format(k, w.citation_file) if hasattr(w, 'scholar') else w.name,
label=False
)
for k, w in load_work_map_all_years()
if hasattr(w, 'snowball')
and w.category == "snowball"
and w.snowball != date
]
len(result)
In [ ]:
def set_tracking_button(key):
def click(w):
set_attribute(key, "tracking", "alert")
return new_button("Set track", click)
reload()
result = [
idisplay(
work_button(w),
set_tracking_button(k),
"https://scholar.google.com/scholar?cluster={}&hl=en&as_sdt=2005&sciodt=0,5".format(w.cluster_id) if hasattr(w, 'cluster_id') else w.name,
getattr(w, 'scholar', ''),
label=False,
)
for k, w in load_work_map_all_years()
if getattr(w, "tracking", "") not in ("alert", "impossible")
and w.category == "snowball"
]
len(result)
In [ ]:
len({k for k, w in load_work_map_all_years() if w.category == "snowball"})
In [ ]:
reload()
result = [
idisplay(work_button(w))
for k, w in load_work_map_all_years()
if w.category == "snowball"
if w.place.name in ("Tech Report", "Unpublished")
]
len(result)
In [ ]:
def check_dash(ele):
k, w = ele
def create_button(attr):
def click(_):
set_attribute(k, attr, str(getattr(w, attr)).replace("-", "--"))
return new_button("Set " + attr, click)
reasons = []
if w.place.name in ("Thesis", "Tech Report"):
return k, w, reasons
if isinstance(getattr(w, 'pp', ''), int):
reasons.append(create_button('pp'))
elif getattr(w, 'pp', '').count('-') not in (0, 2):
reasons.append(create_button('pp'))
if isinstance(getattr(w, 'volume', ''), int):
reasons.append(create_button('volume'))
elif getattr(w, 'volume', '').count('-') not in (0, 2):
reasons.append(create_button('volume'))
if isinstance(getattr(w, 'number', ''), int):
reasons.append(create_button('number'))
elif getattr(w, 'number', '').count('-') not in (0, 2):
reasons.append(create_button('number'))
return k, w, reasons
reload()
result = [
idisplay(work_button(w), *r)
for k, w, r in map(check_dash, load_work_map_all_years())
if r
]
len(result)
In [ ]: