In [4]:
import operator
import sys
from haystack.query import SearchQuerySet, EmptySearchQuerySet, SQ
from haystack.inputs import Raw, AutoQuery, Exact
category = 'CO'
results = SearchQuerySet().models(Entry).filter(category=category)
#results = SearchQuerySet().models(Entry).exclude(category='TO')
print (len(results))
len(Entry.objects.filter(category=category))
Out[4]:
In [ ]:
import re
des = '[(AGRI_1528) Bla. Bli. I, 33] The numbers [<WARD_2013>Wardhaugh 2013,96-97][6 ,8, 9, 12], the second tetraktys [22],\
are [10000] arranged [10003,10004] on a square [33,144]. This representation is a graph of four nodes in which all pairs of nodes\
are connected (kappa-4). The connections are labelled by the corresponding ratios. The natural representation of this configuration\
in three dimensions would be a tetrahedron. [(BOET_10th) Inst. Mus. I, 10] and here some already parsed [FOOBAR_1337][]'
def repl(matchobj):
print (matchobj.group(0))
def replPrimSource(matchobj):
repl(matchobj)
s = matchobj.group(0)
ref = s[s.find("(")+1:s.find(")")]
print (ref)
title = s[s.find(")")+1:].strip(' ').strip(']')
print (title)
def replSecSource(matchobj):
repl(matchobj)
s = matchobj.group(0)
ref = s[s.find("<")+1:s.find(">")]
print (ref)
title = s[s.find(">")+1:].strip(' ').strip(']')
print (title)
# diagramme und sets
print ("######################### Diagramme und Sets #########################")
re.sub("\[(\d+)(\s*,?\s*\d+)*\]", repl, des)
# primär quellen
print ("######################### Primäre Quellen #########################")
des = re.sub("\[\\((.*?)\\)(.*?)\]", replPrimSource, des)
# sekundär literatur
print ("######################### Sekundär Literatur #########################")
des = re.sub("\[\\<(.*?)\\>(.*?)\]", replSecSource, des)
In [ ]:
i = 5000
for e in sorted(Entry.objects.filter(doc_id=None), key=lambda e: e.filename):
e.doc_id = i
e.save()
i += 1
In [ ]:
import sys
import re
des = '[22,23] Arithmetic triangle in the style of Boethius [1, 13] representing [133,134,135,77] numbers of the format 2j\xc2\xb43k. The diagram is used to explain the system of tone durations of binary and ternary division. Fludd has copied it from Torkesey [16,17]. The diagram occurs also on the Templum musicae (with two mistakes). As in Torkesey\xe2\x80\x99s diagram the diagonal direction top right bottom left is labelled Sesquitertiae (3:4), an essential mathematical property of the diagram [1,62,71] asdf.'
print des
ref_id = []
def repl(matchobj):
refs = []
for x in (matchobj.group(0).replace('[','').replace(']','').split(',')):
ref_id.append(int(x))
refs += {'[' + x + '][]'}
return "[" + ','.join(refs) + "]"
des = re.sub("\[([\d, *]+)\]", repl, des)
print des
print ref_id
In [ ]:
import operator
import sys
from haystack.query import SearchQuerySet, EmptySearchQuerySet, SQ
from haystack.inputs import Raw, AutoQuery, Exact
tags = ['senario', 'consonance']
#Keyword.objects.filter(name__in=tags)
#Entry.objects.filter(tags__name__in=tags).distinct()
entries = Entry.objects.all()
results = SearchQuerySet().models(Entry).all()
print (results.count())
#print (results.models())
selected_tags = 'senario, consonance'
selected_tags = [t.strip() for t in selected_tags.split(',')]
for tag in selected_tags:
results = results.filter(SQ(tags=tag))
possible_tags = []
for r in results.all():
possible_tags += [t.pk for t in r.object.tags.all()]
# convert to set
possible_tags = set(possible_tags)
print (possible_tags)
tags = Keyword.objects.filter(pk__in=possible_tags).order_by('name')
tag_objects = []
for t in tags:
tag_objects.append({"name": t.name, "slug": t.slug, "selected": True if t.slug in selected_tags else False })
print (tag_objects)
In [ ]:
print ('############# DB ##############')
e1 = Entry.objects.filter(tags__name__in=['consonance',])
e2 = Entry.objects.filter(tags__name__in=['senario',])
e3 = Entry.objects.filter(tags__name__in=['senario', 'consonance',])
e4 = Entry.objects.filter(tags__name__in=['senario', 'consonance',]).distinct()
print e1.count()
print e2.count()
print e3.count()
print e4.count()
print ('########### SQ ##############')
sqs1 = SearchQuerySet().models(Entry).all().filter(SQ(tags__exact='consonance'))
#sqs11= SearchQuerySet().models(Entry).filter(tags=Exact('consonance'))
sqs2 = SearchQuerySet().models(Entry).all().filter(SQ(tags='senario'))
sqs3 = SearchQuerySet().models(Entry).all().filter(SQ(tags='consonance') | SQ(tags='senario'))
sqs4 = SearchQuerySet().models(Entry).all().filter(SQ(tags='consonance') & SQ(tags='senario'))
sqs5 = SearchQuerySet().models(Entry).all().filter(SQ(tags='consonance'))
sqs5 = sqs5.filter(SQ(tags='senario'))
print sqs1.count()
#print sqs11.count()
print sqs2.count()
print ("OR: %s") % sqs3.count()
print ("AND: %s") % sqs4.count()
print ("AND: %s") % sqs5.count()
print [x for x in sqs4.values_list('pk', flat=True)]
print ('########### DB + SQ combined ##############')
tags = 'senario, consonance'
results = SearchQuerySet().models(Entry).all()
qs = Entry.objects.filter(pk__in=results.values_list('pk', flat=True))
#qs = qs.filter(tags__name__in=[x.strip() for x in tags.split(',')]).distinct()
tags = [t.strip() for t in tags.split(',')]
for tag in tags:
qs = qs.filter(tags__name__in=[tag,]).distinct()
print [x for x in qs.values_list('pk', flat=True)]
ftags = []
for e in qs:
ftags += e.tags.slugs()
#ftags = sorted(ftags, key=str.lower)
ftags = sorted(set(ftags))
print ftags
print qs.count()
In [ ]:
search_items = {
'Term': ['fludd','higgs'],
'Type': ['author'],
}
search_items['Term'][1]
In [ ]:
from haystack.query import SearchQuerySet, EmptySearchQuerySet
order_by = 'author'
#print ([e.pk for e in Entry.objects.all().order_by(order_by)[:20]])
#print ([int(e.pk) for e in SearchQuerySet().all().order_by(order_by)[:20]])
tag = "chromatic scale"
print Entry.objects.filter(tags__name__in=[tag]).count()
print '######################'
print SearchQuerySet().models(Entry).filter(tags__name__in=[tag]).count()
In [ ]:
a = Author.objects.all()
n = a.get(last_name='Newton')
n.museums_entries.all()
In [ ]:
from django.utils.text import slugify
cols = Collection.objects.all()
for c in cols:
slug = slugify(c.title, allow_unicode=True)
c.slug = slug
print c.slug
c.save()
In [ ]:
import json
import sys
import os
import requests
import shutil
import operator
from collections import OrderedDict
from requests.auth import HTTPBasicAuth
import tempfile
from django.core.files.base import ContentFile
from django.core.files import File
base = 'http://medienarchiv.zhdk.ch'
collection_id = '73114b11-08de-42a9-ba36-864d9c1b5641'
user = 'sound-colour-space'
password = '95ca2a71-4e01-4477-8e14-b673e96e4fe9'
auth_header = 'Authorization: Basic c291bmQtY29sb3VyLXNwYWNlOjk1Y2EyYTcxLTRlMDEtNDQ3Ny04ZTE0LWI2NzNlOTZlNGZlOQ=='
auth = HTTPBasicAuth(user, password)
#if r.status_code == 200
def get_authors(relations):
# for each author
author_objs = []
for key, values in relations.iteritems():
a = requests.get(base+values.get('href'), auth=auth).json()
#print ("\tauthor: %s %s (%s) [%s %s]") % (a.get('first_name'), a.get('last_name'), a.get('pseudonym'), a.get('date_of_birth'), a.get('date_of_death'))
new_author = { 'first_name': a.get('first_name'), 'last_name': a.get('last_name'), 'pseudonym': a.get('pseudonym'), 'date_of_birth': a.get('date_of_birth'), 'date_of_death': a.get('date_of_death') }
# create or update author
obj, created = Author.objects.update_or_create(
remote_uuid=a.get('id'), defaults=new_author)
author_objs.append(obj) # add to list for update even if existing
return author_objs
def get_licenses(relations):
# for each license
license_objs = []
for key, values in relations.iteritems():
l = requests.get(base+values.get('href'), auth=auth).json()
#print ("\tlicence (%s): %s, %s, %s") % (l.get('id'), l.get('label'), l.get('url'), l.get('usage'))
new_license = { 'remote_uuid': l.get('id'), 'label': l.get('label'), 'url': l.get('url'), 'usage': l.get('usage') }
# create or update license
obj, created = License.objects.update_or_create(
remote_uuid=l.get('id'), defaults=new_license)
license_objs.append(obj) # add to list for update even if existing
return license_objs
def get_entry(key, href):
# get single entry
entry_request = requests.get(base+href, auth=auth)
entry = entry_request.json()
print ('Madek: %s') % entry.get('id')
# the updated entry as dict
new_entry = {}
# get meta data
meta_data = requests.get(base + href + '/meta-data/', auth=auth)
meta_data = meta_data.json()
author_objs = []
license_objs = []
# iterate over meta-data
for m in meta_data.get('meta-data'):
key = m.get('meta_key_id')
id = m.get('id')
if (key == 'madek_core:title'):
title = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:title: %s') % title
new_entry['title'] = title
elif (key == 'madek_core:subtitle'):
subtitle = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:subtitle: %s') % subtitle
new_entry['subtitle'] = subtitle
elif (key == 'madek_core:portrayed_object_date'):
portrayed_object_date = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:portrayed_object_date: %s') % portrayed_object_date
new_entry['portrayed_object_date'] = portrayed_object_date
elif (key == 'madek_core:authors'):
authors = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('_json-roa')['collection']
#print('madek_core:authors: %s') % authors
author_objs = get_authors(authors['relations'])
elif (key == 'copyright:license'):
licenses = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('_json-roa')['collection']
#print('copyright:license: %s') % licenses
license_objs = get_licenses(licenses['relations']) # will be set at the end of this function
elif (key == 'copyright:source'):
source = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('copyright:source: %s') % source
new_entry['source'] = source
elif (key == 'copyright:copyright_usage'):
#print('copyright:copyright_usage: %s') % requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
pass
elif (key == 'madek_core:copyright_notice'):
copyright_notice = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:copyright_notice: %s') % copyright_notice
new_entry['copyright_notice'] = copyright_notice
# create or update entry
obj, created = Entry.objects.update_or_create(
remote_uuid = entry.get('id'), defaults=new_entry)
# save image
image_href = entry['_json-roa']['relations']['media-file'].get('href')
image_request = requests.get(base + image_href, auth=auth)
image = image_request.json()
image_data_request = requests.get(base + image['_json-roa']['relations']['data-stream']['href'], stream=True, auth=auth)
if image_data_request.status_code == 200:
f = tempfile.NamedTemporaryFile(delete=False)
with open(f.name, 'wb') as f:
image_data_request.raw.decode_content = True
shutil.copyfileobj(image_data_request.raw, f)
#for chunk in image_data_request:
# f.write(chunk)
f.close()
# save entry
with open(f.name, 'r') as f:
print(image.get('filename'))
obj.image.save(image.get('filename'), File(f), save=True)
f.close()
os.unlink(f.name)
# set licenses, authors
print("Entry: %s") % obj
obj.license.set(license_objs)
obj.author.set(author_objs)
return obj
def get_collection(key, href):
# get single entry
collection_request = requests.get(base+href, auth=auth)
collection = collection_request.json()
print ('Madek: [%s] %s') % (key, collection.get('id'))
# the updated collection as dict
new_collection = {}
# get meta data
meta_data = requests.get(base + href + '/meta-data/', auth=auth)
meta_data = meta_data.json()
author_objs = []
# iterate over meta-data
for m in meta_data.get('meta-data'):
key = m.get('meta_key_id')
id = m.get('id')
if (key == 'madek_core:title'):
title = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:title: %s') % title
new_collection['title'] = title
if (key == 'madek_core:subtitle'):
subtitle = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:subtitle: %s') % subtitle
new_collection['subtitle'] = subtitle
if (key == 'madek_core:description'):
description = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:description: %s') % description
new_collection['description'] = description
elif (key == 'madek_core:authors'):
authors = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('_json-roa')['collection']
#print('madek_core:authors: %s') % authors
author_objs = get_authors(authors['relations'])
for a in author_objs:
print ("Author: %s") % a.get_full_name()
#print new_collection
# create or update set
obj, created = Collection.objects.update_or_create(
remote_uuid = collection.get('id'), defaults=new_collection)
obj.author.set(author_objs)
entry_objs = []
# get entries
url = base + '/api/media-entries/?collection_id=' + collection.get('id')
while (True):
r = requests.get(url, auth=auth)
entries_data = r.json()['_json-roa']['collection']
relations = entries_data.get('relations')
next_page = entries_data.get('next')
# iterate page
for key, value in relations.iteritems():
e = get_entry(key, value.get('href'))
entry_objs.append(e)
#print('########################')
if (next_page == None):
break
else:
url = base + entries_data['next']['href']
print ("page at: %s") % url
obj.entry.set(entry_objs)
print("Collection: %s") % (obj)
#url = base + '/api/media-entries/?collection_id=' + collection_id
url = base + '/api/collections/?collection_id=' + collection_id
while (True):
r = requests.get(url, auth=auth)
collection_data = r.json()['_json-roa']['collection']
relations = collection_data.get('relations')
next_page = collection_data.get('next')
# iterate page
for key, value in relations.iteritems():
#get_entry(key, value.get('href'))
get_collection(key, value.get('href'))
print('########################')
# sys.exit(0) # exit after first item (debugging)
if (next_page == None):
break
else:
url = base + collection_data['next']['href']
print ("page at: %s") % url
print('done')
In [ ]:
for i in range (1,100):
try:
e = Entry.objects.get(doc_id=i)
except(Entry.DoesNotExist):
print i
In [ ]:
import sys
import collections
import re
entries = Entry.objects.all()
#refs = {}
for e in entries:
matches = re.findall('\[\d+\]', unicode(e.description))
if matches:
i = 0
for m in matches:
number = (int)(matches[i].replace('[', '').replace(']', ''))
try:
ref = Entry.objects.get(doc_id=number)
title = ref.title
# update description text: [doc_id](doc_id "title")
e.description = e.description.replace(matches[i], '[' + matches[i] + '](' + str(number) + ' "' + title + '")')
# add related obj
e.related.add(ref)
e.save()
# save only number for related set
#matches[i] = number
except (Entry.DoesNotExist):
pass
i += 1
#refs[e.doc_id] = matches
'''
od = collections.OrderedDict(sorted(refs.items()))
for id, refs in od.items():
# get related objects
related_objs = Entry.objects.filter(doc_id__in=refs)
e = Entry.objects.get(doc_id=id)
print e, '==>', related_objs
# set them (empty sets will clear related objs)
e.related.set(related_objs)
'''
In [ ]:
import csv
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
entries = Entry.objects.all()
with open('/home/stahl/icst/soundcolourspace/cards/cards.csv', 'wb') as csvfile:
cardswriter = csv.writer(csvfile, delimiter='#', quotechar='|', quoting=csv.QUOTE_MINIMAL)
#for e in entries[0:10]:
for e in entries:
if len(e.author.all()) > 0:
author_first = e.author.all()[0].first_name or ""
author_last = e.author.all()[0].last_name or ""
if author_first != "" and author_last != "":
author = author_first + " " + author_last
elif author_first == "" and author_last != "":
author = author_last
elif author_first != "" and author_last == "":
author = author_first
else:
author = ""
cardswriter.writerow([e.title, e.description, e.image, e.image.width, e.image.height, author, e.portrayed_object_date])
#with open('cards.csv') as csvfile:
# reader = csv.reader(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL)
# for row in reader:
# print (row[0])
In [ ]:
entries = Entry.objects.all()
e = entries[33]
e.image.width
e.image.height
print (e.date_accuracy)
from datetime import datetime
from django.utils import timezone
print (timezone.get_current_timezone())
date_object = datetime.strptime('Jun 1 1436 1:33PM', '%b %d %Y %I:%M%p')
print (date_object)
date_object = timezone.make_aware(date_object, timezone.get_current_timezone())
print (date_object)
In [ ]:
import re
from datetime import datetime
from django.utils import timezone
date_object = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p')
entries = Entry.objects.all()
total = 0
for e in entries:
# match "c. XXXX" or "ca. XXXX" in portrayed_object_date
m = re.findall('c\w?.?\s?(\d{4})', str(e.portrayed_object_date))
if m:
total = total + 1
date = datetime.strptime(m[0], '%Y')
date = timezone.make_aware(date, timezone.get_current_timezone())
print ("{} \t {} \t\t\t {} \t".format(e.portrayed_object_date, e.date, date))
e.date = date
e.date_accuracy = 3
e.save()
# match "XXth"
m = re.findall('(\d{2}th)', str(e.portrayed_object_date))
if m:
year = m[0].strip('th')
year = int(year)
year = (year - 1) * 100
year = '{:04}'.format(year)
date = datetime.strptime(year, '%Y')
date = timezone.make_aware(date, timezone.get_current_timezone())
print ("{} \t {} \t\t\t {} \t".format(e.portrayed_object_date, e.date,date))
e.date = date
e.date_accuracy = 5
e.save()
print ("updated total: %s") % (total)
In [ ]:
import csv
import os
import re
entries = Entry.objects.all()
#print( os.path.basename (entries[0].image.name) )
path = '/home/stahl/icst/soundcolourspace/annotations.csv'
class MyDialect(csv.Dialect):
strict = True
skipinitialspace = True
quoting = csv.QUOTE_ALL
delimiter = ';'
quotechar = '"'
lineterminator = '\n'
with open(path, 'rb') as csvfile:
r=csv.reader(csvfile, MyDialect())
for i in r:
#print ("#%s: %s \n\n%s") % (i[2], i[0], i[1])
entries = Entry.objects.filter(image__icontains=i[0])
if (len(entries) == 0):
print ("no entries for %s") % i[0]
elif (len(entries) > 1):
print ("multiple entries for %s") % i[0]
else:
entries[0].description = unicode(i[1], errors='ignore')
entries[0].save()
#print ('####################################################################################')
In [ ]:
import json
import sys
import os
import requests
import shutil
import operator
from collections import OrderedDict
from requests.auth import HTTPBasicAuth
import tempfile
from django.core.files.base import ContentFile
from django.core.files import File
base = 'http://medienarchiv.zhdk.ch'
collection_id = '73114b11-08de-42a9-ba36-864d9c1b5641'
user = 'sound-colour-space'
password = '95ca2a71-4e01-4477-8e14-b673e96e4fe9'
auth_header = 'Authorization: Basic c291bmQtY29sb3VyLXNwYWNlOjk1Y2EyYTcxLTRlMDEtNDQ3Ny04ZTE0LWI2NzNlOTZlNGZlOQ=='
auth = HTTPBasicAuth(user, password)
#if r.status_code == 200
def get_authors(relations):
# for each author
author_objs = []
for key, values in relations.iteritems():
a = requests.get(base+values.get('href'), auth=auth).json()
#print ("\tauthor: %s %s (%s) [%s %s]") % (a.get('first_name'), a.get('last_name'), a.get('pseudonym'), a.get('date_of_birth'), a.get('date_of_death'))
new_author = { 'first_name': a.get('first_name'), 'last_name': a.get('last_name'), 'pseudonym': a.get('pseudonym'), 'date_of_birth': a.get('date_of_birth'), 'date_of_death': a.get('date_of_death') }
# create or update license
obj, created = Author.objects.update_or_create(
remote_uuid=a.get('id'), defaults=new_author)
author_objs.append(obj) # add to list for update even if existing
return author_objs
def get_licenses(relations):
# for each license
license_objs = []
for key, values in relations.iteritems():
l = requests.get(base+values.get('href'), auth=auth).json()
#print ("\tlicence (%s): %s, %s, %s") % (l.get('id'), l.get('label'), l.get('url'), l.get('usage'))
new_license = { 'remote_uuid': l.get('id'), 'label': l.get('label'), 'url': l.get('url'), 'usage': l.get('usage') }
# create or update license
obj, created = License.objects.update_or_create(
remote_uuid=l.get('id'), defaults=new_license)
license_objs.append(obj) # add to list for update even if existing
return license_objs
def get_entry(key, href):
# get single entry
entry_request = requests.get(base+href, auth=auth)
entry = entry_request.json()
print ('Madek: %s') % entry.get('id')
# the updated entry as dict
new_entry = {}
# get meta data
meta_data = requests.get(base + href + '/meta-data/', auth=auth)
meta_data = meta_data.json()
author_objs = []
license_objs = []
# iterate over meta-data
for m in meta_data.get('meta-data'):
key = m.get('meta_key_id')
id = m.get('id')
if (key == 'madek_core:title'):
title = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:title: %s') % title
new_entry['title'] = title
elif (key == 'madek_core:subtitle'):
subtitle = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:subtitle: %s') % subtitle
new_entry['subtitle'] = subtitle
elif (key == 'madek_core:portrayed_object_date'):
portrayed_object_date = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:portrayed_object_date: %s') % portrayed_object_date
new_entry['portrayed_object_date'] = portrayed_object_date
elif (key == 'madek_core:authors'):
authors = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('_json-roa')['collection']
#print('madek_core:authors: %s') % authors
author_objs = get_authors(authors['relations'])
elif (key == 'copyright:license'):
licenses = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('_json-roa')['collection']
#print('copyright:license: %s') % licenses
license_objs = get_licenses(licenses['relations']) # will be set at the end of this function
elif (key == 'copyright:source'):
source = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('copyright:source: %s') % source
new_entry['source'] = source
elif (key == 'copyright:copyright_usage'):
#print('copyright:copyright_usage: %s') % requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
pass
elif (key == 'madek_core:copyright_notice'):
copyright_notice = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
#print('madek_core:copyright_notice: %s') % copyright_notice
new_entry['copyright_notice'] = copyright_notice
# create or update entry
obj, created = Entry.objects.update_or_create(
remote_uuid = entry.get('id'), defaults=new_entry)
# save image
image_href = entry['_json-roa']['relations']['media-file'].get('href')
image_request = requests.get(base + image_href, auth=auth)
image = image_request.json()
image_data_request = requests.get(base + image['_json-roa']['relations']['data-stream']['href'], stream=True, auth=auth)
if image_data_request.status_code == 200:
f = tempfile.NamedTemporaryFile(delete=False)
with open(f.name, 'wb') as f:
image_data_request.raw.decode_content = True
shutil.copyfileobj(image_data_request.raw, f)
#for chunk in image_data_request:
# f.write(chunk)
f.close()
# save entry
with open(f.name, 'r') as f:
print(image.get('filename'))
obj.image.save(image.get('filename'), File(f), save=True)
f.close()
os.unlink(f.name)
# set licenses, authors
print("Entry: %s") % obj
obj.license.set(license_objs)
obj.author.set(author_objs)
url = base + '/api/media-entries/?collection_id=' + collection_id
while (True):
r = requests.get(url, auth=auth)
collection_data = r.json()['_json-roa']['collection']
relations = collection_data.get('relations')
next_page = collection_data.get('next')
# iterate page
for key, value in relations.iteritems():
get_entry(key, value.get('href'))
print('########################')
# sys.exit(0) # exit after first item (debugging)
if (next_page == None):
break
else:
url = base + collection_data['next']['href']
print ("page at: %s") % url
print('done')
In [ ]:
import re
entries = Entry.objects.all()
total = 0
for e in entries:
#e.title = e.title.replace('.gif', '')
#e.save()
# match 4 digits at start of title + 1 whitespace
m = re.findall('^(\d{4}.)', e.title)
if m:
total = total + 1
#print e.title
#print e.title.replace(m[0], '')
e.title = e.title.replace(m[0], '')
#e.save()
print total
In [ ]:
from haystack.query import SearchQuerySet, EmptySearchQuerySet
from haystack.inputs import Raw
from itertools import chain
results = []
#query = '*mono* OR zarlino'
#query = 'newton'
#query = '1675'
query = 'Fludd OR newton'
author = EmptySearchQuerySet()
#author = SearchQuerySet().models(Entry).filter(author=Raw(query))
portrayed_object_date = EmptySearchQuerySet()
#portrayed_object_date = SearchQuerySet().models(Entry).filter(portrayed_object_date=Raw(query))
auto = EmptySearchQuerySet()
#auto = SearchQuerySet().models(Entry).auto_query(query)
combined = EmptySearchQuerySet()
#combined = SearchQuerySet().models(Entry).autocomplete(title_auto=query).filter(text=Raw(query))
raw_full = EmptySearchQuerySet()
raw_full = SearchQuerySet().models(Entry).filter(text=Raw(query))
results = list(chain(author, portrayed_object_date, auto, combined, raw_full))
if not results:
print 'None found.'
else:
print 'Found %s.' % len(results)
for r in results:
print '%d %s %s %s' % (r.portrayed_object_date, r.author ,r.title, r.tags)
In [ ]:
import os
from django.core.files import File
for f in sorted(os.listdir('temp')):
if f.endswith(".jpg") or f.endswith(".gif"):
img = open('temp/' + f)
dimg = File(img)
e = Entry(title=f)
e.save() # save so we have an uuid for the image path
e.image.save(f, dimg) # image path will be overwritten by model
e.save()
In [ ]:
a = "thius"
entries = Entry.objects.filter(title__icontains=a)
author = Author.objects.get(last_name__icontains=a)
print (author)
entries.update(author=author)
print (entries)
In [ ]:
up = 0
for e in Entry.objects.all():
t = e.title
y = t.split('_')[0]
if any(char.isdigit() for char in y):
e.portrayed_object_date = y
e.save()
up = up + 1
print ('updated %d entries.' % up)