In [4]:
import operator
import sys
from haystack.query import SearchQuerySet, EmptySearchQuerySet, SQ
from haystack.inputs import Raw, AutoQuery, Exact

category = 'CO'
results = SearchQuerySet().models(Entry).filter(category=category)
#results = SearchQuerySet().models(Entry).exclude(category='TO')

print (len(results))

len(Entry.objects.filter(category=category))


94
Out[4]:
94

In [ ]:
import re

des = '[(AGRI_1528) Bla. Bli. I, 33] The numbers [<WARD_2013>Wardhaugh 2013,96-97][6 ,8, 9, 12], the second tetraktys [22],\
are [10000] arranged [10003,10004] on a square [33,144]. This representation is a graph of four nodes in which all pairs of nodes\
are connected (kappa-4). The connections are labelled by the corresponding ratios. The natural representation of this configuration\
in three dimensions would be a tetrahedron. [(BOET_10th) Inst. Mus. I, 10] and here some already parsed [FOOBAR_1337][]'

def repl(matchobj):
    print (matchobj.group(0))

def replPrimSource(matchobj):
    repl(matchobj)
    s = matchobj.group(0)
    ref = s[s.find("(")+1:s.find(")")]
    print (ref)
    title = s[s.find(")")+1:].strip(' ').strip(']')
    print (title)
    
def replSecSource(matchobj):
    repl(matchobj)
    s = matchobj.group(0)
    ref = s[s.find("<")+1:s.find(">")]
    print (ref)
    title = s[s.find(">")+1:].strip(' ').strip(']')
    print (title)
    

# diagramme und sets
print ("######################### Diagramme und Sets #########################")
re.sub("\[(\d+)(\s*,?\s*\d+)*\]", repl, des)
# primär quellen
print ("######################### Primäre Quellen #########################")
des = re.sub("\[\\((.*?)\\)(.*?)\]", replPrimSource, des)
# sekundär literatur
print ("######################### Sekundär Literatur #########################")
des = re.sub("\[\\<(.*?)\\>(.*?)\]", replSecSource, des)

doc_id


In [ ]:
i = 5000
for e in sorted(Entry.objects.filter(doc_id=None), key=lambda e: e.filename):
    e.doc_id = i
    e.save()
    i += 1

In [ ]:
import sys
import re

des = '[22,23] Arithmetic triangle in the style of Boethius [1, 13] representing [133,134,135,77] numbers of the format 2j\xc2\xb43k. The diagram is used to explain the system of tone durations of binary and ternary division. Fludd has copied it from Torkesey [16,17]. The diagram occurs also on the Templum musicae (with two mistakes). As in Torkesey\xe2\x80\x99s diagram the diagonal direction top right bottom left is labelled Sesquitertiae (3:4), an essential mathematical property of the diagram [1,62,71] asdf.'

print des

ref_id = []


def repl(matchobj):
    
    refs = []
    for x in (matchobj.group(0).replace('[','').replace(']','').split(',')):
        ref_id.append(int(x))
        refs += {'[' + x + '][]'}
    
    return "[" + ','.join(refs) + "]"

des = re.sub("\[([\d, *]+)\]", repl, des)

print des

print ref_id

In [ ]:
import operator
import sys
from haystack.query import SearchQuerySet, EmptySearchQuerySet, SQ
from haystack.inputs import Raw, AutoQuery, Exact

tags = ['senario', 'consonance']


#Keyword.objects.filter(name__in=tags)
#Entry.objects.filter(tags__name__in=tags).distinct()

entries = Entry.objects.all()

results = SearchQuerySet().models(Entry).all()
print (results.count())

#print (results.models())

selected_tags = 'senario, consonance'
selected_tags = [t.strip() for t in selected_tags.split(',')]
for tag in selected_tags:
    results = results.filter(SQ(tags=tag))

possible_tags = []
for r in results.all():
    possible_tags += [t.pk for t in r.object.tags.all()]

# convert to set
possible_tags = set(possible_tags)

print (possible_tags)

tags = Keyword.objects.filter(pk__in=possible_tags).order_by('name')
tag_objects = []
for t in tags:
    tag_objects.append({"name": t.name, "slug": t.slug, "selected": True if t.slug in selected_tags else False })
    
print (tag_objects)

In [ ]:
print ('############# DB  ##############')
e1 = Entry.objects.filter(tags__name__in=['consonance',])
e2 = Entry.objects.filter(tags__name__in=['senario',])
e3 = Entry.objects.filter(tags__name__in=['senario', 'consonance',])
e4 = Entry.objects.filter(tags__name__in=['senario', 'consonance',]).distinct()
print e1.count()
print e2.count()
print e3.count()
print e4.count()


print ('########### SQ ##############')
sqs1 = SearchQuerySet().models(Entry).all().filter(SQ(tags__exact='consonance'))
#sqs11= SearchQuerySet().models(Entry).filter(tags=Exact('consonance'))
sqs2 = SearchQuerySet().models(Entry).all().filter(SQ(tags='senario'))
sqs3 = SearchQuerySet().models(Entry).all().filter(SQ(tags='consonance') | SQ(tags='senario'))
sqs4 = SearchQuerySet().models(Entry).all().filter(SQ(tags='consonance') & SQ(tags='senario'))

sqs5 = SearchQuerySet().models(Entry).all().filter(SQ(tags='consonance'))
sqs5 = sqs5.filter(SQ(tags='senario'))

print sqs1.count()
#print sqs11.count()
print sqs2.count()
print ("OR: %s") % sqs3.count()
print ("AND: %s") % sqs4.count()
print ("AND: %s") % sqs5.count()
print [x for x in sqs4.values_list('pk', flat=True)]

print ('########### DB + SQ combined ##############')
tags = 'senario, consonance'
results = SearchQuerySet().models(Entry).all()
qs = Entry.objects.filter(pk__in=results.values_list('pk', flat=True))
#qs = qs.filter(tags__name__in=[x.strip() for x in tags.split(',')]).distinct()
tags = [t.strip() for t in tags.split(',')]
for tag in tags:
    qs = qs.filter(tags__name__in=[tag,]).distinct()

print [x for x in qs.values_list('pk', flat=True)]

ftags = []
for e in qs:
    ftags += e.tags.slugs()

#ftags = sorted(ftags, key=str.lower)
ftags = sorted(set(ftags))

print ftags
print qs.count()

In [ ]:
search_items = {
            'Term': ['fludd','higgs'],
            'Type': ['author'],
        }
search_items['Term'][1]

In [ ]:
from haystack.query import SearchQuerySet, EmptySearchQuerySet

order_by = 'author'
#print ([e.pk for e in Entry.objects.all().order_by(order_by)[:20]])
#print ([int(e.pk) for e in SearchQuerySet().all().order_by(order_by)[:20]])
tag = "chromatic scale"
print Entry.objects.filter(tags__name__in=[tag]).count()
print '######################'
print SearchQuerySet().models(Entry).filter(tags__name__in=[tag]).count()

In [ ]:
a = Author.objects.all()
n = a.get(last_name='Newton')
n.museums_entries.all()

In [ ]:
from django.utils.text import slugify

cols = Collection.objects.all()

for c in cols:
    slug = slugify(c.title, allow_unicode=True)
    c.slug = slug
    print c.slug
    c.save()

In [ ]:
import json
import sys
import os
import requests
import shutil
import operator
from collections import OrderedDict
from requests.auth import HTTPBasicAuth

import tempfile
from django.core.files.base import ContentFile
from django.core.files import File

base = 'http://medienarchiv.zhdk.ch'
collection_id = '73114b11-08de-42a9-ba36-864d9c1b5641'

user = 'sound-colour-space'
password = '95ca2a71-4e01-4477-8e14-b673e96e4fe9'
auth_header = 'Authorization: Basic c291bmQtY29sb3VyLXNwYWNlOjk1Y2EyYTcxLTRlMDEtNDQ3Ny04ZTE0LWI2NzNlOTZlNGZlOQ=='

auth = HTTPBasicAuth(user, password)

#if r.status_code == 200

def get_authors(relations):
    # for each author
    author_objs = []
    for key, values in relations.iteritems():
        a = requests.get(base+values.get('href'), auth=auth).json()
        #print ("\tauthor: %s %s (%s) [%s %s]") % (a.get('first_name'), a.get('last_name'), a.get('pseudonym'), a.get('date_of_birth'), a.get('date_of_death'))
        new_author = { 'first_name': a.get('first_name'), 'last_name': a.get('last_name'), 'pseudonym': a.get('pseudonym'), 'date_of_birth': a.get('date_of_birth'), 'date_of_death': a.get('date_of_death') }
        
        # create or update author
        obj, created = Author.objects.update_or_create(
            remote_uuid=a.get('id'), defaults=new_author)
        author_objs.append(obj) # add to list for update even if existing
        
    return author_objs
        
def get_licenses(relations):
    # for each license
    license_objs = []
    for key, values in relations.iteritems():
        
        l = requests.get(base+values.get('href'), auth=auth).json()
        #print ("\tlicence (%s): %s, %s, %s") % (l.get('id'), l.get('label'), l.get('url'), l.get('usage'))
        
        new_license = { 'remote_uuid': l.get('id'), 'label': l.get('label'), 'url': l.get('url'), 'usage': l.get('usage') }
        # create or update license
        obj, created = License.objects.update_or_create(
            remote_uuid=l.get('id'), defaults=new_license)
        license_objs.append(obj) # add to list for update even if existing
    
    return license_objs
        
        
def get_entry(key, href):
    # get single entry
    entry_request = requests.get(base+href, auth=auth)
    entry = entry_request.json()
    
    print ('Madek: %s') % entry.get('id')
    
    # the updated entry as dict
    new_entry = {}
    
    # get meta data
    meta_data = requests.get(base + href + '/meta-data/', auth=auth)
    meta_data = meta_data.json()
    
    author_objs = []
    license_objs = []

    # iterate over meta-data
    for m in meta_data.get('meta-data'):
        key = m.get('meta_key_id')
        id = m.get('id')
        if (key == 'madek_core:title'):
            title = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:title: %s') % title
            new_entry['title'] = title
        elif (key == 'madek_core:subtitle'):
            subtitle = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:subtitle: %s') % subtitle
            new_entry['subtitle'] = subtitle
        elif (key == 'madek_core:portrayed_object_date'):
            portrayed_object_date = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:portrayed_object_date: %s') % portrayed_object_date
            new_entry['portrayed_object_date'] = portrayed_object_date
        
        elif (key == 'madek_core:authors'):
            authors = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('_json-roa')['collection']
            #print('madek_core:authors: %s') % authors
            author_objs = get_authors(authors['relations'])
            
        elif (key == 'copyright:license'):    
            licenses = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('_json-roa')['collection']
            #print('copyright:license: %s') % licenses
            license_objs = get_licenses(licenses['relations']) # will be set at the end of this function
            
        elif (key == 'copyright:source'):
            source = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('copyright:source: %s') % source
            new_entry['source'] = source
            
        elif (key == 'copyright:copyright_usage'):
            #print('copyright:copyright_usage: %s') % requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            pass
        elif (key == 'madek_core:copyright_notice'):
            copyright_notice = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:copyright_notice: %s') % copyright_notice
            new_entry['copyright_notice'] = copyright_notice
    
    
    # create or update entry
    obj, created = Entry.objects.update_or_create(
        remote_uuid = entry.get('id'), defaults=new_entry)
    
    # save image
    image_href = entry['_json-roa']['relations']['media-file'].get('href')
    image_request = requests.get(base + image_href, auth=auth)
    image = image_request.json()
    image_data_request = requests.get(base + image['_json-roa']['relations']['data-stream']['href'], stream=True, auth=auth)
    
    if image_data_request.status_code == 200:
        f = tempfile.NamedTemporaryFile(delete=False)
        with open(f.name, 'wb') as f:
            image_data_request.raw.decode_content = True
            shutil.copyfileobj(image_data_request.raw, f)
            #for chunk in image_data_request:
            #    f.write(chunk)
            f.close()
        # save entry
        with open(f.name, 'r') as f:
            
            print(image.get('filename'))
            obj.image.save(image.get('filename'), File(f), save=True)
            
        f.close()
        os.unlink(f.name)
            
    # set licenses, authors
    print("Entry: %s") % obj
    obj.license.set(license_objs)
    obj.author.set(author_objs)
    
    return obj
        

def get_collection(key, href):
    # get single entry
    collection_request = requests.get(base+href, auth=auth)
    collection = collection_request.json()
    
    print ('Madek: [%s] %s') % (key, collection.get('id'))
    
    # the updated collection as dict
    new_collection = {}
    
    # get meta data
    meta_data = requests.get(base + href + '/meta-data/', auth=auth)
    meta_data = meta_data.json()
    
    author_objs = []
    
    # iterate over meta-data
    for m in meta_data.get('meta-data'):
        key = m.get('meta_key_id')
        id = m.get('id')
        
        if (key == 'madek_core:title'):
            title = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:title: %s') % title
            new_collection['title'] = title
        if (key == 'madek_core:subtitle'):
            subtitle = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:subtitle: %s') % subtitle
            new_collection['subtitle'] = subtitle
        if (key == 'madek_core:description'):
            description = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:description: %s') % description
            new_collection['description'] = description
        elif (key == 'madek_core:authors'):
            authors = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('_json-roa')['collection']
            #print('madek_core:authors: %s') % authors
            author_objs = get_authors(authors['relations'])
            for a in author_objs:
                print ("Author: %s") % a.get_full_name()
            
    #print new_collection
    
    # create or update set
    obj, created = Collection.objects.update_or_create(
        remote_uuid = collection.get('id'), defaults=new_collection)
    
    obj.author.set(author_objs)
    
    entry_objs = []
    
    # get entries
    url = base + '/api/media-entries/?collection_id=' + collection.get('id')
    while (True):
        r = requests.get(url, auth=auth)
        entries_data = r.json()['_json-roa']['collection']

        relations = entries_data.get('relations')
        next_page = entries_data.get('next')

        # iterate page
        for key, value in relations.iteritems():

            e = get_entry(key, value.get('href'))
            entry_objs.append(e)
            #print('########################')

        if (next_page == None):
            break
        else:
            url = base + entries_data['next']['href']
            print ("page at: %s") % url
    
    
    obj.entry.set(entry_objs)
    
    print("Collection: %s") % (obj)
    
    
    
#url = base + '/api/media-entries/?collection_id=' + collection_id
url = base + '/api/collections/?collection_id=' + collection_id

while (True):
    r = requests.get(url, auth=auth)

    collection_data = r.json()['_json-roa']['collection']

    relations = collection_data.get('relations')
    next_page = collection_data.get('next')

    # iterate page
    for key, value in relations.iteritems():

        #get_entry(key, value.get('href'))
        get_collection(key, value.get('href'))

        print('########################')

        # sys.exit(0) # exit after first item (debugging)


    if (next_page == None):
        break
    else:
        url = base + collection_data['next']['href']
        print ("page at: %s") % url
    
print('done')

Find missing by doc_id


In [ ]:
for i in range (1,100):
    try:
        e = Entry.objects.get(doc_id=i)
    except(Entry.DoesNotExist):
        print i

Parse references


In [ ]:
import sys
import collections
import re
entries = Entry.objects.all()



#refs = {}

for e in entries:
    matches = re.findall('\[\d+\]', unicode(e.description))
    if matches:
        i = 0
        for m in matches:
            number = (int)(matches[i].replace('[', '').replace(']', ''))
            try:
                ref = Entry.objects.get(doc_id=number)
                title = ref.title
                # update description text: [doc_id](doc_id "title")
                e.description = e.description.replace(matches[i], '[' + matches[i] + '](' + str(number) + ' "' + title + '")')
                
                # add related obj
                e.related.add(ref)
                
                e.save()
            
                # save only number for related set
                #matches[i] = number
            except (Entry.DoesNotExist):
                pass
                    
            
            i += 1
            
        #refs[e.doc_id] = matches
        
        
'''
od = collections.OrderedDict(sorted(refs.items()))

for id, refs in od.items():
    # get related objects
    related_objs = Entry.objects.filter(doc_id__in=refs)
    e = Entry.objects.get(doc_id=id)
    print e, '==>', related_objs
    # set them (empty sets will clear related objs)
    e.related.set(related_objs)
'''

CARDS


In [ ]:
import csv
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
entries = Entry.objects.all()

with open('/home/stahl/icst/soundcolourspace/cards/cards.csv', 'wb') as csvfile:
    cardswriter = csv.writer(csvfile, delimiter='#', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    #for e in entries[0:10]:
    for e in entries:
        if len(e.author.all()) > 0:
            author_first = e.author.all()[0].first_name or ""
            author_last = e.author.all()[0].last_name or ""
            if author_first != "" and author_last != "":
                author = author_first + " " + author_last
            elif author_first == "" and author_last != "":
                author = author_last
            elif author_first != "" and author_last == "":
                author = author_first
                
        else:
            author = ""
        cardswriter.writerow([e.title, e.description, e.image, e.image.width, e.image.height, author, e.portrayed_object_date])


#with open('cards.csv') as csvfile:
#    reader = csv.reader(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL)
#    for row in reader:
#        print (row[0])

In [ ]:
entries = Entry.objects.all()
e = entries[33]
e.image.width
e.image.height
print (e.date_accuracy)
from datetime import datetime
from django.utils import timezone
print (timezone.get_current_timezone())

date_object = datetime.strptime('Jun 1 1436  1:33PM', '%b %d %Y %I:%M%p')
print (date_object)
date_object = timezone.make_aware(date_object, timezone.get_current_timezone())
print (date_object)

In [ ]:
import re
from datetime import datetime
from django.utils import timezone

date_object = datetime.strptime('Jun 1 2005  1:33PM', '%b %d %Y %I:%M%p')

entries = Entry.objects.all()
total = 0
for e in entries:    
    # match "c. XXXX" or "ca. XXXX" in portrayed_object_date
    m = re.findall('c\w?.?\s?(\d{4})', str(e.portrayed_object_date))
    if m:
        total = total + 1
        date = datetime.strptime(m[0], '%Y')
        date = timezone.make_aware(date, timezone.get_current_timezone())
        
        print ("{} \t {} \t\t\t {} \t".format(e.portrayed_object_date, e.date, date))
        
        e.date = date
        e.date_accuracy = 3
        e.save()
    # match "XXth"
    m = re.findall('(\d{2}th)', str(e.portrayed_object_date))
    if m:
        year = m[0].strip('th')
        year = int(year)
        year = (year - 1) * 100
        year = '{:04}'.format(year)
        
        date = datetime.strptime(year, '%Y')
        date = timezone.make_aware(date, timezone.get_current_timezone())
        print ("{} \t {} \t\t\t {} \t".format(e.portrayed_object_date, e.date,date))
        
        e.date = date
        e.date_accuracy = 5
        e.save()
        
print ("updated total: %s") % (total)

Import CSV annotations


In [ ]:
import csv
import os
import re

entries = Entry.objects.all()
#print( os.path.basename (entries[0].image.name) )
        
path = '/home/stahl/icst/soundcolourspace/annotations.csv'

class MyDialect(csv.Dialect):
    strict = True
    skipinitialspace = True
    quoting = csv.QUOTE_ALL
    delimiter = ';'
    quotechar = '"'
    lineterminator = '\n'


with open(path, 'rb') as csvfile:
    r=csv.reader(csvfile, MyDialect())
    for i in r:
        #print ("#%s: %s \n\n%s") % (i[2], i[0], i[1])
        
        entries = Entry.objects.filter(image__icontains=i[0])
        if (len(entries) == 0):
            print ("no entries for %s") % i[0]
        elif (len(entries) > 1):
            print ("multiple entries for %s") % i[0]
        else:
            entries[0].description = unicode(i[1], errors='ignore')
            entries[0].save()
            
    
        #print ('####################################################################################')

MEDIENARCHIV


In [ ]:
import json
import sys
import os
import requests
import shutil
import operator
from collections import OrderedDict
from requests.auth import HTTPBasicAuth

import tempfile
from django.core.files.base import ContentFile
from django.core.files import File

base = 'http://medienarchiv.zhdk.ch'
collection_id = '73114b11-08de-42a9-ba36-864d9c1b5641'

user = 'sound-colour-space'
password = '95ca2a71-4e01-4477-8e14-b673e96e4fe9'
auth_header = 'Authorization: Basic c291bmQtY29sb3VyLXNwYWNlOjk1Y2EyYTcxLTRlMDEtNDQ3Ny04ZTE0LWI2NzNlOTZlNGZlOQ=='

auth = HTTPBasicAuth(user, password)

#if r.status_code == 200

def get_authors(relations):
    # for each author
    author_objs = []
    for key, values in relations.iteritems():
        a = requests.get(base+values.get('href'), auth=auth).json()
        #print ("\tauthor: %s %s (%s) [%s %s]") % (a.get('first_name'), a.get('last_name'), a.get('pseudonym'), a.get('date_of_birth'), a.get('date_of_death'))
        new_author = { 'first_name': a.get('first_name'), 'last_name': a.get('last_name'), 'pseudonym': a.get('pseudonym'), 'date_of_birth': a.get('date_of_birth'), 'date_of_death': a.get('date_of_death') }
        
        # create or update license
        obj, created = Author.objects.update_or_create(
            remote_uuid=a.get('id'), defaults=new_author)
        author_objs.append(obj) # add to list for update even if existing
        
    return author_objs
        
def get_licenses(relations):
    # for each license
    license_objs = []
    for key, values in relations.iteritems():
        
        l = requests.get(base+values.get('href'), auth=auth).json()
        #print ("\tlicence (%s): %s, %s, %s") % (l.get('id'), l.get('label'), l.get('url'), l.get('usage'))
        
        new_license = { 'remote_uuid': l.get('id'), 'label': l.get('label'), 'url': l.get('url'), 'usage': l.get('usage') }
        # create or update license
        obj, created = License.objects.update_or_create(
            remote_uuid=l.get('id'), defaults=new_license)
        license_objs.append(obj) # add to list for update even if existing
    
    return license_objs
        
        
def get_entry(key, href):
    # get single entry
    entry_request = requests.get(base+href, auth=auth)
    entry = entry_request.json()
    
    print ('Madek: %s') % entry.get('id')
    
    # the updated entry as dict
    new_entry = {}
    
    # get meta data
    meta_data = requests.get(base + href + '/meta-data/', auth=auth)
    meta_data = meta_data.json()
    
    author_objs = []
    license_objs = []

    # iterate over meta-data
    for m in meta_data.get('meta-data'):
        key = m.get('meta_key_id')
        id = m.get('id')
        if (key == 'madek_core:title'):
            title = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:title: %s') % title
            new_entry['title'] = title
        elif (key == 'madek_core:subtitle'):
            subtitle = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:subtitle: %s') % subtitle
            new_entry['subtitle'] = subtitle
        elif (key == 'madek_core:portrayed_object_date'):
            portrayed_object_date = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:portrayed_object_date: %s') % portrayed_object_date
            new_entry['portrayed_object_date'] = portrayed_object_date
        
        elif (key == 'madek_core:authors'):
            authors = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('_json-roa')['collection']
            #print('madek_core:authors: %s') % authors
            author_objs = get_authors(authors['relations'])
            
        elif (key == 'copyright:license'):    
            licenses = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('_json-roa')['collection']
            #print('copyright:license: %s') % licenses
            license_objs = get_licenses(licenses['relations']) # will be set at the end of this function
            
        elif (key == 'copyright:source'):
            source = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('copyright:source: %s') % source
            new_entry['source'] = source
            
        elif (key == 'copyright:copyright_usage'):
            #print('copyright:copyright_usage: %s') % requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            pass
        elif (key == 'madek_core:copyright_notice'):
            copyright_notice = requests.get(base+'/api/meta-data/'+id, auth=auth).json().get('value')
            #print('madek_core:copyright_notice: %s') % copyright_notice
            new_entry['copyright_notice'] = copyright_notice
    
    
    # create or update entry
    obj, created = Entry.objects.update_or_create(
        remote_uuid = entry.get('id'), defaults=new_entry)
    
    # save image
    image_href = entry['_json-roa']['relations']['media-file'].get('href')
    image_request = requests.get(base + image_href, auth=auth)
    image = image_request.json()
    image_data_request = requests.get(base + image['_json-roa']['relations']['data-stream']['href'], stream=True, auth=auth)
    
    if image_data_request.status_code == 200:
        f = tempfile.NamedTemporaryFile(delete=False)
        with open(f.name, 'wb') as f:
            image_data_request.raw.decode_content = True
            shutil.copyfileobj(image_data_request.raw, f)
            #for chunk in image_data_request:
            #    f.write(chunk)
            f.close()
        # save entry
        with open(f.name, 'r') as f:
            
            print(image.get('filename'))
            obj.image.save(image.get('filename'), File(f), save=True)
            
        f.close()
        os.unlink(f.name)
            
    # set licenses, authors
    print("Entry: %s") % obj
    obj.license.set(license_objs)
    obj.author.set(author_objs)
        

url = base + '/api/media-entries/?collection_id=' + collection_id

while (True):
    r = requests.get(url, auth=auth)

    collection_data = r.json()['_json-roa']['collection']

    relations = collection_data.get('relations')
    next_page = collection_data.get('next')

    # iterate page
    for key, value in relations.iteritems():

        get_entry(key, value.get('href'))

        print('########################')

        # sys.exit(0) # exit after first item (debugging)


    if (next_page == None):
        break
    else:
        url = base + collection_data['next']['href']
        print ("page at: %s") % url
    
print('done')

Import files from directory

Search & Replace


In [ ]:
import re
entries = Entry.objects.all()
total = 0
for e in entries:
    #e.title = e.title.replace('.gif', '')
    #e.save()
    
    # match 4 digits at start of title + 1 whitespace
    m = re.findall('^(\d{4}.)', e.title)
    if m:
        total = total + 1
        #print e.title
        #print e.title.replace(m[0], '')
        e.title = e.title.replace(m[0], '')
        #e.save()
print total

SearchQuery


In [ ]:
from haystack.query import SearchQuerySet, EmptySearchQuerySet
from haystack.inputs import Raw
from itertools import chain
results = []
#query = '*mono* OR zarlino'
#query = 'newton'
#query = '1675'
query = 'Fludd OR newton'

author = EmptySearchQuerySet()
#author = SearchQuerySet().models(Entry).filter(author=Raw(query))
portrayed_object_date = EmptySearchQuerySet()
#portrayed_object_date = SearchQuerySet().models(Entry).filter(portrayed_object_date=Raw(query))
auto = EmptySearchQuerySet()
#auto = SearchQuerySet().models(Entry).auto_query(query)
combined = EmptySearchQuerySet()
#combined = SearchQuerySet().models(Entry).autocomplete(title_auto=query).filter(text=Raw(query))
raw_full = EmptySearchQuerySet()
raw_full = SearchQuerySet().models(Entry).filter(text=Raw(query))

results =  list(chain(author, portrayed_object_date, auto, combined, raw_full))

if not results:
    print 'None found.'
else:
    print 'Found %s.' % len(results)
    for r in results:
        print '%d %s %s %s' % (r.portrayed_object_date, r.author ,r.title, r.tags)

Local Import


In [ ]:
import os
from django.core.files import File
for f in sorted(os.listdir('temp')):
    if f.endswith(".jpg") or f.endswith(".gif"):
        img = open('temp/' + f)
        dimg = File(img)
        e = Entry(title=f)
        e.save() # save so we have an uuid for the image path
        e.image.save(f, dimg) # image path will be overwritten by model
        e.save()

Set author


In [ ]:
a = "thius"
entries = Entry.objects.filter(title__icontains=a)
author = Author.objects.get(last_name__icontains=a)
print (author)
entries.update(author=author)
print (entries)

In [ ]:
up = 0
for e in Entry.objects.all():
    t = e.title
    y = t.split('_')[0]
    if any(char.isdigit() for char in y):
        e.portrayed_object_date = y
        e.save()
        up = up + 1
print ('updated %d entries.' % up)