In [7]:
import csv, os, time, datetime
import lib.django.djff.models as dm
all_manual_tags_query = dm.ManualTag.objects.all()
researchers = list(dm.Researcher.objects.all())
experiments = list(dm.Experiment.objects.exclude(pk=11))
species = list(dm.Species.objects.all())

query_list = [all_manual_tags_query]

Example of Filtering Tags To Export

After running the cell above to set things up and load all of the tags into the all_manual_tags object, we need to filter them to select only the ones that you want to export.

Each one of these queries builds on the previous query. At each stage in the examples below, we are taking the last query on the query_list, filtering it, and appending the new query to the end of the query_list.

We start by excluding all tags generated by a researcher known to produce low-quality tags:


In [2]:
for researcher in researchers:
    if 'Wil' in researcher.name:
        print 'ID:{}, name:{}'.format(researcher.id, researcher.name)


ID:2, name:Wil "sorely tempted to take away Nikko's admin privileges" Langford

In [3]:
print 'pre-filter:', query_list[-1].count()
query_list.append(query_list[-1].exclude(researcher_id=2))
print 'post-filter:', query_list[-1].count()


pre-filter: 67874
post-filter: 67840

Next, let's only keep the pleco tags:


In [9]:
print 'pre-filter:', query_list[-1].count()
# For each tag, only keep it if the shortname of the species of the experiment of the image that
# this tag is attached to is "HP":
query_list.append(query_list[-1].filter(image__xp__species__shortname='HP'))
print 'post-filter:', query_list[-1].count()

print 'pre-filter:', query_list[-1].count()
# For each tag, only keep it if the name of the experiment of the image that
# this tag is attached to contains "Pleco":
query_list.append(query_list[-1].filter(image__xp__name__contains='Pleco'))
print 'post-filter:', query_list[-1].count()


pre-filter: 60358
post-filter: 60362
pre-filter: 60365
post-filter: 60365

Now let's keep only the tags from January of 2015:


In [10]:
print 'pre-filter:', query_list[-1].count()
query_list.append(query_list[-1].filter(timestamp__month=2, timestamp__year=2016))
print 'post-filter:', query_list[-1].count()


pre-filter: 60365
post-filter: 14606

Finally, we name the latest query export_these_tags and turn it into a Python list for further processing:


In [11]:
export_these_tags = list(query_list[-1])

Shortcut filtering

You can also just toss all of the filters above into a long query and it will probably work:


In [8]:
export_these_tags_test = list(
    query_list[0].exclude(
        researcher_id=2).filter(
        image__xp__species__shortname='HP').filter(
        image__xp__name__contains='Pleco').filter(
        timestamp__month=1, timestamp__year=2015)
)
print 'Same length:', len(export_these_tags) == len(export_these_tags_test)
print 'Same tags:', [tag.id for tag in export_these_tags] == [tag.id for tag in export_these_tags_test]


Same length: False
Same tags: False

Either way, you want to end up with an export_these_tags list (or query) at the end of the filtering stage.

Filter actual data here


In [ ]:


In [ ]:


In [ ]:


In [ ]:
# export_these_tags = query_list[-1]

Export Tags

Executing the cell below will export the tags in export_these_tags into the specified directory.


In [12]:
# set this to suit yourself - leaving it blank is perfectly valid
custom_prefix = ''

# These are probably comfortable defaults

export_directory = '/mnt/server_storage/exported_data'
timestamp_export_file = True

#
# First, we build the full pathname of the exported file:
#

if timestamp_export_file:
    prefix = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S-')
else:
    prefix = ''

export_filename = custom_prefix + prefix + 'fishface_exported_data.csv'
export_path = os.path.join(export_directory, export_filename)

cjr_start_cache = dict()

#
# Then we write the file from `export_these_tags`:
#

with open(export_path, 'wb') as csvfile:
    writer = csv.writer(csvfile, dialect=csv.excel)        

    # Write header row
    writer.writerow([
            'TagId',
            'TaggerID',
            'Species',
            'XPName',
            'ImageTimestamp',
            'ImageVoltage',
            'TagTimestamp',
            'TagStartX',
            'TagStartY',
            'Angle',
            'CJR_ID',
            'SecondsSinceCJRFirstCapture',
    ])
    
    species = export_these_tags[0].image.xp.species.shortname
    
    for tag in export_these_tags:
        cjr_id = tag.image.cjr_id
        
        if cjr_id not in cjr_start_cache:
            images_timestamps_for_cjr = [im.capture_timestamp for im in
                dm.CaptureJobRecord.objects.get(pk=cjr_id).image_set.all()]
            cjr_start_cache[cjr_id] = sorted(images_timestamps_for_cjr)[0]
        
        delta = (tag.image.capture_timestamp - cjr_start_cache[cjr_id])
        seconds_since_cjr_start = delta.seconds + (float(delta.microseconds) / 1000000)
        
        x, y = tag.int_start
        tag_image = tag.image
        
        writer.writerow([
                tag.id,
                tag.researcher_id,
                species,
                tag_image.xp.name,
                tag_image.capture_timestamp.strftime('%Y-%m-%d %H:%M:%S'),
                tag_image.voltage,
                tag.timestamp.strftime('%Y-%m-%d %H:%M:%S'),
                x,
                y,
                tag.degrees,
                cjr_id,
                seconds_since_cjr_start,
            ])

In [ ]: