Trying to extract data from full low level JSON dump. In this case it's release year.
In [57]:
    
from __future__ import print_function
from collections import defaultdict, OrderedDict
from datetime import date
import tarfile
import simplejson
import os
import re
years = defaultdict(int)
missing_count = 0
year_regex = re.compile('\d{4}')
archive_path = os.path.join('data', 'dump', 'acousticbrainz-lowlevel-json-20141119-json.tar')
with tarfile.open(archive_path, mode='r') as tar:    
    for member in tar:        
        
        if member.name.endswith('.json'):
            json = simplejson.load(tar.extractfile(member))
            
            # Extracting year from JSON
            tags = json['metadata']['tags']
            if 'date' in tags:
                d = tags['date'][0]
                m = year_regex.match(d)
                if m:
                    year = int(d[m.start():m.end()])
                    if year > 1890 and year <= date.today().year:
                        years[year] += 1
                        continue                        
            missing_count += 1
    
    
In [58]:
    
%matplotlib inline
import matplotlib.pyplot as plt
ordered = OrderedDict(sorted(years.items()))
keys = ordered.keys()
values = ordered.values()
plt.plot(keys, values)
plt.title('Release year distribution')
plt.axis([min(keys), max(keys), 0, max(values) * 1.2])
plt.xlabel('Years')
plt.ylabel('Tracks')
plt.show()