In [8]:
In [2]:
from django.db import connection
connection.close()
mi_uuid = Massimport.objects.get(id=46).uuid
imp_s = Import.objects.get(uuid_key=str(mi_uuid))
qs = imp_s.files.all()
qs_dup = qs.filter(status=ImportFile.STATUS_DUPLICATE)
qs_done = qs.filter(status=ImportFile.STATUS_DONE)
print(qs.count())
print(qs_dup.count())
In [ ]:
In [46]:
from django.db import connection
connection.close()
m = Media.objects.filter(relations__url__contains='/recording/3901df29-5dda-4542-b7a7-f9a903c16480')
print(m)
print(m[0].pk)
In [16]:
Media.objects.get(pk=430517).relations.filter(url__contains='musicbrainz.org/recording/').exists()
In [ ]:
In [ ]:
In [18]:
import ntpath
for item in qs_dup:
m_name = item.media.name.replace(u"’", u"'")
m_orig = ntpath.basename(item.filename).replace(u"’", u"'")
#print(u'{}\t\t{}'.format(item.media.name, ntpath.basename(item.filename)))
if not (m_name.lower() in m_orig.lower()):
print(u'{}\t\t{}'.format(item.media.name, ntpath.basename(item.filename)))
In [ ]:
In [ ]:
In [19]:
from django.db import connection
connection.close()
import ntpath
from fprint_client.api_client import FprintAPIClient
from fprint_client.utils import fprint_from_path
FPRINT_MIN_SCORE = 0.08
# search for exact duplicates by name (title & artist)
p_dupes = []
for item in qs_done:
if not (item.media.master and item.media.master_duration):
continue
m_name = item.media.name
a_name = item.media.artist.name
dupe_qs = Media.objects.exclude(pk=item.media.pk).filter(name=m_name, artist__name=a_name)
d_range = (item.media.master_duration - 1.0, item.media.master_duration + 1.0)
dupe_qs = dupe_qs.filter(master_duration__range=d_range)
if dupe_qs.exists():
p_dupes.append(item)
print(u'{}\t{} - {} - {}'.format(item.media.master_duration, m_name, a_name, item.media.release.name))
for dupe in dupe_qs:
print(u'{}\t{} - {} - {}'.format(dupe.master_duration, dupe.name, dupe.artist.name, dupe.release.name))
# print(dupe_qs.count())
# testing fprint
#print('- get matches via fprint')
#fprint = fprint_from_path(item.media.master.path)
#results = FprintAPIClient().identify(fprint=fprint, min_score=FPRINT_MIN_SCORE)
#print('fprint num results: {}'.format(len(results)))
#for result in results:
# print('score: {}'.format(result['score']))
#print('--')
print('**********************')
print(qs_done.count())
print(len(p_dupes))