Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PICARD-2584: Load recording if AcoustId metadata is missing #2351

Merged
merged 8 commits into from
Jan 7, 2024
74 changes: 33 additions & 41 deletions picard/acoustid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@
from PyQt6 import QtCore

from picard import log
from picard.acoustid.json_helpers import (
max_source_count,
parse_recording,
)
from picard.acoustid.recordings import RecordingResolver
from picard.config import get_config
from picard.const import (
DEFAULT_FPCALC_THREADS,
Expand All @@ -49,6 +46,7 @@
find_executable,
win_prefix_longpath,
)
from picard.webservice.api_helpers import AcoustIdAPIHelper


def get_score(node):
Expand Down Expand Up @@ -76,7 +74,7 @@ def find_fpcalc():

class AcoustIDClient(QtCore.QObject):

def __init__(self, acoustid_api):
def __init__(self, acoustid_api: AcoustIdAPIHelper):
super().__init__()
self._queue = deque()
self._running = 0
Expand All @@ -93,7 +91,6 @@ def get_max_processes(self):
return config.setting['fpcalc_threads'] or DEFAULT_FPCALC_THREADS

def _on_lookup_finished(self, task, document, http, error):
doc = {}
if error:
mparms = {
'error': http.errorString(),
Expand All @@ -108,42 +105,16 @@ def _on_lookup_finished(self, task, document, http, error):
mparms,
echo=None
)
task.next_func({}, http, error)
else:
try:
recording_list = doc['recordings'] = []
status = document['status']
if status == 'ok':
results = document.get('results') or []
for result in results:
recordings = result.get('recordings') or []
max_sources = max_source_count(recordings)
result_score = get_score(result)
for recording in recordings:
parsed_recording = parse_recording(recording)
if parsed_recording is not None:
# Calculate a score based on result score and sources for this
# recording relative to other recordings in this result
score = min(recording.get('sources', 1) / max_sources, 1.0) * 100
parsed_recording['score'] = score * result_score
parsed_recording['acoustid'] = result['id']
recording_list.append(parsed_recording)

if results:
if not recording_list:
# Set AcoustID in tags if there was no matching recording
task.file.metadata['acoustid_id'] = results[0]['id']
task.file.update()
log.debug(
"AcoustID: Found no matching recordings for '%s',"
" setting acoustid_id tag to %r",
task.file.filename, results[0]['id']
)
else:
log.debug(
"AcoustID: Lookup successful for '%s' (recordings: %d)",
task.file.filename,
len(recording_list)
)
resolver = RecordingResolver(
self._acoustid_api.webservice,
document,
callback=partial(self._on_recording_resolve_finish, task, document, http))
resolver.resolve()
else:
mparms = {
'error': document['error']['message'],
Expand All @@ -157,11 +128,32 @@ def _on_lookup_finished(self, task, document, http, error):
mparms,
echo=None
)
task.next_func({}, http, error)
except (AttributeError, KeyError, TypeError) as e:
log.error("AcoustID: Error reading response", exc_info=True)
error = e

task.next_func(doc, http, error)
task.next_func({}, http, e)

def _on_recording_resolve_finish(self, task, document, http, result=None, error=None):
recording_list = result
if not recording_list:
results = document.get('results')
if results:
# Set AcoustID in tags if there was no matching recording
acoustid = results[0].get('id')
task.file.metadata['acoustid_id'] = acoustid
task.file.update()
log.debug(
"AcoustID: Found no matching recordings for '%s',"
" setting acoustid_id tag to %r",
task.file.filename, acoustid
)
else:
log.debug(
"AcoustID: Lookup successful for '%s' (recordings: %d)",
task.file.filename,
len(recording_list)
)
task.next_func({'recordings': recording_list}, http, error)

def _lookup_fingerprint(self, task, result=None, error=None):
if task.file.state == File.REMOVED:
Expand Down
12 changes: 2 additions & 10 deletions picard/acoustid/json_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,5 @@ def parse_recording(recording):
return recording_mb


def max_source_count(recordings):
"""Given a list of recordings return the highest number of sources.
This ignores recordings without metadata.
"""
sources = [
r.get('sources', 1)
for r in recordings
if r.get('title')
]
return max(sources + [1])
def recording_has_metadata(recording):
return 'id' in recording and recording.get('title') is not None
197 changes: 197 additions & 0 deletions picard/acoustid/recordings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
# -*- coding: utf-8 -*-
#
# Picard, the next-generation MusicBrainz tagger
#
# Copyright (C) 2023 Philipp Wolfer
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

from collections import (
defaultdict,
deque,
namedtuple,
)
from functools import partial
from typing import (
Dict,
List,
)

from PyQt6.QtNetwork import QNetworkReply

from picard.acoustid.json_helpers import (
parse_recording,
recording_has_metadata,
)
from picard.webservice import WebService
from picard.webservice.api_helpers import MBAPIHelper


# Only do extra lookup for recordings without metadata, if they have at least
# this percentage of sources compared to the recording with most sources.
SOURCE_THRESHOLD_NO_METADATA = 0.25

# Load max. this number of recordings without metadata per AcoustID
MAX_NO_METADATA_RECORDINGS = 3


class Recording:
recording: dict
result_score: float
sources: int

def __init__(self, recording, result_score=1.0, sources=1):
self.recording = recording
self.result_score = result_score
self.sources = sources


IncompleteRecording = namedtuple('Recording', 'mbid acoustid result_score sources')


class RecordingResolver:
"""Given an AcoustID lookup result returns a list of MB recordings.
The recordings are either directly taken from the AcoustID result or, if the
results return only the MBID without metadata, loaded via the MB web service.
"""

_recording_map: Dict[str, Dict[str, Recording]]

def __init__(self, ws: WebService, doc: dict, callback: callable) -> None:
self._mbapi = MBAPIHelper(ws)
self._doc = doc
self._callback = callback
self._recording_map = defaultdict(dict)
self._recording_cache = dict()
self._missing_metadata = deque()

def resolve(self) -> None:
results = self._doc.get('results') or []
incomplete_counts = defaultdict(lambda: 0)
for result in results:
recordings = result.get('recordings') or []
result_score = get_score(result)
acoustid = result.get('id')
max_sources = max_source_count_raw_recording(recordings)
for recording in sorted(recordings, key=lambda r: r.get('sources', 1), reverse=True):
mbid = recording.get('id')
sources = recording.get('sources', 1)
if recording_has_metadata(recording):
mb_recording = parse_recording(recording)
self._recording_cache[mbid] = mb_recording
self._recording_map[acoustid][recording['id']] = Recording(
recording=mb_recording,
result_score=result_score,
sources=sources,
)
else:
if (sources / max_sources > SOURCE_THRESHOLD_NO_METADATA
and incomplete_counts[acoustid] < MAX_NO_METADATA_RECORDINGS):
self._missing_metadata.append(IncompleteRecording(
mbid=mbid,
acoustid=acoustid,
result_score=result_score,
sources=sources,
))
incomplete_counts[acoustid] += 1

self._load_recordings()

def _load_recordings(self):
if not self._missing_metadata:
self._send_results()
return

mbid = self._missing_metadata[0].mbid
if mbid in self._recording_cache:
mb_recording = self._recording_cache[mbid]
self._recording_request_finished(mbid, mb_recording, None, None)
else:
self._mbapi.get_track_by_id(
self._missing_metadata[0].mbid,
partial(self._recording_request_finished, mbid),
inc=('artists', 'release-groups', 'releases', 'media'),
)

def _recording_request_finished(self, original_mbid, mb_recording, http, error):
recording = self._missing_metadata.popleft()
if error:
if error == QNetworkReply.NetworkError.ContentNotFoundError:
# Recording does not exist, ignore and move on
self._load_recordings()
else:
self._send_results(error)
return

mbid = mb_recording.get('id')
recording_dict = self._recording_map[recording.acoustid]
if mbid:
self._recording_cache[mbid] = mb_recording
# This was a redirect, cache the old MBID as well
if original_mbid != mbid:
self._recording_cache[original_mbid] = mb_recording
if mbid not in recording_dict:
recording_dict[mbid] = Recording(
recording=mb_recording,
result_score=recording.result_score,
sources=recording.sources,
)
else:
recording_dict[mbid].sources += recording.sources
self._load_recordings()

def _send_results(self, error=None):
self._callback(list(parse_recording_map(self._recording_map)), error)


def get_score(node):
try:
return float(node.get('score', 1.0))
except (TypeError, ValueError):
return 1.0


def parse_recording_map(recording_map: Dict[str, Dict[str, Recording]]):
for acoustid, recordings in recording_map.items():
recording_list = recordings.values()
max_sources = max_source_count(recording_list)
for recording in recording_list:
parsed_recording = recording.recording
if parsed_recording is not None:
# Calculate a score based on result score and sources for this
# recording relative to other recordings in this result
score = min(recording.sources / max_sources, 1.0) * 100
parsed_recording['score'] = score * recording.result_score
parsed_recording['acoustid'] = acoustid
parsed_recording['sources'] = recording.sources
yield parsed_recording


def max_source_count(recordings: List[Recording]):
"""Given a list of recordings return the highest number of sources.
This ignores recordings without metadata.
"""
sources = {r.sources for r in recordings}
sources.add(1)
return max(sources)


def max_source_count_raw_recording(recordings: List[dict]):
"""Given a list of recordings return the highest number of sources.
This ignores recordings without metadata.
"""
sources = {r.get('sources', 1) for r in recordings}
sources.add(1)
return max(sources)
Loading
Loading