Source code for mirdata.datasets.fma_keys

"""
FMA Keys Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    FMA Keys is an expert-labeled dataset for the evaluation of key detection containing
    340 hours (5489 songs) of song-level key and mode annotations, spread across 17 genres.

    This dataset has been annotated by one annotator with perfect pitch and twenty years of
    music experience as a concert pianist. A sample of this dataset was cross-annotated
    by two annotators with high inter-annotator agreement.

    Dataset use

    The annotations are available for conducting non-commercial research
    related to audio analysis.

    About the dataset

    For each song, we provide annotations for:
    - FMA track id
    - Spotify URI (when available)
    - Key and mode

    The modes are provided both as strings and numbers:
     "Major" <-> 1, "minor" <-> 0

    Similarly, for the keys:
    "C" <-> 0, "C#" <-> 1, etc.

    We also provide easy access to the underlying audio data
    from the FMA dataset.

    We filtered the FMA dataset to a subset that exists in the Spotify API
    through fuzzy matching the artists, titles.
    Next, we compared song duration and discard results that are egregiously different.

    About the audio

    All the audio is collected in and distributed by the FMA dataset by Michael Defferrard,
    Kirell Benzi, Pierre Vandergheynst, and Xavier Bresson.

    The FMA metadata is made freely available for public use under a Creative Commons license.
    We do not hold the copyright on the audio and distribute it under the license chosen by the artist.
    The dataset is meant for research purposes.
"""

import csv
import os
import numpy as np
from math import floor
from smart_open import open

import librosa

from mirdata import download_utils, core, io

from typing import Optional, Tuple

BIBTEX = """
    @inproceedings{
        wong_fma_keys,
        title = {FMAK: A Dataset of Key and Mode Annotations for the Free Music Archive},
        author = {Wong, Stella and Hernandez, Gandalf},
        booktitle = {24th International Society for Music Information Retrieval Conference (ISMIR)},
        year = {2023}
    }
}
"""

LICENSE_INFO = "Creative Commons Attribution 4.0 International"

INDEXES = {
    "default": "1.0",
    "test": "sample",
    "1.0": core.Index(
        filename="fma_keys_index_1.0.json",
        url="https://zenodo.org/records/16757314/files/fma_keys_index_1.0.json?download=1",
        checksum="6c905f1c0d1caef11643b67cfe80ddf4",
    ),
    "sample": core.Index(filename="fma_keys_index_1.0_sample.json"),
}
REMOTES = {
    "metadata": download_utils.RemoteFileMetadata(
        filename="fma_keys_metadata.csv",
        url="https://zenodo.org/records/10719860/files/fma_keys_metadata.csv?download=1",
        checksum="d80a03bc8659edc60e335bd7f6bdf12a",
    ),
    "tracks-000-019": download_utils.RemoteFileMetadata(
        filename="000-019.zip",
        url="https://zenodo.org/records/10719860/files/000-019.zip?download=1",
        checksum="b86f6414820c1422b2c6cdf87be1ef3a",
    ),
    "tracks-020-039": download_utils.RemoteFileMetadata(
        filename="020-039.zip",
        url="https://zenodo.org/records/10719860/files/020-039.zip?download=1",
        checksum="a2da8377fdbc1d3a1f54dd60aa7b8f9b",
    ),
    "tracks-040-049": download_utils.RemoteFileMetadata(
        filename="040-049.zip",
        url="https://zenodo.org/records/10719860/files/040-049.zip?download=1",
        checksum="d70babe5f66bdf3e821c42a8b8aafb9b",
    ),
    "tracks-050-059": download_utils.RemoteFileMetadata(
        filename="050-059.zip",
        url="https://zenodo.org/records/10719860/files/050-059.zip?download=1",
        checksum="f53fcba704fce27e5c7f3ec2532dcb44",
    ),
    "tracks-060-069": download_utils.RemoteFileMetadata(
        filename="060-069.zip",
        url="https://zenodo.org/records/10719860/files/060-069.zip?download=1",
        checksum="1520f067d7caaf0813780ff69bc4ba85",
    ),
    "tracks-070-079": download_utils.RemoteFileMetadata(
        filename="070-079.zip",
        url="https://zenodo.org/records/10719860/files/070-079.zip?download=1",
        checksum="186643746fcb1f4722a28d3eb9c6b99c",
    ),
    "tracks-080-089": download_utils.RemoteFileMetadata(
        filename="080-089.zip",
        url="https://zenodo.org/records/10719860/files/080-089.zip?download=1",
        checksum="8cf882609fc2f301621c2e9f9da03214",
    ),
    "tracks-090-099": download_utils.RemoteFileMetadata(
        filename="090-099.zip",
        url="https://zenodo.org/records/10719860/files/090-099.zip?download=1",
        checksum="84f0f036e3778ffd97c10b591f803d06",
    ),
    "tracks-100-109": download_utils.RemoteFileMetadata(
        filename="100-109.zip",
        url="https://zenodo.org/records/10719860/files/100-109.zip?download=1",
        checksum="4a307f019d3354064814f05d1dffa1e2",
    ),
    "tracks-110-124": download_utils.RemoteFileMetadata(
        filename="110-124.zip",
        url="https://zenodo.org/records/10719860/files/110-124.zip?download=1",
        checksum="88d7dbcca82189ed75b7baa5aa132fc1",
    ),
}

KEY_MAP = {
    "C": 0,
    "C#": 1,
    "D": 2,
    "D#": 3,
    "E": 4,
    "F": 5,
    "F#": 6,
    "G": 7,
    "G#": 8,
    "A": 9,
    "Bb": 10,
    "B": 11,
}

MODE_MAP = {"minor": 0, "Major": 1}



[docs]
class Track(core.Track):
    """FMA Keys Track class

    Args:
        track_id (str): track id of the track

    Attributes:
        spotify_uri (str): Spotify URI if available
        key (str): key of the track (C, C#, etc)
        mode (str): mode of the track (Major, minor)
        key_number (int): numeric key of the track (0-11)
        mode_number (int): numeric mode of the track (0 for minor, 1 for Major)
        audio_path (str): path to the track's audio file
        audio (ndarray): audio data
    """

    def __init__(self, track_id, data_home, dataset_name, index, metadata):
        super().__init__(track_id, data_home, dataset_name, index, metadata)

        self.audio_path = self.get_path("audio")

    @property
    def spotify_uri(self):
        return self._track_metadata.get("spotify_uri")

    @property
    def key(self):
        return self._track_metadata.get("key")

    @property
    def mode(self):
        return self._track_metadata.get("mode")

    @property
    def key_number(self):
        return self._track_metadata.get("key_number")

    @property
    def mode_number(self):
        return self._track_metadata.get("mode_number")

    @property
    def audio(self) -> Optional[Tuple[np.ndarray, float]]:
        """audio

        Returns:
            * np.ndarray - audio signal
            * float - sample rate

        """
        return load_audio(self.audio_path)




[docs]
@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
    """
    The FMA Keys dataset
    """

    def __init__(self, data_home=None, version="default"):
        super().__init__(
            data_home,
            version,
            name="fma_keys",
            track_class=Track,
            bibtex=BIBTEX,
            indexes=INDEXES,
            remotes=REMOTES,
            license_info=LICENSE_INFO,
        )

    def _track_to_dict(self, t):
        key_and_mode = t["key_and_mode"].split(" ")

        return {
            "spotify_uri": t["spotify_uri"],
            "key": key_and_mode[0],
            "mode": key_and_mode[1],
            "key_number": KEY_MAP[key_and_mode[0]],
            "mode_number": MODE_MAP[key_and_mode[1]],
        }

    @core.cached_property
    def _metadata(self):
        metadata_path = os.path.join(self.data_home, "fma_keys_metadata.csv")

        metadata_index = {}
        try:
            with open(metadata_path) as f:
                metadata_index = {
                    t["track_id"]: self._track_to_dict(t) for t in csv.DictReader(f)
                }

        except FileNotFoundError:
            raise FileNotFoundError("Metadata not found. Did you run .download()?")

        return metadata_index



# no decorator here because of https://github.com/librosa/librosa/issues/1267

[docs]
def load_audio(path: str) -> Tuple[np.ndarray, float]:
    """Load fma keys audio

    Args:
        path(str): Path to audio file

    Returns:
        * np.ndarray - audio signal
        * float - sample rate

    """
    return librosa.load(path, sr=None, mono=True)