Source code for mirdata.datasets.compmusic_iamms

"""Compmuic IAMMS Dataset Loader

.. admonition:: Dataset Info
    :class: dropdown

    This dataset comprises audio excerpts and manually done annotations of the melodic phrases in Carnatic and Hindustani music.
    This dataset can be used to develop and evaluate approaches for computing melodic similarity between short-time melodic patterns in Indian art music.

    The dataset contains the following manual annotations referring to audio files:

    - Section annotations, both original and finetuned, stored as start and end timestamps together with the phrase ID of the section (similar melodic phrases have the same ID).
    - Nyas event annotations stored as start and end timestamps.
    - Audio features automatically extracted and stored: pitch and tonic.
    - The annotations are stored in files with song identifier as the filename and file extension:
        - Section annotations: `.anot` and `.anotEdit`
        - Nyas annotations: `.flatSegNyas`
        - Pitch annotations: `.pitch`, `.pitchSilIntrpPP`, `tpe` and `tpe5msSilIntrpPP`
        - Tonic: `.tonic` and `.tonic`

    The dataset contains a total of 32 tracks.

    The files of this dataset are shared with the following license:
    Creative Commons Attribution Non Commercial Share Alike 4.0 International

    Dataset compiled by: Gulati, S., SerrĂ , J., and Serra, X.

    For more information about the dataset as well as IAM and annotations, please refer to:
    https://zenodo.org/records/16631794, where a really detailed explanation of the data and annotations is published.

"""

import csv
import json

import librosa
import numpy as np

from mirdata import annotations, core, download_utils, io

BIBTEX = """
@inproceedings{gulati2015improving,
  author    = {Sankalp Gulati and Joan Serr{\\`a} and Xavier Serra},
  title     = {Improving melodic similarity in Indian art music using culture-specific melodic characteristics},
  booktitle = {Proceedings of the 16th International Society for Music Information Retrieval Conference (ISMIR)},
  pages     = {680--686},
  year      = {2015},
  address   = {Malaga, Spain}
}

"""

INDEXES = {
    "default": "1.0",
    "test": "sample",
    "1.0": core.Index(
        filename="compmusic_iamms_index_1.0.json",
        url="https://zenodo.org/records/17175092/files/compmusic_iamms_index_1.0.json?download=1",
        checksum="3c8843f87b0fea83715058c5d8a84c22",
    ),
    "sample": core.Index(filename="compmusic_iamms_index_1.0_sample.json"),
}

REMOTES = {
    "all": download_utils.RemoteFileMetadata(
        filename="compmusic_iamms.zip",
        url="https://zenodo.org/records/16631794/files/MelodicSimilarityDataset.zip?download=1",
        checksum="d02c3f329558f91de2fe3bd613f6f2f5",
    )
}

LICENSE_INFO = (
    "Creative Commons Attribution Non Commercial Share Alike 4.0 International."
)


[docs] class Track(core.Track): """ Track class for IAM Melodic Similarity dataset. Args: track_id (str): track id of the track data_home (str): Local path where the dataset is stored. default=None Attributes: audio_path (str): path to audio file sections_path (str): path to sections annotation file sections_finetuned_path (str): path to improved sections annotation file nyas_path (str): path to nyas features pitch_path (str): path to pitch annotation file pitch_finetuned_path (str): path to improved pitch annotation file tonic_path (str): path to tonic data file tonic_finetuned_path (str): path to improved tonic data file Cached Properties: audio (tuple): (audio signal as np.ndarray, sample rate as float) sections (SectionData): section annotations sections_finetuned (SectionData): improved section annotations nyas (EventData): nyas annotations pitch (F0Data): pitch annotations pitch_finetuned (F0Data): improved pitch annotations tonic (float): tonic tonic_finetuned (float): tonic finetuned """ def __init__(self, track_id, data_home, dataset_name, index, metadata): super().__init__(track_id, data_home, dataset_name, index, metadata) self.audio_path = self.get_path("audio") self.sections_path = self.get_path("sections") self.sections_finetuned_path = self.get_path("sections-finetuned") self.nyas_path = self.get_path("nyas") self.pitch_path = self.get_path("pitch") self.pitch_finetuned_path = self.get_path("pitch-finetuned") self.tonic_path = self.get_path("tonic") self.tonic_finetuned_path = self.get_path("tonic-finetuned") @property def audio(self): return load_audio(self.audio_path) @core.cached_property def sections(self): return load_sections(self.sections_path) @core.cached_property def sections_finetuned(self): return load_sections(self.sections_finetuned_path) @core.cached_property def nyas(self): return load_nyas(self.nyas_path) @core.cached_property def pitch(self): return load_pitch(self.pitch_path) @core.cached_property def pitch_finetuned(self): return load_pitch(self.pitch_finetuned_path) @core.cached_property def tonic(self): return load_tonic(self.tonic_path) @core.cached_property def tonic_finetuned(self): return load_tonic(self.tonic_finetuned_path)
[docs] def load_audio(audio_path): """ Load an audio file. Args: audio_path (str): path to audio file Returns: tuple: np.ndarray - the stereo audio signal, float - sample rate """ if audio_path is None: return None return librosa.load(audio_path, sr=44100, mono=False)
[docs] @io.coerce_to_string_io def load_nyas(fhandle): """ Load a nyas annotation. Args: fhandle (str): path to annotation file Returns: EventData: nyas annotation intervals """ intervals = [] labels = [] reader = csv.reader(fhandle, delimiter="\t") for line in reader: start = float(line[0]) end = float(line[1]) label = "nyas" intervals.append([start, end]) labels.append(label) return annotations.EventData(np.array(intervals), "s", labels, "open")
[docs] @io.coerce_to_string_io def load_sections(fhandle): """ Load a sections annotation file. Args: fhandle (str): path to annotation file Returns: SectionData: section annotations with intervals (melodic phrasee) and labels (phrase identifier) """ intervals = [] labels = [] reader = csv.reader(fhandle, delimiter="\t") for line in reader: start = float(line[0]) end = float(line[1]) label = line[2] intervals.append([start, end]) labels.append(label) return annotations.SectionData(np.array(intervals), "s", labels, "open")
[docs] @io.coerce_to_string_io def load_pitch(fhandle): """ Load pitch annotations. Args: fhandle (str): path to pitch file Returns: F0Data: pitch annotations """ times = [] freqs = [] first_line = fhandle.readline() fhandle.seek(0) delimiter = "\t" if "\t" in first_line else " " reader = csv.reader(fhandle, delimiter=delimiter) for line in reader: times.append(float(line[0])) freqs.append(float(line[1])) times = np.array(times) freqs = np.array(freqs) voicing = (freqs > 0).astype(float) return annotations.F0Data(times, "s", freqs, "hz", voicing, "binary")
[docs] @io.coerce_to_string_io def load_tonic(fhandle): """ Load track's tonic. Args: fhandle (str): path to tonic file Returns: float: tonic frequency in Hz """ reader = csv.reader(fhandle, delimiter=" ") tonic = float(next(reader)[0]) return tonic
[docs] @core.docstring_inherit(core.Dataset) class Dataset(core.Dataset): """ The IAM Melodic Similarity dataset. This dataset contains Carnatic music recordings with annotations for sections, pitch, nyas, and tonic. It is designed to support research on melodic similarity with culturally relevant features. """ def __init__(self, data_home=None, version="default"): super().__init__( data_home, version, name="compmusic_iamms", track_class=Track, bibtex=BIBTEX, indexes=INDEXES, remotes=REMOTES, license_info=LICENSE_INFO, ) def load_audio(self, *args, **kwargs): return load_audio(*args, **kwargs) def load_sections(self, *args, **kwargs): return load_sections(*args, **kwargs) def load_nyas(self, *args, **kwargs): return load_sections(*args, **kwargs) def load_pitch(self, *args, **kwargs): return load_pitch(*args, **kwargs) def load_tonic(self, *args, **kwargs): return load_pitch(*args, **kwargs)