cleaning

a6c14277 · mathieu Lagrange · c68c528e · a6c14277
--- a/2. Audio Content Analysis/tp/mfcc and chroma of synthetic data.ipynb
+++ b/2. Audio Content Analysis/tp/mfcc and chroma of synthetic data.ipynb
@@ -418,7 +418,7 @@
   "id": "77fe4b95-da49-411b-bc8a-9d41351fb1da",
   "metadata": {},
   "source": [
-    "**Question**: Design a metric that account for sensitivity. For example, this metric could quantify the amount of variation, averaged over features. Use it to quantify the sensitivity of the 2 representations with respect to the 2 tones. Is "
+    "**Question**: Design a metric that account for sensitivity. For example, this metric could quantify the amount of variation, averaged over features. Use it to quantify the sensitivity of the 2 representations with respect to the 2 tones. Are the number is line with what is expected ?"
   ]
  },
  {
@@ -476,12 +476,12 @@
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e68b473e-ee70-426c-8240-f44dcec3929c",
+   "cell_type": "markdown",
+   "id": "ba8e20a1-0f2d-4b8d-a3c2-76a766cf7aab",
   "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "**Answer**: here"
+   ]
  }
 ],
 "metadata": {
 %% Cell type:code id:505e07c4-7831-49b2-aa43-f4ce8b57f598 tags:
  
 ``` python
 # manage warnings
 import warnings
  
 # deal with matrices
 import numpy as np
  
 # signal processing
 import scipy.signal
  
 # deal with audio data
 import librosa
 from librosa.display import specshow
  
 # play audio
 import IPython.display as ipd
  
 # handle display
 %matplotlib inline
 from matplotlib import pyplot as plt
 from matplotlib import cm
 ```
  
 %% Cell type:markdown id:e89eb314-d731-4730-84d1-4f774888fb3a tags:
  
 ## Tone with increasing pitch
  
 %% Cell type:code id:0691a763-84ec-439c-a922-c80e19deadf7 tags:
  
 ``` python
 def synth_increasing_pitch(fmin=220, fmax=440, duration=10.0, n_partials=10, sr=22050):
    partials = []
    for partial_id in range(n_partials):
        partial = librosa.chirp(
            fmin*(1+partial_id),
            fmax*(1+partial_id), sr, duration=duration)
        partials.append(partial)
  
    return np.mean(np.stack(partials), axis=0)
 ```
  
 %% Cell type:code id:9da792e6 tags:
  
 ``` python
 sr = 22050 # sampling rate
 yp = synth_increasing_pitch(sr=sr)
 display(Audio(yp, rate=sr))
 ```
  
 %% Output
  
  
 %% Cell type:markdown id:cb687d6f-d1a0-4f34-9114-2614b4f3908c tags:
  
 ## Tone with increasing brightness
  
 %% Cell type:code id:82997a56-1a25-48ed-b17b-09c5968e190e tags:
  
 ``` python
 # smoothing window function
 def planck_taper(N, epsilon=0.5):
    x = np.linspace(0, 1, N)[1:-1]
    Z_plus = epsilon / x + epsilon / (x - epsilon)
    Z_minus = - epsilon / (x - (1-epsilon)) - epsilon / (x - 1)
    w = np.ones(N)
    t2 = int(np.round(epsilon * N))
    t3 = N - t2
    w[0] = 0
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        w[1:t2] = 1 / (np.exp(Z_plus[1:t2]) + 1)
        w[t3:-1] = 1 / (np.exp(Z_minus[(t3-1):]) + 1)
    w[-1] = 0
    return w
 ```
  
 %% Cell type:markdown id:8579a449-26eb-4b17-acde-91ac8c72de9a tags:
  
 **Question:** implement a function *synth_increasing_brightness* that generate an audio of a given f0, duration, and number of partials, such that the number of partials of the tone increases linearly with time. The spectral representation should like like this:
 ```
         -
        --
       ---
      ----
     -----
    ------
   -------
 ---------
 ```
  
 Hint: The function *planck_taper* can be used in order to gradually increase the amplitude of each incoming partials.
  
 %% Cell type:code id:52e0b3c1-bb6f-4454-8580-4ee999cee830 tags:
  
 ``` python
 def synth_increasing_brightness(f0=220, duration=10.0, n_partials=20, sr=22050):
 # answer here
 ####
    partials = [librosa.tone(f0, sr, duration=duration)]
    for partial_id in range(1, n_partials):
        zeros_length = int(duration * sr * partial_id/n_partials)
        ramp_length = int(duration * sr/n_partials)
        ones_length = int(duration * sr - zeros_length - ramp_length)
        amplitude = np.concatenate((
            np.zeros((zeros_length,)),
            planck_taper(2*ramp_length)[:ramp_length],
            np.ones((ones_length,))
        ))
        frequency = (1+partial_id) * f0
        partial = librosa.tone(frequency, sr, duration=duration)
        partials.append(amplitude*partial)
 ####
    return np.mean(np.stack(partials), axis=0)
 ```
  
 %% Cell type:code id:7c031e48-4477-49e0-a6e3-79cbcd0b7de7 tags:
  
 ``` python
 sr = 22050 # sampling rate
 yb = synth_increasing_brightness(sr=sr)
 display(Audio(yb, rate=sr))
 ```
  
 %% Output
  
  
 %% Cell type:markdown id:318f5ff5-3b61-4ed8-82bd-2407d4e95d4b tags:
  
 ## Display the spectrograms of the pitch increasing tone and the brightness increasing tone
  
 %% Cell type:code id:f3f02306 tags:
  
 ``` python
 plt.figure(figsize=(12, 6))
  
 ys = [yp, yb] # stack signals
  
 for fig_id, y in enumerate(ys):
    plt.subplot(1, 2, 1+fig_id)
    y_stft = librosa.stft(y)
    y_spectrogram = librosa.amplitude_to_db(np.abs(y_stft))
    specshow(y_spectrogram, cmap="magma", x_axis="time", y_axis="hz")
    plt.title("Short-term Fourier transform")
    plt.colorbar()
    plt.clim(-40, 30)
    plt.xlabel("Time (seconds)")
    plt.xlim(0, 10)
    plt.ylabel("Frequency (Hz)")
 ```
  
 %% Output
  

  
 %% Cell type:markdown id:988f1106-8d61-4e8c-9aeb-0d7891750a3f tags:
  
 ## Display the Constant Q Tranform (CQT) spectrograms of the pitch increasing tone and the brightness increasing tone
  
 **Question:** use the *cqt* function of librosa to display the Constant Q Tranform (CQT) spectrograms of the two tones.
  
 %% Cell type:code id:c8342664 tags:
  
 ``` python
  
 n_octaves = 5 # number of octaves
  
 cqt_params = {
    "bins_per_octave": 24, # number of bins per octave
    "fmin": 180, # minimal frequency
    "sr": sr # sampling rate
 }
  
 plt.figure(figsize=(12, 6))
 for fig_id, y in enumerate(ys):
    plt.subplot(1, 2, 1+fig_id)
    # answer here
    ####
    y_cqt = librosa.cqt(
        y, n_bins=cqt_params["bins_per_octave"] * n_octaves, **cqt_params)
    y_scalogram = librosa.amplitude_to_db(np.abs(y_cqt))
    ####
    specshow(
        y_scalogram, x_axis="time", y_axis="cqt_note",
        cmap="magma", **cqt_params)
    plt.title("Constant-Q Transform (CQT)")
    plt.xlabel("Time (seconds)")
    plt.xlim(0, 10)
    Ys.append(y_cqt)
 ```
  
 %% Output
  

  
 %% Cell type:markdown id:7aa23406-690a-4cda-94bd-84f3c21d488d tags:
  
 ## Sensitivity of MFCCs to pitch and brightness
  
 **Question**: compute and display the MFCCs (40 mels bands, and 12 mfccs coefficients) of the two tones.
 - According to your understanding of MFCCs, to which type of variation should this representation be most sensitive ?
 - Are the display in line with this?
  
 %% Cell type:code id:21516559 tags:
  
 ``` python
 Ym = []
  
 plt.figure(figsize=(12, 6))
 for fig_id, y in enumerate(ys):
    plt.subplot(1, 2, 1+fig_id)
    # answer here
    ####
    y_mfcc = librosa.feature.mfcc(y, sr, n_mels=40, n_mfcc=12)[:,:]
    ####
    specshow(y_mfcc, x_axis="time", y_axis=None)
    plt.title("Mel-frequency cepstrum")
    plt.xlabel("Time (seconds)")
    plt.xlim(0, 10)
    plt.clim(-200, 200)
    plt.gca().set_yticks(range(12))
    plt.ylabel("Quefrency index")
    Ym.append(y_mfcc)
 ```
  
 %% Output
  

  
 %% Cell type:markdown id:cb3e6127-158c-4c2f-9d10-eebd16f83644 tags:
  
 **Answer**: here
  
 %% Cell type:markdown id:371f0503-a351-48e0-9107-151975c219ae tags:
  
 ## Sensitivity of Chromas to pitch and brightness
  
 **Question**: compute and display the Chromas of the two tones.
 - According to your understanding of Chromas, to which type of variation should this representation be most sensitive ?
 - Are the display in line with this?
  
 %% Cell type:code id:bf581e62 tags:
  
 ``` python
 Yc = []
  
 plt.figure(figsize=(12, 6))
 for fig_id, y in enumerate(ys):
    plt.subplot(1, 2, 1+fig_id)
    y_cqt = librosa.feature.chroma_cqt(
        y, n_octaves=n_octaves, n_chroma=12, **cqt_params)
    y_scalogram = librosa.amplitude_to_db(np.abs(y_cqt))
    specshow(
        y_scalogram, x_axis="time", y_axis="chroma",
        cmap="RdBu_r", bins_per_octave=12,
        fmin=cqt_params["fmin"], sr=cqt_params["sr"])
    plt.title("Constant-Q Transform (CQT)")
    plt.xlabel("Time (seconds)")
    plt.xlim(0, 10)
    Yc.append(y_cqt)
 ```
  
 %% Output
  

  
 %% Cell type:markdown id:7ab669be-ae51-4992-bfd0-f84cef860c79 tags:
  
 **Answer**: here
  
 %% Cell type:markdown id:77fe4b95-da49-411b-bc8a-9d41351fb1da tags:
  
-**Question**: Design a metric that account for sensitivity. For example, this metric could quantify the amount of variation, averaged over features. Use it to quantify the sensitivity of the 2 representations with respect to the 2 tones. Is
+**Question**: Design a metric that account for sensitivity. For example, this metric could quantify the amount of variation, averaged over features. Use it to quantify the sensitivity of the 2 representations with respect to the 2 tones. Are the number is line with what is expected ?
  
 %% Cell type:code id:aaf9d492 tags:
  
 ``` python
 def sensitivity_metric(Y):
 # answer here
 ####
 return np.mean(np.abs(np.diff(Y, axis=1)))
 ####
  
 # set width of bars
 barWidth = 0.25
  
 # set heights of bars
 Sm = [sensitivity_metric(Ym[0]), sensitivity_metric(Ym[1])]
 Sc = [sensitivity_metric(Yc[0]), sensitivity_metric(Yc[1])]
 # normalize metrics
 Sm = Sm/max(Sm)
 Sc = Sc/max(Sc)
  
 # Set position of bars on X axis
 r1 = np.arange(len(bars1))
 r2 = [x + barWidth for x in r1]
  
 # Plot
 plt.bar(r1, bars1, color='#7f6d5f', width=barWidth, edgecolor='white', label='MFCCs')
 plt.bar(r2, bars2, color='#557f2d', width=barWidth, edgecolor='white', label='Chromas')
  
 # Add xticks on the middle of the group bars
 plt.xlabel('Tones', fontweight='bold')
 plt.ylabel('Sensitivity index', fontweight='bold')
 plt.xticks([r + barWidth/2 for r in range(len(bars1))], ['Pitch', 'Brightness'])
  
 # Create legend & Show graphic
 plt.legend()
 plt.show()
 ```
  
 %% Output
  

  
-%% Cell type:code id:e68b473e-ee70-426c-8240-f44dcec3929c tags:
+%% Cell type:markdown id:ba8e20a1-0f2d-4b8d-a3c2-76a766cf7aab tags:
  
-``` python
-```
+**Answer**: here