Source code for spectrographic.base

from pathlib import Path

import numpy as np
import simpleaudio as sa
import wavio
from PIL import Image

__author__ = "Levi Borodenko"
__copyright__ = "Levi Borodenko"
__license__ = "mit"


[docs]class SpectroGraphic(object): """ Takes an image file and creates a sound that draws that image on a spectrogram. [description] Arguments: path {Path} -- Path to file (e.g.: {"./data/python.png"}) Keyword Arguments: height {int} -- y-resolution in spectrogram (default: {100}) duration {int} -- duration of sound in seconds (default: {20}) min_freq {int} -- minimal freq. for image (default: {1000}) max_freq {int} -- maximal freq. for image (default: {8000}) sample_rate {int} -- Sample rate (default: {44100}) num_tones {int} -- Number of tones to used to fill in each pixel (default: {3}) contrast {float} -- Contrast between loud and quiet pixels (default: {5}) """ def __init__( self, path: Path, height: int = 100, duration: int = 20, min_freq: int = 1000, max_freq: int = 8000, sample_rate: int = 44100, num_tones: int = 3, contrast: float = 5, use_black_and_white: bool = False, ): super(SpectroGraphic, self).__init__() self.PATH = Path(path) self.image = Image.open(self.PATH) self.HEIGHT = height self.DURATION = duration self.SAMPLE_RATE = sample_rate # Width after setting height to self.HEIGHT # Preserving aspect-ratio self.WIDTH = int(self.image.width * (self.HEIGHT / self.image.height)) # duration per column self.DURATION_COL = self.DURATION / self.WIDTH # instance of ColumnToSound that will generate # the sounds for each column of the image self.col_to_sound = ColumnToSound( duration=self.DURATION_COL, sample_rate=sample_rate, min_freq=min_freq, max_freq=max_freq, y_resolution=height, num_tones=num_tones, contrast=contrast, ) # Flag whether we have processed the image yet self.is_processed = False # if true, then we do not use grey-scale with only 0 and 1 self.USE_BLACK_AND_WHITE = use_black_and_white def _resize(self): """[summary] We resize the image to be at most self.HEIGHT pixels tall. [description] The reason is to put a limit on the frequency resolution that we would need to draw it in on the spectrograph. """ # resizing image self.image = self.image.resize( size=(self.WIDTH, self.HEIGHT), resample=Image.LANCZOS ) def _preprocess(self): """Resizes the image, converts it to grey-scale and returns the columns as a np.ndarray. """ # resize image self._resize() # convert to gray scale self.image = self.image.convert(mode="L") # get pixels as array and normalise them to be # between 0 and 1 self.image_array = np.array(self.image) / 255 # transpose image to get list of columns self.columns = np.transpose(self.image_array) if self.USE_BLACK_AND_WHITE: self.columns[self.columns >= 0.5] = 1 self.columns[self.columns < 0.5] = 0 self.columns = 1 - self.columns def _process(self): """Preprocesses the image then turns the columns into sounds and stacks them up to produce the resulting sound. """ self._preprocess() audio_array = np.hstack( [self.col_to_sound.gen_soundwall(col) for col in self.columns] ) # convert to 16-bit data audio_array *= 32767 / np.max(np.abs(audio_array)) audio_array = audio_array.astype(np.int16) return audio_array @property def sound_array(self): if self.is_processed: return self._sound_array else: self.is_processed = True self._sound_array = self._process() return self._sound_array
[docs] def play(self): """Plays the SpectroGraphic sound. """ # get sound array audio = self.sound_array # play it using simpleaudio wave_object = sa.WaveObject(audio, 1, 2, self.SAMPLE_RATE) play_object = wave_object.play() play_object.wait_done()
[docs] def save(self, wav_file: Path = "SpectroGraphic.wav"): """saves the spectrographic to a .wav file We use the wavio module """ wavio.write(wav_file, self.sound_array, self.SAMPLE_RATE)
[docs]class ColumnToSound(object): """Class to turn grey-scale image columns into a sound. It takes a numpy array of grey intensities (in the range 0 to 1) of length Y_RESOLUTION and turns them into a DURATION seconds long sound in the frequency range between MIN_FREQ and MAX_FREQ. Arguments: duration {int} -- Duration of sound in seconds Keyword Arguments: sample_rate {int} -- Sample rate of sound (default: {44100}) min_freq {int} -- Minimal frequency in the spectrograph (default: {10000}) max_freq {int} -- Maximal frequency in the spectrograph (default: {17000}) y_resolution {int} -- Number of pixels to plot (default: {1000}) num_tones {int} -- Number of tones to use to fill out each pixel (default: {3}) contrast {float} -- Contrast between loud and quiet pixels (default: {5}) """ def __init__( self, duration: int, sample_rate: int = 44100, min_freq: int = 10000, max_freq: int = 17000, y_resolution: int = 1000, num_tones: int = 3, contrast: float = 5, ): super(ColumnToSound, self).__init__() # saving imporant parameters self.Y_RESOLUTION = y_resolution self.CONTRAST = contrast # sample rate; 44100 is a good default self.SAMPLE_RATE = sample_rate # region in which to draw the pixel sound self.MIN_FREQ = min_freq self.MAX_FREQ = max_freq # Number of tones used to fill the pixel sound self.NUM_TONES = num_tones # frequency window for each pixel sound self.HEIGHT = (max_freq - min_freq) / (y_resolution) # frequency delta between each tone that fills the pixel sound self.tone_delta = self.HEIGHT / num_tones # duration in seconds self.DURATION = duration def _get_wave(self, freq: int, intensity: float = 1, duration: int = 1): """Core method that takes a frequency, intensity and duration and returns an array representing the corresponding sound. Arguments: freq {int} -- frequency Keyword Arguments: intensity {float} -- between 0 and 1 (default: {1}) duration {float} -- in seconds (default: {1}) Returns: np.ndarray -- sound wave array """ # get timesteps t = np.linspace( start=0, stop=duration, num=int(duration * self.SAMPLE_RATE), endpoint=False ) # generate corresponding sine wave. # this is the only place that CONTRAST acts in sound_wave = (intensity ** self.CONTRAST) * np.cos(freq * t * 2 * np.pi) return sound_wave
[docs] def pixel_to_sound(self, y: int, intensity: float = 1): """Takes a pixel in a imagae column at the y'th position from the top and turns it into a sound at a corresponding position in the spectrum. [description] Arguments: y {int} -- position of pixel in column from the top. Keyword Arguments: intensity {float} -- [description] (default: {1}) Returns: np.ndarray -- sound array Raises: ValueError """ # pixel position (count) from the top must be # positive and not larger than the number of pixels # in the column. if y < 0 or y > self.Y_RESOLUTION: raise ValueError("y must be between 0 and 1.") # Loudness should be between 0 and 1 if not (0 <= intensity <= 1): raise ValueError("Intensity must be between 0 and 1.") # Duration should be positive if self.DURATION < 0: raise ValueError("Duration must be positive.") # calculating base frequency for pixel sound base_freq = (self.MAX_FREQ - self.MIN_FREQ) / (self.Y_RESOLUTION) * ( self.Y_RESOLUTION - y ) + self.MIN_FREQ # get base wave wave = self._get_wave(base_freq, intensity, self.DURATION) # add tones to fill up pixel sound # first tone: tone_freq = base_freq # iterating over tones, adding up the sounds. for _ in range(self.NUM_TONES): tone_freq += self.tone_delta wave += self._get_wave(tone_freq, intensity, self.DURATION) return wave
[docs] def gen_soundwall(self, column: np.ndarray): """Takes a column of pixels and generates the sound wall. [description] Arguments: column {np.ndarray} -- Y_RESOLUTION long column of pixels (values between 0 and 1) Returns: np.ndarray -- soundwall """ # empty wave that we will add individual # pixel sounds onto wave = self.pixel_to_sound(0, 0) # iterating over column, adding the pixel sounds # of all pixels together to get the final wave. for idx, intensity in enumerate(column): wave += self.pixel_to_sound(idx, intensity) return wave