Hackster is hosting Hackster Holidays, Ep. 7: Livestream & Giveaway Drawing. Watch previous episodes or stream live on Friday!Stream Hackster Holidays, Ep. 7 on Friday!
Sam
Published © Apache-2.0

Encode Image In Sound With Python

You can hide pictures in your favourite sound by generating custom sound spectrogram from an image in Python.

IntermediateWork in progress2 hours11,117
Encode Image In Sound With Python

Things used in this project

Software apps and online services

Jupyter Notebook
Jupyter Notebook
I use this to organise the research, trial & errors

Story

Read more

Code

Code that actually works

Python
This is a runnable code that takes as input a picture, a duration and a path where to output the audio file.
You can run it with Python 3 using this command:
python3 spectrogen.py path/to/your/image.jpg path/to/your/output.wav
This will output a 5 seconds song under output.wav which spectrogram represents image.jpg. More options are available, just run python3 spectrogen.py -h
import wave, struct, math # To calculate the WAV file content
import numpy as np # To handle matrices
from PIL import Image # To open the input image and convert it to grayscale

import scipy.ndimage # To resample using nearest neighbour

'''
    Loads a picture, converts it to greyscale, then to numpy array, normalise it so that the max value is 1 
    the min is 0, increase the contrast a bit, remove every pixel which intensity is lower that 0.5, 
    then resize the picture using nearest neighbour resampling and outputs the numpy matrix.
    
    FYI: imgArr[0,0] is the top left corner of the image, cheers matrix indexing
    
    Returns: the resized image as a high contrast, normalised between 0 and 1, numpy matrix
'''
def loadPicture(size, file, contrast=True, highpass=False, verbose=1):
    img = Image.open(file)
    img = img.convert("L")
    #img = img.resize(size) # DO NOT DO THAT OR THE PC WILL CRASH
    
    imgArr = np.array(img)
    imgArr = np.flip(imgArr, axis=0)
    if verbose:
        print("Image original size: ", imgArr.shape)
        
    # Increase the contrast of the image
    if contrast:
        imgArr = 1/(imgArr+10**15.2) # Now only god knows how this works but it does
    else:
        imgArr = 1 - imgArr
    # Scale between 0 and 1
    imgArr -= np.min(imgArr)
    imgArr = imgArr/np.max(imgArr)
    # Remove low pixel values (highpass filter)
    if highpass:
        removeLowValues = np.vectorize(lambda x: x if x > 0.5 else 0, otypes=[np.float])
        imgArr = removeLowValues(imgArr)

    if size[0] == 0:
        size = imgArr.shape[0], size[1]
    if size[1] == 0:
        size = size[0], imgArr.shape[1]
    resamplingFactor = size[0]/imgArr.shape[0], size[1]/imgArr.shape[1]
    if resamplingFactor[0] == 0:
        resamplingFactor = 1, resamplingFactor[1]
    if resamplingFactor[1] == 0:
        resamplingFactor = resamplingFactor[0], 1
    
    # Order : 0=nearestNeighbour, 1:bilinear, 2:cubic etc...
    imgArr = scipy.ndimage.zoom(imgArr, resamplingFactor, order=0)
    
    if verbose:
        print("Resampling factor", resamplingFactor)
        print("Image resized :", imgArr.shape)
        print("Max intensity: ", np.max(imgArr))
        print("Min intensity: ", np.min(imgArr))
    return imgArr

def genSoundFromImage(file, output="sound.wav", duration=5.0, sampleRate=44100.0, intensityFactor=1, min_freq=0, max_freq=22000, invert=False, contrast=True, highpass=True, verbose=False):
    wavef = wave.open(output,'w')
    wavef.setnchannels(1) # mono
    wavef.setsampwidth(2) 
    wavef.setframerate(sampleRate)
    
    max_frame = int(duration * sampleRate)
    max_intensity = 32767 # Defined by WAV
    
    stepSize = 400 # Hz, each pixel's portion of the spectrum
    steppingSpectrum = int((max_freq-min_freq)/stepSize)
    
    imgMat = loadPicture(size=(steppingSpectrum, max_frame), file=file, contrast=contrast, highpass=highpass, verbose=verbose)
    if invert:
        imgMat = 1 - imgMat
    imgMat *= intensityFactor # To lower/increase the image overall intensity
    imgMat *= max_intensity # To scale it to max WAV audio intensity
    if verbose:
        print("Input: ", file)
        print("Duration (in seconds): ", duration)
        print("Sample rate: ", sampleRate)
        print("Computing each soundframe sum value..")
    for frame in range(max_frame):
        if frame % 60 == 0: # Only print once in a while
            print("Progress: ==> {:.2%}".format(frame/max_frame), end="\r")
        signalValue, count = 0, 0
        for step in range(steppingSpectrum):
            intensity = imgMat[step, frame]
            if intensity < 0.1*intensityFactor:
                continue
            # nextFreq is less than currentFreq
            currentFreq = (step * stepSize) + min_freq
            nextFreq = ((step+1) * stepSize) + min_freq
            if nextFreq - min_freq > max_freq: # If we're at the end of the spectrum
                nextFreq = max_freq
            for freq in range(currentFreq, nextFreq, 1000): # substep of 1000 Hz is good
                signalValue += intensity*math.cos(freq * 2 * math.pi * float(frame) / float(sampleRate))
                count += 1
        if count == 0: count = 1
        signalValue /= count
        
        data = struct.pack('<h', int(signalValue))
        wavef.writeframesraw( data )
        
    wavef.writeframes(''.encode())
    wavef.close()
    print("\nProgress: ==> 100%")
    if verbose:
        print("Output: ", output)

import sys
import argparse

def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("inputImage", help="Input image in any PIL supported format (JPG, PNG (with and without alpha), BMP etc...)")
    parser.add_argument("outputFile", help="path where to output the soundfile in WAV format")
    parser.add_argument("-d", "--duration", help="Duration of the sound to output, in whole seconds, default: 5", type=int)
    parser.add_argument("-n", "--minFreq", help="Minimum frequency to use, in Hz, default: 0", type=int)
    parser.add_argument("-x", "--maxFreq", help="Maximum frequency to use, in Hz, default: 22000", type=int)
    parser.add_argument("-s", "--samplerate", help="Sample rate of the sound to output, in Hertz, default: 44100", type=int)
    parser.add_argument("-if", "--intensityFactor", help="Factory by which multiply the image intensity, in decimal, default: 1.0", type=float)
    parser.add_argument("-i", "--invert", help="Invert the image intensity, resulting in an inverted spectrum", action="store_true")
    parser.add_argument("-c", "--contrast", help="Increases image's contrast before converting it, can enhance the resulting spectrum", action="store_true")
    parser.add_argument("-hi", "--highintensity", help="Cut low intensity pixels, can enhance result", action="store_true")
    parser.add_argument("-v", "--verbose", help="Display verbose", action="store_true")
    args = parser.parse_args()
    
    img = args.inputImage
    output = args.outputFile
    duration = 5 if not args.duration else args.duration
    min_freq = 0 if not args.minFreq else args.minFreq
    max_freq = 22000 if not args.maxFreq else args.maxFreq
    sampleRate = 44100 if not args.samplerate else args.samplerate
    intensityFactor = 1 if not args.intensityFactor else args.intensityFactor
    invert = args.invert
    contrast = args.contrast
    highpass = args.highintensity # Not a real highpass, but it cuts low intensities...
    verbose = args.verbose

    genSoundFromImage(
            file=img, 
            output=output, 
            duration=duration, 
            sampleRate=sampleRate,
            min_freq=min_freq,
            max_freq=max_freq,
            contrast=contrast, 
            invert=invert, 
            intensityFactor=intensityFactor,
            highpass=highpass, 
            verbose=verbose)

if __name__ == "__main__":
    main(sys.argv[1:])

Detailed explanation of the code

HTML
This is the export of the Jupyter Notebook containing detailed explanation of every bits of codes, along with the result each bits produces
<!DOCTYPE html>
<html>
<head><meta charset="utf-8" />
<title>Spectro Gen V3</title><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>

<style type="text/css">
    /*!
*
* Twitter Bootstrap
*
*/
/*!
 * Bootstrap v3.3.7 (http://getbootstrap.com)
 * Copyright 2011-2016 Twitter, Inc.
 * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
 */
/*! normalize.css v3.0.3 | MIT License | github.com/necolas/normalize.css */
html {
  font-family: sans-serif;
  -ms-text-size-adjust: 100%;
  -webkit-text-size-adjust: 100%;
}
body {
  margin: 0;
}
article,
aside,
details,
figcaption,
figure,
footer,
header,
hgroup,
main,
menu,
nav,
section,
summary {
  display: block;
}
audio,
canvas,
progress,
video {
  display: inline-block;
  vertical-align: baseline;
}
audio:not([controls]) {
  display: none;
  height: 0;
}
[hidden],
template {
  display: none;
}
a {
  background-color: transparent;
}
a:active,
a:hover {
  outline: 0;
}
abbr[title] {
  border-bottom: 1px dotted;
}
b,
strong {
  font-weight: bold;
}
dfn {
  font-style: italic;
}
h1 {
  font-size: 2em;
  margin: 0.67em 0;
}
mark {
  background: #ff0;
  color: #000;
}
small {
  font-size: 80%;
}
sub,
sup {
  font-size: 75%;
  line-height: 0;
  position: relative;
  vertical-align: baseline;
}
sup {
  top: -0.5em;
}
sub {
  bottom: -0.25em;
}
img {
  border: 0;
}
svg:not(:root) {
  overflow: hidden;
}
figure {
  margin: 1em 40px;
}
hr {
  box-sizing: content-box;
  height: 0;
}
pre {
  overflow: auto;
}
code,
kbd,
pre,
samp {
  font-family: monospace, monospace;
  font-size: 1em;
}
button,
input,
optgroup,
select,
textarea {
  color: inherit;
  font: inherit;
  margin: 0;
}
button {
  overflow: visible;
}
button,
select {
  text-transform: none;
}
button,
html input[type="button"],
input[type="reset"],
input[type="submit"] {
  -webkit-appearance: button;
  cursor: pointer;
}
button[disabled],
html input[disabled] {
  cursor: default;
}
button::-moz-focus-inner,
input::-moz-focus-inner {
  border: 0;
  padding: 0;
}
input {
  line-height: normal;
}
input[type="checkbox"],
input[type="radio"] {
  box-sizing: border-box;
  padding: 0;
}
input[type="number"]::-webkit-inner-spin-button,
input[type="number"]::-webkit-outer-spin-button {
  height: auto;
}
input[type="search"] {
  -webkit-appearance: textfield;
  box-sizing: content-box;
}
input[type="search"]::-webkit-search-cancel-button,
input[type="search"]::-webkit-search-decoration {
  -webkit-appearance: none;
}
fieldset {
  border: 1px solid #c0c0c0;
  margin: 0 2px;
  padding: 0.35em 0.625em 0.75em;
}
legend {
  border: 0;
  padding: 0;
}
textarea {
  overflow: auto;
}
optgroup {
  font-weight: bold;
}
table {
  border-collapse: collapse;
  border-spacing: 0;
}
td,
th {
  padding: 0;
}
/*! Source: https://github.com/h5bp/html5-boilerplate/blob/master/src/css/main.css */
@media print {
  *,
  *:before,
  *:after {
    background: transparent !important;
    color: #000 !important;
    box-shadow: none !important;
    text-shadow: none !important;
  }
  a,
  a:visited {
    text-decoration: underline;
  }
  a[href]:after {
    content: " (" attr(href) ")";
  }
  abbr[title]:after {
    content: " (" attr(title) ")";
  }
  a[href^="#"]:after,
  a[href^="javascript:"]:after {
    content: "";
  }
  pre,
  blockquote {
    border: 1px solid #999;
    page-break-inside: avoid;
  }
  thead {
    display: table-header-group;
  }
  tr,
  img {
    page-break-inside: avoid;
  }
  img {
    max-width: 100% !important;
  }
  p,
  h2,
  h3 {
    orphans: 3;
    widows: 3;
  }
  h2,
  h3 {
    page-break-after: avoid;
  }
  .navbar {
    display: none;
  }
  .btn > .caret,
  .dropup > .btn > .caret {
    border-top-color: #000 !important;
  }
  .label {
    border: 1px solid #000;
  }
  .table {
    border-collapse: collapse !important;
  }
  .table td,
  .table th {
    background-color: #fff !important;
  }
  .table-bordered th,
  .table-bordered td {
    border: 1px solid #ddd !important;
  }
}
@font-face {
  font-family: 'Glyphicons Halflings';
  src: url('../components/bootstrap/fonts/glyphicons-halflings-regular.eot');
  src: url('../components/bootstrap/fonts/glyphicons-halflings-regular.eot?#iefix') format('embedded-opentype'), url('../components/bootstrap/fonts/glyphicons-halflings-regular.woff2') format('woff2'), url('../components/bootstrap/fonts/glyphicons-halflings-regular.woff') format('woff'), url('../components/bootstrap/fonts/glyphicons-halflings-regular.ttf') format('truetype'), url('../components/bootstrap/fonts/glyphicons-halflings-regular.svg#glyphicons_halflingsregular') format('svg');
}
.glyphicon {
  position: relative;
  top: 1px;
  display: inline-block;
  font-family: 'Glyphicons Halflings';
  font-style: normal;
  font-weight: normal;
  line-height: 1;
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
}
.glyphicon-asterisk:before {
  content: "\002a";
}
.glyphicon-plus:before {
  content: "\002b";
}
.glyphicon-euro:before,
.glyphicon-eur:before {
  content: "\20ac";
}
.glyphicon-minus:before {
  content: "\2212";
}
.glyphicon-cloud:before {
  content: "\2601";
}
.glyphicon-envelope:before {
  content: "\2709";
}
.glyphicon-pencil:before {
  content: "\270f";
}
.glyphicon-glass:before {
  content: "\e001";
}
.glyphicon-music:before {
  content: "\e002";
}
.glyphicon-search:before {
  content: "\e003";
}
.glyphicon-heart:before {
  content: "\e005";
}
.glyphicon-star:before {
  content: "\e006";
}
.glyphicon-star-empty:before {
  content: "\e007";
}
.glyphicon-user:before {
  content: "\e008";
}
.glyphicon-film:before {
  content: "\e009";
}
.glyphicon-th-large:before {
  content: "\e010";
}
.glyphicon-th:before {
  content: "\e011";
}
.glyphicon-th-list:before {
  content: "\e012";
}
.glyphicon-ok:before {
  content: "\e013";
}
.glyphicon-remove:before {
  content: "\e014";
}
.glyphicon-zoom-in:before {
  content: "\e015";
}
.glyphicon-zoom-out:before {
  content: "\e016";
}
.glyphicon-off:before {
  content: "\e017";
}
.glyphicon-signal:before {
  content: "\e018";
}
.glyphicon-cog:before {
  content: "\e019";
}
.glyphicon-trash:before {
  content: "\e020";
}
.glyphicon-home:before {
  content: "\e021";
}
.glyphicon-file:before {
  content: "\e022";
}
.glyphicon-time:before {
  content: "\e023";
}
.glyphicon-road:before {
  content: "\e024";
}
.glyphicon-download-alt:before {
  content: "\e025";
}
.glyphicon-download:before {
  content: "\e026";
}
.glyphicon-upload:before {
  content: "\e027";
}
.glyphicon-inbox:before {
  content: "\e028";
}
.glyphicon-play-circle:before {
  content: "\e029";
}
.glyphicon-repeat:before {
  content: "\e030";
}
.glyphicon-refresh:before {
  content: "\e031";
}
.glyphicon-list-alt:before {
  content: "\e032";
}
.glyphicon-lock:before {
  content: "\e033";
}
.glyphicon-flag:before {
  content: "\e034";
}
.glyphicon-headphones:before {
  content: "\e035";
}
.glyphicon-volume-off:before {
  content: "\e036";
}
.glyphicon-volume-down:before {
  content: "\e037";
}
.glyphicon-volume-up:before {
  content: "\e038";
}
.glyphicon-qrcode:before {
  content: "\e039";
}
.glyphicon-barcode:before {
  content: "\e040";
}
.glyphicon-tag:before {
  content: "\e041";
}
.glyphicon-tags:before {
  content: "\e042";
}
.glyphicon-book:before {
  content: "\e043";
}
.glyphicon-bookmark:before {
  content: "\e044";
}
.glyphicon-print:before {
  content: "\e045";
}
.glyphicon-camera:before {
  content: "\e046";
}
.glyphicon-font:before {
  content: "\e047";
}
.glyphicon-bold:before {
  content: "\e048";
}
.glyphicon-italic:before {
  content: "\e049";
}
.glyphicon-text-height:before {
  content: "\e050";
}
.glyphicon-text-width:before {
  content: "\e051";
}
.glyphicon-align-left:before {
  content: "\e052";
}
.glyphicon-align-center:before {
  content: "\e053";
}
.glyphicon-align-right:before {
  content: "\e054";
}
.glyphicon-align-justify:before {
  content: "\e055";
}
.glyphicon-list:before {
  content: "\e056";
}
.glyphicon-indent-left:before {
  content: "\e057";
}
.glyphicon-indent-right:before {
  content: "\e058";
}
.glyphicon-facetime-video:before {
  content: "\e059";
}
.glyphicon-picture:before {
  content: "\e060";
}
.glyphicon-map-marker:before {
  content: "\e062";
}
.glyphicon-adjust:before {
  content: "\e063";
}
.glyphicon-tint:before {
  content: "\e064";
}
.glyphicon-edit:before {
  content: "\e065";
}
.glyphicon-share:before {
  content: "\e066";
}
.glyphicon-check:before {
  content: "\e067";
}
.glyphicon-move:before {
  content: "\e068";
}
.glyphicon-step-backward:before {
  content: "\e069";
}
.glyphicon-fast-backward:before {
  content: "\e070";
}
.glyphicon-backward:before {
  content: "\e071";
}
.glyphicon-play:before {
  content: "\e072";
}
.glyphicon-pause:before {
  content: "\e073";
}
.glyphicon-stop:before {
  content: "\e074";
}
.glyphicon-forward:before {
  content: "\e075";
}
.glyphicon-fast-forward:before {
  content: "\e076";
}
.glyphicon-step-forward:before {
  content: "\e077";
}
.glyphicon-eject:before {
  content: "\e078";
}
.glyphicon-chevron-left:before {
  content: "\e079";
}
.glyphicon-chevron-right:before {
  content: "\e080";
}
.glyphicon-plus-sign:before {
  content: "\e081";
}
.glyphicon-minus-sign:before {
  content: "\e082";
}
.glyphicon-remove-sign:before {
  content: "\e083";
}
.glyphicon-ok-sign:before {
  content: "\e084";
}
.glyphicon-question-sign:before {
  content: "\e085";
}
.glyphicon-info-sign:before {
  content: "\e086";
}
.glyphicon-screenshot:before {
  content: "\e087";
}
.glyphicon-remove-circle:before {
  content: "\e088";
}
.glyphicon-ok-circle:before {
  content: "\e089";
}
.glyphicon-ban-circle:before {
  content: "\e090";
}
.glyphicon-arrow-left:before {
  content: "\e091";
}
.glyphicon-arrow-right:before {
  content: "\e092";
}
.glyphicon-arrow-up:before {
  content: "\e093";
}
.glyphicon-arrow-down:before {
  content: "\e094";
}
.glyphicon-share-alt:before {
  content: "\e095";
}
.glyphicon-resize-full:before {
  content: "\e096";
}
.glyphicon-resize-small:before {
  content: "\e097";
}
.glyphicon-exclamation-sign:before {
  content: "\e101";
}
.glyphicon-gift:before {
  content: "\e102";
}
.glyphicon-leaf:before {
  content: "\e103";
}
.glyphicon-fire:before {
  content: "\e104";
}
.glyphicon-eye-open:before {
  content: "\e105";
}
.glyphicon-eye-close:before {
  content: "\e106";
}
.glyphicon-warning-sign:before {
  content: "\e107";
}
.glyphicon-plane:before {
  content: "\e108";
}
.glyphicon-calendar:before {
  content: "\e109";
}
.glyphicon-random:before {
  content: "\e110";
}
.glyphicon-comment:before {
  content: "\e111";
}
.glyphicon-magnet:before {
  content: "\e112";
}
.glyphicon-chevron-up:before {
  content: "\e113";
}
.glyphicon-chevron-down:before {
  content: "\e114";
}
.glyphicon-retweet:before {
  content: "\e115";
}
.glyphicon-shopping-cart:before {
  content: "\e116";
}
.glyphicon-folder-close:before {
  content: "\e117";
}
.glyphicon-folder-open:before {
  content: "\e118";
}
.glyphicon-resize-vertical:before {
  content: "\e119";
}
.glyphicon-resize-horizontal:before {
  content: "\e120";
}
.glyphicon-hdd:before {
  content: "\e121";
}
.glyphicon-bullhorn:before {
  content: "\e122";
}
.glyphicon-bell:before {
  content: "\e123";
}
.glyphicon-certificate:before {
  content: "\e124";
}
.glyphicon-thumbs-up:before {
  content: "\e125";
}
.glyphicon-thumbs-down:before {
  content: "\e126";
}
.glyphicon-hand-right:before {
  content: "\e127";
}
.glyphicon-hand-left:before {
  content: "\e128";
}
.glyphicon-hand-up:before {
  content: "\e129";
}
.glyphicon-hand-down:before {
  content: "\e130";
}
.glyphicon-circle-arrow-right:before {
  content: "\e131";
}
.glyphicon-circle-arrow-left:before {
  content: "\e132";
}
.glyphicon-circle-arrow-up:before {
  content: "\e133";
}
.glyphicon-circle-arrow-down:before {
  content: "\e134";
}
.glyphicon-globe:before {
  content: "\e135";
}
.glyphicon-wrench:before {
  content: "\e136";
}
.glyphicon-tasks:before {
  content: "\e137";
}
.glyphicon-filter:before {
  content: "\e138";
}
.glyphicon-briefcase:before {
  content: "\e139";
}
.glyphicon-fullscreen:before {
  content: "\e140";
}
.glyphicon-dashboard:before {
  content: "\e141";
}
.glyphicon-paperclip:before {
  content: "\e142";
}
.glyphicon-heart-empty:before {
  content: "\e143";
}
.glyphicon-link:before {
  content: "\e144";
}
.glyphicon-phone:before {
  content: "\e145";
}
.glyphicon-pushpin:before {
  content: "\e146";
}
.glyphicon-usd:before {
  content: "\e148";
}
.glyphicon-gbp:before {
  content: "\e149";
}
.glyphicon-sort:before {
  content: "\e150";
}
.glyphicon-sort-by-alphabet:before {
  content: "\e151";
}
.glyphicon-sort-by-alphabet-alt:before {
  content: "\e152";
}
.glyphicon-sort-by-order:before {
  content: "\e153";
}
.glyphicon-sort-by-order-alt:before {
  content: "\e154";
}
.glyphicon-sort-by-attributes:before {
  content: "\e155";
}
.glyphicon-sort-by-attributes-alt:before {
  content: "\e156";
}
.glyphicon-unchecked:before {
  content: "\e157";
}
.glyphicon-expand:before {
  content: "\e158";
}
.glyphicon-collapse-down:before {
  content: "\e159";
}
.glyphicon-collapse-up:before {
  content: "\e160";
}
.glyphicon-log-in:before {
  content: "\e161";
}
.glyphicon-flash:before {
  content: "\e162";
}
.glyphicon-log-out:before {
  content: "\e163";
}
.glyphicon-new-window:before {
  content: "\e164";
}
.glyphicon-record:before {
  content: "\e165";
}
.glyphicon-save:before {
  content: "\e166";
}
.glyphicon-open:before {
  content: "\e167";
}
.glyphicon-saved:before {
  content: "\e168";
}
.glyphicon-import:before {
  content: "\e169";
}
.glyphicon-export:before {
  content: "\e170";
}
.glyphicon-send:before {
  content: "\e171";
}
.glyphicon-floppy-disk:before {
  content: "\e172";
}
.glyphicon-floppy-saved:before {
  content: "\e173";
}
.glyphicon-floppy-remove:before {
  content: "\e174";
}
.glyphicon-floppy-save:before {
  content: "\e175";
}
.glyphicon-floppy-open:before {
  content: "\e176";
}
.glyphicon-credit-card:before {
  content: "\e177";
}
.glyphicon-transfer:before {
  content: "\e178";
}
.glyphicon-cutlery:before {
  content: "\e179";
}
.glyphicon-header:before {
  content: "\e180";
}
.glyphicon-compressed:before {
  content: "\e181";
}
.glyphicon-earphone:before {
  content: "\e182";
}
.glyphicon-phone-alt:before {
  content: "\e183";
}
.glyphicon-tower:before {
  content: "\e184";
}
.glyphicon-stats:before {
  content: "\e185";
}
.glyphicon-sd-video:before {
  content: "\e186";
}
.glyphicon-hd-video:before {
  content: "\e187";
}
.glyphicon-subtitles:before {
  content: "\e188";
}
.glyphicon-sound-stereo:before {
  content: "\e189";
}
.glyphicon-sound-dolby:before {
  content: "\e190";
}
.glyphicon-sound-5-1:before {
  content: "\e191";
}
.glyphicon-sound-6-1:before {
  content: "\e192";
}
.glyphicon-sound-7-1:before {
  content: "\e193";
}
.glyphicon-copyright-mark:before {
  content: "\e194";
}
.glyphicon-registration-mark:before {
  content: "\e195";
}
.glyphicon-cloud-download:before {
  content: "\e197";
}
.glyphicon-cloud-upload:before {
  content: "\e198";
}
.glyphicon-tree-conifer:before {
  content: "\e199";
}
.glyphicon-tree-deciduous:before {
  content: "\e200";
}
.glyphicon-cd:before {
  content: "\e201";
}
.glyphicon-save-file:before {
  content: "\e202";
}
.glyphicon-open-file:before {
  content: "\e203";
}
.glyphicon-level-up:before {
  content: "\e204";
}
.glyphicon-copy:before {
  content: "\e205";
}
.glyphicon-paste:before {
  content: "\e206";
}
.glyphicon-alert:before {
  content: "\e209";
}
.glyphicon-equalizer:before {
  content: "\e210";
}
.glyphicon-king:before {
  content: "\e211";
}
.glyphicon-queen:before {
  content: "\e212";
}
.glyphicon-pawn:before {
  content: "\e213";
}
.glyphicon-bishop:before {
  content: "\e214";
}
.glyphicon-knight:before {
  content: "\e215";
}
.glyphicon-baby-formula:before {
  content: "\e216";
}
.glyphicon-tent:before {
  content: "\26fa";
}
.glyphicon-blackboard:before {
  content: "\e218";
}
.glyphicon-bed:before {
  content: "\e219";
}
.glyphicon-apple:before {
  content: "\f8ff";
}
.glyphicon-erase:before {
  content: "\e221";
}
.glyphicon-hourglass:before {
  content: "\231b";
}
.glyphicon-lamp:before {
  content: "\e223";
}
.glyphicon-duplicate:before {
  content: "\e224";
}
.glyphicon-piggy-bank:before {
  content: "\e225";
}
.glyphicon-scissors:before {
  content: "\e226";
}
.glyphicon-bitcoin:before {
  content: "\e227";
}
.glyphicon-btc:before {
  content: "\e227";
}
.glyphicon-xbt:before {
  content: "\e227";
}
.glyphicon-yen:before {
  content: "\00a5";
}
.glyphicon-jpy:before {
  content: "\00a5";
}
.glyphicon-ruble:before {
  content: "\20bd";
}
.glyphicon-rub:before {
  content: "\20bd";
}
.glyphicon-scale:before {
  content: "\e230";
}
.glyphicon-ice-lolly:before {
  content: "\e231";
}
.glyphicon-ice-lolly-tasted:before {
  content: "\e232";
}
.glyphicon-education:before {
  content: "\e233";
}
.glyphicon-option-horizontal:before {
  content: "\e234";
}
.glyphicon-option-vertical:before {
  content: "\e235";
}
.glyphicon-menu-hamburger:before {
  content: "\e236";
}
.glyphicon-modal-window:before {
  content: "\e237";
}
...

This file has been truncated, please download it to see its full contents.

Full Jupyter notebook

XML
The full jupyter notebook which you can locally run if you so desire
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Spectrogram Generator\n",
    "_By Sam_\n",
    "\n",
    "This project is about creating a sound that represents an image. As you may have guessed, an image is a 3D (or 5D if you do RGB) media : there is width, height and pixel intensity (greyscale or Red, Green and Blue), while sound is a 2D media: there is time and points.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 250,
   "metadata": {},
   "outputs": [],
   "source": [
    "import wave, struct, math # To calculate the WAV file content\n",
    "import numpy as np # To handle matrices\n",
    "from PIL import Image # To open the input image and convert it to grayscale\n",
    "\n",
    "import scipy                     # To plot the spectrogram\n",
    "import matplotlib.pyplot as plt  # To plot the spectrogram\n",
    "import scipy.io.wavfile          # To plot the spectrogram\n",
    "\n",
    "import scipy.ndimage # To resample using nearest neighbour\n",
    "import IPython.display  # Jupyter notebook ..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plotting\n",
    "These two functions allows me to plot data\n",
    "\n",
    " - `plotSpectrogram` \n",
    " \n",
    " Plots the spectrogram (frequencies and their intensity along time axis) of a given WAVE file. Also display its name and sample rate\n",
    " \n",
    " \n",
    " - `plotMat` \n",
    " \n",
    " Plots a matrix (2D tensor) in the common representation (i=0,j=0 on the top left corner). The values are represented by colour intensity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 251,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plotSpectrogram(file=\"sound.wav\"):\n",
    "    sample_rate, X = scipy.io.wavfile.read(file)\n",
    "    plt.specgram(X, Fs=sample_rate, xextent=(0,60))\n",
    "    print(\"File: \", file)\n",
    "    print(\"Sample rate (Hz): \",sample_rate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 252,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plotMat(mat):\n",
    "    mat = np.flip(mat,0)\n",
    "    X, Y = np.meshgrid(range(mat.shape[0]), range(mat.shape[1]))\n",
    "    Z = mat[X,Y]\n",
    "\n",
    "    plt.pcolormesh(Y,X,Z)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Picture\n",
    "The `loadPicture` function first load and converts the picture to grayscale using PIL. Then, it converts the picture into a NumPy array and perform multiple operations on its values to increase the contrast.\n",
    "\n",
    "Then, the function performs a nearest neighbour resampling of the image.\n",
    "\n",
    "\n",
    "Example of resampling:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 253,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original array\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[0, 1, 2],\n",
       "       [3, 4, 5],\n",
       "       [6, 7, 8]])"
      ]
     },
     "execution_count": 253,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = np.arange(9).reshape(3,3)\n",
    "print(\"Original array\")\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 254,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "After resampling by factor of 2 along both axis, using nearest neighbour\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[0, 0, 1, 1, 2, 2],\n",
       "       [0, 0, 1, 1, 2, 2],\n",
       "       [3, 3, 4, 4, 5, 5],\n",
       "       [3, 3, 4, 4, 5, 5],\n",
       "       [6, 6, 7, 7, 8, 8],\n",
       "       [6, 6, 7, 7, 8, 8]])"
      ]
     },
     "execution_count": 254,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"After resampling by factor of 2 along both axis, using nearest neighbour\")\n",
    "scipy.ndimage.zoom(x, 2, order=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The resampling is conducted using a factor computed such that the end size of the matrix is equal to the `size` parameter. The resampling factor can be a floating point number.\n",
    "\n",
    "Finally, `loadPicture` returns the loaded and resampled image as a NumPy array of size `size`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 255,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "    Loads a picture, converts it to greyscale, then to numpy array, normalise it so that the max value is 1 \n",
    "    the min is 0, increase the contrast a bit, remove every pixel which intensity is lower that 0.5, \n",
    "    then resize the picture using nearest neighbour resampling and outputs the numpy matrix.\n",
    "    \n",
    "    FYI: imgArr[0,0] is the top left corner of the image, cheers matrix indexing\n",
    "    \n",
    "    Returns: the resized image as a high contrast, normalised between 0 and 1, numpy matrix\n",
    "'''\n",
    "def loadPicture(size, file, verbose=1):\n",
    "    img = Image.open(file)\n",
    "    img = img.convert(\"L\")\n",
    "    #img = img.resize(size) # DO NOT DO THAT OR THE PC WILL CRASH\n",
    "    \n",
    "    imgArr = np.array(img)\n",
    "    if verbose:\n",
    "        print(\"Image original size: \", imgArr.shape)\n",
    "        \n",
    "    # Increase the contrast of the image\n",
    "    imgArr = imgArr/np.max(imgArr)\n",
    "    imgArr = 1/(imgArr+10**15.2)\n",
    "    \n",
    "    # Scale between 0 and 1\n",
    "    imgArr -= np.min(imgArr)\n",
    "    imgArr = imgArr/np.max(imgArr)\n",
    "    \n",
    "    # Remove low pixel values\n",
    "    removeLowValues = np.vectorize(lambda x: x if x > 0.5 else 0, otypes=[np.float])\n",
    "    imgArr = removeLowValues(imgArr)\n",
    "    \n",
    "    if size[0] == 0:\n",
    "        size = imgArr.shape[0], size[1]\n",
    "    if size[1] == 0:\n",
    "        size = size[0], imgArr.shape[1]\n",
    "    resamplingFactor = size[0]/imgArr.shape[0], size[1]/imgArr.shape[1]\n",
    "    if resamplingFactor[0] == 0:\n",
    "        resamplingFactor = 1, resamplingFactor[1]\n",
    "    if resamplingFactor[1] == 0:\n",
    "        resamplingFactor = resamplingFactor[0], 1\n",
    "    \n",
    "    # Order : 0=nearestNeighbour, 1:bilinear, 2:cubic etc...\n",
    "    imgArr = scipy.ndimage.zoom(imgArr, resamplingFactor, order=0)\n",
    "    \n",
    "    if verbose:\n",
    "        print(\"Resampling factor\", resamplingFactor)\n",
    "        print(\"Image resized :\", imgArr.shape)\n",
    "        print(\"Max intensity: \", np.max(imgArr))\n",
    "        print(\"Min intensity: \", np.min(imgArr))\n",
    "        plotMat(imgArr)\n",
    "    return imgArr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For this demonstration we're gonna convert this painting named _Wanderer above the Sea of Fog_ by Caspar David Friedrich:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 274,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "image/jpeg": "\n",
      "text/plain": [
       "<IPython.core.display.Image object>"
      ]
     },
     "execution_count": 274,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "IPython.display.Image(\"/home/sam1902/Pictures/WandererAboveTheSeaOfFogResized.jpg\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 256,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Image original size:  (1399, 1100)\n",
      "Resampling factor (2.073624017155111, 2.090909090909091)\n",
      "Image resized : (2901, 2300)\n",
      "Max intensity:  1.0\n",
      "Min intensity:  0.0\n"
     ]
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f88cfb604a8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "imgMat = loadPicture(size=(2901,2300), file=\"/home/sam1902/Pictures/WandererAboveTheSeaOfFog.jpg\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate spectrogram from image\n",
    "The spectrogram is a graph representing the frequencies and their intensity relative to the progress of the sound. It can be obtained by computing the Fourier Transform of the sound at each soundframe.\n",
    "\n",
    "Therefore, to craft a sound that'll render the desired spectrogram we have to play a set of different sound which each individual frequency is related to the position of the pixel in the original image, and of which each intensity is related to the pixel intensity at this point.\n",
    "\n",
    "To do so, we first load the image and resize it such that each column of pixel correspond to a frame of which we've got `max_frame` amount, and each row correspond to a frequency in the `steppingSpectrum`, which is essentially the maximum frequency allowed, divided by the step between each frequency band. We can further tune the frequency range if we want our custom spectrogram to appear more up or down the frequency spectrum, but for now lets just use the full spectrum.\n",
    "\n",
    "Then, we multiply each pixel value (which was previously scaled between 0 and 1) by the `max_intensity` so that each pixel value is now the intensity of the pure sound to play.\n",
    "\n",
    "Finally, we compute each soundframe by iterating through each column of the image and, for each pixel in it (which correspond each to a distinct band of frequencies) we add to the value of the sound value of that frame the sound of the pure wave corresponding to this range of frequencies, but we walk across these frequencies in substep of 1000 Hz. Strangely, the higher is this number the less \"spaced\" the drawing appears on the spectrogram. You can try lowering it to see what I mean.\n",
    "\n",
    "Then, after iterating though each pixel we divide the sum of their values by the amount of individual values summed (divide once to avoid high computational cost). To conclude, we write this value as the frame's value and go to the next frame.\n",
    "\n",
    "By doing that for every column of pixel, we can recreate the image in the spectrogram. Tadaaaa.\n",
    "\n",
    "---\n",
    "\n",
    "###Addendum\n",
    "\n",
    "Except as it turns out, we should theoretically run into a pretty big problem which has a sweet name: the uncertainty principle. Except for some reason we don't, which is strange and I haven't found out why yet.\n",
    "\n",
    "The fact is that spectrogram is computed by decomposing the sum of frequency that make a soundframe, but if that sum of frequency variates a lot between the current soundframe and the next, it's hard to tell what were the frequency in the current frame. This is called the uncertainty principle: if you can observe a wave for a very short amount of time, you cannot accurately tell which frequencies it was made of (uncertain) and when you observe it for long enough (a.k.a. stays the same for long enough) you can tell more accurately.\n",
    "\n",
    "Therefore, to \"solve\" the problem that the uncertainty principle creates, we have to extend the periode during which we keep the same value so that the Fourier transform knows - and is certain - about which frequencies were in the sound and the spectrogram comes out crisps and clear.\n",
    "\n",
    "To do that we simply should insert the same frame repeatedly, except when I try it doesn't do as expected, which is strange \\\\\\_()_/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 262,
   "metadata": {},
   "outputs": [],
   "source": [
    "def genSoundFromImage(file, output=\"sound.wav\", duration=5.0, sampleRate=44100.0):\n",
    "    wavef = wave.open(output,'w')\n",
    "    wavef.setnchannels(1) # mono\n",
    "    wavef.setsampwidth(2) \n",
    "    wavef.setframerate(sampleRate)\n",
    "    \n",
    "    max_frame = int(duration * sampleRate)\n",
    "    max_freq = 22000 # Hz\n",
    "    max_intensity = 32767\n",
    "    \n",
    "    stepSize = 400 # Hz\n",
    "    steppingSpectrum = int(max_freq/stepSize)\n",
    "    \n",
    "    imgMat = loadPicture((steppingSpectrum, max_frame), file, verbose=0)\n",
    "    imgMat *= max_intensity\n",
    "    print(\"Input: \", file)\n",
    "    print(\"Duration (in seconds): \", duration)\n",
    "    print(\"Sample rate: \", sampleRate)\n",
    "    print(\"Computing each soundframe sum value..\")\n",
    "    for frame in range(max_frame):\n",
    "        if frame % 60 == 0: # Only print once in a while\n",
    "            IPython.display.clear_output(wait=True)\n",
    "            print(\"Progress: ==> {:.2%}\".format(frame/max_frame), end=\"\\r\")\n",
    "        signalValue, count = 0, 0\n",
    "        for step in range(steppingSpectrum):\n",
    "            intensity = imgMat[step, frame]\n",
    "            if intensity == 0:\n",
    "                continue\n",
    "            # nextFreq is less than currentFreq\n",
    "            currentFreq = max_freq - step * stepSize\n",
    "            nextFreq = max_freq - (step+1) * stepSize\n",
    "            if nextFreq < 0: # If we're at the end of the spectrum\n",
    "                nextFreq = 0\n",
    "            for freq in range(nextFreq, currentFreq, 1000): # substep of 1000 Hz is good\n",
    "                signalValue += intensity*math.cos(freq * 2 * math.pi * float(frame) / float(sampleRate))\n",
    "                count += 1\n",
    "        if count == 0: count = 1\n",
    "        signalValue /= count\n",
    "        \n",
    "        data = struct.pack('<h', int(signalValue))\n",
    "        wavef.writeframesraw( data )\n",
    "        \n",
    "    wavef.writeframes(''.encode())\n",
    "    wavef.close()\n",
    "    print(\"\\nProgress: ==> 100%\")\n",
    "    print(\"Output: \", output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 260,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Progress: ==> 99.97%\n",
      "Progress: ==> 100%\n",
      "Output:  sound.wav\n"
     ]
    }
   ],
   "source": [
    "genSoundFromImage(file=\"/home/sam1902/Pictures/WandererAboveTheSeaOfFog.jpg\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 259,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File:  sound.wav\n",
      "Sample rate (Hz):  44100\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.5/dist-packages/matplotlib/axes/_axes.py:7176: RuntimeWarning: divide by zero encountered in log10\n",
      "  Z = 10. * np.log10(spec)\n"
     ]
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f88cd1269e8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plotSpectrogram()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Thanks for reading !"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

Github Repo

This is the up to date code along with its JavaScript counterpart

Credits

Sam
1 project • 2 followers

Comments