Skip to content

Commit bbebf0e

Browse files
committed
implement image to audio conversion
1 parent 941cc23 commit bbebf0e

File tree

2 files changed

+159
-52
lines changed

2 files changed

+159
-52
lines changed

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"meyda": "^5.6.3",
2424
"mime": "^4.1.0",
2525
"pdftoimg-js": "^0.2.5",
26-
"vite-plugin-static-copy": "^3.1.6"
26+
"vite-plugin-static-copy": "^3.1.6",
27+
"wavefile": "^11.0.0"
2728
}
2829
}

src/handlers/meyda.ts

Lines changed: 157 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import type { FileData, FileFormat, FormatHandler } from "../FormatHandler.ts";
22

33
import Meyda from "meyda";
4+
import { WaveFile } from "wavefile";
45

56
class meydaHandler implements FormatHandler {
67

@@ -11,27 +12,27 @@ class meydaHandler implements FormatHandler {
1112
format: "png",
1213
extension: "png",
1314
mime: "image/png",
14-
from: false,
15+
from: true,
1516
to: true,
16-
internal: "png"
17+
internal: "image"
1718
},
1819
{
1920
name: "Joint Photographic Experts Group JFIF",
2021
format: "jpeg",
2122
extension: "jpg",
2223
mime: "image/jpeg",
23-
from: false,
24+
from: true,
2425
to: true,
25-
internal: "jpeg"
26+
internal: "image"
2627
},
2728
{
2829
name: "WebP",
2930
format: "webp",
3031
extension: "webp",
3132
mime: "image/webp",
32-
from: false,
33+
from: true,
3334
to: true,
34-
internal: "webp"
35+
internal: "image"
3536
}
3637
];
3738
public ready: boolean = false;
@@ -43,14 +44,14 @@ class meydaHandler implements FormatHandler {
4344
async init () {
4445

4546
const dummy = document.createElement("audio");
46-
if (dummy.canPlayType("audio/wav")) this.supportedFormats.push({
47+
this.supportedFormats.push({
4748
name: "Waveform Audio File Format",
4849
format: "wav",
4950
extension: "wav",
5051
mime: "audio/wav",
51-
from: true,
52-
to: false,
53-
internal: "wav"
52+
from: dummy.canPlayType("audio/wav") !== "",
53+
to: true,
54+
internal: "audio"
5455
});
5556
if (dummy.canPlayType("audio/mpeg")) this.supportedFormats.push({
5657
name: "MP3 Audio",
@@ -59,7 +60,7 @@ class meydaHandler implements FormatHandler {
5960
mime: "audio/mpeg",
6061
from: true,
6162
to: false,
62-
internal: "mp3"
63+
internal: "audio"
6364
});
6465
if (dummy.canPlayType("audio/ogg")) this.supportedFormats.push({
6566
name: "Ogg Audio",
@@ -68,7 +69,7 @@ class meydaHandler implements FormatHandler {
6869
mime: "audio/ogg",
6970
from: true,
7071
to: false,
71-
internal: "ogg"
72+
internal: "audio"
7273
});
7374
if (dummy.canPlayType("audio/flac")) this.supportedFormats.push({
7475
name: "Free Lossless Audio Codec",
@@ -77,11 +78,13 @@ class meydaHandler implements FormatHandler {
7778
mime: "audio/flac",
7879
from: true,
7980
to: false,
80-
internal: "flac"
81+
internal: "audio"
8182
});
8283
dummy.remove();
8384

84-
this.#audioContext = new AudioContext();
85+
this.#audioContext = new AudioContext({
86+
sampleRate: 44100
87+
});
8588

8689
this.#canvas = document.createElement("canvas");
8790
const ctx = this.#canvas.getContext("2d");
@@ -94,7 +97,7 @@ class meydaHandler implements FormatHandler {
9497

9598
async doConvert (
9699
inputFiles: FileData[],
97-
_inputFormat: FileFormat,
100+
inputFormat: FileFormat,
98101
outputFormat: FileFormat
99102
): Promise<FileData[]> {
100103
if (
@@ -107,48 +110,151 @@ class meydaHandler implements FormatHandler {
107110
}
108111
const outputFiles: FileData[] = [];
109112

110-
for (const inputFile of inputFiles) {
111-
112-
const inputBytes = new Uint8Array(inputFile.bytes);
113-
const audioData = await this.#audioContext.decodeAudioData(inputBytes.buffer);
114-
115-
Meyda.bufferSize = 2048;
116-
Meyda.sampleRate = audioData.sampleRate;
117-
const samples = audioData.getChannelData(0);
118-
const imageWidth = Math.floor(samples.length / Meyda.bufferSize);
119-
const imageHeight = Meyda.bufferSize / 2;
120-
121-
this.#canvas.width = imageWidth;
122-
this.#canvas.height = imageHeight;
123-
124-
for (let i = 0; i < imageWidth; i ++) {
125-
const frame = samples.slice(i * Meyda.bufferSize, (i + 1) * Meyda.bufferSize);
126-
const filtered = Meyda.windowing(frame, "hanning");
127-
const spectrum = Meyda.extract("amplitudeSpectrum", filtered);
128-
if (!(spectrum instanceof Float32Array)) throw "Failed to extract audio features!";
129-
const pixels = new Uint8ClampedArray(spectrum.length * 4);
130-
for (let i = 0; i < spectrum.length; i ++) {
131-
const int = Math.floor(spectrum[i] * 16777215);
132-
pixels[i * 4] = int & 0xFF;
133-
pixels[i * 4 + 1] = (int >> 8) & 0xFF;
134-
pixels[i * 4 + 2] = (int >> 16) & 0xFF;
135-
pixels[i * 4 + 3] = 0xFF;
113+
const inputIsImage = (inputFormat.internal === "image");
114+
const outputIsImage = (outputFormat.internal === "image");
115+
116+
const bufferSize = 2048;
117+
118+
if (inputIsImage === outputIsImage) {
119+
throw "Invalid input/output format.";
120+
}
121+
122+
if (inputIsImage) {
123+
for (const inputFile of inputFiles) {
124+
125+
this.#ctx.clearRect(0, 0, this.#canvas.width, this.#canvas.width);
126+
127+
const blob = new Blob([inputFile.bytes as BlobPart], { type: inputFormat.mime });
128+
const url = URL.createObjectURL(blob);
129+
130+
const image = new Image();
131+
await new Promise((resolve, reject) => {
132+
image.addEventListener("load", resolve);
133+
image.addEventListener("error", reject);
134+
image.src = url;
135+
});
136+
137+
const imageWidth = image.naturalWidth;
138+
const imageHeight = image.naturalHeight;
139+
140+
this.#canvas.width = imageWidth;
141+
this.#canvas.height = imageHeight;
142+
this.#ctx.drawImage(image, 0, 0);
143+
144+
const imageData = this.#ctx.getImageData(0, 0, imageWidth, imageHeight);
145+
const pixelBuffer = imageData.data as Uint8ClampedArray;
146+
147+
const sampleRate = this.#audioContext.sampleRate;
148+
149+
const audioData = new Float32Array(imageWidth * bufferSize);
150+
151+
// Precompute sine and cosine waves for each frequency
152+
const sineWaves = new Float32Array(imageHeight * bufferSize);
153+
const cosineWaves = new Float32Array(imageHeight * bufferSize);
154+
for (let y = 0; y < imageHeight; y ++) {
155+
const frequency = (y / imageHeight) * (sampleRate / 2);
156+
for (let s = 0; s < bufferSize; s ++) {
157+
const timeInSeconds = s / sampleRate;
158+
const angle = 2 * Math.PI * frequency * timeInSeconds;
159+
sineWaves[y * bufferSize + s] = Math.sin(angle);
160+
cosineWaves[y * bufferSize + s] = Math.cos(angle);
161+
}
136162
}
137-
const imageData = new ImageData(pixels as ImageDataArray, 1, imageHeight);
138-
this.#ctx.putImageData(imageData, i, 0);
163+
164+
for (let x = 0; x < imageWidth; x ++) {
165+
for (let y = 0; y < imageHeight; y ++) {
166+
const pixelIndex = (x + y * imageWidth) * 4;
167+
168+
// Extract amplitude from R and G channels
169+
const magInt = pixelBuffer[pixelIndex] + (pixelBuffer[pixelIndex + 1] << 8);
170+
const amplitude = magInt / 65535;
171+
// Extract phase from B channel
172+
const phase = (pixelBuffer[pixelIndex + 2] / 255) * (2 * Math.PI) - Math.PI;
173+
174+
for (let s = 0; s < bufferSize; s ++) {
175+
const timeIndex = x * bufferSize + s;
176+
audioData[timeIndex] += amplitude * (
177+
cosineWaves[y * bufferSize + s] * Math.cos(phase)
178+
- sineWaves[y * bufferSize + s] * Math.sin(phase)
179+
);
180+
}
181+
}
182+
}
183+
184+
// Normalize output
185+
let max = 0;
186+
for (let i = 0; i < imageWidth * bufferSize; i ++) {
187+
const magnitude = Math.abs(audioData[i]);
188+
if (magnitude > max) max = magnitude;
189+
}
190+
for (let i = 0; i < imageWidth * bufferSize; i ++) {
191+
audioData[i] /= max;
192+
}
193+
194+
const wav = new WaveFile();
195+
wav.fromScratch(1, sampleRate, "32f", audioData);
196+
197+
const bytes = wav.toBuffer();
198+
const name = inputFile.name.split(".")[0] + "." + outputFormat.extension;
199+
outputFiles.push({ bytes, name });
200+
139201
}
202+
} else {
203+
for (const inputFile of inputFiles) {
140204

141-
const bytes: Uint8Array = await new Promise((resolve, reject) => {
142-
this.#canvas!.toBlob((blob) => {
143-
if (!blob) return reject("Canvas output failed.");
144-
blob.arrayBuffer().then(buf => resolve(new Uint8Array(buf)));
145-
}, outputFormat.mime);
146-
});
147-
const name = inputFile.name.split(".")[0] + "." + outputFormat.extension;
148-
outputFiles.push({ bytes, name });
205+
const inputBytes = new Uint8Array(inputFile.bytes);
206+
const audioData = await this.#audioContext.decodeAudioData(inputBytes.buffer);
149207

208+
Meyda.bufferSize = bufferSize;
209+
Meyda.sampleRate = audioData.sampleRate;
210+
const samples = audioData.getChannelData(0);
211+
const imageWidth = Math.floor(samples.length / Meyda.bufferSize);
212+
const imageHeight = Meyda.bufferSize / 2;
213+
214+
this.#canvas.width = imageWidth;
215+
this.#canvas.height = imageHeight;
216+
217+
for (let i = 0; i < imageWidth; i ++) {
218+
219+
const frame = samples.slice(i * Meyda.bufferSize, (i + 1) * Meyda.bufferSize);
220+
const spectrum = Meyda.extract("complexSpectrum", frame);
221+
if (!spectrum || !("real" in spectrum) || !("imag" in spectrum)) {
222+
throw "Failed to extract audio features!";
223+
}
224+
const real = spectrum.real as Float32Array;
225+
const imaginary = spectrum.imag as Float32Array;
226+
227+
const pixels = new Uint8ClampedArray(imageHeight * 4);
228+
for (let j = 0; j < imageHeight; j ++) {
229+
const magnitude = Math.sqrt(real[j] * real[j] + imaginary[j] * imaginary[j]);
230+
const phase = Math.atan2(imaginary[j], real[j]);
231+
// Encode magnitude in R, G channels
232+
const magInt = Math.floor(Math.min(magnitude * 65535, 65535));
233+
pixels[j * 4] = magInt & 0xFF;
234+
pixels[j * 4 + 1] = (magInt >> 8) & 0xFF;
235+
// Encode phase in B channel
236+
const phaseNormalized = Math.floor(((phase + Math.PI) / (2 * Math.PI)) * 255);
237+
pixels[j * 4 + 2] = phaseNormalized;
238+
pixels[j * 4 + 3] = 0xFF;
239+
}
240+
const imageData = new ImageData(pixels as ImageDataArray, 1, imageHeight);
241+
this.#ctx.putImageData(imageData, i, 0);
242+
243+
}
244+
245+
const bytes: Uint8Array = await new Promise((resolve, reject) => {
246+
this.#canvas!.toBlob((blob) => {
247+
if (!blob) return reject("Canvas output failed.");
248+
blob.arrayBuffer().then(buf => resolve(new Uint8Array(buf)));
249+
}, outputFormat.mime);
250+
});
251+
const name = inputFile.name.split(".")[0] + "." + outputFormat.extension;
252+
outputFiles.push({ bytes, name });
253+
254+
}
150255
}
151256

257+
152258
return outputFiles;
153259
}
154260

0 commit comments

Comments
 (0)