Skip to content

Commit 509fc30

Browse files
committed
implement overlap-add when reconstructing audio from image
1 parent bbebf0e commit 509fc30

File tree

1 file changed

+25
-7
lines changed

1 file changed

+25
-7
lines changed

src/handlers/meyda.ts

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class meydaHandler implements FormatHandler {
114114
const outputIsImage = (outputFormat.internal === "image");
115115

116116
const bufferSize = 2048;
117+
const hopSize = bufferSize / 2;
117118

118119
if (inputIsImage === outputIsImage) {
119120
throw "Invalid input/output format.";
@@ -146,7 +147,13 @@ class meydaHandler implements FormatHandler {
146147

147148
const sampleRate = this.#audioContext.sampleRate;
148149

149-
const audioData = new Float32Array(imageWidth * bufferSize);
150+
const audioData = new Float32Array(imageWidth * hopSize + bufferSize);
151+
const window = new Float32Array(bufferSize);
152+
153+
// Generate Hanning window
154+
for (let i = 0; i < bufferSize; i++) {
155+
window[i] = 0.5 * (1 - Math.cos(2 * Math.PI * i / bufferSize));
156+
}
150157

151158
// Precompute sine and cosine waves for each frequency
152159
const sineWaves = new Float32Array(imageHeight * bufferSize);
@@ -162,6 +169,8 @@ class meydaHandler implements FormatHandler {
162169
}
163170

164171
for (let x = 0; x < imageWidth; x ++) {
172+
const frameData = new Float32Array(bufferSize);
173+
165174
for (let y = 0; y < imageHeight; y ++) {
166175
const pixelIndex = (x + y * imageWidth) * 4;
167176

@@ -172,13 +181,18 @@ class meydaHandler implements FormatHandler {
172181
const phase = (pixelBuffer[pixelIndex + 2] / 255) * (2 * Math.PI) - Math.PI;
173182

174183
for (let s = 0; s < bufferSize; s ++) {
175-
const timeIndex = x * bufferSize + s;
176-
audioData[timeIndex] += amplitude * (
184+
frameData[s] += amplitude * (
177185
cosineWaves[y * bufferSize + s] * Math.cos(phase)
178186
- sineWaves[y * bufferSize + s] * Math.sin(phase)
179187
);
180188
}
181189
}
190+
191+
// Apply window and overlap-add
192+
const outputOffset = x * hopSize;
193+
for (let s = 0; s < bufferSize; s ++) {
194+
audioData[outputOffset + s] += frameData[s] * window[s];
195+
}
182196
}
183197

184198
// Normalize output
@@ -187,7 +201,7 @@ class meydaHandler implements FormatHandler {
187201
const magnitude = Math.abs(audioData[i]);
188202
if (magnitude > max) max = magnitude;
189203
}
190-
for (let i = 0; i < imageWidth * bufferSize; i ++) {
204+
for (let i = 0; i < audioData.length; i ++) {
191205
audioData[i] /= max;
192206
}
193207

@@ -208,16 +222,20 @@ class meydaHandler implements FormatHandler {
208222
Meyda.bufferSize = bufferSize;
209223
Meyda.sampleRate = audioData.sampleRate;
210224
const samples = audioData.getChannelData(0);
211-
const imageWidth = Math.floor(samples.length / Meyda.bufferSize);
225+
const imageWidth = Math.max(1, Math.ceil((samples.length - bufferSize) / hopSize) + 1);
212226
const imageHeight = Meyda.bufferSize / 2;
213227

214228
this.#canvas.width = imageWidth;
215229
this.#canvas.height = imageHeight;
216230

231+
const frameBuffer = new Float32Array(bufferSize);
232+
217233
for (let i = 0; i < imageWidth; i ++) {
218234

219-
const frame = samples.slice(i * Meyda.bufferSize, (i + 1) * Meyda.bufferSize);
220-
const spectrum = Meyda.extract("complexSpectrum", frame);
235+
const start = i * hopSize;
236+
frameBuffer.fill(0);
237+
frameBuffer.set(samples.subarray(start, Math.min(start + bufferSize, samples.length)));
238+
const spectrum = Meyda.extract("complexSpectrum", frameBuffer);
221239
if (!spectrum || !("real" in spectrum) || !("imag" in spectrum)) {
222240
throw "Failed to extract audio features!";
223241
}

0 commit comments

Comments
 (0)