Replace FFTW3 with a friendlier license FFT library about pitch-detection HOT 4 CLOSED

sevagh commented on May 27, 2024

Replace FFTW3 with a friendlier license FFT library

from pitch-detection.

Comments (4)

CrushedPixel commented on May 27, 2024

Here's my MPMPitchDetector template class, based on your implementation, and using the more friendly-licensed FFTS instead of FFTW.

Usage:

MPMPitchDetector<4096> detector(sampleRate, 80);
detector.estimate(samples); // samples is a vector<double> of size 4096

Source:

// MPMPitchDetector.h

#pragma once

#include <vector>
#include <ffts/ffts.h>
#include <climits>

#define MPM_CUTOFF 0.93
#define MPM_SMALL_CUTOFF 0.5

static inline std::pair<double, double> parabolicInterpolation(const std::vector<double> &array, double x) {
	int x_adjusted;

	if (x < 1) {
		x_adjusted = (array[x] <= array[x + 1]) ? x : x + 1;
	} else if (x > signed(array.size()) - 1) {
		x_adjusted = (array[x] <= array[x - 1]) ? x : x - 1;
	} else {
		double den = array[x + 1] + array[x - 1] - 2 * array[x];
		double delta = array[x - 1] - array[x + 1];
		return (!den) ? std::make_pair(x, array[x])
		              : std::make_pair(x + delta / (2 * den),
		                               array[x] - delta * delta / (8 * den));
	}
	return std::make_pair(x_adjusted, array[x_adjusted]);
}

template <size_t SIZE>
class MPMPitchDetector {
public:
	MPMPitchDetector(double sampleRate, double minFreq) :
			sampleRate(sampleRate), minFreq(minFreq) {
		assert(SIZE && ((SIZE & (SIZE - 1)) == 0)); // (optional) assert SIZE is a power of 2
		fftForward = ffts_init_1d(SIZE * 2, false);
		fftBackward = ffts_init_1d(SIZE * 2, false);
	}

	~MPMPitchDetector() {
		ffts_free(fftForward);
		ffts_free(fftBackward);
	}

	// estimates the base frequency for the samples provided
	// using the McLeod Pitch Method.
	double estimate(const std::vector<double> &samples) {
		assert(samples.size() == SIZE);

		std::vector<double> nsdf = normalizedSquareDifference(samples);
		std::vector<int> max_positions = pickPeaks(nsdf);
		std::vector<std::pair<double, double>> estimates;

		double highest_amplitude = -DBL_MAX;

		for (int i : max_positions) {
			highest_amplitude = std::max(highest_amplitude, nsdf[i]);
			if (nsdf[i] > MPM_SMALL_CUTOFF) {
				auto x = parabolicInterpolation(nsdf, i);
				estimates.push_back(x);
				highest_amplitude = std::max(highest_amplitude, x.second);
			}
		}

		if (estimates.empty())
			return -1;

		double actual_cutoff = MPM_CUTOFF * highest_amplitude;
		double period = 0;

		for (auto i : estimates) {
			if (i.second >= actual_cutoff) {
				period = i.first;
				break;
			}
		}

		double pitch_estimate = sampleRate / period;
		return pitch_estimate > minFreq ? pitch_estimate : -1;
	}

private:
	const double sampleRate;
	const double minFreq;

	ffts_plan_t *fftForward, *fftBackward;

	std::vector<double> normalizedSquareDifference(const std::vector<double> &samples) {
		std::complex<float> acf_complex[SIZE * 2];
		std::vector<double> acf_real(SIZE);
		std::vector<double> acf_real_2(SIZE);

		autocorrelation(samples, acf_complex);

		for (size_t i = 0; i < SIZE; ++i)
			acf_real[i] = (acf_complex[i + SIZE]).real() / acf_complex[SIZE].real();

		for (size_t i = SIZE; i < SIZE * 2; i++) {
			acf_real_2.push_back(acf_complex[i].real() / acf_complex[SIZE].real());
		}

		return acf_real;
	}

	void autocorrelation(const std::vector<double> &data, std::complex<float> *result) {
		std::complex<float> signala_ext[SIZE * 2];
		std::complex<float> signalb_ext[SIZE * 2];

		// zero padding of input data
		for (int i = 0; i < SIZE; i++) {
			signala_ext[SIZE + i] = data[i];
			signalb_ext[i] = data[i];
		}

		std::complex<float> outa[SIZE * 2];
		std::complex<float> outb[SIZE * 2];
		std::complex<float> out[SIZE * 2];

		ffts_execute(fftForward, signala_ext, outa);
		ffts_execute(fftForward, signalb_ext, outb);

		for (int i = 0; i < SIZE * 2; ++i) {
			out[i] = outa[i] * std::conj(outb[i]);
		}

		ffts_execute(fftBackward, out, result);
	}

	std::vector<int> pickPeaks(const std::vector<double> &nsdf) {
		std::vector<int> max_positions{};
		int pos = 0;
		int cur_max_pos = 0;
		ssize_t size = nsdf.size();

		while (pos < (size - 1) / 3 && nsdf[pos] > 0)
			pos++;
		while (pos < size - 1 && nsdf[pos] <= 0.0)
			pos++;

		if (pos == 0)
			pos = 1;

		while (pos < size - 1) {
			if (nsdf[pos] > nsdf[pos - 1] && nsdf[pos] >= nsdf[pos + 1]) {
				if (cur_max_pos == 0) {
					cur_max_pos = pos;
				} else if (nsdf[pos] > nsdf[cur_max_pos]) {
					cur_max_pos = pos;
				}
			}
			pos++;
			if (pos < size - 1 && nsdf[pos] <= 0) {
				if (cur_max_pos > 0) {
					max_positions.push_back(cur_max_pos);
					cur_max_pos = 0;
				}
				while (pos < size - 1 && nsdf[pos] <= 0.0) {
					pos++;
				}
			}
		}
		if (cur_max_pos > 0) {
			max_positions.push_back(cur_max_pos);
		}
		return max_positions;
	}
};

from pitch-detection.

sevagh commented on May 27, 2024

FYI:

		std::vector<double> acf_real(SIZE);
		std::vector<double> acf_real_2(SIZE);

		autocorrelation(samples, acf_complex);

		for (size_t i = 0; i < SIZE; ++i)
			acf_real[i] = (acf_complex[i + SIZE]).real() / acf_complex[SIZE].real();

		for (size_t i = SIZE; i < SIZE * 2; i++) {
			acf_real_2.push_back(acf_complex[i].real() / acf_complex[SIZE].real());
		}

I think acf_real_2 here is a leftover of some debugging experiment I committed by accident. If you still have it in your code you can get rid of it.

I'm experimenting with FFTS right now.

from pitch-detection.

sevagh commented on May 27, 2024

#31

from pitch-detection.

CrushedPixel commented on May 27, 2024

Looks good to me! Thank you!

from pitch-detection.

Replace FFTW3 with a friendlier license FFT library about pitch-detection HOT 4 CLOSED

Comments (4)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent