NAudio определение тона(ноты) - C#
Формулировка задачи:
Здравствуйте, у меня возник вопрос: Имеется код реализованный с помощью библиотеки NAudio который берет данные со звуковой карты и обрабатывает, на данный момент просто записывает его в WAV-файл. и имееться 2 класса - Автокорреляция и FFT(пока незнаю какой использовать) для поиска этой самой частоты(ноты) в звуковом потоке. возникла ситуация что я не в состоянии сообразить как применить эти самые классы для определения тона, код прилагаеться,помогите разобраться пожалуйста...оч. хочеться с этим разобраться.
собственно код:
Автокорреляция:
FFT Бренси:
P.S. Прошу прощения если что то невнятно или неверно оформленно, очень нужно разобраться, буду признателен за любую помощ и наводки.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using NAudio.Wave;
using System.IO;
using Microsoft.Win32;
namespace Recorder
{
class AudioProc
{
private enum RecordingState
{
Stoped,
Recording
}
private string FileName;
private WaveFileWriter writer;
private RecordingState recordingState;
private WaveIn waveIn;
private float Pitch;
public float pitch
{
get
{
return Pitch;
}
set
{
Pitch = value;
}
}
public void waveInStart(int device)
{
FileName = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString() + ".wav");
waveIn = new WaveIn();
waveIn.DeviceNumber = device;
waveIn.DataAvailable += waveIn_DataAvailable;
waveIn.WaveFormat = new WaveFormat(44100, 1);
writer = new WaveFileWriter(FileName, waveIn.WaveFormat);
waveIn.StartRecording();
recordingState = RecordingState.Recording;
}
private void waveIn_DataAvailable(object sender, WaveInEventArgs e)
{
byte[] buffer = e.Buffer;
int bytesRecorded = e.BytesRecorded;
WriteToFile(buffer, bytesRecorded);
for (int index = 0; index < e.BytesRecorded; index += 2)
{
short sample = (short)((e.Buffer[index + 1] << 8) | e.Buffer[index + 0]);
float sample32 = sample / 32768f;
}
}
private void WriteToFile(byte[] buffer, int bytesRecorded)
{
long maxFileLength = waveIn.WaveFormat.AverageBytesPerSecond * 60;
if (recordingState == RecordingState.Recording)
{
int towrite = (int)Math.Min(maxFileLength - writer.Length, bytesRecorded);
if (towrite > 0)
{
writer.WriteData(buffer, 0, bytesRecorded);
}
else
{
waveInStop();
}
}
}
public void waveInStop()
{
waveIn.StopRecording();
recordingState = RecordingState.Stoped;
writer.Dispose();
}
public void Save()
{
SaveFileDialog saveFileDialog = new SaveFileDialog();
saveFileDialog.Filter = "WAV file (.wav)|*.wav";
saveFileDialog.DefaultExt = ".wav";
bool? result = saveFileDialog.ShowDialog();
if (result.HasValue && result.Value)
{
SaveAs(saveFileDialog.FileName);
}
}
private void SaveAs(string fileName)
{
//string source = Path.Combine(Path.GetTempPath(), new Guid().ToString() + ".wav");
if (File.Exists(fileName)) File.Delete(fileName);
File.Copy(FileName,fileName);
}
}
}using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Recorder
{
class AutoCorrelator: IPitchDetect
{
private float[] prevBuffer;
private int minOffset;
private int maxOffset;
private float sampleRate;
public AutoCorrelator(int sampleRate)
{
this.sampleRate = (float)sampleRate;
int minFreq = 85;
int maxFreq = 255;
this.maxOffset = sampleRate / minFreq;
this.minOffset = sampleRate / maxFreq;
}
public float DetectPitch(float[] buffer, int frames)
{
if (prevBuffer == null)
{
prevBuffer = new float[frames];
}
float secCor = 0;
int secLag = 0;
float maxCorr = 0;
int maxLag = 0;
// starting with low frequencies, working to higher
for (int lag = maxOffset; lag >= minOffset; lag--)
{
float corr = 0; // this is calculated as the sum of squares
for (int i = 0; i < frames; i++)
{
int oldIndex = i - lag;
float sample = ((oldIndex < 0) ? prevBuffer[frames + oldIndex] : buffer[oldIndex]);
corr += (sample * buffer[i]);
}
if (corr > maxCorr)
{
maxCorr = corr;
maxLag = lag;
}
if (corr >= 0.9 * maxCorr)
{
secCor = corr;
secLag = lag;
}
}
for (int n = 0; n < frames; n++)
{
prevBuffer[n] = buffer[n];
}
float noiseThreshold = frames / 1000f;
//Debug.WriteLine(String.Format("Max Corr: {0} ({1}), Sec Corr: {2} ({3})", this.sampleRate / maxLag, maxCorr, this.sampleRate / secLag, secCor));
if (maxCorr < noiseThreshold || maxLag == 0) return 0.0f;
//return 44100.0f / secLag; //--works better for singing
return this.sampleRate / maxLag;
}
}
}using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Recorder
{
public interface IPitchDetect
{
float DetectPitch(float[] buffer, int frames);
}
class PitchDetect: IPitchDetect
{
private float sampleRate;
public PitchDetect(float sampleRate)
{
this.sampleRate = sampleRate;
}
public const double PI_VAL = 3.14159265358979323846;
/*
FFT routine, (C)1996 S.M.Bernsee. Sign = -1 is FFT, 1 is iFFT (inverse)
Fills fftBuffer[0...2*fftFrameSize-1] with the Fourier transform of the
time domain data in fftBuffer[0...2*fftFrameSize-1]. The FFT array takes
and returns the cosine and sine parts in an interleaved manner, ie.
fftBuffer[0] = cosPart[0], fftBuffer[1] = sinPart[0], asf. fftFrameSize
must be a power of 2. It expects a complex input signal (see footnote 2),
ie. when working with 'common' audio signals our input signal has to be
passed as {in[0],0.,in[1],0.,in[2],0.,...} asf. In that case, the transform
of the frequencies of interest is in fftBuffer[0...fftFrameSize].
*/
private void smbFft(float[] fftBuffer, int fftFrameSize, int sign)
{
float wr, wi, arg, temp;
int p1, p2; // MRH: were float*
float tr, ti, ur, ui;
int p1r, p1i, p2r, p2i; // MRH: were float*
int i, bitm, j, le, le2, k;
int fftFrameSize2 = fftFrameSize * 2;
for (i = 2; i < fftFrameSize2 - 2; i += 2)
{
for (bitm = 2, j = 0; bitm < fftFrameSize2; bitm <<= 1)
{
if ((i & bitm) != 0) j++;
j <<= 1;
}
if (i < j)
{
p1 = i; p2 = j;
temp = fftBuffer[p1];
fftBuffer[p1++] = fftBuffer[p2];
fftBuffer[p2++] = temp;
temp = fftBuffer[p1];
fftBuffer[p1] = fftBuffer[p2];
fftBuffer[p2] = temp;
}
}
int kmax = (int)(Math.Log(fftFrameSize) / Math.Log(2.0) + 0.5);
for (k = 0, le = 2; k < kmax; k++)
{
le <<= 1;
le2 = le >> 1;
ur = 1.0f;
ui = 0.0f;
arg = (float)(PI_VAL / (le2 >> 1));
wr = (float)Math.Cos(arg);
wi = (float)(sign * Math.Sin(arg));
for (j = 0; j < le2; j += 2)
{
p1r = j; p1i = p1r + 1;
p2r = p1r + le2; p2i = p2r + 1;
for (i = j; i < fftFrameSize2; i += le)
{
float p2rVal = fftBuffer[p2r];
float p2iVal = fftBuffer[p2i];
tr = p2rVal * ur - p2iVal * ui;
ti = p2rVal * ui + p2iVal * ur;
fftBuffer[p2r] = fftBuffer[p1r] - tr;
fftBuffer[p2i] = fftBuffer[p1i] - ti;
fftBuffer[p1r] += tr;
fftBuffer[p1i] += ti;
p1r += le;
p1i += le;
p2r += le;
p2i += le;
}
tr = ur * wr - ui * wi;
ui = ur * wi + ui * wr;
ur = tr;
}
}
}
private float HammingWindow(int n, int N)
{
return 0.54f - 0.46f * (float)Math.Cos((2 * Math.PI * n) / (N - 1));
}
private float[] fftBuffer;
private float[] prevBuffer;
public float DetectPitch(float[] buffer, int inFrames)
{
Func<int, int, float> window = HammingWindow;
if (prevBuffer == null)
{
prevBuffer = new float[inFrames];
}
// double frames since we are combining present and previous buffers
int frames = inFrames * 2;
if (fftBuffer == null)
{
fftBuffer = new float[frames * 2]; // times 2 because it is complex input
}
for (int n = 0; n < frames; n++)
{
if (n < inFrames)
{
fftBuffer[n * 2] = prevBuffer[n] * window(n, frames);
fftBuffer[n * 2 + 1] = 0; // need to clear out as fft modifies buffer
}
else
{
fftBuffer[n * 2] = buffer[n-inFrames] * window(n, frames);
fftBuffer[n * 2 + 1] = 0; // need to clear out as fft modifies buffer
}
}
// assuming frames is a power of 2
smbFft(fftBuffer, frames, -1);
float binSize = sampleRate / frames;
int minBin = (int)(85 / binSize);
int maxBin = (int)(300 / binSize);
float maxIntensity = 0f;
int maxBinIndex = 0;
for (int bin = minBin; bin <= maxBin; bin++)
{
float real = fftBuffer[bin * 2];
float imaginary = fftBuffer[bin * 2 + 1];
float intensity = real * real + imaginary * imaginary;
if (intensity > maxIntensity)
{
maxIntensity = intensity;
maxBinIndex = bin;
}
}
return binSize * maxBinIndex;
}
}
}Решение задачи: «NAudio определение тона(ноты)»
textual
Листинг программы
for (int i = 0; i < frames; i++)
{
int oldIndex = i - lag;
try
{
float sample = ((oldIndex < 0) ? prevBuffer[frames + oldIndex] : prevBuffer[oldIndex]);
corr += (sample * source_speech[i]);
}
catch (IndexOutOfRangeException ex)
{
}
}