NAudio определение тона(ноты) - C#
Формулировка задачи:
Здравствуйте, у меня возник вопрос: Имеется код реализованный с помощью библиотеки NAudio который берет данные со звуковой карты и обрабатывает, на данный момент просто записывает его в WAV-файл. и имееться 2 класса - Автокорреляция и FFT(пока незнаю какой использовать) для поиска этой самой частоты(ноты) в звуковом потоке. возникла ситуация что я не в состоянии сообразить как применить эти самые классы для определения тона, код прилагаеться,помогите разобраться пожалуйста...оч. хочеться с этим разобраться.
собственно код:
Автокорреляция:
FFT Бренси:
P.S. Прошу прощения если что то невнятно или неверно оформленно, очень нужно разобраться, буду признателен за любую помощ и наводки.
using System; using System.Collections.Generic; using System.Linq; using System.Text; using NAudio.Wave; using System.IO; using Microsoft.Win32; namespace Recorder { class AudioProc { private enum RecordingState { Stoped, Recording } private string FileName; private WaveFileWriter writer; private RecordingState recordingState; private WaveIn waveIn; private float Pitch; public float pitch { get { return Pitch; } set { Pitch = value; } } public void waveInStart(int device) { FileName = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString() + ".wav"); waveIn = new WaveIn(); waveIn.DeviceNumber = device; waveIn.DataAvailable += waveIn_DataAvailable; waveIn.WaveFormat = new WaveFormat(44100, 1); writer = new WaveFileWriter(FileName, waveIn.WaveFormat); waveIn.StartRecording(); recordingState = RecordingState.Recording; } private void waveIn_DataAvailable(object sender, WaveInEventArgs e) { byte[] buffer = e.Buffer; int bytesRecorded = e.BytesRecorded; WriteToFile(buffer, bytesRecorded); for (int index = 0; index < e.BytesRecorded; index += 2) { short sample = (short)((e.Buffer[index + 1] << 8) | e.Buffer[index + 0]); float sample32 = sample / 32768f; } } private void WriteToFile(byte[] buffer, int bytesRecorded) { long maxFileLength = waveIn.WaveFormat.AverageBytesPerSecond * 60; if (recordingState == RecordingState.Recording) { int towrite = (int)Math.Min(maxFileLength - writer.Length, bytesRecorded); if (towrite > 0) { writer.WriteData(buffer, 0, bytesRecorded); } else { waveInStop(); } } } public void waveInStop() { waveIn.StopRecording(); recordingState = RecordingState.Stoped; writer.Dispose(); } public void Save() { SaveFileDialog saveFileDialog = new SaveFileDialog(); saveFileDialog.Filter = "WAV file (.wav)|*.wav"; saveFileDialog.DefaultExt = ".wav"; bool? result = saveFileDialog.ShowDialog(); if (result.HasValue && result.Value) { SaveAs(saveFileDialog.FileName); } } private void SaveAs(string fileName) { //string source = Path.Combine(Path.GetTempPath(), new Guid().ToString() + ".wav"); if (File.Exists(fileName)) File.Delete(fileName); File.Copy(FileName,fileName); } } }
using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace Recorder { class AutoCorrelator: IPitchDetect { private float[] prevBuffer; private int minOffset; private int maxOffset; private float sampleRate; public AutoCorrelator(int sampleRate) { this.sampleRate = (float)sampleRate; int minFreq = 85; int maxFreq = 255; this.maxOffset = sampleRate / minFreq; this.minOffset = sampleRate / maxFreq; } public float DetectPitch(float[] buffer, int frames) { if (prevBuffer == null) { prevBuffer = new float[frames]; } float secCor = 0; int secLag = 0; float maxCorr = 0; int maxLag = 0; // starting with low frequencies, working to higher for (int lag = maxOffset; lag >= minOffset; lag--) { float corr = 0; // this is calculated as the sum of squares for (int i = 0; i < frames; i++) { int oldIndex = i - lag; float sample = ((oldIndex < 0) ? prevBuffer[frames + oldIndex] : buffer[oldIndex]); corr += (sample * buffer[i]); } if (corr > maxCorr) { maxCorr = corr; maxLag = lag; } if (corr >= 0.9 * maxCorr) { secCor = corr; secLag = lag; } } for (int n = 0; n < frames; n++) { prevBuffer[n] = buffer[n]; } float noiseThreshold = frames / 1000f; //Debug.WriteLine(String.Format("Max Corr: {0} ({1}), Sec Corr: {2} ({3})", this.sampleRate / maxLag, maxCorr, this.sampleRate / secLag, secCor)); if (maxCorr < noiseThreshold || maxLag == 0) return 0.0f; //return 44100.0f / secLag; //--works better for singing return this.sampleRate / maxLag; } } }
using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace Recorder { public interface IPitchDetect { float DetectPitch(float[] buffer, int frames); } class PitchDetect: IPitchDetect { private float sampleRate; public PitchDetect(float sampleRate) { this.sampleRate = sampleRate; } public const double PI_VAL = 3.14159265358979323846; /* FFT routine, (C)1996 S.M.Bernsee. Sign = -1 is FFT, 1 is iFFT (inverse) Fills fftBuffer[0...2*fftFrameSize-1] with the Fourier transform of the time domain data in fftBuffer[0...2*fftFrameSize-1]. The FFT array takes and returns the cosine and sine parts in an interleaved manner, ie. fftBuffer[0] = cosPart[0], fftBuffer[1] = sinPart[0], asf. fftFrameSize must be a power of 2. It expects a complex input signal (see footnote 2), ie. when working with 'common' audio signals our input signal has to be passed as {in[0],0.,in[1],0.,in[2],0.,...} asf. In that case, the transform of the frequencies of interest is in fftBuffer[0...fftFrameSize]. */ private void smbFft(float[] fftBuffer, int fftFrameSize, int sign) { float wr, wi, arg, temp; int p1, p2; // MRH: were float* float tr, ti, ur, ui; int p1r, p1i, p2r, p2i; // MRH: were float* int i, bitm, j, le, le2, k; int fftFrameSize2 = fftFrameSize * 2; for (i = 2; i < fftFrameSize2 - 2; i += 2) { for (bitm = 2, j = 0; bitm < fftFrameSize2; bitm <<= 1) { if ((i & bitm) != 0) j++; j <<= 1; } if (i < j) { p1 = i; p2 = j; temp = fftBuffer[p1]; fftBuffer[p1++] = fftBuffer[p2]; fftBuffer[p2++] = temp; temp = fftBuffer[p1]; fftBuffer[p1] = fftBuffer[p2]; fftBuffer[p2] = temp; } } int kmax = (int)(Math.Log(fftFrameSize) / Math.Log(2.0) + 0.5); for (k = 0, le = 2; k < kmax; k++) { le <<= 1; le2 = le >> 1; ur = 1.0f; ui = 0.0f; arg = (float)(PI_VAL / (le2 >> 1)); wr = (float)Math.Cos(arg); wi = (float)(sign * Math.Sin(arg)); for (j = 0; j < le2; j += 2) { p1r = j; p1i = p1r + 1; p2r = p1r + le2; p2i = p2r + 1; for (i = j; i < fftFrameSize2; i += le) { float p2rVal = fftBuffer[p2r]; float p2iVal = fftBuffer[p2i]; tr = p2rVal * ur - p2iVal * ui; ti = p2rVal * ui + p2iVal * ur; fftBuffer[p2r] = fftBuffer[p1r] - tr; fftBuffer[p2i] = fftBuffer[p1i] - ti; fftBuffer[p1r] += tr; fftBuffer[p1i] += ti; p1r += le; p1i += le; p2r += le; p2i += le; } tr = ur * wr - ui * wi; ui = ur * wi + ui * wr; ur = tr; } } } private float HammingWindow(int n, int N) { return 0.54f - 0.46f * (float)Math.Cos((2 * Math.PI * n) / (N - 1)); } private float[] fftBuffer; private float[] prevBuffer; public float DetectPitch(float[] buffer, int inFrames) { Func<int, int, float> window = HammingWindow; if (prevBuffer == null) { prevBuffer = new float[inFrames]; } // double frames since we are combining present and previous buffers int frames = inFrames * 2; if (fftBuffer == null) { fftBuffer = new float[frames * 2]; // times 2 because it is complex input } for (int n = 0; n < frames; n++) { if (n < inFrames) { fftBuffer[n * 2] = prevBuffer[n] * window(n, frames); fftBuffer[n * 2 + 1] = 0; // need to clear out as fft modifies buffer } else { fftBuffer[n * 2] = buffer[n-inFrames] * window(n, frames); fftBuffer[n * 2 + 1] = 0; // need to clear out as fft modifies buffer } } // assuming frames is a power of 2 smbFft(fftBuffer, frames, -1); float binSize = sampleRate / frames; int minBin = (int)(85 / binSize); int maxBin = (int)(300 / binSize); float maxIntensity = 0f; int maxBinIndex = 0; for (int bin = minBin; bin <= maxBin; bin++) { float real = fftBuffer[bin * 2]; float imaginary = fftBuffer[bin * 2 + 1]; float intensity = real * real + imaginary * imaginary; if (intensity > maxIntensity) { maxIntensity = intensity; maxBinIndex = bin; } } return binSize * maxBinIndex; } } }
Решение задачи: «NAudio определение тона(ноты)»
textual
Листинг программы
for (int i = 0; i < frames; i++) { int oldIndex = i - lag; try { float sample = ((oldIndex < 0) ? prevBuffer[frames + oldIndex] : prevBuffer[oldIndex]); corr += (sample * source_speech[i]); } catch (IndexOutOfRangeException ex) { } }
ИИ поможет Вам:
- решить любую задачу по программированию
- объяснить код
- расставить комментарии в коде
- и т.д