webrtc 静音检测(二)

上一次的文章很久以前了第一次的简单介绍静音检测

1、使用portaudio 来采集声音

类接口


class DeviceAudio:public c_thread
{
private:
	TSoundInfo * _info = NULL;
	static vector g_AudioDevice;

protected:
	static int GetDeviceList(vector &deviceList);

public:
	DeviceAudio(){
		
	}
	~DeviceAudio() {
#if(USE_FAAC)
		if (_aPacket != NULL)
			delete _aPacket;
#endif
		/*if(_pkt!=NULL)
			av_packet_free(&_pkt);*/
	}
	//int _channel = 1;

	int Encode(uint8_t * inputBuffer, int audioSize,int channel);



	func_send _ptr = NULL;
	int GetChannel()
	{
		if (_info != NULL)
			return _info->channels;
		return -1;
	}

	static int Init();
	static int UnInit();
	static int GetDeviceDefault(int &in, string &inname, int &out, string &outname);
	static int GetDeviceCount();
	static const char *GetDeviceName(int index);
	static const char *GetDeviceCapacity(int index,int &samplerate, int &channel);
	//开始时初始化音频和音频编码参数
	//int Start();
	int Open(TSoundInfo *_info);
	void Stop();
	void Run();
	//int isStopped();
	//int _stop = 0;
};

以下为类的实现代码，如果需要ffmpeg编码，打开编译开关

#include "DeviceAudio.h"


extern "C"  {
#include "portaudio/portaudio.h"
}
#include 

using namespace std;


vector DeviceAudio::g_AudioDevice;

static int AudioRecordCallback(const void *inputBuffer, void *outputBuffer,
	unsigned long framesPerBuffer,
	const PaStreamCallbackTimeInfo* timeInfo,
	PaStreamCallbackFlags statusFlags,
	void *userData)
{
	DeviceAudio *process = (DeviceAudio*)userData;
	//const short *rptr = (const short*)inputBuffer;
	int finished = paContinue;
	//注意2 通道
	int channel = process->GetChannel();
	int audioSize = (framesPerBuffer * sizeof(short) * channel);

	(void)outputBuffer; /* Prevent unused variable warnings. */
	(void)timeInfo;
	(void)statusFlags;


	if (process->IsStop() ==1) {
		finished = paComplete;
		return 0;
	}
	if (process->_ptr != NULL)
	{
#ifdef USE_AAC //注意这里是ffmpeg编码
		AVPacket *pkt = av_packet_alloc();
		av_init_packet(pkt);
		if (process->GetEncoder()->Encode((uint8_t*)inputBuffer, audioSize, channel, pkt) != NULL)
			process->_ptr(pkt);
		else
			av_packet_free(&pkt);
#else
		//使用编码器编码
		process->Encode((uint8_t*)inputBuffer, audioSize,channel);
#endif
	}
	return finished;
}
int DeviceAudio::Encode(uint8_t * inputBuffer, int audioSize,int channel)
{
	if (_ptr == NULL)
		return -1;
	_ptr(inputBuffer,audioSize);
	return 0;
}

int DeviceAudio::GetDeviceDefault(int &in, string &inname, int &out, string &outname)
{
	in = Pa_GetDefaultInputDevice();
	const PaDeviceInfo *pInfo = Pa_GetDeviceInfo(in);
	inname = pInfo->name;
	out = Pa_GetDefaultOutputDevice();
	pInfo = Pa_GetDeviceInfo(out);
	outname = pInfo->name;
	return 2;
}

int DeviceAudio::GetDeviceList(vector &deviceList)
{
	PaDeviceIndex num = Pa_GetDeviceCount();
	for (int i = 0; i name;
		cp.channel = pInfo->maxInputChannels;
		cp.samplerate =(int)pInfo->defaultSampleRate;
		deviceList.push_back(cp);
	}
	if (deviceList.size()>0)
		return 0;
	return -1;
}

PaError DeviceAudio::Init()
{
	int err = Pa_Initialize();
	if (err != paNoError) goto done;
	return 0;
done:
	Pa_Terminate();
	if (err != paNoError)
	{
		fprintf(stderr, "An error occured while using the portaudio stream\n");
		fprintf(stderr, "Error number: %d\n", err);
		fprintf(stderr, "Error message: %s\n", Pa_GetErrorText(err));
		err = 1;          /* Always return 0 or 1, but no other return codes. */
	}
	return err;
}
//framesPerBuffer * sizeof(float) * 通道数

//return  0 is correct， other failure, author : qianbo 
int DeviceAudio::Open(TSoundInfo *info)
{
	_info = info;
	_ptr = info->callback;
	return 0;
}
//C = A + B - (A * B >> 0x10)
//if (C > 32767) C = 32767;
//else if (C < -32768) C = -32768;
void DeviceAudio::Stop()
{
	c_thread::Stop();
	Notify();
	Join();
	
}

//int DeviceAudio::isStopped()
//{
//	//fflush(stdout);
//	return Pa_IsStreamActive(_recordStream);
//}

int DeviceAudio::UnInit()
{
	return Pa_Terminate();

}

//得到音频设备的个数
int DeviceAudio::GetDeviceCount()
{
	g_AudioDevice.clear();
	DeviceAudio::GetDeviceList(g_AudioDevice);
	int ret = (int)g_AudioDevice.size();
	return ret;
}

const char *DeviceAudio::GetDeviceName(int index)
{
	if (index = 0)
	{
		return g_AudioDevice[index].name.c_str();
	}
	return NULL;
}
const char *DeviceAudio::GetDeviceCapacity(int index ,int &samplerate, int &channel)
{
	if (index = 0)
	{
		samplerate = g_AudioDevice[index].samplerate;
		channel = g_AudioDevice[index].channel;
		return g_AudioDevice[index].name.c_str();
	}
	return NULL;
}



void DeviceAudio::Run()
{
	PaError             err = paNoError;
	PaStreamParameters  inputParameters;
	PaStream*           stream;



	if (_info->deviceindex == -1)/* default input device */
		inputParameters.device = Pa_GetDefaultInputDevice();
	else
		inputParameters.device = _info->deviceindex;

	if (inputParameters.device == paNoDevice) {
		fprintf(stderr, "Error: No default input device.\n");
		return;
		//goto done;
	}
	const PaDeviceInfo * pInfo = Pa_GetDeviceInfo(inputParameters.device);

	//直接取最大通道数目
	//int channelNum = pInfo->maxInputChannels ;
	//if (channelNum > 2)
	//channelNum = 1;
	inputParameters.channelCount = 1; 

	inputParameters.sampleFormat = paInt16;
	inputParameters.suggestedLatency = Pa_GetDeviceInfo(inputParameters.device)->defaultLowInputLatency;
	inputParameters.hostApiSpecificStreamInfo = NULL;
	err = Pa_OpenStream(
		&stream,
		&inputParameters,
		NULL,                  /* &outputParameters, */
		_info->sample_rate,
		//_info->sample_rate, //44100,
		_info->nb_samples, //1024,
		paClipOff,      /* we won't output out of range samples so don't bother clipping them */
		AudioRecordCallback,
		(void*)this);
	if (err != paNoError)
		return ;

	Pa_StartStream(stream);

	WaitForSignal(); //等待结束

	if (stream!= NULL)
	{
		Pa_AbortStream(stream);
		Pa_CloseStream(stream);
	}
}

2、使用webrtc的静音检测的接口

首先，一定要沉下心来研究webrtc的几个接口，把以下接口仔细研究一遍

#ifdef __cplusplus
extern "C" {
#endif

// Creates an instance to the VAD structure.
//
// - handle [o] : Pointer to the VAD instance that should be created.
//
// returns      : 0 - (OK), -1 - (Error)
int WebRtcVad_Create(VadInst** handle);

// Frees the dynamic memory of a specified VAD instance.
//
// - handle [i] : Pointer to VAD instance that should be freed.
//
// returns      : 0 - (OK), -1 - (NULL pointer in)
int WebRtcVad_Free(VadInst* handle);

// Initializes a VAD instance.
//
// - handle [i/o] : Instance that should be initialized.
//
// returns        : 0 - (OK),
//                 -1 - (NULL pointer or Default mode could not be set).
int WebRtcVad_Init(VadInst* handle);

// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
// restrictive in reporting speech. Put in other words the probability of being
// speech when the VAD returns 1 is increased with increasing mode. As a
// consequence also the missed detection rate goes up.
//
// - handle [i/o] : VAD instance.
// - mode   [i]   : Aggressiveness mode (0, 1, 2, or 3).
//
// returns        : 0 - (OK),
//                 -1 - (NULL pointer, mode could not be set or the VAD instance
//                       has not been initialized).
int WebRtcVad_set_mode(VadInst* handle, int mode);

// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
//
// - handle       [i/o] : VAD Instance. Needs to be initialized by
//                        WebRtcVad_Init() before call.
// - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
// - audio_frame  [i]   : Audio frame buffer.
// - frame_length [i]   : Length of audio frame buffer in number of samples.
//
// returns              : 1 - (Active Voice),
//                        0 - (Non-active Voice),
//                       -1 - (Error)
int WebRtcVad_Process(VadInst* handle, int fs, int16_t* audio_frame,
                      int frame_length);

// Checks for valid combinations of |rate| and |frame_length|. We support 10,
// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
//
// - rate         [i] : Sampling frequency (Hz).
// - frame_length [i] : Speech frame buffer length in number of samples.
//
// returns            : 0 - (valid combination), -1 - (invalid combination)
int WebRtcVad_ValidRateAndFrameLength(int rate, int frame_length);

#ifdef __cplusplus
}
#endif

3、如何封装调用

我们在类中来初始化

	int init()
	{
		if (_init == -1)
		{

			status = WebRtcVad_Create(&handle);
			if (status != 0)
			{
				printf("Init is error\n");
				return -1;
			}
			status = WebRtcVad_Init(handle);
			if (status != 0)
			{
				return -1;
			}
			//这里强度为2 ，可以修改
			status = WebRtcVad_set_mode(handle, 2);
			if (status != 0)
			{
				printf("Set mode is error\n");
				return -1;
			}
			_init = 0;
			return 0;
		}
		return 0;
	}

具体调用的detect函数，fs取值16000

int detect(short *data, int length)
	{
		status = WebRtcVad_Process(handle, fs, data, length);
		if (status == -1)
		{
			printf("WebRtcVad_Process is error\n");
			return -1;
		}
		return status;
		//return 0;
	}

以下为类封装


class c_audio_util
{
	c_vad_check _vadcheck;

	int vad_check0(uint8_t *data, int len)
	{
		//len = 16000;
		//fl = 480;
		int lenl = len / sizeof(short); // 2048    0-479 480+480-960  961-1441 1442
		short* start = (short*)data;
		short* end = (short*)(start + lenl);
		int a[5] = { 0,0,0,0,0 };
		int i = 0;
		while (start

webrtc 静音检测(二)

最近更新

热门博客

[ 申请 ]友情链接：