关于frame中nb_samples字段的解释:
nb_samples表示一帧音频数据中采样的数量(次数)(每通道),nb_samples与具体的码流类型和编码级别有关。nb_samples和AVCodecContext中的frame_size相同。
采样率44100Khz,采样格式s16,双声道时:
一秒44100个采样点(两个声道各有22050个采样点),一个采样点2字节(16位),总数据量为88200字节;一帧nb_samples次采样,数据量为nb_samples x 2 x 2;一秒有88200/(nb_sample x 4)帧音频。
编码类型 NB_SAMPLES MP3 1152 AAC LC 1024 AAC HE V2 2048数据格式AV_SAMPLE_FMT_S16和AV_SAMPLE_FMT_FLTP:
枚举中末尾带P表示存储模式为Planar,否则为Packed。
enum AVSampleFormat { AV_SAMPLE_FMT_NONE = -1, AV_SAMPLE_FMT_U8, ///< unsigned 8 bits AV_SAMPLE_FMT_S16, ///< signed 16 bits AV_SAMPLE_FMT_S32, ///< signed 32 bits AV_SAMPLE_FMT_FLT, ///< float AV_SAMPLE_FMT_DBL, ///< double AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar AV_SAMPLE_FMT_FLTP, ///< float, planar AV_SAMPLE_FMT_DBLP, ///< double, planar AV_SAMPLE_FMT_S64, ///< signed 64 bits AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically };
存储模式会影响到AVFrame数据的读写。对于Packed的数据,frame->data[]只有一个维度,即所有数据都是在frame->data[0]中。而对于Planar的数据,frame->data[]有多维,每个维度(frame->data[i])存放不同声道的数据。
PCM数据的格式为AV_SAMPLE_FMT_S16,AAC解码输出的数据为浮点型的 AV_SAMPLE_FMT_FLTP 格式。
audio_encode.c 代码
#include #include #include "libavcodec/avcodec.h" #include "libavformat/avformat.h" // 只编码,不重采样 void audio_encode(const char *pcmfile, const char *aacfile) { FILE *sfp = fopen(pcmfile, "r"); if (sfp == NULL) { printf("fail to open pcmile\n"); goto _Error; } FILE *dfp = fopen(aacfile, "w+"); if (dfp == NULL) { printf("failed to open aacfile\n"); goto _Error; } // 1. 初始化pcm_frame,保存每帧pcm数据 AVFrame *pcm_frame; pcm_frame = av_frame_alloc(); pcm_frame->format = AV_SAMPLE_FMT_S16; pcm_frame->channel_layout = AV_CH_LAYOUT_STEREO; pcm_frame->sample_rate = 44100; pcm_frame->channels = 2; // nb_samples的值与具体的编码协议有关 pcm_frame->nb_samples = 2048; // 一帧中有多少个采样点 av_frame_get_buffer(pcm_frame, 0); // 2. 找到合适的编码器 AVCodec *cod = avcodec_find_encoder_by_name("libfdk_aac"); if (cod == NULL) { av_log(NULL, AV_LOG_ERROR, "fail to find codec\n"); goto _Error; } // 3. 设置其编码选项 AVCodecContext *cod_ctx = avcodec_alloc_context3(cod); cod_ctx->profile = FF_PROFILE_AAC_HE_V2; // 编码协议 cod_ctx->codec_type = AVMEDIA_TYPE_AUDIO; // 音频编码 cod_ctx->sample_fmt = pcm_frame->format; cod_ctx->channel_layout = pcm_frame->channel_layout; cod_ctx->channels = pcm_frame->channels; cod_ctx->sample_rate = pcm_frame->sample_rate; // 采样率 // 4. 打开编码器 if (avcodec_open2(cod_ctx, cod, NULL) < 0) { av_log(NULL, AV_LOG_ERROR, "fail to open codec\n"); goto _Error; } printf("frame_size:%d\n", cod_ctx->frame_size); // 5. 初始化packet int count = 0; AVPacket *packet = av_packet_alloc(); int frame_size = pcm_frame->nb_samples * \ av_get_bytes_per_sample(pcm_frame->format) * pcm_frame->channels; while (1) { // AV_SAMPLE_FMT_S16是packed格式的, 声道数据LRLRLRLRLR... // 所以pcm_frame->data[]是一维的 int ret = fread(pcm_frame->data[0], 1, frame_size, sfp); if (ret < 0) { printf("fail to read raw data\n"); goto _Error; } else if (ret == 0) { break; } pcm_frame->pts = count; // 原始数据发送到编码器 if (avcodec_send_frame(cod_ctx, pcm_frame) < 0) { printf("fail to send frame\n"); goto _Error; } // 获取编码数据 if (avcodec_receive_packet(cod_ctx, packet) >= 0) { fwrite(packet->data, 1, packet->size, dfp); count++; av_packet_unref(packet); printf("receive %d frame\n", count); } } _Error: if (sfp) { fclose(sfp); } if (dfp) { fclose(dfp); } if (cod_ctx) { avcodec_close(cod_ctx); avcodec_free_context(&cod_ctx); } if (pcm_frame) { av_frame_free(&pcm_frame); } if (packet) { av_packet_free(&packet); } } int main(int argc, char const* argv[]) { audio_encode("output.pcm", "output.aac"); return 0; }
编译测试
./myrun.sh audio_encode.c && ./a.out
ffplay output.aac -nodisp
参考:
PCM音频数据
ffmpeg音频小结
转自:ffmpeg音视频编码入门:音频编码(pcm编码aac)_zhou jiabo的博客-CSDN博客_ffmpeg 音频编码