当前位置：网站首页>Ffmpeg Visual Studio development (IV): audio decoding

Ffmpeg Visual Studio development (IV): audio decoding

2022-07-04 14:52:00 【PYJTLK】

Last one In this article, we learned how to pass FFmpeg Decoding video frames , In this article, we will learn how to decode audio frames . The article explains the steps of video decoding in sections , Then the complete code will be pasted , Finally, test .

preparation

Before you start learning , Let's prepare a folder first （Res）, Put a video file in it （video.mp4） And an audio file （audio.mp3）, The length of 1 About minutes .
Insert picture description here
Of course , For those who have not learned FFprobe and FFplay Friend, , It is strongly recommended to learn their common commands first . These two tools can be used to test our audio and video files .

The steps of audio decoding are as follows .
1. Open the file to get the multimedia file context
2. Get the audio stream
3. Turn on the audio decoder
4. Loop read package frame
4.1. Decoding audio frames
4.2. Output pcm Raw data

Get multimedia file context

	...
	AVFormatContext *avFormatContext = NULL;

	// Open the file stream , Read header information 
	int ret = avformat_open_input(&avFormatContext, inputFilePath, NULL, NULL);

	if (ret < 0){
    // File opening failure 
		char buff[1024];
		// Write the specific error information buff
		av_strerror(ret, buff, sizeof(buff)-1);
		cout << "can't open file" << endl;
		cout << buff << endl;
		// Release AVFormatContext Of memory 
		avformat_close_input(&avFormatContext);
		return -1;
	}
	...

Get the audio stream

The way to get the audio stream is the same as the way to get the video stream , Are obtained by subscript .

	...
	// Get the audio stream 
	int audioIndex = av_find_best_stream(avFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
	if (audioIndex < 0){
    
		cout << av_get_media_type_string(AVMEDIA_TYPE_AUDIO) << endl;
		// Release AVFormatContext Of memory 
		avformat_close_input(&avFormatContext);
		return -1;
	}

	// according to audioIndex Get the audio stream 
	AVStream *audioStream = avFormatContext->streams[audioIndex];
	...

Turn on the audio decoder

static int openAudioCodec(int audioStreamIndex,
	AVCodecContext **avCodecContext, AVFormatContext *avFormatContext,char *outputFilePath){
    
	
	AVStream *avStream;
	AVCodec *avCodec = NULL;
	AVDictionary *opts = NULL;
	
	if (av_find_best_stream(avFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0) < 0) {
    
		cout << "can't find audio stream" << endl;
		return -1;
	}
	else {
    
		avStream = avFormatContext->streams[audioStreamIndex];

		// Looking for an audio decoder 
		avCodec = avcodec_find_decoder(avStream->codecpar->codec_id);
		if (avCodec == NULL) {
    
			cout << "can't find audio codec" << endl;
			return -1;
		}

		// Get audio decoder context 
		*avCodecContext = avcodec_alloc_context3(avCodec);

		if (*avCodecContext == NULL) {
    
			cout << "can't alloc audio codec context" << endl;
			return -1;
		}

		// Fill in the audio decoder context according to the audio input stream 
		if (avcodec_parameters_to_context(*avCodecContext, avStream->codecpar) < 0) {
    
			cout << "can't copy input stream params" << endl;
			return -1;
		}

		// Turn on the audio decoder 
		if (avcodec_open2(*avCodecContext, avCodec, &opts) < 0) {
    
			cout << "can't open audio codec" << endl;
			return -1;
		}
	}

	return 0;
}

Loop to get the encapsulated frame

The program reads each package frame in a circular way , Decode while reading .

	...
	AVFrame *avFrame = av_frame_alloc();

	while (1){
    
		// Read an encapsulated frame 
		ret = av_read_frame(avFormatContext, avPacket);

		if (ret < 0){
    
			cout << "finished" << endl;
			break;
		}

		if (avPacket->stream_index == audioIndex){
    
			cout << "=============packet=============" << endl;
			cout << "pos:" << avPacket->pos << endl;
			cout << "handle audio packet" << endl;
			// Decoding audio frames 
			decodeAudioPacket(avCodecContext, avPacket, avFrame,outputFile);
			cout << "=============packet=============" << endl;
		}
	
		// hold AVPacket Empty the data in , In order to load the information of the next frame 
		av_packet_unref(avPacket);
	}
	...

Decoding audio frames

static int decodeAudioPacket(AVCodecContext *avCodecContext, const AVPacket *avPacket,AVFrame *avFrame,FILE *outputFile)
{
    
	int ret = 0;

	//  Send an encapsulated frame to the audio decoder 
	if (avcodec_send_packet(avCodecContext, avPacket) < 0) {
    
		cout << "error on submit packet" << endl;
		return -1;
	}

	while (ret >= 0) {
    
		ret = avcodec_receive_frame(avCodecContext, avFrame);
		if (ret < 0) {
    
			if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
				return 0;

			cout << "error while decodeing" << endl;
			return ret;
		}

		// The size of an audio sample 
		int sampleSize = av_get_bytes_per_sample((AVSampleFormat)avFrame->format);

		cout << "--------------frame--------------" << endl;
		cout << "sample rate:" << avFrame->sample_rate << endl;
		cout << "channel num:" << avFrame->channels << endl;
		cout << "format:" << avFrame->format << endl;
		cout << "sample nums:" << avFrame->nb_samples << endl;
		cout << "sampleSize:" << sampleSize << endl;
		cout << "--------------frame--------------" << endl;

		...

		av_frame_unref(avFrame);
		if (ret < 0)
			return ret;
	}

	return 0;
}

Output pcm Raw data

After decoding the audio frame, we get the original data of the audio , Here, output it to a file .

PCM The data storage method is as follows . For mono , The sampling data of each frame can be placed in order . For two channels , The sampling data of each frame of each channel is placed alternately .
Insert picture description here

		...
		// This method will output single channel pcm
		//fwrite(avFrame->extended_data[0], 1, avFrame->nb_samples * av_get_bytes_per_sample((AVSampleFormat)avFrame->format), outputFile);
		
		// This method will output dual channels pcm
		for (int i = 0; i < avFrame->nb_samples;i++){
    
			// passageway 1 Raw data 
			fwrite(avFrame->extended_data[0] + sampleSize * i, 1, sampleSize, outputFile);
			// passageway 2 Raw data 
			fwrite(avFrame->extended_data[1] + sampleSize * i, 1, sampleSize, outputFile);
		}
		...

Complete code

The complete code is as follows .

#include "stdafx.h"
#include <iostream>

extern "C"
{
    
#include "libavformat/avformat.h"
};

using namespace std;

static int decodeAudioPacket(AVCodecContext *avCodecContext, const AVPacket *avPacket,AVFrame *avFrame,FILE *outputFile)
{
    
	int ret = 0;

	//  Send an encapsulated frame to the audio decoder 
	if (avcodec_send_packet(avCodecContext, avPacket) < 0) {
    
		cout << "error on submit packet" << endl;
		return -1;
	}

	while (ret >= 0) {
    
		ret = avcodec_receive_frame(avCodecContext, avFrame);
		if (ret < 0) {
    
			if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
				return 0;

			cout << "error while decodeing" << endl;
			return ret;
		}

		// The size of an audio sample 
		int sampleSize = av_get_bytes_per_sample((AVSampleFormat)avFrame->format);

		cout << "--------------frame--------------" << endl;
		cout << "sample rate:" << avFrame->sample_rate << endl;
		cout << "channel num:" << avFrame->channels << endl;
		cout << "format:" << avFrame->format << endl;
		cout << "sample nums:" << avFrame->nb_samples << endl;
		cout << "sampleSize:" << sampleSize << endl;
		cout << "--------------frame--------------" << endl;

		// This method will output single channel pcm
		//fwrite(avFrame->extended_data[0], 1, avFrame->nb_samples * av_get_bytes_per_sample((AVSampleFormat)avFrame->format), outputFile);
		
		// This method will output dual channels pcm
		for (int i = 0; i < avFrame->nb_samples;i++){
    
			// passageway 1 Raw data 
			fwrite(avFrame->extended_data[0] + sampleSize * i, 1, sampleSize, outputFile);
			// passageway 2 Raw data 
			fwrite(avFrame->extended_data[1] + sampleSize * i, 1, sampleSize, outputFile);
		}

		av_frame_unref(avFrame);
		if (ret < 0)
			return ret;
	}

	return 0;
}

static int openAudioCodec(int audioStreamIndex,
	AVCodecContext **avCodecContext, AVFormatContext *avFormatContext,char *outputFilePath){
    
	
	AVStream *avStream;
	AVCodec *avCodec = NULL;
	AVDictionary *opts = NULL;
	
	if (av_find_best_stream(avFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0) < 0) {
    
		cout << "can't find audio stream" << endl;
		return -1;
	}
	else {
    
		avStream = avFormatContext->streams[audioStreamIndex];

		// Looking for an audio decoder 
		avCodec = avcodec_find_decoder(avStream->codecpar->codec_id);
		if (avCodec == NULL) {
    
			cout << "can't find audio codec" << endl;
			return -1;
		}

		// Get audio decoder context 
		*avCodecContext = avcodec_alloc_context3(avCodec);

		if (*avCodecContext == NULL) {
    
			cout << "can't alloc audio codec context" << endl;
			return -1;
		}

		// Fill in the audio decoder context according to the audio input stream 
		if (avcodec_parameters_to_context(*avCodecContext, avStream->codecpar) < 0) {
    
			cout << "can't copy input stream params" << endl;
			return -1;
		}

		// Turn on the audio decoder 
		if (avcodec_open2(*avCodecContext, avCodec, &opts) < 0) {
    
			cout << "can't open audio codec" << endl;
			return -1;
		}
	}

	return 0;
}

int _tmain(int argc, _TCHAR* argv[])
{
    

	char inputFilePath[100];
	cout << "inputfile path: ";
	// Input file name , Such as C://WorkZone//Res//audio.mp3
	cin >> inputFilePath;

	char outputFilePath[100];
	cout << "outputfile path:";
	// Input file name , Such as C://WorkZone//Res//out.pcm
	cin >> outputFilePath;

	FILE *outputFile = NULL;

	fopen_s(&outputFile, outputFilePath, "wb");

	if (outputFile == NULL){
    
		cout << "can't open output file" << endl;
		return -1;
	}

	AVFormatContext *avFormatContext = NULL;

	// Open the file stream , Read header information 
	int ret = avformat_open_input(&avFormatContext, inputFilePath, NULL, NULL);

	if (ret < 0){
    // File opening failure 
		char buff[1024];
		// Write the specific error information buff
		av_strerror(ret, buff, sizeof(buff)-1);
		cout << "can't open file" << endl;
		cout << buff << endl;
		// Release AVFormatContext Of memory 
		avformat_close_input(&avFormatContext);
		return -1;
	}

	// Read stream information 
	ret = avformat_find_stream_info(avFormatContext, NULL);

	if (ret < 0){
    // Failed to read stream information 
		char buff[1024];
		// Write the specific error information buff
		av_strerror(ret, buff, sizeof(buff)-1);
		cout << "can't open stream" << endl;
		cout << buff << endl;
		// Release AVFormatContext Of memory 
		avformat_close_input(&avFormatContext);
		return -1;
	}

	// Print format information 
	av_dump_format(avFormatContext, 0, inputFilePath, 0);
	cout << "stream num:" << avFormatContext->nb_streams << endl;
	cout << "duration:" << avFormatContext->duration << endl << endl;

	// Get the audio stream 
	int audioIndex = av_find_best_stream(avFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
	if (audioIndex < 0){
    
		cout << av_get_media_type_string(AVMEDIA_TYPE_AUDIO) << endl;
		// Release AVFormatContext Of memory 
		avformat_close_input(&avFormatContext);
		return -1;
	}

	// according to audioIndex Get the audio stream 
	AVStream *audioStream = avFormatContext->streams[audioIndex];
	cout << "duration:" << audioStream->duration << endl;

	// Extract code stream 
	AVPacket *avPacket = av_packet_alloc();
	cout << "start read frames" << endl;

	AVCodecContext *avCodecContext;

	if (openAudioCodec(audioIndex, &avCodecContext, avFormatContext,outputFilePath) < 0){
    
		return -1;
	}

	AVFrame *avFrame = av_frame_alloc();

	while (1){
    
		ret = av_read_frame(avFormatContext, avPacket);

		if (ret < 0){
    
			cout << "finished" << endl;
			break;
		}

		if (avPacket->stream_index == audioIndex){
    
			cout << "=============packet=============" << endl;
			cout << "pos:" << avPacket->pos << endl;
			cout << "handle audio packet" << endl;
			decodeAudioPacket(avCodecContext, avPacket, avFrame,outputFile);
			cout << "=============packet=============" << endl;
		}
	
		// hold AVPacket Empty the data in , In order to load the information of the next frame 
		av_packet_unref(avPacket);
	}

	cout << "decode finish" << endl;

	// Release AVFormatContext Of memory 
	avformat_close_input(&avFormatContext);

	// Free the memory of dynamic application 
	av_free(avPacket);
	av_free(avFrame);

	fclose(outputFile);
	return 0;
}

test

Run the program , Enter just now Res The audio file path in the folder （audio.mp3） And output file path . Program output pcm file .
Insert picture description here
adopt FFprobe see audio.mp3 Information about .

ffprobe -i audio.mp3

Here's the picture , The channel of this audio file is 2, The sampling rate is 44100, The sampling format is floating point （32 position ）.
Insert picture description here
Then we go through ffplay To play the output pcm file .-ar Represents the sampling rate ,-ac Indicates the sound channel ,-f Represents the sampling format ,f32le Refer to 32 Bit small end mode . If the sound is strange when playing , It may be sampling 、 There is a problem with the channel or sampling format .

ffplay -ar 44100 -ac 2 -f f32le -i out.pcm

Last

This article describes how to pass FFmpeg Realize audio decoding .

Interested friends can also come to my Gitee Warehouse to see the complete engineering code .

Reference article

《FFmpeg Detailed explanation of audio processing 》

原网站

版权声明
本文为[PYJTLK]所创，转载请带上原文链接，感谢
https://yzsam.com/2022/02/202202141241083720.html