当前位置:网站首页>Solve the problem that ffmpeg obtains aac audio files with incorrect duration

Solve the problem that ffmpeg obtains aac audio files with incorrect duration

2022-06-11 06:23:00 Tinghua_ M

Recent tests have proposed a bug,ijk Acquired aac Of documents duration forbid , Send it to have a look , Really not , stay AE Or the system mediaplayer What you get from it is 3m48s( The exact time is MMParserExtractor: ADTS: duration = 228010580us, Here's the picture ),ijk Get is 2m54s, On the air , stay 2m54s The flow is over when , Put it into the compiled ffmpeg in , Duration: was 00:03:13.07, however VLC yes 3m53s, This document is also wonderful ! Other players will not be discussed for the time being , Now I just want to do MMParserExtractor And IJKPlayer The length of time obtained is always enough ! 

1、 To analyze problems

Let's start to analyze this problem , Look at this file from the command line ,ffmpeg What we get from it is 3m13s

Look carefully at the red arrow , This means to get duration It is calculated according to the bit rate , May not be accurate . This kind of access to audio and video info If there is a problem, we can generally start from avformat_find_stream_info Function start analysis .

This is directly from log Start looking at ,waring Appear in utils.c/libavformat Next

static void estimate_timings_from_bit_rate(AVFormatContext *ic)
{
    int64_t filesize, duration;
    int i, show_warning = 0;
    AVStream *st;
	
	av_log(ic, AV_LOG_WARNING, "-->ic->bit_rate:%lld\n",ic->bit_rate);
	// From here log You can see ,bitrate And I didn't get ,bitrate = 0
    /* if bit_rate is already set, we believe it */
    if (ic->bit_rate <= 0) {
        int64_t bit_rate = 0;
        for (i = 0; i < ic->nb_streams; i++) {
            st = ic->streams[i];
			
            if (st->codecpar->bit_rate <= 0 && st->internal->avctx->bit_rate > 0)
                st->codecpar->bit_rate = st->internal->avctx->bit_rate;
            if (st->codecpar->bit_rate > 0) {
                if (INT64_MAX - st->codecpar->bit_rate < bit_rate) {
                    bit_rate = 0;
                    break;
                }
                bit_rate += st->codecpar->bit_rate;
            } else if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && st->codec_info_nb_frames > 1) {
                // If we have a videostream with packets but without a bitrate
                // then consider the sum not known
                bit_rate = 0;
                break;
            }
        }
        // Here comes one bitrate
        ic->bit_rate = bit_rate;
		av_log(ic, AV_LOG_WARNING, "-->ic->bit_rate:%lld\n",ic->bit_rate);
    }
    // from log Can be seen in , there duration It's also 0

    /* if duration is already set, we believe it */
	av_log(ic, AV_LOG_WARNING,"-->ic->duration:%lld\n",ic->duration);
    if (ic->duration == AV_NOPTS_VALUE &&
        ic->bit_rate != 0) {
        filesize = ic->pb ? avio_size(ic->pb) : 0;
		av_log(ic, AV_LOG_WARNING,"-->ic->filesize:%lld\n",filesize);
        if (filesize > ic->internal->data_offset) {
            filesize -= ic->internal->data_offset;
            for (i = 0; i < ic->nb_streams; i++) {
                st      = ic->streams[i];
                if (   st->time_base.num <= INT64_MAX / ic->bit_rate
                    && st->duration == AV_NOPTS_VALUE) {
                    // Here, according to the file bytes *8 / Calculated by bit rate duration, here cbr In this way, we can calculate , But if vbr( Rate dynamics ) Then there is a problem 
                    duration = av_rescale(8 * filesize, st->time_base.den,
                                          ic->bit_rate *
                                          (int64_t) st->time_base.num);
                // Acquired duration It's not accurate 
                    st->duration = duration;
                    show_warning = 1;
                }
            }
        }
    }
    if (show_warning)
        av_log(ic, AV_LOG_WARNING,
               "Estimating duration from bitrate, this may be inaccurate\n");
}

The place where the above function is called is utils.c/libavofrmat:

static void estimate_timings(AVFormatContext *ic, int64_t old_offset)
{
    int64_t file_size;

    /* get the file size, if possible */
    if (ic->iformat->flags & AVFMT_NOFILE) {
        file_size = 0;
    } else {
        file_size = avio_size(ic->pb);
        file_size = FFMAX(0, file_size);
    }
	av_log(ic, AV_LOG_WARNING, "->ic->iformat->name:%s\n", ic->iformat->name);
	av_log(ic, AV_LOG_WARNING, "->file_size:%lld\n", file_size);
	av_log(ic, AV_LOG_WARNING, "->ic->pb->seekable:%d\n", ic->pb->seekable);

    if ((!strcmp(ic->iformat->name, "mpeg") ||
         !strcmp(ic->iformat->name, "mpegts")) &&
        file_size && (ic->pb->seekable & AVIO_SEEKABLE_NORMAL)) {
        /* get accurate estimate from the PTSes */
        estimate_timings_from_pts(ic, old_offset);
        ic->duration_estimation_method = AVFMT_DURATION_FROM_PTS;
    } else if (has_duration(ic)) {
    // If in demuxer Get to the duration 了 
        /* at least one component has timings - we use them for all
         * the components */
        fill_all_stream_timings(ic);
        ic->duration_estimation_method = AVFMT_DURATION_FROM_STREAM;
    } else {
    // This file was not obtained duration, So here we go 
        /* less precise: use bitrate info */
        estimate_timings_from_bit_rate(ic);
        ic->duration_estimation_method = AVFMT_DURATION_FROM_BITRATE;
    }
    update_stream_timings(ic);

    {
        int i;
        AVStream av_unused *st;
        for (i = 0; i < ic->nb_streams; i++) {
            st = ic->streams[i];
            av_log(ic, AV_LOG_TRACE, "stream %d: start_time: %0.3f duration: %0.3f\n", i,
                   (double) st->start_time * av_q2d(st->time_base),
                   (double) st->duration   * av_q2d(st->time_base));
        }
        av_log(ic, AV_LOG_TRACE,
                "format: start_time: %0.3f duration: %0.3f bitrate=%"PRId64" kb/s\n",
                (double) ic->start_time / AV_TIME_BASE,
                (double) ic->duration   / AV_TIME_BASE,
                (int64_t)ic->bit_rate / 1000);
    }
}

The above method is called in avformat_find_stream_info/utils.c/libavformat Function .

2、 Solution exploration

The reason is known , So how can we solve this problem ?

aac Of duration How can I get it ?

Let's take a look at android In the system libstagefright In the frame aacextractore The implementation of the

AACExtractor::AACExtractor(
        const sp<DataSource> &source, const sp<AMessage> &_meta)
    : mDataSource(source),
      mInitCheck(NO_INIT),
      mFrameDurationUs(0) {
    sp<AMessage> meta = _meta;

    if (meta == NULL) {
        String8 mimeType;
        float confidence;
        sp<AMessage> _meta;

        if (!SniffAAC(mDataSource, &mimeType, &confidence, &meta)) {
            return;
        }
    }

    int64_t offset;
    CHECK(meta->findInt64("offset", &offset));

    uint8_t profile, sf_index, channel, header[2];
    if (mDataSource->readAt(offset + 2, &header, 2) < 2) {
        return;
    }
// obtain profile
    profile = (header[0] >> 6) & 0x3;
// Get sampling index 
    sf_index = (header[0] >> 2) & 0xf;
// Get the sample rate 
    uint32_t sr = get_sample_rate(sf_index);
    if (sr == 0) {
        return;
    }
// passageway 
    channel = (header[0] & 0x1) << 2 | (header[1] >> 6);

    mMeta = MakeAACCodecSpecificData(profile, sf_index, channel);

    off64_t streamSize, numFrames = 0;
    size_t frameSize = 0;
    int64_t duration = 0;
// Get file size 
    if (mDataSource->getSize(&streamSize) == OK) {
         while (offset < streamSize) {
         // obtain adts Each frame size 
            if ((frameSize = getAdtsFrameLength(source, offset, NULL)) == 0) {
                return;
            }

            mOffsetVector.push(offset);

            offset += frameSize;// Offset plus 
            numFrames ++;// Calculate the number of frames 
        }
//*************** Focus on here , Here, the following analysis is made aac The file format will be explained in more detail *************
        // Round up and get the duration
        mFrameDurationUs = (1024 * 1000000ll + (sr - 1)) / sr;
        duration = numFrames * mFrameDurationUs;// The total number of frames x One AAC Playback time of audio frame 
        mMeta->setInt64(kKeyDuration, duration);
    }

    mInitCheck = OK;
}

Let's take another look getAdtsFrameLength/AACExtractor.cpp/libstagefrgiht function , This function is actually based on adts Head to calculate each framesize The size of the

static size_t getAdtsFrameLength(const sp<DataSource> &source, off64_t offset, size_t* headerSize) {
//CRC
    const size_t kAdtsHeaderLengthNoCrc = 7;
    const size_t kAdtsHeaderLengthWithCrc = 9;

    size_t frameSize = 0;
// Sync word 
    uint8_t syncword[2];
    if (source->readAt(offset, &syncword, 2) != 2) {
        return 0;
    }
    if ((syncword[0] != 0xff) || ((syncword[1] & 0xf6) != 0xf0)) {
        return 0;
    }
//0 No, crc,1 Yes crc
    uint8_t protectionAbsent;
	
    if (source->readAt(offset + 1, &protectionAbsent, 1) < 1) {
        return 0;
    }
    protectionAbsent &= 0x1;

    uint8_t header[3];
    if (source->readAt(offset + 3, &header, 3) < 3) {
        return 0;
    }
// obtain framesize Size 
    frameSize = (header[0] & 0x3) << 11 | header[1] << 3 | header[2] >> 5;

    // protectionAbsent is 0 if there is CRC
    size_t headSize = protectionAbsent ? kAdtsHeaderLengthNoCrc : kAdtsHeaderLengthWithCrc;
    if (headSize > frameSize) {
        return 0;
    }
    if (headerSize != NULL) {
        *headerSize = headSize;
    }

    return frameSize;
}

The above implementation principle is based on a AAC The original frame contains a period of time 1024 Samples and related data . One AAC Playback time of audio frame = One AAC The number of samples corresponding to the frame / Sampling rate . therefore aac Total audio file time t= The total number of frames x One AAC Playback time of audio frame .

So let's see aac Of demuxer, stay aacdec.c/libavformat Next , It is found that the inside is connected to aidf There is no header processing , Let this go .

AAC Format Introduction

The first thing to understand is this AAC The file format is ADIF and ADTS Two kinds of , among ADIF(Audio Data Interchange Format Audio data interchange format ) The feature of is that decoding must be performed at the beginning of a well-defined , You can't start in the middle of the data flow ; and ADTS(Audio Data Transport Stream Audio data stream ) On the contrary , This format is characterized by synchronization words , Decoding can start anywhere in the stream , Just like its name , It's a kind of and TS Stream similar format .

ADTS Every frame in the format has a header , With flow characteristics , Suitable for network transmission and processing , and ADIF Only one unified head , And these two formats header The format is also different . At present, the mainstream use is ADTS Format .

ADTS AAC The file format is as follows

ADTS_header

AAC ES

ADTS_header

AAC ES

ADTS_header

AAC ES

  The details of the AAC Refer to this article for the format

AAC File format and audio file length calculation

Get the duration of each frame :ffmpeg Can read each frame correctly nb_samples And overall sample_rate, Then the division of the two is the duration of each frame .

AAC: Frame size 1024 individual sample, The sampling rate is 44100Hz , Frame playback duration :acc dur=1024/44100 = 0.02322s=23.22ms

So how can we get the exact duration ? It should be through adts frame header Take the total number of frames * The value of the duration of each frame is taken as duration.

3、 solve the problem

So let's see ffmpeg In this format demuxer, This file encapsulates the format raw ADTS AAC, So let's see aacdec.c/libavformat

// obtain adts frame Frame length 
static int getAdtsFrameLength(AVFormatContext *s,int64_t offset,int* headerSize)
{
	int64_t filesize, position = avio_tell(s->pb);  
    filesize = avio_size(s->pb);
	//av_log(NULL, AV_LOG_WARNING, "hxk->getAdtsFrameLength.filesize:%d\n",filesize);
    const int kAdtsHeaderLengthNoCrc = 7;
    const int kAdtsHeaderLengthWithCrc = 9;
    int frameSize = 0;
    uint8_t syncword[2];
	avio_seek(s->pb, offset, SEEK_SET);
	// Read sync word 
    if(avio_read(s->pb,&syncword, 2)!= 2){
		return 0;
	}
    if ((syncword[0] != 0xff) || ((syncword[1] & 0xf6) != 0xf0)) {
        return 0;
    }
	uint8_t protectionAbsent;
	avio_seek(s->pb, offset+1, SEEK_SET);
	// Read protectionAbsent
    if (avio_read(s->pb, &protectionAbsent, 1) < 1) {
        return 0;
    }
    protectionAbsent &= 0x1;
    uint8_t header[3];
// Read header
	avio_seek(s->pb, offset+3, SEEK_SET);
    if (avio_read(s->pb, &header, 3) < 3) {
        return 0;
    }
    
    // obtain framesize
    frameSize = (header[0] & 0x3) << 11 | header[1] << 3 | header[2] >> 5;
    // protectionAbsent is 0 if there is CRC
    int headSize = protectionAbsent ? kAdtsHeaderLengthNoCrc : kAdtsHeaderLengthWithCrc;
    if (headSize > frameSize) {
        return 0;
    }
    if (headerSize != NULL) {
        *headerSize = headSize;
    }
    return frameSize;
}
// Obtain the sampling rate according to the sampling rate subscript 
static uint32_t get_sample_rate(const uint8_t sf_index)
{
    static const uint32_t sample_rates[] =
    {
        96000, 88200, 64000, 48000, 44100, 32000,
        24000, 22050, 16000, 12000, 11025, 8000
    };

    if (sf_index < sizeof(sample_rates) / sizeof(sample_rates[0])) {
        return sample_rates[sf_index];
    }

    return 0;
}

//add end

modify adts_aac_read_header function

static int adts_aac_read_header(AVFormatContext *s)
{
    AVStream *st;
    uint16_t state;

    st = avformat_new_stream(s, NULL);
    if (!st)
        return AVERROR(ENOMEM);

    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
    st->codecpar->codec_id   = s->iformat->raw_codec_id;
    st->need_parsing         = AVSTREAM_PARSE_FULL_RAW;

    ff_id3v1_read(s);
    if ((s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
        !av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) {
        int64_t cur = avio_tell(s->pb);
        ff_ape_parse_tag(s);
        avio_seek(s->pb, cur, SEEK_SET);
    }

    // skip data until the first ADTS frame is found
    state = avio_r8(s->pb);
    while (!avio_feof(s->pb) && avio_tell(s->pb) < s->probesize) {
        state = (state << 8) | avio_r8(s->pb);
        if ((state >> 4) != 0xFFF)
            continue;
        avio_seek(s->pb, -2, SEEK_CUR);
        break;
    }
    if ((state >> 4) != 0xFFF)
        return AVERROR_INVALIDDATA;

    // LCM of all possible ADTS sample rates
    //avpriv_set_pts_info(st, 64, 1, 28224000);

//add by M
#if  1
	// The handle points back to the starting point 
	avio_seek(s->pb, 0, SEEK_SET);
	uint8_t profile, sf_index, channel, header[2];
	// The file pointer moves to the beginning of the file 2 Bytes 
	avio_seek(s->pb, 2, SEEK_SET);
	if (avio_read(s->pb,&header, 2) < 2) {
		av_log(NULL, AV_LOG_ERROR, "avio_read header error!\n");
		return 0;
	}
	int64_t offset = 0;
	// obtain profile
	profile = (header[0] >> 6) & 0x3;
	st->codecpar->profile = profile;
	sf_index = (header[0] >> 2) & 0xf;
	// Get the sample rate 
	uint32_t sr = get_sample_rate(sf_index);
	if (sr == 0) {
		av_log(NULL, AV_LOG_ERROR, "adts_aac_read_header read sampletare error!\n");
		return 0;
	}
	//st->codecpar->sample_rate = sr;
	channel = (header[0] & 0x1) << 2 | (header[1] >> 6);
	if(channel == 0) {
		av_log(NULL, AV_LOG_ERROR, "adts_aac_read_header read channel error!\n");
		return 0;
	}
	// Assign a value to codec  Parameters 
	st->codecpar->channels = channel;
	sf_index = (header[0] >> 2) & 0xf;
	int frameSize = 0;
	int64_t mFrameDurationUs = 0;
	int64_t duration = 0;
	// The sampling rate is assigned to codec
	st->codecpar->sample_rate = sr;
	int64_t streamSize, numFrames = 0;
	avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
	// Get file size 
	streamSize = avio_size(s->pb);
	if (streamSize > 0) {
		while (offset < streamSize) {
			if ((frameSize = getAdtsFrameLength(s, offset, NULL)) == 0) {
				goto  end;
			}
			offset += frameSize;
			// Frame number plus , Get the total number of frames 
			numFrames ++;
		}
end:
		av_log(NULL, AV_LOG_WARNING, "---streamSize:%lld,numFrames:%lld!---\n",streamSize, numFrames);
		// Round up and get the duration, Calculate each frame time 
		mFrameDurationUs = (1024 * 1000000ll + (sr - 1)) / sr;
		av_log(NULL, AV_LOG_WARNING, "---mFrameDurationUs:%lld!---\n",mFrameDurationUs);
		duration = numFrames * mFrameDurationUs; //us
		duration = av_rescale_q(duration,AV_TIME_BASE_Q, st->time_base);
		st->duration = duration;
		av_log(NULL, AV_LOG_WARNING, "-------duration:%d------!\n",duration);
	}
	// Return handle 
	avio_seek(s->pb, 0, SEEK_SET);
#endif
	//add end

    return 0;
}

Originally, I referred to the blog of a peach blossom pressing Begonia , Here is return 0 Of , After testing , part aac The file cannot be played , Later it was changed to the above goto 了

if ((frameSize = getAdtsFrameLength(s, offset, NULL)) == 0) {
				return 0;
			}

There is no problem with the current test , Can be normal seek And play !

Reference link :ffmpeg series - solve ffmpeg obtain aac Audio file duration forbid _ A peach blossom pressed Begonia blog -CSDN Blog _ffmpeg Audio duration

原网站

版权声明
本文为[Tinghua_ M]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/162/202206110620188004.html