LibMedia: Allow FFmpegDemuxer to grab samples from multiple streams

In order to do so, we create a AVFormatContext for each stream we want to demux.
2025-12-06 00:19:53 +01:00 · 2025-10-06 16:59:07 -05:00 · 2025-10-06 16:59:07 -05:00 · fcde0f66c8
commit fcde0f66c8
parent 9c960c3307
3 changed files with 109 additions and 49 deletions
--- a/Libraries/LibMedia/FFmpeg/FFmpegDemuxer.cpp
+++ b/Libraries/LibMedia/FFmpeg/FFmpegDemuxer.cpp
@ -6,6 +6,7 @@
 */

 #include <AK/Math.h>
+#include <AK/MemoryStream.h>
 #include <AK/Stream.h>
 #include <AK/Time.h>
 #include <LibMedia/FFmpeg/FFmpegDemuxer.h>
@ -13,46 +14,63 @@

 namespace Media::FFmpeg {

-FFmpegDemuxer::FFmpegDemuxer(NonnullOwnPtr<SeekableStream> stream, NonnullOwnPtr<Media::FFmpeg::FFmpegIOContext> io_context)
-    : m_stream(move(stream))
+FFmpegDemuxer::FFmpegDemuxer(ReadonlyBytes data, NonnullOwnPtr<SeekableStream>&& stream, NonnullOwnPtr<Media::FFmpeg::FFmpegIOContext>&& io_context)
+    : m_data(data)
+    , m_stream(move(stream))
    , m_io_context(move(io_context))
 {
 }

 FFmpegDemuxer::~FFmpegDemuxer()
 {
-    if (m_packet != nullptr)
-        av_packet_free(&m_packet);
-    if (m_codec_context != nullptr)
-        avcodec_free_context(&m_codec_context);
    if (m_format_context != nullptr)
        avformat_close_input(&m_format_context);
 }

-ErrorOr<NonnullRefPtr<FFmpegDemuxer>> FFmpegDemuxer::create(NonnullOwnPtr<SeekableStream> stream)
+static DecoderErrorOr<void> initialize_format_context(AVFormatContext*& format_context, AVIOContext& io_context)
 {
-    auto io_context = TRY(Media::FFmpeg::FFmpegIOContext::create(*stream));
-    auto demuxer = make_ref_counted<FFmpegDemuxer>(move(stream), move(io_context));
-
-    // Open the container
-    demuxer->m_format_context = avformat_alloc_context();
-    if (demuxer->m_format_context == nullptr)
-        return Error::from_string_literal("Failed to allocate format context");
-    demuxer->m_format_context->pb = demuxer->m_io_context->avio_context();
-    if (avformat_open_input(&demuxer->m_format_context, nullptr, nullptr, nullptr) < 0)
-        return Error::from_string_literal("Failed to open input for format parsing");
+    format_context = avformat_alloc_context();
+    if (format_context == nullptr)
+        return DecoderError::with_description(DecoderErrorCategory::Memory, "Failed to allocate format context"sv);
+    format_context->pb = &io_context;
+    if (avformat_open_input(&format_context, nullptr, nullptr, nullptr) < 0)
+        return DecoderError::with_description(DecoderErrorCategory::Corrupted, "Failed to open input for format parsing"sv);

    // Read stream info; doing this is required for headerless formats like MPEG
-    if (avformat_find_stream_info(demuxer->m_format_context, nullptr) < 0)
-        return Error::from_string_literal("Failed to find stream info");
+    if (avformat_find_stream_info(format_context, nullptr) < 0)
+        return DecoderError::with_description(DecoderErrorCategory::Corrupted, "Failed to find stream info"sv);

-    demuxer->m_packet = av_packet_alloc();
-    if (demuxer->m_packet == nullptr)
-        return Error::from_string_literal("Failed to allocate packet");
+    return {};
+}
+
+DecoderErrorOr<NonnullRefPtr<FFmpegDemuxer>> FFmpegDemuxer::from_data(ReadonlyBytes data)
+{
+    auto stream = DECODER_TRY_ALLOC(try_make<FixedMemoryStream>(data));
+    auto io_context = DECODER_TRY_ALLOC(Media::FFmpeg::FFmpegIOContext::create(*stream));
+    auto demuxer = DECODER_TRY_ALLOC(adopt_nonnull_ref_or_enomem(new (nothrow) FFmpegDemuxer(data, move(stream), move(io_context))));
+
+    TRY(initialize_format_context(demuxer->m_format_context, *demuxer->m_io_context->avio_context()));

    return demuxer;
 }

+FFmpegDemuxer::TrackContext& FFmpegDemuxer::get_track_context(Track const& track)
+{
+    return *m_track_contexts.ensure(track, [&] {
+        auto stream = MUST(try_make<FixedMemoryStream>(m_data));
+        auto io_context = MUST(Media::FFmpeg::FFmpegIOContext::create(*stream));
+
+        auto track_context = make<TrackContext>(move(stream), move(io_context));
+
+        // We've already initialized a format context, so the only way this can fail is OOM.
+        MUST(initialize_format_context(track_context->format_context, *track_context->io_context->avio_context()));
+
+        track_context->packet = av_packet_alloc();
+        VERIFY(track_context->packet != nullptr);
+        return track_context;
+    });
+}
+
 static inline AK::Duration time_units_to_duration(i64 time_units, int numerator, int denominator)
 {
    VERIFY(numerator != 0);
@ -160,12 +178,23 @@ DecoderErrorOr<Optional<Track>> FFmpegDemuxer::get_preferred_track_for_type(Trac

 DecoderErrorOr<DemuxerSeekResult> FFmpegDemuxer::seek_to_most_recent_keyframe(Track const& track, AK::Duration timestamp, DemuxerSeekOptions)
 {
-    VERIFY(track.identifier() < m_format_context->nb_streams);
-    auto* stream = m_format_context->streams[track.identifier()];
-    auto target_pts = duration_to_time_units(timestamp, stream->time_base);
+    auto& track_context = get_track_context(track);
+    auto& format_context = *track_context.format_context;

-    if (av_seek_frame(m_format_context, stream->index, target_pts, AVSEEK_FLAG_BACKWARD) < 0)
-        return DecoderError::format(DecoderErrorCategory::Unknown, "Failed to seek");
+    VERIFY(format_context.nb_streams == m_format_context->nb_streams);
+    VERIFY(track.identifier() < format_context.nb_streams);
+    auto& stream = *format_context.streams[track.identifier()];
+    auto av_timestamp = duration_to_time_units(timestamp, stream.time_base);
+
+    auto seek_succeeded = false;
+    if (track_context.is_seekable && av_seek_frame(&format_context, stream.index, av_timestamp, AVSEEK_FLAG_BACKWARD) >= 0)
+        seek_succeeded = true;
+    if (!seek_succeeded) {
+        track_context.is_seekable = false;
+        auto av_base_timestamp = duration_to_time_units(timestamp, AV_TIME_BASE_Q);
+        if (av_seek_frame(&format_context, -1, av_base_timestamp, AVSEEK_FLAG_BACKWARD) < 0)
+            return DecoderError::format(DecoderErrorCategory::Corrupted, "Failed to seek");
+    }

    return DemuxerSeekResult::MovedPosition;
 }
@ -186,29 +215,34 @@ DecoderErrorOr<ReadonlyBytes> FFmpegDemuxer::get_codec_initialization_data_for_t

 DecoderErrorOr<CodedFrame> FFmpegDemuxer::get_next_sample_for_track(Track const& track)
 {
-    VERIFY(track.identifier() < m_format_context->nb_streams);
-    auto* stream = m_format_context->streams[track.identifier()];
+    auto& track_context = get_track_context(track);
+    auto& format_context = *track_context.format_context;
+    auto& packet = *track_context.packet;
+
+    VERIFY(format_context.nb_streams == m_format_context->nb_streams);
+    VERIFY(track.identifier() < format_context.nb_streams);
+    auto& stream = *format_context.streams[track.identifier()];

    for (;;) {
-        auto read_frame_error = av_read_frame(m_format_context, m_packet);
+        auto read_frame_error = av_read_frame(&format_context, &packet);
        if (read_frame_error < 0) {
            if (read_frame_error == AVERROR_EOF)
                return DecoderError::format(DecoderErrorCategory::EndOfStream, "End of stream");

            return DecoderError::format(DecoderErrorCategory::Unknown, "Failed to read frame");
        }
-        if (m_packet->stream_index != stream->index) {
-            av_packet_unref(m_packet);
+        if (packet.stream_index != stream.index) {
+            av_packet_unref(&packet);
            continue;
        }

        auto auxiliary_data = [&]() -> CodedFrame::AuxiliaryData {
            if (track.type() == TrackType::Video) {
-                auto color_primaries = static_cast<ColorPrimaries>(stream->codecpar->color_primaries);
-                auto transfer_characteristics = static_cast<TransferCharacteristics>(stream->codecpar->color_trc);
-                auto matrix_coefficients = static_cast<MatrixCoefficients>(stream->codecpar->color_space);
+                auto color_primaries = static_cast<ColorPrimaries>(stream.codecpar->color_primaries);
+                auto transfer_characteristics = static_cast<TransferCharacteristics>(stream.codecpar->color_trc);
+                auto matrix_coefficients = static_cast<MatrixCoefficients>(stream.codecpar->color_space);
                auto color_range = [stream] {
-                    switch (stream->codecpar->color_range) {
+                    switch (stream.codecpar->color_range) {
                    case AVColorRange::AVCOL_RANGE_MPEG:
                        return VideoFullRangeFlag::Studio;
                    case AVColorRange::AVCOL_RANGE_JPEG:
@ -227,19 +261,25 @@ DecoderErrorOr<CodedFrame> FFmpegDemuxer::get_next_sample_for_track(Track const&

        // Copy the packet data so that we have a permanent reference to it whilst the Sample is alive, which allows us
        // to wipe the packet afterwards.
-        auto packet_data = DECODER_TRY_ALLOC(ByteBuffer::copy(m_packet->data, m_packet->size));
+        auto packet_data = DECODER_TRY_ALLOC(ByteBuffer::copy(packet.data, packet.size));

-        auto flags = (m_packet->flags & AV_PKT_FLAG_KEY) != 0 ? FrameFlags::Keyframe : FrameFlags::None;
+        auto flags = (packet.flags & AV_PKT_FLAG_KEY) != 0 ? FrameFlags::Keyframe : FrameFlags::None;
        auto sample = CodedFrame(
-            time_units_to_duration(m_packet->pts, stream->time_base),
+            time_units_to_duration(packet.pts, stream.time_base),
            flags,
            move(packet_data),
            auxiliary_data);

        // Wipe the packet now that the data is safe.
-        av_packet_unref(m_packet);
+        av_packet_unref(&packet);
        return sample;
    }
 }

+FFmpegDemuxer::TrackContext::~TrackContext()
+{
+    av_packet_free(&packet);
+    avformat_free_context(format_context);
+}
+
 }
--- a/Libraries/LibMedia/FFmpeg/FFmpegDemuxer.h
+++ b/Libraries/LibMedia/FFmpeg/FFmpegDemuxer.h
@ -7,7 +7,8 @@

 #pragma once

-#include <AK/Error.h>
+#include <AK/Forward.h>
+#include <AK/HashMap.h>
 #include <AK/NonnullOwnPtr.h>
 #include <LibMedia/Demuxer.h>
 #include <LibMedia/FFmpeg/FFmpegIOContext.h>
@ -21,9 +22,7 @@ namespace Media::FFmpeg {

 class FFmpegDemuxer : public Demuxer {
 public:
-    static ErrorOr<NonnullRefPtr<FFmpegDemuxer>> create(NonnullOwnPtr<SeekableStream> stream);
-
-    FFmpegDemuxer(NonnullOwnPtr<SeekableStream> stream, NonnullOwnPtr<Media::FFmpeg::FFmpegIOContext>);
+    static DecoderErrorOr<NonnullRefPtr<FFmpegDemuxer>> from_data(ReadonlyBytes data);
    virtual ~FFmpegDemuxer() override;

    virtual DecoderErrorOr<Vector<Track>> get_tracks_for_type(TrackType type) override;
@ -41,13 +40,34 @@ public:
    virtual DecoderErrorOr<CodedFrame> get_next_sample_for_track(Track const& track) override;

 private:
+    struct TrackContext {
+        TrackContext(NonnullOwnPtr<SeekableStream>&& stream, NonnullOwnPtr<FFmpegIOContext>&& io_context)
+            : stream(move(stream))
+            , io_context(move(io_context))
+        {
+        }
+        ~TrackContext();
+        TrackContext(TrackContext&&) = default;
+
+        NonnullOwnPtr<SeekableStream> stream;
+        NonnullOwnPtr<FFmpegIOContext> io_context;
+        AVFormatContext* format_context { nullptr };
+        AVPacket* packet { nullptr };
+        bool is_seekable { true };
+        bool peeked_packet_already { false };
+    };
+
+    FFmpegDemuxer(ReadonlyBytes, NonnullOwnPtr<SeekableStream>&&, NonnullOwnPtr<Media::FFmpeg::FFmpegIOContext>&&);
+
+    TrackContext& get_track_context(Track const&);
    DecoderErrorOr<Track> get_track_for_stream_index(u32 stream_index);

+    ReadonlyBytes m_data;
    NonnullOwnPtr<SeekableStream> m_stream;
-    AVCodecContext* m_codec_context { nullptr };
-    AVFormatContext* m_format_context { nullptr };
-    NonnullOwnPtr<Media::FFmpeg::FFmpegIOContext> m_io_context;
-    AVPacket* m_packet { nullptr };
+    NonnullOwnPtr<FFmpegIOContext> m_io_context;
+    AVFormatContext* m_format_context;
+
+    HashMap<Track, NonnullOwnPtr<TrackContext>> m_track_contexts;
 };

 }
--- a/Libraries/LibMedia/PlaybackManager.cpp
+++ b/Libraries/LibMedia/PlaybackManager.cpp
@ -26,7 +26,7 @@ DecoderErrorOr<NonnullRefPtr<PlaybackManager>> PlaybackManager::try_create(Reado
        auto matroska_result = Matroska::MatroskaDemuxer::from_data(data);
        if (!matroska_result.is_error())
            return matroska_result.release_value();
-        return DECODER_TRY_ALLOC(FFmpeg::FFmpegDemuxer::create(make<FixedMemoryStream>(data)));
+        return TRY(FFmpeg::FFmpegDemuxer::from_data(data));
    }());
    auto demuxer = DECODER_TRY_ALLOC(try_make_ref_counted<MutexedDemuxer>(inner_demuxer));