summarylogtreecommitdiffstats
path: root/python-pytorch-ffmpeg6.patch
blob: 4ed46501daea7526618cdac4108e4e13d5f9eee1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
From: Gard Spreemann <gspr@nonempty.org>
Date: Fri, 11 Feb 2022 20:59:35 +0100
Subject: FFMPEG 5.0 support

---
 caffe2/video/video_decoder.cc | 77 ++++++++++++++++++++++++++++++-------------
 caffe2/video/video_decoder.h  |  1 +
 2 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/caffe2/video/video_decoder.cc b/caffe2/video/video_decoder.cc
index d40bc05..9ab7271 100644
--- a/caffe2/video/video_decoder.cc
+++ b/caffe2/video/video_decoder.cc
@@ -12,8 +12,10 @@ VideoDecoder::VideoDecoder() {
   static std::mutex gMutex;
   std::unique_lock<std::mutex> lock(gMutex);
   if (!gInitialized) {
+#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 10, 100)
     av_register_all();
     avcodec_register_all();
+#endif
     avformat_network_init();
     gInitialized = true;
   }
@@ -27,8 +29,20 @@ void VideoDecoder::getAudioSample(
     Callback& callback,
     const Params& params) {
   int frame_finished = 0;
-  auto result = avcodec_decode_audio4(
-      audioCodecContext_, audioStreamFrame_, &frame_finished, &packet);
+
+  int result = avcodec_send_packet(audioCodecContext_, &packet);
+  if (result < 0) {
+    LOG(ERROR) << "Failed to decode audio packet: " << ffmpegErrorStr(result);
+    return;
+  }
+  result = avcodec_receive_frame(audioCodecContext_, audioStreamFrame_);
+  if (result == 0) {
+    frame_finished = 1;
+  }
+  else if (result < 0 && result != AVERROR(EAGAIN) && result != AVERROR_EOF) {
+    LOG(ERROR) << "Failed to decode audio packet: " << ffmpegErrorStr(result);
+    return;
+  }
 
   if (frame_finished) {
     // from
@@ -185,12 +199,12 @@ void VideoDecoder::decodeLoop(
     if (params.streamIndex_ == -1) {
       for (int i = 0; i < inputContext->nb_streams; i++) {
         auto stream = inputContext->streams[i];
-        if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+        if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
             videoStreamIndex_ == -1) {
           videoStreamIndex_ = i;
           videoStream_ = stream;
         } else if (
-            stream->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
+            stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
             audioStreamIndex_ == -1) {
           audioStreamIndex_ = i;
         }
@@ -207,7 +221,11 @@ void VideoDecoder::decodeLoop(
 
     // Initialize codec
     AVDictionary* opts = nullptr;
-    videoCodecContext_ = videoStream_->codec;
+    ret = avcodec_parameters_to_context(videoCodecContext_, videoStream_->codecpar);
+    if (ret < 0) {
+      LOG(ERROR) << "Cannot get codec context from parameters";
+      return;
+    }
     try {
       ret = avcodec_open2(
           videoCodecContext_,
@@ -226,7 +244,11 @@ void VideoDecoder::decodeLoop(
 
     if (params.getAudio_ && audioStreamIndex_ >= 0) {
       // see e.g. ridge/decoder/StreamDecoder.cpp
-      audioCodecContext_ = inputContext->streams[audioStreamIndex_]->codec;
+      ret = avcodec_parameters_to_context(audioCodecContext_, inputContext->streams[audioStreamIndex_]->codecpar);
+      if (ret < 0) {
+        LOG(ERROR) << "Failed to get codec parameters: " << ffmpegErrorStr(ret);
+        return;
+      }
       ret = avcodec_open2(
           audioCodecContext_,
           avcodec_find_decoder(audioCodecContext_->codec_id),
@@ -452,12 +474,12 @@ void VideoDecoder::decodeLoop(
           ret = av_read_frame(inputContext, &packet);
           if (ret == AVERROR_EOF) {
             eof = 1;
-            av_free_packet(&packet);
+            av_packet_unref(&packet);
             packet.data = nullptr;
             packet.size = 0;
             // stay in the while loop to flush frames
           } else if (ret == AVERROR(EAGAIN)) {
-            av_free_packet(&packet);
+            av_packet_unref(&packet);
             continue;
           } else if (ret < 0) {
             LOG(ERROR) << "Error reading packet : " << ffmpegErrorStr(ret);
@@ -483,13 +505,17 @@ void VideoDecoder::decodeLoop(
           }
 
           if (si != videoStreamIndex_) {
-            av_free_packet(&packet);
+            av_packet_unref(&packet);
             continue;
           }
         }
 
-        ret = avcodec_decode_video2(
-            videoCodecContext_, videoStreamFrame_, &gotPicture, &packet);
+        ret = avcodec_send_packet(videoCodecContext_, &packet);
+        if (ret < 0) {
+          LOG(ERROR) << "Error decoding video frame : " << ffmpegErrorStr(ret);
+          return;
+        }
+        ret = avcodec_receive_frame(videoCodecContext_, videoStreamFrame_);
         if (ret < 0) {
           LOG(ERROR) << "Error decoding video frame : " << ffmpegErrorStr(ret);
           return;
@@ -497,13 +523,12 @@ void VideoDecoder::decodeLoop(
         try {
           // Nothing to do without a picture
           if (!gotPicture) {
-            av_free_packet(&packet);
+            av_packet_unref(&packet);
             continue;
           }
           frameIndex++;
 
-          long int frame_ts =
-              av_frame_get_best_effort_timestamp(videoStreamFrame_);
+          long int frame_ts = videoStreamFrame_->best_effort_timestamp;
           timestamp = frame_ts * av_q2d(videoStream_->time_base);
           if ((frame_ts >= start_ts && !mustDecodeAll) || mustDecodeAll) {
             /* process current frame if:
@@ -534,7 +559,7 @@ void VideoDecoder::decodeLoop(
             if (!keyFrame) {
               // if fps == SpecialFps::SAMPLE_NO_FRAME (0), don't sample at all
               if (currFps == SpecialFps::SAMPLE_NO_FRAME) {
-                av_free_packet(&packet);
+                av_packet_unref(&packet);
                 continue;
               }
 
@@ -560,7 +585,7 @@ void VideoDecoder::decodeLoop(
                    timestamp >= lastFrameTimestamp + (1 / currFps));
 
               if (!fpsReached) {
-                av_free_packet(&packet);
+                av_packet_unref(&packet);
                 continue;
               }
             }
@@ -571,7 +596,7 @@ void VideoDecoder::decodeLoop(
             if (params.maximumOutputFrames_ != -1 &&
                 outputFrameIndex >= params.maximumOutputFrames_) {
               // enough frames
-              av_free_packet(&packet);
+              av_packet_unref(&packet);
               break;
             }
 
@@ -583,16 +608,22 @@ void VideoDecoder::decodeLoop(
 
             try {
               // Determine required buffer size and allocate buffer
-              int numBytes = avpicture_get_size(pixFormat, outWidth, outHeight);
+              int numBytes = av_image_get_buffer_size(pixFormat, outWidth, outHeight, 1);
               DecodedFrame::AvDataPtr buffer(
                   (uint8_t*)av_malloc(numBytes * sizeof(uint8_t)));
 
-              int size = avpicture_fill(
-                  (AVPicture*)rgbFrame,
+              int size = av_image_fill_arrays(
+                  rgbFrame->data,
+                  rgbFrame->linesize,
                   buffer.get(),
                   pixFormat,
                   outWidth,
-                  outHeight);
+                  outHeight,
+                  1);
+              if (size < 0) {
+                LOG(ERROR) << "Failed to fill frame";
+                return;
+              }
 
               sws_scale(
                   scaleContext_,
@@ -628,9 +659,9 @@ void VideoDecoder::decodeLoop(
           av_frame_unref(audioStreamFrame_);
         }
 
-        av_free_packet(&packet);
+        av_packet_unref(&packet);
       } catch (const std::exception&) {
-        av_free_packet(&packet);
+        av_packet_unref(&packet);
       }
     } // of while loop
     callback.videoDecodingEnded(timestamp);
diff --git a/caffe2/video/video_decoder.h b/caffe2/video/video_decoder.h
index a091142..8c061fd 100644
--- a/caffe2/video/video_decoder.h
+++ b/caffe2/video/video_decoder.h
@@ -11,6 +11,7 @@ extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 #include <libavformat/avio.h>
+#include <libavutil/imgutils.h>
 #include <libavutil/log.h>
 #include <libavutil/motion_vector.h>
 #include <libswresample/swresample.h>