Pass lower resolution image to libaom av1 encoder.
The implementation of VideoFrameBuffer might have a more efficient way to scale images.
Therefore it is beneficial to first scale the image down to the top active layer resolution.
Bug: chromium:397485312
Change-Id: I782bc12d78e5bcab75345dbb145bce68100ba570
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/379680
Reviewed-by: Erik Språng <[email protected]>
Commit-Queue: Ilya Nikolaevskiy <[email protected]>
Cr-Commit-Position: refs/heads/main@{#44531}
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
index d0c513e..339a508 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@@ -13,6 +13,7 @@
#include <stdint.h>
#include <memory>
+#include <numeric>
#include <optional>
#include <utility>
#include <vector>
@@ -124,10 +125,22 @@
void SetSvcRefFrameConfig(
const ScalableVideoController::LayerFrameConfig& layer_frame);
// If pixel format doesn't match, then reallocate.
- void MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt);
+ void MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt,
+ unsigned int width,
+ unsigned int height);
+
+ // Adjust sclaing factors assuming that the top active SVC layer
+ // will be the input resolution.
+ void AdjustScalingFactorsForTopActiveLayer();
std::unique_ptr<ScalableVideoController> svc_controller_;
std::optional<ScalabilityMode> scalability_mode_;
+ // Original scaling factors for all configured layers active and inactive.
+ // `svc_params_` stores factors ignoring top inactive layers.
+ std::vector<int> scaling_factors_num_;
+ std::vector<int> scaling_factors_den_;
+ int last_active_layer_ = 0;
+
bool inited_;
bool rates_configured_;
std::optional<aom_svc_params_t> svc_params_;
@@ -469,9 +482,19 @@
1 << (svc_config.num_temporal_layers - tid - 1);
}
+ scaling_factors_den_.resize(svc_config.num_spatial_layers);
+ scaling_factors_num_.resize(svc_config.num_spatial_layers);
for (int sid = 0; sid < svc_config.num_spatial_layers; ++sid) {
+ scaling_factors_num_[sid] = svc_config.scaling_factor_num[sid];
svc_params.scaling_factor_num[sid] = svc_config.scaling_factor_num[sid];
+ scaling_factors_den_[sid] = svc_config.scaling_factor_den[sid];
svc_params.scaling_factor_den[sid] = svc_config.scaling_factor_den[sid];
+ encoder_settings_.spatialLayers[sid].width = encoder_settings_.width *
+ scaling_factors_num_[sid] /
+ scaling_factors_den_[sid];
+ encoder_settings_.spatialLayers[sid].height = encoder_settings_.height *
+ scaling_factors_num_[sid] /
+ scaling_factors_den_[sid];
}
// svc_params.layer_target_bitrate is set in SetRates() before svc_params is
@@ -540,21 +563,61 @@
return WEBRTC_VIDEO_CODEC_OK;
}
-void LibaomAv1Encoder::MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt) {
+void LibaomAv1Encoder::MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt,
+ unsigned int width,
+ unsigned int height) {
if (!frame_for_encode_) {
- frame_for_encode_ =
- aom_img_wrap(nullptr, fmt, cfg_.g_w, cfg_.g_h, 1, nullptr);
-
- } else if (frame_for_encode_->fmt != fmt) {
+ RTC_LOG(LS_INFO) << "Configuring AV1 encoder pixel format to "
+ << (fmt == AOM_IMG_FMT_NV12 ? "NV12" : "I420") << " "
+ << width << "x" << height;
+ frame_for_encode_ = aom_img_wrap(nullptr, fmt, width, height, 1, nullptr);
+ } else if (frame_for_encode_->fmt != fmt || frame_for_encode_->d_w != width ||
+ frame_for_encode_->d_h != height) {
RTC_LOG(LS_INFO) << "Switching AV1 encoder pixel format to "
- << (fmt == AOM_IMG_FMT_NV12 ? "NV12" : "I420");
+ << (fmt == AOM_IMG_FMT_NV12 ? "NV12" : "I420") << " "
+ << width << "x" << height;
aom_img_free(frame_for_encode_);
- frame_for_encode_ =
- aom_img_wrap(nullptr, fmt, cfg_.g_w, cfg_.g_h, 1, nullptr);
+ frame_for_encode_ = aom_img_wrap(nullptr, fmt, width, height, 1, nullptr);
}
// else no-op since the image is already in the right format.
}
+void LibaomAv1Encoder::AdjustScalingFactorsForTopActiveLayer() {
+ if (!SvcEnabled())
+ return;
+ last_active_layer_ = svc_params_->number_spatial_layers - 1;
+ for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
+ for (int tid = 0; tid < svc_params_->number_temporal_layers; ++tid) {
+ int layer_index = sid * svc_params_->number_temporal_layers + tid;
+ if (svc_params_->layer_target_bitrate[layer_index] > 0) {
+ last_active_layer_ = sid;
+ }
+ }
+ }
+ if (static_cast<int>(cfg_.g_w) ==
+ encoder_settings_.spatialLayers[last_active_layer_].width) {
+ return;
+ }
+
+ cfg_.g_w = encoder_settings_.spatialLayers[last_active_layer_].width;
+ cfg_.g_h = encoder_settings_.spatialLayers[last_active_layer_].height;
+
+ // Recalculate scaling factors ignoring top inactive layers.
+ // Divide all by scaling factor of the last active layer.
+ for (int i = 0; i <= last_active_layer_; ++i) {
+ int n = scaling_factors_num_[i] * scaling_factors_den_[last_active_layer_];
+ int d = scaling_factors_den_[i] * scaling_factors_num_[last_active_layer_];
+ int gcd = std::gcd(n, d);
+ svc_params_->scaling_factor_num[i] = n / gcd;
+ svc_params_->scaling_factor_den[i] = d / gcd;
+ }
+ for (int i = last_active_layer_ + 1; i < svc_params_->number_spatial_layers;
+ ++i) {
+ svc_params_->scaling_factor_num[i] = 1;
+ svc_params_->scaling_factor_den[i] = 1;
+ }
+}
+
int32_t LibaomAv1Encoder::Encode(
const VideoFrame& frame,
const std::vector<VideoFrameType>* frame_types) {
@@ -578,13 +641,24 @@
absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
supported_formats = {VideoFrameBuffer::Type::kI420,
VideoFrameBuffer::Type::kNV12};
+
+ scoped_refptr<VideoFrameBuffer> scaled_image;
+ if (!SvcEnabled() ||
+ last_active_layer_ + 1 == svc_params_->number_spatial_layers) {
+ scaled_image = buffer;
+ } else {
+ scaled_image = buffer->Scale(
+ encoder_settings_.spatialLayers[last_active_layer_].width,
+ encoder_settings_.spatialLayers[last_active_layer_].height);
+ }
+
scoped_refptr<VideoFrameBuffer> mapped_buffer;
- if (buffer->type() != VideoFrameBuffer::Type::kNative) {
+ if (scaled_image->type() != VideoFrameBuffer::Type::kNative) {
// `buffer` is already mapped.
- mapped_buffer = buffer;
+ mapped_buffer = scaled_image;
} else {
// Attempt to map to one of the supported formats.
- mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats);
+ mapped_buffer = scaled_image->GetMappedFrameBuffer(supported_formats);
}
// Convert input frame to I420, if needed.
@@ -610,7 +684,8 @@
case VideoFrameBuffer::Type::kI420:
case VideoFrameBuffer::Type::kI420A: {
// Set frame_for_encode_ data pointers and strides.
- MaybeRewrapImgWithFormat(AOM_IMG_FMT_I420);
+ MaybeRewrapImgWithFormat(AOM_IMG_FMT_I420, mapped_buffer->width(),
+ mapped_buffer->height());
auto i420_buffer = mapped_buffer->GetI420();
RTC_DCHECK(i420_buffer);
RTC_CHECK_EQ(i420_buffer->width(), frame_for_encode_->d_w);
@@ -627,7 +702,8 @@
break;
}
case VideoFrameBuffer::Type::kNV12: {
- MaybeRewrapImgWithFormat(AOM_IMG_FMT_NV12);
+ MaybeRewrapImgWithFormat(AOM_IMG_FMT_NV12, mapped_buffer->width(),
+ mapped_buffer->height());
const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12();
RTC_DCHECK(nv12_buffer);
RTC_CHECK_EQ(nv12_buffer->width(), frame_for_encode_->d_w);
@@ -725,10 +801,10 @@
// If encoded image width/height info are added to aom_codec_cx_pkt_t,
// use those values in lieu of the values in frame.
if (svc_params_) {
- int n = svc_params_->scaling_factor_num[layer_frame->SpatialId()];
- int d = svc_params_->scaling_factor_den[layer_frame->SpatialId()];
- encoded_image._encodedWidth = cfg_.g_w * n / d;
- encoded_image._encodedHeight = cfg_.g_h * n / d;
+ int n = scaling_factors_num_[layer_frame->SpatialId()];
+ int d = scaling_factors_den_[layer_frame->SpatialId()];
+ encoded_image._encodedWidth = encoder_settings_.width * n / d;
+ encoded_image._encodedHeight = encoder_settings_.height * n / d;
encoded_image.SetSpatialIndex(layer_frame->SpatialId());
encoded_image.SetTemporalIndex(layer_frame->TemporalId());
} else {
@@ -762,10 +838,11 @@
if (SvcEnabled()) {
resolutions.resize(svc_params_->number_spatial_layers);
for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
- int n = svc_params_->scaling_factor_num[sid];
- int d = svc_params_->scaling_factor_den[sid];
+ int n = scaling_factors_num_[sid];
+ int d = scaling_factors_den_[sid];
resolutions[sid] =
- RenderResolution(cfg_.g_w * n / d, cfg_.g_h * n / d);
+ RenderResolution(encoder_settings_.width * n / d,
+ encoder_settings_.height * n / d);
}
} else {
resolutions = {RenderResolution(cfg_.g_w, cfg_.g_h)};
@@ -807,11 +884,6 @@
// total target bitrate is not updated first a division by zero could happen.
svc_controller_->OnRatesUpdated(parameters.bitrate);
cfg_.rc_target_bitrate = parameters.bitrate.get_sum_kbps();
- aom_codec_err_t error_code = aom_codec_enc_config_set(&ctx_, &cfg_);
- if (error_code != AOM_CODEC_OK) {
- RTC_LOG(LS_WARNING) << "Error configuring encoder, error code: "
- << error_code;
- }
if (SvcEnabled()) {
for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
@@ -824,9 +896,17 @@
parameters.bitrate.GetTemporalLayerSum(sid, tid) / 1000;
}
}
+ AdjustScalingFactorsForTopActiveLayer();
SetEncoderControlParameters(AV1E_SET_SVC_PARAMS, &*svc_params_);
}
+ // AdjustScalingFactorsForTopActiveLayer() may update `cfg_`.
+ aom_codec_err_t error_code = aom_codec_enc_config_set(&ctx_, &cfg_);
+ if (error_code != AOM_CODEC_OK) {
+ RTC_LOG(LS_WARNING) << "Error configuring encoder, error code: "
+ << error_code;
+ }
+
framerate_fps_ = parameters.framerate_fps;
rates_configured_ = true;
diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc
index 33b750c..0bbf6a2 100644
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc
@@ -37,6 +37,7 @@
#include "api/video_codecs/video_codec.h"
#include "api/video_codecs/video_encoder.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
+#include "modules/video_coding/codecs/av1/av1_svc_config.h"
#include "modules/video_coding/codecs/test/encoded_video_frame_producer.h"
#include "modules/video_coding/include/video_error_codes.h"
#include "rtc_base/checks.h"
@@ -68,6 +69,18 @@
return codec_settings;
}
+VideoCodec HDCodecSettings() {
+ VideoCodec codec_settings;
+ codec_settings.codecType = kVideoCodecAV1;
+ codec_settings.width = 1280;
+ codec_settings.height = 720;
+ codec_settings.maxFramerate = 30;
+ codec_settings.startBitrate = 2048;
+ codec_settings.maxBitrate = 2048;
+ codec_settings.qpMax = 63;
+ return codec_settings;
+}
+
VideoEncoder::Settings DefaultEncoderSettings() {
return VideoEncoder::Settings(
VideoEncoder::Capabilities(/*loss_notification=*/false),
@@ -592,5 +605,87 @@
RTC_CHECK_EQ(callback.frames_encoded(), 1);
}
+TEST(LibaomAv1EncoderTest, EnableDisableSpatialLayersWithSvcController) {
+ constexpr int kNumSpatialLayers = 3;
+ constexpr int kNumTemporalLayers = 1;
+ constexpr size_t kWidth = 1280;
+ constexpr size_t kHeight = 720;
+
+ // Configure encoder to produce 3 spatial layers. Encode frames of layer 0
+ // then enable layer 1 and encode more frames and so on.
+ // Then disable layers one by one in the same way.
+ // Note: bit rate allocation is high to avoid frame dropping due to rate
+ // control, the encoder should always produce a frame. A dropped
+ // frame indicates a problem and the test will fail.
+ std::unique_ptr<VideoEncoder> encoder =
+ CreateLibaomAv1Encoder(CreateEnvironment());
+ VideoCodec codec_settings = HDCodecSettings();
+ SetAv1SvcConfig(codec_settings, kNumTemporalLayers, kNumSpatialLayers);
+ codec_settings.SetFrameDropEnabled(true);
+ EXPECT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()),
+ WEBRTC_VIDEO_CODEC_OK);
+
+ EncodedVideoFrameProducer producer(*encoder);
+ producer.SetResolution({kWidth, kHeight});
+
+ VideoBitrateAllocation bitrate_allocation;
+
+ // Set all layers active for initial allocation.
+ for (size_t sl_idx = 0; sl_idx < kNumSpatialLayers; ++sl_idx) {
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0,
+ codec_settings.spatialLayers[sl_idx].targetBitrate * 1000 * 2);
+ }
+
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+
+ // Encode a key frame to validate all other frames are delta frames.
+ std::vector<EncodedVideoFrameProducer::EncodedFrame> frames =
+ producer.SetNumInputFrames(1).Encode();
+ ASSERT_THAT(frames, Not(IsEmpty()));
+ EXPECT_TRUE(frames[0].codec_specific_info.template_structure);
+
+ constexpr size_t kNumFramesToEncode = 5;
+
+ // Disable layers one by one.
+ for (int sl_idx = kNumSpatialLayers - 1; sl_idx > 0; --sl_idx) {
+ bitrate_allocation.SetBitrate(sl_idx, 0, 0);
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+
+ frames = producer.SetNumInputFrames(kNumFramesToEncode).Encode();
+ // With `sl_idx` spatial layer disabled, there are `sl_idx` spatial layers
+ // left.
+ ASSERT_THAT(frames, SizeIs(kNumFramesToEncode * sl_idx));
+ for (size_t i = 0; i < frames.size(); ++i) {
+ EXPECT_TRUE(frames[i].codec_specific_info.generic_frame_info);
+ EXPECT_FALSE(frames[i].codec_specific_info.template_structure);
+ }
+ }
+
+ // Enable layers back one by one.
+ for (size_t sl_idx = 1; sl_idx < kNumSpatialLayers; ++sl_idx) {
+ // Allocate high bit rate to avoid frame dropping due to rate control.
+ bitrate_allocation.SetBitrate(
+ sl_idx, 0,
+ codec_settings.spatialLayers[sl_idx].targetBitrate * 1000 * 2);
+ encoder->SetRates(VideoEncoder::RateControlParameters(
+ bitrate_allocation, codec_settings.maxFramerate));
+
+ frames = producer.SetNumInputFrames(kNumFramesToEncode).Encode();
+ // With (sl_idx+1) spatial layers expect (sl_idx+1) frames per input frame.
+ ASSERT_THAT(frames, SizeIs(kNumFramesToEncode * (sl_idx + 1)));
+ // Only the first frame after enabling the layer must be a keyframe.
+ EXPECT_TRUE(frames[0].codec_specific_info.generic_frame_info);
+ EXPECT_TRUE(frames[0].codec_specific_info.template_structure);
+ for (size_t i = 1; i < frames.size(); ++i) {
+ EXPECT_TRUE(frames[i].codec_specific_info.generic_frame_info);
+ EXPECT_FALSE(frames[i].codec_specific_info.template_structure);
+ }
+ }
+}
+
} // namespace
} // namespace webrtc