blob: c602ac940b518582aa1030262596bd3350b83d45 [file] [log] [blame]
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "remoting/protocol/webrtc_video_encoder_wrapper.h"
#include <stdint.h>
#include <functional>
#include <string>
#include <vector>
#include "base/cxx17_backports.h"
#include "base/functional/bind.h"
#include "base/logging.h"
#include "base/memory/ptr_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/task/bind_post_task.h"
#include "base/task/sequenced_task_runner.h"
#include "base/task/single_thread_task_runner.h"
#include "base/time/time.h"
#include "build/build_config.h"
#include "remoting/base/constants.h"
#include "remoting/base/session_options.h"
#include "remoting/codec/webrtc_video_encoder_av1.h"
#include "remoting/codec/webrtc_video_encoder_vpx.h"
#include "remoting/protocol/video_stream_event_router.h"
#include "remoting/protocol/webrtc_video_frame_adapter.h"
#include "third_party/webrtc/api/video_codecs/av1_profile.h"
#include "third_party/webrtc/api/video_codecs/sdp_video_format.h"
#include "third_party/webrtc/api/video_codecs/vp9_profile.h"
#include "third_party/webrtc/modules/desktop_capture/desktop_frame.h"
#include "third_party/webrtc/modules/video_coding/include/video_codec_interface.h"
#include "third_party/webrtc/modules/video_coding/include/video_error_codes.h"
#if defined(USE_H264_ENCODER)
#include "remoting/codec/webrtc_video_encoder_gpu.h"
#endif
namespace remoting::protocol {
namespace {
// Maximum quantizer at which to encode frames. Lowering this value will
// improve image quality (in cases of low-bandwidth or large frames) at the
// cost of latency. Increasing the value will improve latency (in these cases)
// at the cost of image quality, resulting in longer top-off times.
const int kMaxQuantizer = 50;
// Minimum quantizer at which to encode frames. The value is chosen such that
// sending higher-quality (lower quantizer) frames would use up bandwidth
// without any appreciable gain in image quality.
const int kMinQuantizer = 10;
const int64_t kPixelsPerMegapixel = 1000000;
// Threshold in number of updated pixels used to detect "big" frames. These
// frames update significant portion of the screen compared to the preceding
// frames. For these frames min quantizer may need to be adjusted in order to
// ensure that they get delivered to the client as soon as possible, in exchange
// for lower-quality image.
const int kBigFrameThresholdPixels = 300000;
// Estimated size (in bytes per megapixel) of encoded frame at target quantizer
// value (see kTargetQuantizerForTopOff). Compression ratio varies depending
// on the image, so this is just a rough estimate. It's used to predict when
// encoded "big" frame may be too large to be delivered to the client quickly.
const int kEstimatedBytesPerMegapixel = 100000;
// Minimum interval between frames needed to keep the connection alive. The
// client will request a key-frame if it does not receive any frames for a
// 3-second period. This is effectively a minimum frame-rate, so the value
// should not be too small, otherwise the client may waste CPU cycles on
// processing and rendering lots of identical frames.
constexpr base::TimeDelta kKeepAliveInterval = base::Seconds(2);
// SDP format parameter name used to set the maximum framerate for an encoder.
constexpr char kMaxFramerateKey[] = "max-fr";
std::string EncodeResultToString(WebrtcVideoEncoder::EncodeResult result) {
using EncodeResult = WebrtcVideoEncoder::EncodeResult;
switch (result) {
case EncodeResult::SUCCEEDED:
return "Succeeded";
case EncodeResult::FRAME_SIZE_EXCEEDS_CAPABILITY:
return "Frame size exceeds capability";
case EncodeResult::UNKNOWN_ERROR:
return "Unknown error";
}
NOTREACHED();
return "";
}
int GetFrameRateFromSdpFormatParam(const std::string& param_value) {
int conversion_result;
if (!base::StringToInt(param_value, &conversion_result)) {
LOG(ERROR) << "Failed to convert max-fr value to an int: " << param_value;
return kTargetFrameRate;
}
// Clamp the range to prevent a bad experience in case of a client bug.
// 1000 is the maximum allowable frame rate as capturing at a higher rate will
// cause problems in several components which expect at least 1 millisecond
// between frames. In reality, very few applications update their window
// faster than the current monitor refresh rate which is likely 60Hz - 144Hz
// so value > ~150 won't provide much value.
// A lower bound of 1 millisecond is needed because the framerate is used as
// the denominator when determining the period between frames so 0 will lead
// to divide by 0 bugs.
return base::clamp<int>(conversion_result, 1, 1000);
}
} // namespace
WebrtcVideoEncoderWrapper::WebrtcVideoEncoderWrapper(
const webrtc::SdpVideoFormat& format,
const SessionOptions& session_options,
scoped_refptr<base::SingleThreadTaskRunner> main_task_runner,
scoped_refptr<base::SingleThreadTaskRunner> encode_task_runner,
base::WeakPtr<VideoStreamEventRouter> video_stream_event_router)
: main_task_runner_(main_task_runner),
encode_task_runner_(encode_task_runner),
video_stream_event_router_(video_stream_event_router) {
codec_type_ = webrtc::PayloadStringToCodecType(format.name);
switch (codec_type_) {
case webrtc::kVideoCodecVP8:
VLOG(0) << "Creating VP8 encoder.";
encoder_ = WebrtcVideoEncoderVpx::CreateForVP8();
break;
case webrtc::kVideoCodecVP9: {
absl::optional<webrtc::VP9Profile> profile =
webrtc::ParseSdpForVP9Profile(format.parameters);
bool lossless_color = profile.has_value() &&
profile.value() == webrtc::VP9Profile::kProfile1;
VLOG(0) << "Creating VP9 encoder, lossless_color="
<< (lossless_color ? "true" : "false");
encoder_ = WebrtcVideoEncoderVpx::CreateForVP9();
encoder_->SetLosslessColor(lossless_color);
absl::optional<int> encoder_speed =
session_options.GetInt("Vp9-Encoder-Speed");
if (encoder_speed) {
VLOG(0) << "Setting VP9 encoder speed to " << encoder_speed.value();
encoder_->SetEncoderSpeed(encoder_speed.value());
}
break;
}
case webrtc::kVideoCodecAV1: {
absl::optional<webrtc::AV1Profile> profile =
webrtc::ParseSdpForAV1Profile(format.parameters);
bool lossless_color = profile.has_value() &&
profile.value() == webrtc::AV1Profile::kProfile1;
VLOG(0) << "Creating AV1 encoder, lossless_color="
<< (lossless_color ? "true" : "false");
encoder_ = std::make_unique<WebrtcVideoEncoderAV1>();
encoder_->SetLosslessColor(lossless_color);
break;
}
case webrtc::kVideoCodecH264:
#if defined(USE_H264_ENCODER)
VLOG(0) << "Creating H264 encoder.";
encoder_ = WebrtcVideoEncoderGpu::CreateForH264();
#else
NOTIMPLEMENTED();
#endif
break;
default:
LOG(FATAL) << "Unknown codec type: " << codec_type_;
}
auto iter = format.parameters.find(kMaxFramerateKey);
if (iter != format.parameters.end()) {
target_frame_rate_ = GetFrameRateFromSdpFormatParam(iter->second);
}
target_frame_interval_ = base::Hertz(target_frame_rate_);
}
WebrtcVideoEncoderWrapper::~WebrtcVideoEncoderWrapper() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (encode_pending_) {
// If the encoder is still running, then delete it on |encode_task_runner_|
// as it will no longer be called on this sequence and isn't sequence bound.
encode_task_runner_->DeleteSoon(FROM_HERE, encoder_.release());
}
}
void WebrtcVideoEncoderWrapper::SetEncoderForTest(
std::unique_ptr<WebrtcVideoEncoder> encoder) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
encoder_ = std::move(encoder);
}
int32_t WebrtcVideoEncoderWrapper::InitEncode(
const webrtc::VideoCodec* codec_settings,
const webrtc::VideoEncoder::Settings& settings) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(codec_settings);
DCHECK_EQ(codec_settings->codecType, codec_type_);
// Validate request is to support a single stream.
DCHECK_EQ(1, codec_settings->numberOfSimulcastStreams);
if (codec_type_ == webrtc::kVideoCodecVP9) {
// SVC is not supported.
DCHECK_EQ(1, codec_settings->VP9().numberOfSpatialLayers);
}
return WEBRTC_VIDEO_CODEC_OK;
}
int32_t WebrtcVideoEncoderWrapper::RegisterEncodeCompleteCallback(
webrtc::EncodedImageCallback* callback) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
encoded_callback_ = callback;
return WEBRTC_VIDEO_CODEC_OK;
}
int32_t WebrtcVideoEncoderWrapper::Release() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
encoded_callback_ = nullptr;
return WEBRTC_VIDEO_CODEC_OK;
}
int32_t WebrtcVideoEncoderWrapper::Encode(
const webrtc::VideoFrame& frame,
const std::vector<webrtc::VideoFrameType>* frame_types) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
auto now = base::TimeTicks::Now();
// Simulcast is unsupported, so only the first vector element is needed.
bool key_frame_requested =
(frame_types && !frame_types->empty() &&
((*frame_types)[0] == webrtc::VideoFrameType::kVideoFrameKey));
if (key_frame_requested) {
pending_key_frame_request_ = true;
}
bool webrtc_dropped_frame = false;
if (next_frame_id_ != frame.id()) {
webrtc_dropped_frame = true;
next_frame_id_ = frame.id();
}
next_frame_id_++;
// WebRTC calls Encode() after each successful capture. If we drop the frame
// immediately when we are currently encoding instead of storing the frame
// data, then the encoder would need to wait until the next capture request
// has succeeded before it can encode another frame, this period can be
// several milliseconds or more. To reduce this latency, we store the new
// frame when the encoder is busy so it can be encoded immediately after the
// encoder finishes the current frame.
if (encode_pending_) {
if (pending_frame_) {
accumulated_update_rect_.Union(pending_frame_->update_rect());
base::SequencedTaskRunner::GetCurrentDefault()->PostTask(
FROM_HERE,
base::BindOnce(&WebrtcVideoEncoderWrapper::NotifyFrameDropped,
weak_factory_.GetWeakPtr()));
}
pending_frame_ = std::make_unique<webrtc::VideoFrame>(frame);
return WEBRTC_VIDEO_CODEC_OK;
}
// Frames of type kNative are expected to have the adapter that was used to
// wrap the DesktopFrame, so the downcast should be safe.
if (frame.video_frame_buffer()->type() !=
webrtc::VideoFrameBuffer::Type::kNative) {
LOG(ERROR) << "Only kNative frames are supported.";
return WEBRTC_VIDEO_CODEC_ERROR;
}
auto* video_frame_adapter =
static_cast<WebrtcVideoFrameAdapter*>(frame.video_frame_buffer().get());
// Store RTP timestamp and FrameStats so they can be added to the
// EncodedImage and EncodedFrame when encoding is complete.
rtp_timestamp_ = frame.timestamp();
frame_stats_ = video_frame_adapter->TakeFrameStats();
if (!frame_stats_) {
// This could happen if WebRTC tried to encode the same frame twice.
// Taking the frame-stats twice from the same frame-adapter would return
// nullptr the second time.
LOG(ERROR) << "Frame provided with missing frame-stats.";
return WEBRTC_VIDEO_CODEC_ERROR;
}
if (!screen_id_.has_value()) {
screen_id_ = frame_stats_->screen_id;
// Now that we know which screen id this encoder is associated with, we can
// let that video stream know if a non-default framerate has been requested.
if (target_frame_rate_ != kTargetFrameRate) {
main_task_runner_->PostTask(
FROM_HERE,
base::BindOnce(&VideoStreamEventRouter::OnTargetFramerateChanged,
video_stream_event_router_, *screen_id_,
target_frame_rate_));
}
}
frame_stats_->encode_started_time = now;
auto desktop_frame = video_frame_adapter->TakeDesktopFrame();
// If any frames were dropped by WebRTC or by this class, the
// original DesktopFrame's updated-region should not be used as-is
// (because that region is the difference between this frame and the
// previous frame, which the encoder has not seen because it was dropped).
// In this case, the DesktopFrame's update-region should be set to the
// union of all the dropped frames' update-rectangles.
bool this_class_dropped_frame = !accumulated_update_rect_.IsEmpty();
if (webrtc_dropped_frame || this_class_dropped_frame) {
// Get the update-rect that WebRTC provides, which will include any
// accumulated updates from frames that WebRTC dropped.
auto update_rect = frame.update_rect();
// Combine it with any updates from frames dropped by this class.
update_rect.Union(accumulated_update_rect_);
// In case the new frame has a different resolution, ensure the update-rect
// is constrained by the frame's bounds. On the first frame with a new
// resolution, WebRTC sets the update-rect to the full area of the frame, so
// this line will give the correct result in that case. If the resolution
// did not change (for this frame or any prior dropped frames), the
// update-region will already be constrained by the resolution, so this line
// will be a no-op.
update_rect.Intersect(
webrtc::VideoFrame::UpdateRect{0, 0, frame.width(), frame.height()});
desktop_frame->mutable_updated_region()->SetRect(
webrtc::DesktopRect::MakeXYWH(update_rect.offset_x,
update_rect.offset_y, update_rect.width,
update_rect.height));
// The update-region has now been applied to the desktop_frame which is
// being sent to the encoder, so empty it here.
accumulated_update_rect_.MakeEmptyUpdate();
}
// Limit the encoding and sending of empty frames to |kKeepAliveInterval|.
// This is done to save on network bandwidth and CPU usage.
if (desktop_frame->updated_region().is_empty() && !top_off_active_ &&
!pending_key_frame_request_ &&
(now - latest_frame_encode_start_time_ < kKeepAliveInterval)) {
// Drop the frame. There is no need to track the update-rect as the
// frame being dropped is empty.
base::SequencedTaskRunner::GetCurrentDefault()->PostTask(
FROM_HERE,
base::BindOnce(&WebrtcVideoEncoderWrapper::NotifyFrameDropped,
weak_factory_.GetWeakPtr()));
return WEBRTC_VIDEO_CODEC_OK;
}
latest_frame_encode_start_time_ = now;
WebrtcVideoEncoder::FrameParams frame_params;
// SetRates() must be called prior to Encode(), with a non-zero bitrate.
DCHECK_NE(0, bitrate_kbps_);
frame_params.bitrate_kbps = bitrate_kbps_;
frame_params.duration = target_frame_interval_;
// TODO(crbug.com/1192865): Copy the FPS estimator from the scheduler,
// instead of hard-coding this value here.
frame_params.fps = target_frame_rate_;
frame_params.vpx_min_quantizer =
ShouldDropQualityForLargeFrame(*desktop_frame) ? kMaxQuantizer
: kMinQuantizer;
frame_params.vpx_max_quantizer = kMaxQuantizer;
frame_params.clear_active_map = !top_off_active_;
frame_params.key_frame = pending_key_frame_request_;
pending_key_frame_request_ = false;
encode_pending_ = true;
auto encode_callback = base::BindPostTask(
base::SequencedTaskRunner::GetCurrentDefault(),
base::BindOnce(&WebrtcVideoEncoderWrapper::OnFrameEncoded,
weak_factory_.GetWeakPtr()));
encode_task_runner_->PostTask(
FROM_HERE,
base::BindOnce(&WebrtcVideoEncoder::Encode,
base::Unretained(encoder_.get()), std::move(desktop_frame),
frame_params, std::move(encode_callback)));
return WEBRTC_VIDEO_CODEC_OK;
}
void WebrtcVideoEncoderWrapper::SetRates(
const RateControlParameters& parameters) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
bitrate_kbps_ = parameters.bitrate.get_sum_kbps();
}
void WebrtcVideoEncoderWrapper::OnRttUpdate(int64_t rtt_ms) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
rtt_estimate_ = base::Milliseconds(rtt_ms);
}
webrtc::VideoEncoder::EncoderInfo WebrtcVideoEncoderWrapper::GetEncoderInfo()
const {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
return EncoderInfo();
}
webrtc::EncodedImageCallback::Result
WebrtcVideoEncoderWrapper::ReturnEncodedFrame(
const WebrtcVideoEncoder::EncodedFrame& frame) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
// Non-null, because WebRTC registers a callback before calling Encode().
DCHECK(encoded_callback_);
webrtc::EncodedImage encoded_image;
encoded_image.SetEncodedData(frame.data);
encoded_image._encodedWidth = frame.dimensions.width();
encoded_image._encodedHeight = frame.dimensions.height();
encoded_image._frameType = frame.key_frame
? webrtc::VideoFrameType::kVideoFrameKey
: webrtc::VideoFrameType::kVideoFrameDelta;
encoded_image.SetTimestamp(frame.rtp_timestamp);
encoded_image.playout_delay_.min_ms = 0;
encoded_image.playout_delay_.max_ms = 0;
encoded_image.content_type_ = webrtc::VideoContentType::SCREENSHARE;
webrtc::CodecSpecificInfo codec_specific_info;
codec_specific_info.codecType = frame.codec;
if (frame.codec == webrtc::kVideoCodecVP8) {
webrtc::CodecSpecificInfoVP8* vp8_info =
&codec_specific_info.codecSpecific.VP8;
vp8_info->temporalIdx = webrtc::kNoTemporalIdx;
} else if (frame.codec == webrtc::kVideoCodecVP9) {
webrtc::CodecSpecificInfoVP9* vp9_info =
&codec_specific_info.codecSpecific.VP9;
vp9_info->inter_pic_predicted = !frame.key_frame;
vp9_info->ss_data_available = frame.key_frame;
vp9_info->spatial_layer_resolution_present = frame.key_frame;
if (frame.key_frame) {
vp9_info->width[0] = frame.dimensions.width();
vp9_info->height[0] = frame.dimensions.height();
}
vp9_info->num_spatial_layers = 1;
vp9_info->gof_idx = webrtc::kNoGofIdx;
vp9_info->temporal_idx = webrtc::kNoTemporalIdx;
vp9_info->flexible_mode = false;
vp9_info->temporal_up_switch = true;
vp9_info->inter_layer_predicted = false;
vp9_info->first_frame_in_picture = true;
vp9_info->spatial_layer_resolution_present = false;
} else if (frame.codec == webrtc::kVideoCodecH264) {
#if defined(USE_H264_ENCODER)
webrtc::CodecSpecificInfoH264* h264_info =
&codec_specific_info.codecSpecific.H264;
h264_info->packetization_mode =
webrtc::H264PacketizationMode::NonInterleaved;
#else
NOTREACHED();
#endif
} else if (frame.codec == webrtc::kVideoCodecAV1) {
// TODO(joedow): Set codec specific params for AV1 here.
} else {
NOTREACHED();
}
return encoded_callback_->OnEncodedImage(encoded_image, &codec_specific_info);
}
void WebrtcVideoEncoderWrapper::OnFrameEncoded(
WebrtcVideoEncoder::EncodeResult encode_result,
std::unique_ptr<WebrtcVideoEncoder::EncodedFrame> encoded_frame) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
// Keep |encoded_frame| alive until frame-encoded/frame-sent notifications
// have executed on |main_task_runner_|.
std::unique_ptr<WebrtcVideoEncoder::EncodedFrame, base::OnTaskRunnerDeleter>
frame(encoded_frame.release(),
base::OnTaskRunnerDeleter(main_task_runner_));
DCHECK(encode_pending_);
encode_pending_ = false;
// Transfer the cached frame stats into the encoded frame.
if (frame) {
// This is non-null because the |encode_pending_| flag ensures that
// frame-encodings are serialized. So there cannot be 2 consecutive calls to
// this method without an intervening call to Encode() which sets
// |frame_stats_| to non-null.
DCHECK(frame_stats_);
frame_stats_->encode_ended_time = base::TimeTicks::Now();
frame_stats_->rtt_estimate = rtt_estimate_;
frame_stats_->bandwidth_estimate_kbps = bitrate_kbps_;
// WebrtcFrameSchedulerConstantRate cannot estimate this delay. Set it to 0
// so the client can still calculate the derived stats.
frame_stats_->send_pending_delay = base::TimeDelta();
frame->stats = std::move(frame_stats_);
frame->rtp_timestamp = rtp_timestamp_;
}
if (encode_result != WebrtcVideoEncoder::EncodeResult::SUCCEEDED) {
// TODO(crbug.com/1192865): Store this error and communicate it to WebRTC
// via the next call to Encode(). The VPX encoders are never expected to
// return any error, but hardware-decoders such as H264 may fail.
LOG(ERROR) << "Video encoder returned error "
<< EncodeResultToString(encode_result);
NotifyFrameDropped();
DropPendingFrame();
return;
}
if (!frame || !frame->data || !frame->data->size()) {
top_off_active_ = false;
NotifyFrameDropped();
DropPendingFrame();
return;
}
// Top-off until the best quantizer value is reached.
top_off_active_ = (frame->quantizer > kMinQuantizer);
// If there was a successful capture while the encoder was working then there
// will be a frame waiting to be encoded. Send it to the encoder now that its
// no longer busy and we've copied the frame stats for the current frame.
// Note: This function is called here instead of at the end of the function as
// this saves a few hundred microseconds per frame. It can certainly be moved
// if ever there is a need but be sure to profile the per-frame cost.
SchedulePendingFrame();
// WARNING: No frame-specific class members should be accessed after this
// point as they may be updated in Encode() when the pending frame is sent to
// the encoder.
webrtc::EncodedImageCallback::Result send_result = ReturnEncodedFrame(*frame);
// std::ref() is used here because base::BindOnce() would otherwise try to
// copy the referenced frame object, which is move-only. This is safe because
// base::OnTaskRunnerDeleter posts the frame-deleter task to run after this
// task has executed.
main_task_runner_->PostTask(
FROM_HERE, base::BindOnce(&VideoStreamEventRouter::OnEncodedFrameSent,
video_stream_event_router_, *screen_id_,
send_result, std::ref(*frame)));
}
void WebrtcVideoEncoderWrapper::NotifyFrameDropped() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(encoded_callback_);
encoded_callback_->OnDroppedFrame(
webrtc::EncodedImageCallback::DropReason::kDroppedByEncoder);
}
bool WebrtcVideoEncoderWrapper::ShouldDropQualityForLargeFrame(
const webrtc::DesktopFrame& frame) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (codec_type_ != webrtc::kVideoCodecVP8) {
return false;
}
int64_t updated_area = 0;
for (webrtc::DesktopRegion::Iterator r(frame.updated_region()); !r.IsAtEnd();
r.Advance()) {
updated_area += r.rect().width() * r.rect().height();
}
bool should_drop_quality = false;
if (updated_area - updated_region_area_.Max() > kBigFrameThresholdPixels) {
int expected_frame_size =
updated_area * kEstimatedBytesPerMegapixel / kPixelsPerMegapixel;
base::TimeDelta expected_send_delay =
base::Seconds(expected_frame_size * 8 / (bitrate_kbps_ * 1000.0));
if (expected_send_delay > target_frame_interval_) {
should_drop_quality = true;
}
}
updated_region_area_.Record(updated_area);
return should_drop_quality;
}
void WebrtcVideoEncoderWrapper::SchedulePendingFrame() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (pending_frame_) {
auto pending_frame = std::move(pending_frame_);
Encode(*pending_frame, nullptr);
}
}
void WebrtcVideoEncoderWrapper::DropPendingFrame() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (pending_frame_) {
pending_frame_.reset();
NotifyFrameDropped();
}
}
} // namespace remoting::protocol