content/renderer/accessibility/ax_image_annotator.cc - chromium/src - Git at Google

 // Copyright 2019 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "content/renderer/accessibility/ax_image_annotator.h"

 #include <utility>
 #include <vector>

 #include "base/base64.h"
 #include "base/containers/contains.h"
 #include "base/containers/fixed_flat_map.h"
 #include "base/i18n/char_iterator.h"
 #include "base/metrics/histogram_functions.h"
 #include "base/strings/string_number_conversions.h"
 #include "base/strings/string_split.h"
 #include "base/strings/string_util.h"
 #include "base/strings/utf_string_conversions.h"
 #include "build/build_config.h"
 #include "content/public/common/content_client.h"
 #include "content/public/renderer/render_frame.h"
 #include "content/renderer/accessibility/ax_image_stopwords.h"
 #include "crypto/sha2.h"
 #include "services/metrics/public/cpp/mojo_ukm_recorder.h"
 #include "services/metrics/public/cpp/ukm_builders.h"
 #include "third_party/blink/public/strings/grit/blink_accessibility_strings.h"
 #include "third_party/blink/public/web/web_ax_object.h"
 #include "third_party/blink/public/web/web_document.h"
 #include "third_party/blink/public/web/web_element.h"
 #include "third_party/blink/public/web/web_local_frame.h"
 #include "third_party/blink/public/web/web_node.h"
 #include "ui/accessibility/accessibility_features.h"
 #include "ui/accessibility/ax_role_properties.h"
 #include "ui/gfx/geometry/size.h"
 #include "ui/gfx/geometry/transform.h"
 #include "url/gurl.h"

 namespace content {

 namespace {

 int GetMessageIdForIconEnum(const std::string& icon_type) {
   static constexpr auto kIconTypeToMessageIdMap =
       base::MakeFixedFlatMap<base::StringPiece, int>({
           {"ICON_PLUS", IDS_AX_IMAGE_ANNOTATION_ICON_PLUS},
           {"ICON_ARROW_BACKWARD", IDS_AX_IMAGE_ANNOTATION_ICON_ARROW_BACKWARD},
           {"ICON_ARROW_FORWARD", IDS_AX_IMAGE_ANNOTATION_ICON_ARROW_FORWARD},
           {"ICON_CALL", IDS_AX_IMAGE_ANNOTATION_ICON_CALL},
           {"ICON_CHAT", IDS_AX_IMAGE_ANNOTATION_ICON_CHAT},
           {"ICON_CHECK", IDS_AX_IMAGE_ANNOTATION_ICON_CHECK},
           {"ICON_X", IDS_AX_IMAGE_ANNOTATION_ICON_X},
           {"ICON_DELETE", IDS_AX_IMAGE_ANNOTATION_ICON_DELETE},
           {"ICON_EDIT", IDS_AX_IMAGE_ANNOTATION_ICON_EDIT},
           {"ICON_EMOJI", IDS_AX_IMAGE_ANNOTATION_ICON_EMOJI},
           {"ICON_END_CALL", IDS_AX_IMAGE_ANNOTATION_ICON_END_CALL},
           {"ICON_V_DOWNWARD", IDS_AX_IMAGE_ANNOTATION_ICON_V_DOWNWARD},
           {"ICON_HEART", IDS_AX_IMAGE_ANNOTATION_ICON_HEART},
           {"ICON_HOME", IDS_AX_IMAGE_ANNOTATION_ICON_HOME},
           {"ICON_INFO", IDS_AX_IMAGE_ANNOTATION_ICON_INFO},
           {"ICON_LAUNCH_APPS", IDS_AX_IMAGE_ANNOTATION_ICON_LAUNCH_APPS},
           {"ICON_THUMBS_UP", IDS_AX_IMAGE_ANNOTATION_ICON_THUMBS_UP},
           {"ICON_THREE_BARS", IDS_AX_IMAGE_ANNOTATION_ICON_THREE_BARS},
           {"ICON_THREE_DOTS", IDS_AX_IMAGE_ANNOTATION_ICON_THREE_DOTS},
           {"ICON_NOTIFICATIONS", IDS_AX_IMAGE_ANNOTATION_ICON_NOTIFICATIONS},
           {"ICON_PAUSE", IDS_AX_IMAGE_ANNOTATION_ICON_PAUSE},
           {"ICON_PLAY", IDS_AX_IMAGE_ANNOTATION_ICON_PLAY},
           {"ICON_REFRESH", IDS_AX_IMAGE_ANNOTATION_ICON_REFRESH},
           {"ICON_MAGNIFYING_GLASS",
            IDS_AX_IMAGE_ANNOTATION_ICON_MAGNIFYING_GLASS},
           {"ICON_SEND", IDS_AX_IMAGE_ANNOTATION_ICON_SEND},
           {"ICON_SETTINGS", IDS_AX_IMAGE_ANNOTATION_ICON_SETTINGS},
           {"ICON_SHARE", IDS_AX_IMAGE_ANNOTATION_ICON_SHARE},
           {"ICON_STAR", IDS_AX_IMAGE_ANNOTATION_ICON_STAR},
           {"ICON_TAKE_PHOTO", IDS_AX_IMAGE_ANNOTATION_ICON_TAKE_PHOTO},
           {"ICON_TIME", IDS_AX_IMAGE_ANNOTATION_ICON_TIME},
           {"ICON_VIDEOCAM", IDS_AX_IMAGE_ANNOTATION_ICON_VIDEOCAM},
           {"ICON_EXPAND", IDS_AX_IMAGE_ANNOTATION_ICON_EXPAND},
           {"ICON_CONTRACT", IDS_AX_IMAGE_ANNOTATION_ICON_CONTRACT},
           {"ICON_GOOGLE", IDS_AX_IMAGE_ANNOTATION_ICON_GOOGLE},
           {"ICON_TWITTER", IDS_AX_IMAGE_ANNOTATION_ICON_TWITTER},
           {"ICON_FACEBOOK", IDS_AX_IMAGE_ANNOTATION_ICON_FACEBOOK},
           {"ICON_ASSISTANT", IDS_AX_IMAGE_ANNOTATION_ICON_ASSISTANT},
           {"ICON_WEATHER", IDS_AX_IMAGE_ANNOTATION_ICON_WEATHER},
           {"ICON_SHOPPING_CART", IDS_AX_IMAGE_ANNOTATION_ICON_SHOPPING_CART},
           {"ICON_UPLOAD", IDS_AX_IMAGE_ANNOTATION_ICON_UPLOAD},
           {"ICON_QUESTION", IDS_AX_IMAGE_ANNOTATION_ICON_QUESTION},
           {"ICON_MIC", IDS_AX_IMAGE_ANNOTATION_ICON_MIC},
           {"ICON_MIC_MUTE", IDS_AX_IMAGE_ANNOTATION_ICON_MIC_MUTE},
           {"ICON_GALLERY", IDS_AX_IMAGE_ANNOTATION_ICON_GALLERY},
           {"ICON_COMPASS", IDS_AX_IMAGE_ANNOTATION_ICON_COMPASS},
           {"ICON_PEOPLE", IDS_AX_IMAGE_ANNOTATION_ICON_PEOPLE},
           {"ICON_ARROW_UPWARD", IDS_AX_IMAGE_ANNOTATION_ICON_ARROW_UPWARD},
           {"ICON_ENVELOPE", IDS_AX_IMAGE_ANNOTATION_ICON_ENVELOPE},
           {"ICON_EMOJI_FACE", IDS_AX_IMAGE_ANNOTATION_ICON_EMOJI_FACE},
           {"ICON_PAPERCLIP", IDS_AX_IMAGE_ANNOTATION_ICON_PAPERCLIP},
           {"ICON_CAST", IDS_AX_IMAGE_ANNOTATION_ICON_CAST},
           {"ICON_VOLUME_UP", IDS_AX_IMAGE_ANNOTATION_ICON_VOLUME_UP},
           {"ICON_VOLUME_DOWN", IDS_AX_IMAGE_ANNOTATION_ICON_VOLUME_DOWN},
           {"ICON_VOLUME_STATE", IDS_AX_IMAGE_ANNOTATION_ICON_VOLUME_STATE},
           {"ICON_VOLUME_MUTE", IDS_AX_IMAGE_ANNOTATION_ICON_VOLUME_MUTE},
           {"ICON_STOP", IDS_AX_IMAGE_ANNOTATION_ICON_STOP},
           {"ICON_SHOPPING_BAG", IDS_AX_IMAGE_ANNOTATION_ICON_SHOPPING_BAG},
           {"ICON_LIST", IDS_AX_IMAGE_ANNOTATION_ICON_LIST},
           {"ICON_LOCATION", IDS_AX_IMAGE_ANNOTATION_ICON_LOCATION},
           {"ICON_CALENDAR", IDS_AX_IMAGE_ANNOTATION_ICON_CALENDAR},
           {"ICON_THUMBS_DOWN", IDS_AX_IMAGE_ANNOTATION_ICON_THUMBS_DOWN},
           {"ICON_HEADSET", IDS_AX_IMAGE_ANNOTATION_ICON_HEADSET},
           {"ICON_REDO", IDS_AX_IMAGE_ANNOTATION_ICON_REDO},
           {"ICON_UNDO", IDS_AX_IMAGE_ANNOTATION_ICON_UNDO},
           {"ICON_DOWNLOAD", IDS_AX_IMAGE_ANNOTATION_ICON_DOWNLOAD},
           {"ICON_ARROW_DOWNWARD", IDS_AX_IMAGE_ANNOTATION_ICON_ARROW_DOWNWARD},
           {"ICON_V_UPWARD", IDS_AX_IMAGE_ANNOTATION_ICON_V_UPWARD},
           {"ICON_V_FORWARD", IDS_AX_IMAGE_ANNOTATION_ICON_V_FORWARD},
           {"ICON_V_BACKWARD", IDS_AX_IMAGE_ANNOTATION_ICON_V_BACKWARD},
           {"ICON_HISTORY", IDS_AX_IMAGE_ANNOTATION_ICON_HISTORY},
           {"ICON_PERSON", IDS_AX_IMAGE_ANNOTATION_ICON_PERSON},
           {"ICON_HAPPY_FACE", IDS_AX_IMAGE_ANNOTATION_ICON_HAPPY_FACE},
           {"ICON_SAD_FACE", IDS_AX_IMAGE_ANNOTATION_ICON_SAD_FACE},
           {"ICON_MOON", IDS_AX_IMAGE_ANNOTATION_ICON_MOON},
           {"ICON_CLOUD", IDS_AX_IMAGE_ANNOTATION_ICON_CLOUD},
           {"ICON_SUN", IDS_AX_IMAGE_ANNOTATION_ICON_SUN},
       });

   auto* iter = kIconTypeToMessageIdMap.find(icon_type);
   if (iter == kIconTypeToMessageIdMap.end())
     return 0;

   return iter->second;
 }

 }  // namespace

 AXImageAnnotator::AXImageAnnotator(
     RenderAccessibilityImpl* const render_accessibility,
     mojo::PendingRemote<image_annotation::mojom::Annotator> annotator)
     : render_accessibility_(render_accessibility),
       annotator_(std::move(annotator)) {
   DCHECK(render_accessibility_);
 }

 AXImageAnnotator::~AXImageAnnotator() {}

 void AXImageAnnotator::Destroy() {
   MarkAllImagesDirty();
 }

 std::string AXImageAnnotator::GetImageAnnotation(
     blink::WebAXObject& image) const {
   DCHECK(!image.IsDetached());
   const auto lookup = image_annotations_.find(image.AxID());
   if (lookup != image_annotations_.end())
     return lookup->second.annotation();
   return std::string();
 }

 ax::mojom::ImageAnnotationStatus AXImageAnnotator::GetImageAnnotationStatus(
     blink::WebAXObject& image) const {
   DCHECK(!image.IsDetached());
   const auto lookup = image_annotations_.find(image.AxID());
   if (lookup != image_annotations_.end())
     return lookup->second.status();
   return ax::mojom::ImageAnnotationStatus::kNone;
 }

 bool AXImageAnnotator::HasAnnotationInCache(blink::WebAXObject& image) const {
   DCHECK(!image.IsDetached());
   if (!HasImageInCache(image))
     return false;
   return image_annotations_.at(image.AxID()).HasAnnotation();
 }

 bool AXImageAnnotator::HasImageInCache(const blink::WebAXObject& image) const {
   DCHECK(!image.IsDetached());
   return base::Contains(image_annotations_, image.AxID());
 }

 void AXImageAnnotator::OnImageAdded(blink::WebAXObject& image) {
   DCHECK(!image.IsDetached());
   DCHECK(!base::Contains(image_annotations_, image.AxID()));
   const std::string image_id = GenerateImageSourceId(image);
   if (image_id.empty())
     return;

   image_annotations_.emplace(image.AxID(), image);
   ImageInfo& image_info = image_annotations_.at(image.AxID());
   // Fetch image annotation.
   annotator_->AnnotateImage(image_id, render_accessibility_->GetLanguage(),
                             image_info.GetImageProcessor(),
                             base::BindOnce(&AXImageAnnotator::OnImageAnnotated,
                                            weak_factory_.GetWeakPtr(), image));
   VLOG(1) << "Requesting annotation for " << image_id << " with language '"
           << render_accessibility_->GetLanguage() << "' from page "
           << GetDocumentUrl();
 }

 void AXImageAnnotator::OnImageUpdated(blink::WebAXObject& image) {
   DCHECK(!image.IsDetached());
   DCHECK(base::Contains(image_annotations_, image.AxID()));
   const std::string image_id = GenerateImageSourceId(image);
   if (image_id.empty())
     return;

   ImageInfo& image_info = image_annotations_.at(image.AxID());
   // Update annotation.
   annotator_->AnnotateImage(image_id, render_accessibility_->GetLanguage(),
                             image_info.GetImageProcessor(),
                             base::BindOnce(&AXImageAnnotator::OnImageAnnotated,
                                            weak_factory_.GetWeakPtr(), image));
 }

 void AXImageAnnotator::OnImageRemoved(blink::WebAXObject& image) {
   DCHECK(!image.IsDetached());
   DCHECK(base::Contains(image_annotations_, image.AxID()));
   image_annotations_.erase(image.AxID());
 }

 // static
 int AXImageAnnotator::GetLengthAfterRemovingStopwords(
     const std::string& image_name) {
   // Split the image name into words by splitting on all whitespace and
   // punctuation. Reject any words that are classified as stopwords.
   // Return the number of remaining codepoints.
   const char* separators = "0123456789`~!@#$%^&*()[]{}\\|;:'\",.<>?/-_=+ ";
   std::vector<std::string> words = base::SplitString(
       image_name, separators, base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
   int remaining_codepoints = 0;
   for (const std::string& word : words) {
     if (AXImageStopwords::GetInstance().IsImageStopword(word.c_str())) {
       continue;
     }

     for (base::i18n::UTF8CharIterator iter(word); !iter.end(); iter.Advance()) {
       remaining_codepoints++;
     }
   }

   return remaining_codepoints;
 }

 // static
 bool AXImageAnnotator::ImageNameHasMostlyStopwords(
     const std::string& image_name) {
   // Compute how many characters remain after removing stopwords.
   int remaining_codepoints = GetLengthAfterRemovingStopwords(image_name);

   // If there are 3 or fewer unicode codepoints remaining, classify
   // the string as "mostly stopwords".
   //
   // More details and analysis in this (Google-internal) design doc:
   // http://goto.google.com/augment-existing-image-descriptions
   return (remaining_codepoints <= 3);
 }

 #if defined(CONTENT_IMPLEMENTATION)
 ContentClient* AXImageAnnotator::GetContentClient() const {
   return content::GetContentClient();
 }
 #else
 ContentClient* AXImageAnnotator::GetContentClient() const {
   return nullptr;
 }
 #endif  // defined(CONTENT_IMPLEMENTATION)

 std::string AXImageAnnotator::GenerateImageSourceId(
     const blink::WebAXObject& image) const {
   DCHECK(render_accessibility_);
   DCHECK(!image.IsDetached());
   const std::string document_url =
       render_accessibility_->GetMainDocument().Url().GetString().Utf8();
   const std::string image_src = image.Url().GetString().Utf8();
   if (document_url.empty() || image_src.empty())
     return std::string();

   // The |image_src| might be a URL that is relative to the document's URL.
   const GURL image_url = GURL(document_url).Resolve(image_src);
   if (!image_url.is_valid())
     return std::string();

   // If |image_url| appears to be publicly reachable, return the URL as the
   // image source ID.
   if (image_url.SchemeIsHTTPOrHTTPS())
     return image_url.spec();

   // If |image_url| is not publicly reachable, return a hash of |image_url|.
   // Scheme could be "data", "javascript", "ftp", "file", etc.
   const std::string& content = image_url.GetContent();
   if (content.empty())
     return std::string();
   std::string source_id;
   base::Base64Encode(crypto::SHA256HashString(content), &source_id);
   return source_id;
 }

 void AXImageAnnotator::MarkAllImagesDirty() {
   for (auto& key_value : image_annotations_) {
     blink::WebAXObject image = blink::WebAXObject::FromWebDocumentByID(
         render_accessibility_->GetMainDocument(), key_value.first);
     if (!image.IsDetached())
       MarkDirty(image);
   }
   image_annotations_.clear();
 }

 void AXImageAnnotator::MarkDirty(const blink::WebAXObject& image) const {
   render_accessibility_->MarkWebAXObjectDirty(image, false /* subtree */);

   // Check two unignored parents. If either of them is a link or root web area,
   // mark it dirty too, because we want a link or document containing exactly
   // a single image and nothing more to get annotated directly, too.
   //
   // TODO(dmazzoni): Expose ParentObjectUnignored in WebAXObject to
   // make this simpler.
   blink::WebAXObject parent = image.ParentObject();
   for (int ancestor_count = 0; !parent.IsDetached() && ancestor_count < 2;
        parent = parent.ParentObject()) {
     if (!parent.AccessibilityIsIgnored()) {
       ++ancestor_count;
       if (ui::IsLink(parent.Role()) || ui::IsPlatformDocument(parent.Role())) {
         render_accessibility_->MarkWebAXObjectDirty(parent,
                                                     false /* subtree */);
         return;
       }
     }
   }
 }

 AXImageAnnotator::ImageInfo::ImageInfo(const blink::WebAXObject& image)
     : image_processor_(
           base::BindRepeating(&AXImageAnnotator::GetImageData, image)),
       status_(ax::mojom::ImageAnnotationStatus::kAnnotationPending),
       annotation_(absl::nullopt) {}

 AXImageAnnotator::ImageInfo::~ImageInfo() = default;

 mojo::PendingRemote<image_annotation::mojom::ImageProcessor>
 AXImageAnnotator::ImageInfo::GetImageProcessor() {
   return image_processor_.GetPendingRemote();
 }

 bool AXImageAnnotator::ImageInfo::HasAnnotation() const {
   switch (status_) {
     case ax::mojom::ImageAnnotationStatus::kNone:
     case ax::mojom::ImageAnnotationStatus::kWillNotAnnotateDueToScheme:
     case ax::mojom::ImageAnnotationStatus::kIneligibleForAnnotation:
     // The user hasn't requested an annotation yet, or a previously pending
     // annotation request had been cancelled.
     case ax::mojom::ImageAnnotationStatus::kEligibleForAnnotation:
     case ax::mojom::ImageAnnotationStatus::kSilentlyEligibleForAnnotation:
     case ax::mojom::ImageAnnotationStatus::kAnnotationPending:
       return false;
     case ax::mojom::ImageAnnotationStatus::kAnnotationSucceeded:
       DCHECK(annotation_.has_value());
       return true;
     case ax::mojom::ImageAnnotationStatus::kAnnotationEmpty:
     // Image has been classified as adult content.
     case ax::mojom::ImageAnnotationStatus::kAnnotationAdult:
     case ax::mojom::ImageAnnotationStatus::kAnnotationProcessFailed:
       DCHECK(!annotation_.has_value());
       return true;
   }
 }

 // static
 SkBitmap AXImageAnnotator::GetImageData(const blink::WebAXObject& image) {
   if (image.IsDetached())
     return SkBitmap();
   blink::WebNode node = image.GetNode();
   if (node.IsNull() || !node.IsElementNode())
     return SkBitmap();
   blink::WebElement element = node.To<blink::WebElement>();
   VLOG(1) << "Uploading pixels for " << element.ImageContents().width() << " x "
           << element.ImageContents().height() << " image";
   return element.ImageContents();
 }

 void AXImageAnnotator::OnImageAnnotated(
     const blink::WebAXObject& image,
     image_annotation::mojom::AnnotateImageResultPtr result) {
   DCHECK(render_accessibility_->GetAXContext());
   render_accessibility_->GetAXContext()->UpdateAXForAllDocuments();

   if (!base::Contains(image_annotations_, image.AxID()))
     return;

   if (image.IsDetached()) {
     image_annotations_.at(image.AxID())
         .set_status(ax::mojom::ImageAnnotationStatus::kIneligibleForAnnotation);
     // We should not mark dirty a detached object.
     return;
   }

   if (features::IsAugmentExistingImageLabelsEnabled()) {
     // Get the image size as minimum and maximum dimension.
     blink::WebAXObject offset_container;
     gfx::RectF bounds;
     gfx::Transform container_transform;
     bool clips_children = false;
     image.GetRelativeBounds(offset_container, bounds, container_transform,
                             &clips_children);
     int min_dimension =
         static_cast<int>(std::min(bounds.width(), bounds.height()));
     int max_dimension =
         static_cast<int>(std::max(bounds.width(), bounds.height()));

     // Collect some histograms on the number of characters in the
     // image name, and also the image name after removing stopwords,
     // and also the minimum and maximum dimension,
     // as a function of whether the retrieved image label was
     // a success, an error, or empty.
     ax::mojom::NameFrom name_from;
     blink::WebVector<blink::WebAXObject> name_objects;
     blink::WebString web_name = image.GetName(name_from, name_objects);
     int non_stop_length = GetLengthAfterRemovingStopwords(web_name.Utf8());

     if (result->is_error_code()) {
       base::UmaHistogramCounts100("Accessibility.ImageLabels.ErrorByNameLength",
                                   web_name.length());
       base::UmaHistogramCounts100(
           "Accessibility.ImageLabels.ErrorByNonStopNameLength",
           non_stop_length);
       base::UmaHistogramCounts1000(
           "Accessibility.ImageLabels.ErrorByMaxDimension", max_dimension);
       base::UmaHistogramCounts1000(
           "Accessibility.ImageLabels.ErrorByMinDimension", min_dimension);
     } else if (!result->is_annotations()) {
       base::UmaHistogramCounts100("Accessibility.ImageLabels.EmptyByNameLength",
                                   web_name.length());
       base::UmaHistogramCounts100(
           "Accessibility.ImageLabels.EmptyByNonStopNameLength",
           non_stop_length);
       base::UmaHistogramCounts1000(
           "Accessibility.ImageLabels.EmptyByMaxDimension", max_dimension);
       base::UmaHistogramCounts1000(
           "Accessibility.ImageLabels.EmptyByMinDimension", min_dimension);
     } else {
       base::UmaHistogramCounts100(
           "Accessibility.ImageLabels.SuccessByNameLength", web_name.length());
       base::UmaHistogramCounts100(
           "Accessibility.ImageLabels.SuccessByNonStopNameLength",
           non_stop_length);
       base::UmaHistogramCounts1000(
           "Accessibility.ImageLabels.SuccessByMaxDimension", max_dimension);
       base::UmaHistogramCounts1000(
           "Accessibility.ImageLabels.SuccessByMinDimension", min_dimension);
     }
   }

   if (result->is_error_code()) {
     DLOG(WARNING) << "Image annotation error.";
     switch (result->get_error_code()) {
       case image_annotation::mojom::AnnotateImageError::kCanceled:
         // By marking the image as having an annotation status of
         // kSilentlyEligibleForAnnotation and not one of
         // kEligibleForAnnotation:, the user will not be asked to visit the
         // context menu to turn on automatic image labels, because there is no
         // way to repeat the operation from that menu yet.
         image_annotations_.at(image.AxID())
             .set_status(ax::mojom::ImageAnnotationStatus::
                             kSilentlyEligibleForAnnotation);
         break;
       case image_annotation::mojom::AnnotateImageError::kFailure:
         image_annotations_.at(image.AxID())
             .set_status(
                 ax::mojom::ImageAnnotationStatus::kAnnotationProcessFailed);
         break;
       case image_annotation::mojom::AnnotateImageError::kAdult:
         image_annotations_.at(image.AxID())
             .set_status(ax::mojom::ImageAnnotationStatus::kAnnotationAdult);
         break;
     }
     MarkDirty(image);
     return;
   }

   if (!result->is_annotations()) {
     DLOG(WARNING) << "No image annotation results.";
     image_annotations_.at(image.AxID())
         .set_status(ax::mojom::ImageAnnotationStatus::kAnnotationEmpty);
     MarkDirty(image);
     return;
   }

   bool has_ocr = false;
   bool has_description = false;
   std::vector<std::string> contextualized_strings;
   for (const mojo::InlinedStructPtr<image_annotation::mojom::Annotation>&
            annotation : result->get_annotations()) {
     int message_id = 0;
     switch (annotation->type) {
       case image_annotation::mojom::AnnotationType::kOcr:
         has_ocr = true;
         message_id = IDS_AX_IMAGE_ANNOTATION_OCR_CONTEXT;
         break;
       case image_annotation::mojom::AnnotationType::kCaption:
       case image_annotation::mojom::AnnotationType::kLabel:
         has_description = true;
         message_id = IDS_AX_IMAGE_ANNOTATION_DESCRIPTION_CONTEXT;
         break;
       case image_annotation::mojom::AnnotationType::kIcon: {
         int icon_message_id = GetMessageIdForIconEnum(annotation->text);

         // Skip unrecognized icon annotation enum.
         if (icon_message_id == 0)
           continue;

         DCHECK(GetContentClient());
         contextualized_strings.push_back(base::UTF16ToUTF8(
             GetContentClient()->GetLocalizedString(icon_message_id)));
         break;
       }
     }

     // Skip unrecognized annotation types.
     if (message_id == 0)
       continue;

     int last_meaningful_char = annotation->text.length() - 1;
     while (last_meaningful_char >= 0) {
       bool is_whitespace_or_punct =
           base::IsAsciiWhitespace(annotation->text[last_meaningful_char]) ||
           base::IsAsciiPunctuation(annotation->text[last_meaningful_char]);
       if (!is_whitespace_or_punct) {
         break;
       }
       last_meaningful_char--;
     }

     if (annotation->text.empty() || last_meaningful_char < 0)
       continue;

     std::string text = annotation->text.substr(0, last_meaningful_char + 1);
     if (GetContentClient()) {
       contextualized_strings.push_back(
           base::UTF16ToUTF8(GetContentClient()->GetLocalizedString(
               message_id, base::UTF8ToUTF16(text))));
     } else {
       contextualized_strings.push_back(text);
     }
   }

   if (contextualized_strings.empty()) {
     image_annotations_.at(image.AxID())
         .set_status(ax::mojom::ImageAnnotationStatus::kAnnotationEmpty);
     MarkDirty(image);
     return;
   }

   ax::mojom::NameFrom name_from;
   blink::WebVector<blink::WebAXObject> name_objects;
   blink::WebString name = image.GetName(name_from, name_objects);
   bool has_existing_label = !name.IsEmpty();

   ukm::builders::Accessibility_ImageDescriptions(
       render_accessibility_->GetMainDocument().GetUkmSourceId())
       .SetOCR(has_ocr)
       .SetDescription(has_description)
       .SetImageAlreadyHasLabel(has_existing_label)
       .Record(render_accessibility_->ukm_recorder());

   image_annotations_.at(image.AxID())
       .set_status(ax::mojom::ImageAnnotationStatus::kAnnotationSucceeded);
   // TODO(accessibility): join two sentences together in a more i18n-friendly
   // way. Since this is intended for a screen reader, though, a period
   // probably works in almost all languages.
   std::string contextualized_string =
       base::JoinString(contextualized_strings, ". ");
   image_annotations_.at(image.AxID()).set_annotation(contextualized_string);
   MarkDirty(image);

   VLOG(1) << "Annotating image on page " << GetDocumentUrl() << " - "
           << contextualized_string;
 }

 std::string AXImageAnnotator::GetDocumentUrl() const {
   const blink::WebLocalFrame* frame =
       render_accessibility_->render_frame()->GetWebFrame();
   return frame->GetDocument().Url().GetString().Utf8();
 }

 }  // namespace content