chrome/browser/spellcheck_unittest.cc - chromium/src.git - Git at Google

 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "webkit/glue/webkit_glue.h"

 #include "base/file_util.h"
 #include "base/message_loop.h"
 #include "base/path_service.h"
 #include "chrome/browser/spellchecker.h"
 #include "chrome/common/chrome_paths.h"
 #include "testing/gtest/include/gtest/gtest.h"

 namespace {
 const FilePath::CharType kTempCustomDictionaryFile[] =
     FILE_PATH_LITERAL("temp_custom_dictionary.txt");
 }  // namespace

 class SpellCheckTest : public testing::Test {
  private:
   MessageLoop message_loop_;
 };

 // Represents a special initialization function used only for the unit tests
 // in this file.
 extern void InitHunspellWithFiles(FILE* file_aff_hunspell,
                                   FILE* file_dic_hunspell);

 FilePath GetHunspellDirectory() {
   FilePath hunspell_directory;
   if (!PathService::Get(base::DIR_SOURCE_ROOT, &hunspell_directory))
     return FilePath();

   hunspell_directory = hunspell_directory.AppendASCII("chrome");
   hunspell_directory = hunspell_directory.AppendASCII("third_party");
   hunspell_directory = hunspell_directory.AppendASCII("hunspell");
   hunspell_directory = hunspell_directory.AppendASCII("dictionaries");
   return hunspell_directory;
 }

 // Operates unit tests for the webkit_glue::SpellCheckWord() function
 // with the US English dictionary.
 // The unit tests in this function consist of:
 //   * Tests for the function with empty strings;
 //   * Tests for the function with a valid English word;
 //   * Tests for the function with a valid non-English word;
 //   * Tests for the function with a valid English word with a preceding
 //     space character;
 //   * Tests for the function with a valid English word with a preceding
 //     non-English word;
 //   * Tests for the function with a valid English word with a following
 //     space character;
 //   * Tests for the function with a valid English word with a following
 //     non-English word;
 //   * Tests for the function with two valid English words concatenated
 //     with space characters or non-English words;
 //   * Tests for the function with an invalid English word;
 //   * Tests for the function with an invalid English word with a preceding
 //     space character;
 //   * Tests for the function with an invalid English word with a preceding
 //     non-English word;
 //   * Tests for the function with2 an invalid English word with a following
 //     space character;
 //   * Tests for the function with an invalid English word with a following
 //     non-English word, and;
 //   * Tests for the function with two invalid English words concatenated
 //     with space characters or non-English words.
 // A test with a "[ROBUSTNESS]" mark shows it is a robustness test and it uses
 // grammartically incorrect string.
 // TODO(hbono): Please feel free to add more tests.
 TEST_F(SpellCheckTest, SpellCheckStrings_EN_US) {
   static const struct {
     // A string to be tested.
     const wchar_t* input;
     // An expected result for this test case.
     //   * true: the input string does not have any invalid words.
     //   * false: the input string has one or more invalid words.
     bool expected_result;
     // The position and the length of the first invalid word.
     int misspelling_start;
     int misspelling_length;
   } kTestCases[] = {
     // Empty strings.
     {NULL, true, 0, 0},
     {L"", true, 0, 0},
     {L" ", true, 0, 0},
     {L"\xA0", true, 0, 0},
     {L"\x3000", true, 0, 0},

     // A valid English word "hello".
     {L"hello", true, 0, 0},
     // A valid Chinese word (meaning "hello") consisiting of two CJKV
     // ideographs
     {L"\x4F60\x597D", true, 0, 0},
     // A valid Korean word (meaning "hello") consisting of five hangul
     // syllables
     {L"\xC548\xB155\xD558\xC138\xC694", true, 0, 0},
     // A valid Japanese word (meaning "hello") consisting of five Hiragana
     // letters
     {L"\x3053\x3093\x306B\x3061\x306F", true, 0, 0},
     // A valid Hindi word (meaning ?) consisting of six Devanagari letters
     // (This word is copied from "http://b/issue?id=857583".)
     {L"\x0930\x093E\x091C\x0927\x093E\x0928", true, 0, 0},
     // A valid English word "affix" using a Latin ligature 'ffi'
     {L"a\xFB03x", true, 0, 0},
     // A valid English word "hello" (fullwidth version)
     {L"\xFF28\xFF45\xFF4C\xFF4C\xFF4F", true, 0, 0},
     // Two valid Greek words (meaning "hello") consisting of seven Greek
     // letters
     {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true, 0, 0},
     // A valid Russian word (meainng "hello") consisting of twelve Cyrillic
     // letters
     {L"\x0437\x0434\x0440\x0430\x0432\x0441"
      L"\x0442\x0432\x0443\x0439\x0442\x0435", true, 0, 0},
     // A valid English contraction
     {L"isn't", true, 0, 0},
     // A valid English word enclosed with underscores.
     {L"_hello_", true, 0, 0},

     // A valid English word with a preceding whitespace
     {L" " L"hello", true, 0, 0},
     // A valid English word with a preceding no-break space
     {L"\xA0" L"hello", true, 0, 0},
     // A valid English word with a preceding ideographic space
     {L"\x3000" L"hello", true, 0, 0},
     // A valid English word with a preceding Chinese word
     {L"\x4F60\x597D" L"hello", true, 0, 0},
     // [ROBUSTNESS] A valid English word with a preceding Korean word
     {L"\xC548\xB155\xD558\xC138\xC694" L"hello", true, 0, 0},
     // A valid English word with a preceding Japanese word
     {L"\x3053\x3093\x306B\x3061\x306F" L"hello", true, 0, 0},
     // [ROBUSTNESS] A valid English word with a preceding Hindi word
     {L"\x0930\x093E\x091C\x0927\x093E\x0928" L"hello", true, 0, 0},
     // [ROBUSTNESS] A valid English word with two preceding Greek words
     {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
      L"hello", true, 0, 0},
     // [ROBUSTNESS] A valid English word with a preceding Russian word
     {L"\x0437\x0434\x0440\x0430\x0432\x0441"
      L"\x0442\x0432\x0443\x0439\x0442\x0435" L"hello", true, 0, 0},

     // A valid English word with a following whitespace
     {L"hello" L" ", true, 0, 0},
     // A valid English word with a following no-break space
     {L"hello" L"\xA0", true, 0, 0},
     // A valid English word with a following ideographic space
     {L"hello" L"\x3000", true, 0, 0},
     // A valid English word with a following Chinese word
     {L"hello" L"\x4F60\x597D", true, 0, 0},
     // [ROBUSTNESS] A valid English word with a following Korean word
     {L"hello" L"\xC548\xB155\xD558\xC138\xC694", true, 0, 0},
     // A valid English word with a following Japanese word
     {L"hello" L"\x3053\x3093\x306B\x3061\x306F", true, 0, 0},
     // [ROBUSTNESS] A valid English word with a following Hindi word
     {L"hello" L"\x0930\x093E\x091C\x0927\x093E\x0928", true, 0, 0},
     // [ROBUSTNESS] A valid English word with two following Greek words
     {L"hello"
      L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true, 0, 0},
     // [ROBUSTNESS] A valid English word with a following Russian word
     {L"hello" L"\x0437\x0434\x0440\x0430\x0432\x0441"
      L"\x0442\x0432\x0443\x0439\x0442\x0435", true, 0, 0},

     // Two valid English words concatenated with a whitespace
     {L"hello" L" " L"hello", true, 0, 0},
     // Two valid English words concatenated with a no-break space
     {L"hello" L"\xA0" L"hello", true, 0, 0},
     // Two valid English words concatenated with an ideographic space
     {L"hello" L"\x3000" L"hello", true, 0, 0},
     // Two valid English words concatenated with a Chinese word
     {L"hello" L"\x4F60\x597D" L"hello", true, 0, 0},
     // [ROBUSTNESS] Two valid English words concatenated with a Korean word
     {L"hello" L"\xC548\xB155\xD558\xC138\xC694" L"hello", true, 0, 0},
     // Two valid English words concatenated with a Japanese word
     {L"hello" L"\x3053\x3093\x306B\x3061\x306F" L"hello", true, 0, 0},
     // [ROBUSTNESS] Two valid English words concatenated with a Hindi word
     {L"hello" L"\x0930\x093E\x091C\x0927\x093E\x0928" L"hello" , true, 0, 0},
     // [ROBUSTNESS] Two valid English words concatenated with two Greek words
     {L"hello" L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
      L"hello", true, 0, 0},
     // [ROBUSTNESS] Two valid English words concatenated with a Russian word
     {L"hello" L"\x0437\x0434\x0440\x0430\x0432\x0441"
      L"\x0442\x0432\x0443\x0439\x0442\x0435" L"hello", true, 0, 0},
     // [ROBUSTNESS] Two valid English words concatenated with a contraction
     // character.
     {L"hello:hello", true, 0, 0},

     // An invalid English word
     {L"ifmmp", false, 0, 5},
     // An invalid English word "bffly" containing a Latin ligature 'ffl'
     {L"b\xFB04y", false, 0, 3},
     // An invalid English word "ifmmp" (fullwidth version)
     {L"\xFF29\xFF46\xFF4D\xFF4D\xFF50", false, 0, 5},
     // An invalid English contraction
     {L"jtm'u", false, 0, 5},
     // An invalid English word enclosed with underscores.
     {L"_ifmmp_", false, 1, 5},

     // An invalid English word with a preceding whitespace
     {L" " L"ifmmp", false, 1, 5},
     // An invalid English word with a preceding no-break space
     {L"\xA0" L"ifmmp", false, 1, 5},
     // An invalid English word with a preceding ideographic space
     {L"\x3000" L"ifmmp", false, 1, 5},
     // An invalid English word with a preceding Chinese word
     {L"\x4F60\x597D" L"ifmmp", false, 2, 5},
     // [ROBUSTNESS] An invalid English word with a preceding Korean word
     {L"\xC548\xB155\xD558\xC138\xC694" L"ifmmp", false, 5, 5},
     // An invalid English word with a preceding Japanese word
     {L"\x3053\x3093\x306B\x3061\x306F" L"ifmmp", false, 5, 5},
     // [ROBUSTNESS] An invalid English word with a preceding Hindi word
     {L"\x0930\x093E\x091C\x0927\x093E\x0928" L"ifmmp", false, 6, 5},
     // [ROBUSTNESS] An invalid English word with two preceding Greek words
     {L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
      L"ifmmp", false, 8, 5},
     // [ROBUSTNESS] An invalid English word with a preceding Russian word
     {L"\x0437\x0434\x0440\x0430\x0432\x0441"
      L"\x0442\x0432\x0443\x0439\x0442\x0435" L"ifmmp", false, 12, 5},

     // An invalid English word with a following whitespace
     {L"ifmmp" L" ", false, 0, 5},
     // An invalid English word with a following no-break space
     {L"ifmmp" L"\xA0", false, 0, 5},
     // An invalid English word with a following ideographic space
     {L"ifmmp" L"\x3000", false, 0, 5},
     // An invalid English word with a following Chinese word
     {L"ifmmp" L"\x4F60\x597D", false, 0, 5},
     // [ROBUSTNESS] An invalid English word with a following Korean word
     {L"ifmmp" L"\xC548\xB155\xD558\xC138\xC694", false, 0, 5},
     // An invalid English word with a following Japanese word
     {L"ifmmp" L"\x3053\x3093\x306B\x3061\x306F", false, 0, 5},
     // [ROBUSTNESS] An invalid English word with a following Hindi word
     {L"ifmmp" L"\x0930\x093E\x091C\x0927\x093E\x0928", false, 0, 5},
     // [ROBUSTNESS] An invalid English word with two following Greek words
     {L"ifmmp"
      L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", false, 0, 5},
     // [ROBUSTNESS] An invalid English word with a following Russian word
     {L"ifmmp" L"\x0437\x0434\x0440\x0430\x0432\x0441"
      L"\x0442\x0432\x0443\x0439\x0442\x0435", false, 0, 5},

     // Two invalid English words concatenated with a whitespace
     {L"ifmmp" L" " L"ifmmp", false, 0, 5},
     // Two invalid English words concatenated with a no-break space
     {L"ifmmp" L"\xA0" L"ifmmp", false, 0, 5},
     // Two invalid English words concatenated with an ideographic space
     {L"ifmmp" L"\x3000" L"ifmmp", false, 0, 5},
     // Two invalid English words concatenated with a Chinese word
     {L"ifmmp" L"\x4F60\x597D" L"ifmmp", false, 0, 5},
     // [ROBUSTNESS] Two invalid English words concatenated with a Korean word
     {L"ifmmp" L"\xC548\xB155\xD558\xC138\xC694" L"ifmmp", false, 0, 5},
     // Two invalid English words concatenated with a Japanese word
     {L"ifmmp" L"\x3053\x3093\x306B\x3061\x306F" L"ifmmp", false, 0, 5},
     // [ROBUSTNESS] Two invalid English words concatenated with a Hindi word
     {L"ifmmp" L"\x0930\x093E\x091C\x0927\x093E\x0928" L"ifmmp" , false, 0, 5},
     // [ROBUSTNESS] Two invalid English words concatenated with two Greek words
     {L"ifmmp" L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
      L"ifmmp", false, 0, 5},
     // [ROBUSTNESS] Two invalid English words concatenated with a Russian word
     {L"ifmmp" L"\x0437\x0434\x0440\x0430\x0432\x0441"
      L"\x0442\x0432\x0443\x0439\x0442\x0435" L"ifmmp", false, 0, 5},
     // [ROBUSTNESS] Two invalid English words concatenated with a contraction
     // character.
     {L"ifmmp:ifmmp", false, 0, 11},
   };

   FilePath hunspell_directory = GetHunspellDirectory();
   ASSERT_FALSE(hunspell_directory.empty());

   scoped_refptr<SpellChecker> spell_checker(new SpellChecker(
       hunspell_directory, "en-US", NULL, FilePath()));

   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
     size_t input_length = 0;
     if (kTestCases[i].input != NULL) {
       input_length = wcslen(kTestCases[i].input);
     }
     int misspelling_start;
     int misspelling_length;
     bool result = spell_checker->SpellCheckWord(kTestCases[i].input,
                                                 static_cast<int>(input_length),
                                                 &misspelling_start,
                                                 &misspelling_length, NULL);

     EXPECT_EQ(kTestCases[i].expected_result, result);
     EXPECT_EQ(kTestCases[i].misspelling_start, misspelling_start);
     EXPECT_EQ(kTestCases[i].misspelling_length, misspelling_length);
   }
 }


 TEST_F(SpellCheckTest, SpellCheckSuggestions_EN_US) {
   static const struct {
     // A string to be tested.
     const wchar_t* input;
     // An expected result for this test case.
     //   * true: the input string does not have any invalid words.
     //   * false: the input string has one or more invalid words.
     bool expected_result;
     // The position and the length of the first invalid word.
     int misspelling_start;
     int misspelling_length;

     // A suggested word that should occur.
     const wchar_t* suggested_word;
   } kTestCases[] = {    // A valid English word with a preceding whitespace
     {L"ello", false, 0, 0, L"hello"},
     {L"ello", false, 0, 0, L"cello"},
     {L"wate", false, 0, 0, L"water"},
     {L"wate", false, 0, 0, L"waste"},
     {L"wate", false, 0, 0, L"sate"},
     {L"wate", false, 0, 0, L"rate"},
     {L"jum", false, 0, 0, L"jump"},
     {L"jum", false, 0, 0, L"rum"},
     {L"jum", false, 0, 0, L"sum"},
     {L"jum", false, 0, 0, L"tum"},
     // TODO (Sidchat): add many more examples.
   };

   FilePath hunspell_directory = GetHunspellDirectory();
   ASSERT_FALSE(hunspell_directory.empty());

   scoped_refptr<SpellChecker> spell_checker(new SpellChecker(
       hunspell_directory, "en-US", NULL, FilePath()));

   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
     std::vector<std::wstring> suggestions;
     size_t input_length = 0;
     if (kTestCases[i].input != NULL) {
       input_length = wcslen(kTestCases[i].input);
     }
     int misspelling_start;
     int misspelling_length;
     bool result = spell_checker->SpellCheckWord(kTestCases[i].input,
                                                 static_cast<int>(input_length),
                                                 &misspelling_start,
                                                 &misspelling_length,
                                                 &suggestions);

     // Check for spelling.
     EXPECT_EQ(kTestCases[i].expected_result, result);

     // Check if the suggested words occur.
     bool suggested_word_is_present = false;
     for (int j=0; j < static_cast<int>(suggestions.size()); j++) {
       if (suggestions.at(j).compare(kTestCases[i].suggested_word) == 0) {
         suggested_word_is_present = true;
         break;
       }
     }

     EXPECT_TRUE(suggested_word_is_present);
   }
 }

 // This test Adds words to the SpellChecker and veifies that it remembers them.
 TEST_F(SpellCheckTest, DISABLED_SpellCheckAddToDictionary_EN_US) {
   static const struct {
     // A string to be added to SpellChecker.
     const wchar_t* word_to_add;
   } kTestCases[] = {  // word to be added to SpellChecker
     {L"Googley"},
     {L"Googleplex"},
     {L"Googler"},
   };

   FilePath custom_dictionary_file(kTempCustomDictionaryFile);
   FilePath hunspell_directory = GetHunspellDirectory();
   ASSERT_FALSE(hunspell_directory.empty());

   scoped_refptr<SpellChecker> spell_checker(new SpellChecker(
       hunspell_directory, "en-US", NULL, custom_dictionary_file));

   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
     // Add the word to spellchecker.
     spell_checker->AddWord(std::wstring(kTestCases[i].word_to_add));

     // Now check whether it is added to Spellchecker.
     std::vector<std::wstring> suggestions;
     size_t input_length = 0;
     if (kTestCases[i].word_to_add != NULL) {
       input_length = wcslen(kTestCases[i].word_to_add);
     }
     int misspelling_start;
     int misspelling_length;
     bool result = spell_checker->SpellCheckWord(kTestCases[i].word_to_add,
                                                 static_cast<int>(input_length),
                                                 &misspelling_start,
                                                 &misspelling_length,
                                                 &suggestions);

     // Check for spelling.
     EXPECT_TRUE(result);
   }

   // Now initialize another spellchecker to see that AddToWord is permanent.
   scoped_refptr<SpellChecker> spell_checker_new(new SpellChecker(
       hunspell_directory, "en-US", NULL, custom_dictionary_file));

   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
     // Now check whether it is added to Spellchecker.
     std::vector<std::wstring> suggestions;
     size_t input_length = 0;
     if (kTestCases[i].word_to_add != NULL) {
       input_length = wcslen(kTestCases[i].word_to_add);
     }
     int misspelling_start;
     int misspelling_length;
     bool result = spell_checker_new->SpellCheckWord(
         kTestCases[i].word_to_add,
         static_cast<int>(input_length),
         &misspelling_start,
         &misspelling_length,
         &suggestions);

     // Check for spelling.
     EXPECT_TRUE(result);
   }

   // Remove the temp custom dictionary file.
   file_util::Delete(custom_dictionary_file, false);
 }

 // SpellChecker should suggest custome words for misspelled words.
 TEST_F(SpellCheckTest, DISABLED_SpellCheckSuggestionsAddToDictionary_EN_US) {
   static const struct {
     // A string to be added to SpellChecker.
     const wchar_t* word_to_add;
   } kTestCases[] = {  // word to be added to SpellChecker
     {L"Googley"},
     {L"Googleplex"},
     {L"Googler"},
   };

   FilePath custom_dictionary_file(kTempCustomDictionaryFile);
   FilePath hunspell_directory = GetHunspellDirectory();
   ASSERT_FALSE(hunspell_directory.empty());

   scoped_refptr<SpellChecker> spell_checker(new SpellChecker(
       hunspell_directory, "en-US", NULL, custom_dictionary_file));

   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
     // Add the word to spellchecker.
     spell_checker->AddWord(std::wstring(kTestCases[i].word_to_add));
   }

   // Now check to see whether the custom words are suggested for
   // misspelled but similar words.
   static const struct {
     // A string to be tested.
     const wchar_t* input;
     // An expected result for this test case.
     //   * true: the input string does not have any invalid words.
     //   * false: the input string has one or more invalid words.
     bool expected_result;
     // The position and the length of the first invalid word.
     int misspelling_start;
     int misspelling_length;

     // A suggested word that should occur.
     const wchar_t* suggested_word;
   } kTestCasesToBeTested[] = {
     {L"oogley", false, 0, 0, L"Googley"},
     {L"oogler", false, 0, 0, L"Googler"},
     {L"oogleplex", false, 0, 0, L"Googleplex"},
   };

   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCasesToBeTested); ++i) {
     std::vector<std::wstring> suggestions;
     size_t input_length = 0;
     if (kTestCasesToBeTested[i].input != NULL) {
       input_length = wcslen(kTestCasesToBeTested[i].input);
     }
     int misspelling_start;
     int misspelling_length;
     bool result = spell_checker->SpellCheckWord(kTestCasesToBeTested[i].input,
                                                 static_cast<int>(input_length),
                                                 &misspelling_start,
                                                 &misspelling_length,
                                                 &suggestions);

     // Check for spelling.
     EXPECT_EQ(result, kTestCasesToBeTested[i].expected_result);

     // Check if the suggested words occur.
     bool suggested_word_is_present = false;
     for (int j=0; j < static_cast<int>(suggestions.size()); j++) {
       if (suggestions.at(j).compare(kTestCasesToBeTested[i].suggested_word) ==
                                     0) {
         suggested_word_is_present = true;
         break;
       }
     }

     EXPECT_TRUE(suggested_word_is_present);
   }

   // Remove the temp custom dictionary file.
   file_util::Delete(custom_dictionary_file, false);
 }
	// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "webkit/glue/webkit_glue.h"

	#include "base/file_util.h"
	#include "base/message_loop.h"
	#include "base/path_service.h"
	#include "chrome/browser/spellchecker.h"
	#include "chrome/common/chrome_paths.h"
	#include "testing/gtest/include/gtest/gtest.h"

	namespace {
	const FilePath::CharType kTempCustomDictionaryFile[] =
	FILE_PATH_LITERAL("temp_custom_dictionary.txt");
	} // namespace

	class SpellCheckTest : public testing::Test {
	private:
	MessageLoop message_loop_;
	};

	// Represents a special initialization function used only for the unit tests
	// in this file.
	extern void InitHunspellWithFiles(FILE* file_aff_hunspell,
	FILE* file_dic_hunspell);

	FilePath GetHunspellDirectory() {
	FilePath hunspell_directory;
	if (!PathService::Get(base::DIR_SOURCE_ROOT, &hunspell_directory))
	return FilePath();

	hunspell_directory = hunspell_directory.AppendASCII("chrome");
	hunspell_directory = hunspell_directory.AppendASCII("third_party");
	hunspell_directory = hunspell_directory.AppendASCII("hunspell");
	hunspell_directory = hunspell_directory.AppendASCII("dictionaries");
	return hunspell_directory;
	}

	// Operates unit tests for the webkit_glue::SpellCheckWord() function
	// with the US English dictionary.
	// The unit tests in this function consist of:
	// * Tests for the function with empty strings;
	// * Tests for the function with a valid English word;
	// * Tests for the function with a valid non-English word;
	// * Tests for the function with a valid English word with a preceding
	// space character;
	// * Tests for the function with a valid English word with a preceding
	// non-English word;
	// * Tests for the function with a valid English word with a following
	// space character;
	// * Tests for the function with a valid English word with a following
	// non-English word;
	// * Tests for the function with two valid English words concatenated
	// with space characters or non-English words;
	// * Tests for the function with an invalid English word;
	// * Tests for the function with an invalid English word with a preceding
	// space character;
	// * Tests for the function with an invalid English word with a preceding
	// non-English word;
	// * Tests for the function with2 an invalid English word with a following
	// space character;
	// * Tests for the function with an invalid English word with a following
	// non-English word, and;
	// * Tests for the function with two invalid English words concatenated
	// with space characters or non-English words.
	// A test with a "[ROBUSTNESS]" mark shows it is a robustness test and it uses
	// grammartically incorrect string.
	// TODO(hbono): Please feel free to add more tests.
	TEST_F(SpellCheckTest, SpellCheckStrings_EN_US) {
	static const struct {
	// A string to be tested.
	const wchar_t* input;
	// An expected result for this test case.
	// * true: the input string does not have any invalid words.
	// * false: the input string has one or more invalid words.
	bool expected_result;
	// The position and the length of the first invalid word.
	int misspelling_start;
	int misspelling_length;
	} kTestCases[] = {
	// Empty strings.
	{NULL, true, 0, 0},
	{L"", true, 0, 0},
	{L" ", true, 0, 0},
	{L"\xA0", true, 0, 0},
	{L"\x3000", true, 0, 0},

	// A valid English word "hello".
	{L"hello", true, 0, 0},
	// A valid Chinese word (meaning "hello") consisiting of two CJKV
	// ideographs
	{L"\x4F60\x597D", true, 0, 0},
	// A valid Korean word (meaning "hello") consisting of five hangul
	// syllables
	{L"\xC548\xB155\xD558\xC138\xC694", true, 0, 0},
	// A valid Japanese word (meaning "hello") consisting of five Hiragana
	// letters
	{L"\x3053\x3093\x306B\x3061\x306F", true, 0, 0},
	// A valid Hindi word (meaning ?) consisting of six Devanagari letters
	// (This word is copied from "http://b/issue?id=857583".)
	{L"\x0930\x093E\x091C\x0927\x093E\x0928", true, 0, 0},
	// A valid English word "affix" using a Latin ligature 'ffi'
	{L"a\xFB03x", true, 0, 0},
	// A valid English word "hello" (fullwidth version)
	{L"\xFF28\xFF45\xFF4C\xFF4C\xFF4F", true, 0, 0},
	// Two valid Greek words (meaning "hello") consisting of seven Greek
	// letters
	{L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true, 0, 0},
	// A valid Russian word (meainng "hello") consisting of twelve Cyrillic
	// letters
	{L"\x0437\x0434\x0440\x0430\x0432\x0441"
	L"\x0442\x0432\x0443\x0439\x0442\x0435", true, 0, 0},
	// A valid English contraction
	{L"isn't", true, 0, 0},
	// A valid English word enclosed with underscores.
	{L"_hello_", true, 0, 0},

	// A valid English word with a preceding whitespace
	{L" " L"hello", true, 0, 0},
	// A valid English word with a preceding no-break space
	{L"\xA0" L"hello", true, 0, 0},
	// A valid English word with a preceding ideographic space
	{L"\x3000" L"hello", true, 0, 0},
	// A valid English word with a preceding Chinese word
	{L"\x4F60\x597D" L"hello", true, 0, 0},
	// [ROBUSTNESS] A valid English word with a preceding Korean word
	{L"\xC548\xB155\xD558\xC138\xC694" L"hello", true, 0, 0},
	// A valid English word with a preceding Japanese word
	{L"\x3053\x3093\x306B\x3061\x306F" L"hello", true, 0, 0},
	// [ROBUSTNESS] A valid English word with a preceding Hindi word
	{L"\x0930\x093E\x091C\x0927\x093E\x0928" L"hello", true, 0, 0},
	// [ROBUSTNESS] A valid English word with two preceding Greek words
	{L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
	L"hello", true, 0, 0},
	// [ROBUSTNESS] A valid English word with a preceding Russian word
	{L"\x0437\x0434\x0440\x0430\x0432\x0441"
	L"\x0442\x0432\x0443\x0439\x0442\x0435" L"hello", true, 0, 0},

	// A valid English word with a following whitespace
	{L"hello" L" ", true, 0, 0},
	// A valid English word with a following no-break space
	{L"hello" L"\xA0", true, 0, 0},
	// A valid English word with a following ideographic space
	{L"hello" L"\x3000", true, 0, 0},
	// A valid English word with a following Chinese word
	{L"hello" L"\x4F60\x597D", true, 0, 0},
	// [ROBUSTNESS] A valid English word with a following Korean word
	{L"hello" L"\xC548\xB155\xD558\xC138\xC694", true, 0, 0},
	// A valid English word with a following Japanese word
	{L"hello" L"\x3053\x3093\x306B\x3061\x306F", true, 0, 0},
	// [ROBUSTNESS] A valid English word with a following Hindi word
	{L"hello" L"\x0930\x093E\x091C\x0927\x093E\x0928", true, 0, 0},
	// [ROBUSTNESS] A valid English word with two following Greek words
	{L"hello"
	L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", true, 0, 0},
	// [ROBUSTNESS] A valid English word with a following Russian word
	{L"hello" L"\x0437\x0434\x0440\x0430\x0432\x0441"
	L"\x0442\x0432\x0443\x0439\x0442\x0435", true, 0, 0},

	// Two valid English words concatenated with a whitespace
	{L"hello" L" " L"hello", true, 0, 0},
	// Two valid English words concatenated with a no-break space
	{L"hello" L"\xA0" L"hello", true, 0, 0},
	// Two valid English words concatenated with an ideographic space
	{L"hello" L"\x3000" L"hello", true, 0, 0},
	// Two valid English words concatenated with a Chinese word
	{L"hello" L"\x4F60\x597D" L"hello", true, 0, 0},
	// [ROBUSTNESS] Two valid English words concatenated with a Korean word
	{L"hello" L"\xC548\xB155\xD558\xC138\xC694" L"hello", true, 0, 0},
	// Two valid English words concatenated with a Japanese word
	{L"hello" L"\x3053\x3093\x306B\x3061\x306F" L"hello", true, 0, 0},
	// [ROBUSTNESS] Two valid English words concatenated with a Hindi word
	{L"hello" L"\x0930\x093E\x091C\x0927\x093E\x0928" L"hello" , true, 0, 0},
	// [ROBUSTNESS] Two valid English words concatenated with two Greek words
	{L"hello" L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
	L"hello", true, 0, 0},
	// [ROBUSTNESS] Two valid English words concatenated with a Russian word
	{L"hello" L"\x0437\x0434\x0440\x0430\x0432\x0441"
	L"\x0442\x0432\x0443\x0439\x0442\x0435" L"hello", true, 0, 0},
	// [ROBUSTNESS] Two valid English words concatenated with a contraction
	// character.
	{L"hello:hello", true, 0, 0},

	// An invalid English word
	{L"ifmmp", false, 0, 5},
	// An invalid English word "bffly" containing a Latin ligature 'ffl'
	{L"b\xFB04y", false, 0, 3},
	// An invalid English word "ifmmp" (fullwidth version)
	{L"\xFF29\xFF46\xFF4D\xFF4D\xFF50", false, 0, 5},
	// An invalid English contraction
	{L"jtm'u", false, 0, 5},
	// An invalid English word enclosed with underscores.
	{L"_ifmmp_", false, 1, 5},

	// An invalid English word with a preceding whitespace
	{L" " L"ifmmp", false, 1, 5},
	// An invalid English word with a preceding no-break space
	{L"\xA0" L"ifmmp", false, 1, 5},
	// An invalid English word with a preceding ideographic space
	{L"\x3000" L"ifmmp", false, 1, 5},
	// An invalid English word with a preceding Chinese word
	{L"\x4F60\x597D" L"ifmmp", false, 2, 5},
	// [ROBUSTNESS] An invalid English word with a preceding Korean word
	{L"\xC548\xB155\xD558\xC138\xC694" L"ifmmp", false, 5, 5},
	// An invalid English word with a preceding Japanese word
	{L"\x3053\x3093\x306B\x3061\x306F" L"ifmmp", false, 5, 5},
	// [ROBUSTNESS] An invalid English word with a preceding Hindi word
	{L"\x0930\x093E\x091C\x0927\x093E\x0928" L"ifmmp", false, 6, 5},
	// [ROBUSTNESS] An invalid English word with two preceding Greek words
	{L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
	L"ifmmp", false, 8, 5},
	// [ROBUSTNESS] An invalid English word with a preceding Russian word
	{L"\x0437\x0434\x0440\x0430\x0432\x0441"
	L"\x0442\x0432\x0443\x0439\x0442\x0435" L"ifmmp", false, 12, 5},

	// An invalid English word with a following whitespace
	{L"ifmmp" L" ", false, 0, 5},
	// An invalid English word with a following no-break space
	{L"ifmmp" L"\xA0", false, 0, 5},
	// An invalid English word with a following ideographic space
	{L"ifmmp" L"\x3000", false, 0, 5},
	// An invalid English word with a following Chinese word
	{L"ifmmp" L"\x4F60\x597D", false, 0, 5},
	// [ROBUSTNESS] An invalid English word with a following Korean word
	{L"ifmmp" L"\xC548\xB155\xD558\xC138\xC694", false, 0, 5},
	// An invalid English word with a following Japanese word
	{L"ifmmp" L"\x3053\x3093\x306B\x3061\x306F", false, 0, 5},
	// [ROBUSTNESS] An invalid English word with a following Hindi word
	{L"ifmmp" L"\x0930\x093E\x091C\x0927\x093E\x0928", false, 0, 5},
	// [ROBUSTNESS] An invalid English word with two following Greek words
	{L"ifmmp"
	L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5", false, 0, 5},
	// [ROBUSTNESS] An invalid English word with a following Russian word
	{L"ifmmp" L"\x0437\x0434\x0440\x0430\x0432\x0441"
	L"\x0442\x0432\x0443\x0439\x0442\x0435", false, 0, 5},

	// Two invalid English words concatenated with a whitespace
	{L"ifmmp" L" " L"ifmmp", false, 0, 5},
	// Two invalid English words concatenated with a no-break space
	{L"ifmmp" L"\xA0" L"ifmmp", false, 0, 5},
	// Two invalid English words concatenated with an ideographic space
	{L"ifmmp" L"\x3000" L"ifmmp", false, 0, 5},
	// Two invalid English words concatenated with a Chinese word
	{L"ifmmp" L"\x4F60\x597D" L"ifmmp", false, 0, 5},
	// [ROBUSTNESS] Two invalid English words concatenated with a Korean word
	{L"ifmmp" L"\xC548\xB155\xD558\xC138\xC694" L"ifmmp", false, 0, 5},
	// Two invalid English words concatenated with a Japanese word
	{L"ifmmp" L"\x3053\x3093\x306B\x3061\x306F" L"ifmmp", false, 0, 5},
	// [ROBUSTNESS] Two invalid English words concatenated with a Hindi word
	{L"ifmmp" L"\x0930\x093E\x091C\x0927\x093E\x0928" L"ifmmp" , false, 0, 5},
	// [ROBUSTNESS] Two invalid English words concatenated with two Greek words
	{L"ifmmp" L"\x03B3\x03B5\x03B9\x03AC" L" " L"\x03C3\x03BF\x03C5"
	L"ifmmp", false, 0, 5},
	// [ROBUSTNESS] Two invalid English words concatenated with a Russian word
	{L"ifmmp" L"\x0437\x0434\x0440\x0430\x0432\x0441"
	L"\x0442\x0432\x0443\x0439\x0442\x0435" L"ifmmp", false, 0, 5},
	// [ROBUSTNESS] Two invalid English words concatenated with a contraction
	// character.
	{L"ifmmp:ifmmp", false, 0, 11},
	};

	FilePath hunspell_directory = GetHunspellDirectory();
	ASSERT_FALSE(hunspell_directory.empty());

	scoped_refptr<SpellChecker> spell_checker(new SpellChecker(
	hunspell_directory, "en-US", NULL, FilePath()));

	for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
	size_t input_length = 0;
	if (kTestCases[i].input != NULL) {
	input_length = wcslen(kTestCases[i].input);
	}
	int misspelling_start;
	int misspelling_length;
	bool result = spell_checker->SpellCheckWord(kTestCases[i].input,
	static_cast<int>(input_length),
	&misspelling_start,
	&misspelling_length, NULL);

	EXPECT_EQ(kTestCases[i].expected_result, result);
	EXPECT_EQ(kTestCases[i].misspelling_start, misspelling_start);
	EXPECT_EQ(kTestCases[i].misspelling_length, misspelling_length);
	}
	}


	TEST_F(SpellCheckTest, SpellCheckSuggestions_EN_US) {
	static const struct {
	// A string to be tested.
	const wchar_t* input;
	// An expected result for this test case.
	// * true: the input string does not have any invalid words.
	// * false: the input string has one or more invalid words.
	bool expected_result;
	// The position and the length of the first invalid word.
	int misspelling_start;
	int misspelling_length;

	// A suggested word that should occur.
	const wchar_t* suggested_word;
	} kTestCases[] = { // A valid English word with a preceding whitespace
	{L"ello", false, 0, 0, L"hello"},
	{L"ello", false, 0, 0, L"cello"},
	{L"wate", false, 0, 0, L"water"},
	{L"wate", false, 0, 0, L"waste"},
	{L"wate", false, 0, 0, L"sate"},
	{L"wate", false, 0, 0, L"rate"},
	{L"jum", false, 0, 0, L"jump"},
	{L"jum", false, 0, 0, L"rum"},
	{L"jum", false, 0, 0, L"sum"},
	{L"jum", false, 0, 0, L"tum"},
	// TODO (Sidchat): add many more examples.
	};

	FilePath hunspell_directory = GetHunspellDirectory();
	ASSERT_FALSE(hunspell_directory.empty());

	scoped_refptr<SpellChecker> spell_checker(new SpellChecker(
	hunspell_directory, "en-US", NULL, FilePath()));

	for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
	std::vector<std::wstring> suggestions;
	size_t input_length = 0;
	if (kTestCases[i].input != NULL) {
	input_length = wcslen(kTestCases[i].input);
	}
	int misspelling_start;
	int misspelling_length;
	bool result = spell_checker->SpellCheckWord(kTestCases[i].input,
	static_cast<int>(input_length),
	&misspelling_start,
	&misspelling_length,
	&suggestions);

	// Check for spelling.
	EXPECT_EQ(kTestCases[i].expected_result, result);

	// Check if the suggested words occur.
	bool suggested_word_is_present = false;
	for (int j=0; j < static_cast<int>(suggestions.size()); j++) {
	if (suggestions.at(j).compare(kTestCases[i].suggested_word) == 0) {
	suggested_word_is_present = true;
	break;
	}
	}

	EXPECT_TRUE(suggested_word_is_present);
	}
	}

	// This test Adds words to the SpellChecker and veifies that it remembers them.
	TEST_F(SpellCheckTest, DISABLED_SpellCheckAddToDictionary_EN_US) {
	static const struct {
	// A string to be added to SpellChecker.
	const wchar_t* word_to_add;
	} kTestCases[] = { // word to be added to SpellChecker
	{L"Googley"},
	{L"Googleplex"},
	{L"Googler"},
	};

	FilePath custom_dictionary_file(kTempCustomDictionaryFile);
	FilePath hunspell_directory = GetHunspellDirectory();
	ASSERT_FALSE(hunspell_directory.empty());

	scoped_refptr<SpellChecker> spell_checker(new SpellChecker(
	hunspell_directory, "en-US", NULL, custom_dictionary_file));

	for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
	// Add the word to spellchecker.
	spell_checker->AddWord(std::wstring(kTestCases[i].word_to_add));

	// Now check whether it is added to Spellchecker.
	std::vector<std::wstring> suggestions;
	size_t input_length = 0;
	if (kTestCases[i].word_to_add != NULL) {
	input_length = wcslen(kTestCases[i].word_to_add);
	}
	int misspelling_start;
	int misspelling_length;
	bool result = spell_checker->SpellCheckWord(kTestCases[i].word_to_add,
	static_cast<int>(input_length),
	&misspelling_start,
	&misspelling_length,
	&suggestions);

	// Check for spelling.
	EXPECT_TRUE(result);
	}

	// Now initialize another spellchecker to see that AddToWord is permanent.
	scoped_refptr<SpellChecker> spell_checker_new(new SpellChecker(
	hunspell_directory, "en-US", NULL, custom_dictionary_file));

	for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
	// Now check whether it is added to Spellchecker.
	std::vector<std::wstring> suggestions;
	size_t input_length = 0;
	if (kTestCases[i].word_to_add != NULL) {
	input_length = wcslen(kTestCases[i].word_to_add);
	}
	int misspelling_start;
	int misspelling_length;
	bool result = spell_checker_new->SpellCheckWord(
	kTestCases[i].word_to_add,
	static_cast<int>(input_length),
	&misspelling_start,
	&misspelling_length,
	&suggestions);

	// Check for spelling.
	EXPECT_TRUE(result);
	}

	// Remove the temp custom dictionary file.
	file_util::Delete(custom_dictionary_file, false);
	}

	// SpellChecker should suggest custome words for misspelled words.
	TEST_F(SpellCheckTest, DISABLED_SpellCheckSuggestionsAddToDictionary_EN_US) {
	static const struct {
	// A string to be added to SpellChecker.
	const wchar_t* word_to_add;
	} kTestCases[] = { // word to be added to SpellChecker
	{L"Googley"},
	{L"Googleplex"},
	{L"Googler"},
	};

	FilePath custom_dictionary_file(kTempCustomDictionaryFile);
	FilePath hunspell_directory = GetHunspellDirectory();
	ASSERT_FALSE(hunspell_directory.empty());

	scoped_refptr<SpellChecker> spell_checker(new SpellChecker(
	hunspell_directory, "en-US", NULL, custom_dictionary_file));

	for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
	// Add the word to spellchecker.
	spell_checker->AddWord(std::wstring(kTestCases[i].word_to_add));
	}

	// Now check to see whether the custom words are suggested for
	// misspelled but similar words.
	static const struct {
	// A string to be tested.
	const wchar_t* input;
	// An expected result for this test case.
	// * true: the input string does not have any invalid words.
	// * false: the input string has one or more invalid words.
	bool expected_result;
	// The position and the length of the first invalid word.
	int misspelling_start;
	int misspelling_length;

	// A suggested word that should occur.
	const wchar_t* suggested_word;
	} kTestCasesToBeTested[] = {
	{L"oogley", false, 0, 0, L"Googley"},
	{L"oogler", false, 0, 0, L"Googler"},
	{L"oogleplex", false, 0, 0, L"Googleplex"},
	};

	for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCasesToBeTested); ++i) {
	std::vector<std::wstring> suggestions;
	size_t input_length = 0;
	if (kTestCasesToBeTested[i].input != NULL) {
	input_length = wcslen(kTestCasesToBeTested[i].input);
	}
	int misspelling_start;
	int misspelling_length;
	bool result = spell_checker->SpellCheckWord(kTestCasesToBeTested[i].input,
	static_cast<int>(input_length),
	&misspelling_start,
	&misspelling_length,
	&suggestions);

	// Check for spelling.
	EXPECT_EQ(result, kTestCasesToBeTested[i].expected_result);

	// Check if the suggested words occur.
	bool suggested_word_is_present = false;
	for (int j=0; j < static_cast<int>(suggestions.size()); j++) {
	if (suggestions.at(j).compare(kTestCasesToBeTested[i].suggested_word) ==
	0) {
	suggested_word_is_present = true;
	break;
	}
	}

	EXPECT_TRUE(suggested_word_is_present);
	}

	// Remove the temp custom dictionary file.
	file_util::Delete(custom_dictionary_file, false);
	}