[email protected] | a502bbe7 | 2011-01-07 18:06:45 | [diff] [blame^] | 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 4 | // |
| 5 | // A JSON parser. Converts strings of JSON into a Value object (see |
| 6 | // base/values.h). |
| 7 | // http://www.ietf.org/rfc/rfc4627.txt?number=4627 |
| 8 | // |
| 9 | // Known limitations/deviations from the RFC: |
| 10 | // - Only knows how to parse ints within the range of a signed 32 bit int and |
| 11 | // decimal numbers within a double. |
| 12 | // - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 |
| 13 | // (BE or LE) and UTF-32 (BE or LE) as well. |
| 14 | // - We limit nesting to 100 levels to prevent stack overflow (this is allowed |
| 15 | // by the RFC). |
| 16 | // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data |
| 17 | // stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input |
| 18 | // UTF-8 string for the JSONReader::JsonToValue() function may start with a |
| 19 | // UTF-8 BOM (0xEF, 0xBB, 0xBF). |
| 20 | // To avoid the function from mis-treating a UTF-8 BOM as an invalid |
| 21 | // character, the function skips a Unicode BOM at the beginning of the |
| 22 | // Unicode string (converted from the input UTF-8 string) before parsing it. |
| 23 | // |
[email protected] | e724599 | 2008-07-29 00:01:31 | [diff] [blame] | 24 | // TODO(tc): Add a parsing option to to relax object keys being wrapped in |
| 25 | // double quotes |
| 26 | // TODO(tc): Add an option to disable comment stripping |
[email protected] | 88e72845 | 2008-12-05 22:14:46 | [diff] [blame] | 27 | // TODO(aa): Consider making the constructor public and the static Read() method |
| 28 | // only a convenience for the common uses with more complex configuration going |
| 29 | // on the instance. |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 30 | |
[email protected] | 93d49d7 | 2009-10-23 20:00:20 | [diff] [blame] | 31 | #ifndef BASE_JSON_JSON_READER_H_ |
| 32 | #define BASE_JSON_JSON_READER_H_ |
[email protected] | 32b76ef | 2010-07-26 23:08:24 | [diff] [blame] | 33 | #pragma once |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 34 | |
| 35 | #include <string> |
| 36 | |
| 37 | #include "base/basictypes.h" |
[email protected] | c646aed | 2010-01-21 19:46:27 | [diff] [blame] | 38 | |
| 39 | // Chromium and Chromium OS check out gtest to different places, so we're |
| 40 | // unable to compile on both if we include gtest_prod.h here. Instead, include |
| 41 | // its only contents -- this will need to be updated if the macro ever changes. |
| 42 | #define FRIEND_TEST(test_case_name, test_name)\ |
| 43 | friend class test_case_name##_##test_name##_Test |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 44 | |
| 45 | class Value; |
| 46 | |
[email protected] | 93d49d7 | 2009-10-23 20:00:20 | [diff] [blame] | 47 | namespace base { |
| 48 | |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 49 | class JSONReader { |
| 50 | public: |
| 51 | // A struct to hold a JS token. |
| 52 | class Token { |
| 53 | public: |
| 54 | enum Type { |
| 55 | OBJECT_BEGIN, // { |
| 56 | OBJECT_END, // } |
| 57 | ARRAY_BEGIN, // [ |
| 58 | ARRAY_END, // ] |
| 59 | STRING, |
| 60 | NUMBER, |
| 61 | BOOL_TRUE, // true |
| 62 | BOOL_FALSE, // false |
| 63 | NULL_TOKEN, // null |
| 64 | LIST_SEPARATOR, // , |
| 65 | OBJECT_PAIR_SEPARATOR, // : |
| 66 | END_OF_INPUT, |
| 67 | INVALID_TOKEN, |
| 68 | }; |
| 69 | Token(Type t, const wchar_t* b, int len) |
| 70 | : type(t), begin(b), length(len) {} |
| 71 | |
[email protected] | a502bbe7 | 2011-01-07 18:06:45 | [diff] [blame^] | 72 | // Get the character that's one past the end of this token. |
| 73 | wchar_t NextChar() { |
| 74 | return *(begin + length); |
| 75 | } |
| 76 | |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 77 | Type type; |
| 78 | |
| 79 | // A pointer into JSONReader::json_pos_ that's the beginning of this token. |
| 80 | const wchar_t* begin; |
| 81 | |
| 82 | // End should be one char past the end of the token. |
| 83 | int length; |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 84 | }; |
| 85 | |
[email protected] | ba39967 | 2010-04-06 15:42:39 | [diff] [blame] | 86 | // Error codes during parsing. |
| 87 | enum JsonParseError { |
| 88 | JSON_NO_ERROR = 0, |
| 89 | JSON_BAD_ROOT_ELEMENT_TYPE, |
| 90 | JSON_INVALID_ESCAPE, |
| 91 | JSON_SYNTAX_ERROR, |
| 92 | JSON_TRAILING_COMMA, |
| 93 | JSON_TOO_MUCH_NESTING, |
| 94 | JSON_UNEXPECTED_DATA_AFTER_ROOT, |
| 95 | JSON_UNSUPPORTED_ENCODING, |
| 96 | JSON_UNQUOTED_DICTIONARY_KEY, |
| 97 | }; |
| 98 | |
| 99 | // String versions of parse error codes. |
[email protected] | 88e72845 | 2008-12-05 22:14:46 | [diff] [blame] | 100 | static const char* kBadRootElementType; |
| 101 | static const char* kInvalidEscape; |
| 102 | static const char* kSyntaxError; |
| 103 | static const char* kTrailingComma; |
| 104 | static const char* kTooMuchNesting; |
| 105 | static const char* kUnexpectedDataAfterRoot; |
| 106 | static const char* kUnsupportedEncoding; |
| 107 | static const char* kUnquotedDictionaryKey; |
| 108 | |
[email protected] | 703e807a | 2009-03-28 19:56:51 | [diff] [blame] | 109 | JSONReader(); |
| 110 | |
[email protected] | b4cebf8 | 2008-12-29 19:59:08 | [diff] [blame] | 111 | // Reads and parses |json|, returning a Value. The caller owns the returned |
| 112 | // instance. If |json| is not a properly formed JSON string, returns NULL. |
[email protected] | b930d13 | 2009-01-05 18:37:51 | [diff] [blame] | 113 | // If |allow_trailing_comma| is true, we will ignore trailing commas in |
| 114 | // objects and arrays even though this goes against the RFC. |
| 115 | static Value* Read(const std::string& json, bool allow_trailing_comma); |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 116 | |
[email protected] | ba39967 | 2010-04-06 15:42:39 | [diff] [blame] | 117 | // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out| |
| 118 | // are optional. If specified and NULL is returned, they will be populated |
| 119 | // an error code and a formatted error message (including error location if |
| 120 | // appropriate). Otherwise, they will be unmodified. |
[email protected] | b4cebf8 | 2008-12-29 19:59:08 | [diff] [blame] | 121 | static Value* ReadAndReturnError(const std::string& json, |
| 122 | bool allow_trailing_comma, |
[email protected] | ba39967 | 2010-04-06 15:42:39 | [diff] [blame] | 123 | int* error_code_out, |
| 124 | std::string* error_msg_out); |
[email protected] | 88e72845 | 2008-12-05 22:14:46 | [diff] [blame] | 125 | |
[email protected] | ba39967 | 2010-04-06 15:42:39 | [diff] [blame] | 126 | // Converts a JSON parse error code into a human readable message. |
| 127 | // Returns an empty string if error_code is JSON_NO_ERROR. |
| 128 | static std::string ErrorCodeToString(JsonParseError error_code); |
| 129 | |
| 130 | // Returns the error code if the last call to JsonToValue() failed. |
| 131 | // Returns JSON_NO_ERROR otherwise. |
| 132 | JsonParseError error_code() const { return error_code_; } |
| 133 | |
| 134 | // Converts error_code_ to a human-readable string, including line and column |
| 135 | // numbers if appropriate. |
| 136 | std::string GetErrorMessage() const; |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 137 | |
[email protected] | 703e807a | 2009-03-28 19:56:51 | [diff] [blame] | 138 | // Reads and parses |json|, returning a Value. The caller owns the returned |
| 139 | // instance. If |json| is not a properly formed JSON string, returns NULL and |
| 140 | // a detailed error can be retrieved from |error_message()|. |
| 141 | // If |check_root| is true, we require that the root object be an object or |
| 142 | // array. Otherwise, it can be any valid JSON type. |
| 143 | // If |allow_trailing_comma| is true, we will ignore trailing commas in |
| 144 | // objects and arrays even though this goes against the RFC. |
[email protected] | b4cebf8 | 2008-12-29 19:59:08 | [diff] [blame] | 145 | Value* JsonToValue(const std::string& json, bool check_root, |
| 146 | bool allow_trailing_comma); |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 147 | |
[email protected] | 703e807a | 2009-03-28 19:56:51 | [diff] [blame] | 148 | private: |
| 149 | static std::string FormatErrorMessage(int line, int column, |
[email protected] | ba39967 | 2010-04-06 15:42:39 | [diff] [blame] | 150 | const std::string& description); |
[email protected] | 703e807a | 2009-03-28 19:56:51 | [diff] [blame] | 151 | |
[email protected] | 93d49d7 | 2009-10-23 20:00:20 | [diff] [blame] | 152 | DISALLOW_COPY_AND_ASSIGN(JSONReader); |
[email protected] | 703e807a | 2009-03-28 19:56:51 | [diff] [blame] | 153 | |
| 154 | FRIEND_TEST(JSONReaderTest, Reading); |
| 155 | FRIEND_TEST(JSONReaderTest, ErrorMessages); |
| 156 | |
[email protected] | b4cebf8 | 2008-12-29 19:59:08 | [diff] [blame] | 157 | // Recursively build Value. Returns NULL if we don't have a valid JSON |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 158 | // string. If |is_root| is true, we verify that the root element is either |
| 159 | // an object or an array. |
[email protected] | b4cebf8 | 2008-12-29 19:59:08 | [diff] [blame] | 160 | Value* BuildValue(bool is_root); |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 161 | |
| 162 | // Parses a sequence of characters into a Token::NUMBER. If the sequence of |
| 163 | // characters is not a valid number, returns a Token::INVALID_TOKEN. Note |
| 164 | // that DecodeNumber is used to actually convert from a string to an |
| 165 | // int/double. |
| 166 | Token ParseNumberToken(); |
| 167 | |
| 168 | // Try and convert the substring that token holds into an int or a double. If |
[email protected] | b4cebf8 | 2008-12-29 19:59:08 | [diff] [blame] | 169 | // we can (ie., no overflow), return the value, else return NULL. |
| 170 | Value* DecodeNumber(const Token& token); |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 171 | |
| 172 | // Parses a sequence of characters into a Token::STRING. If the sequence of |
| 173 | // characters is not a valid string, returns a Token::INVALID_TOKEN. Note |
| 174 | // that DecodeString is used to actually decode the escaped string into an |
| 175 | // actual wstring. |
| 176 | Token ParseStringToken(); |
| 177 | |
| 178 | // Convert the substring into a value string. This should always succeed |
[email protected] | b930d13 | 2009-01-05 18:37:51 | [diff] [blame] | 179 | // (otherwise ParseStringToken would have failed). |
[email protected] | b4cebf8 | 2008-12-29 19:59:08 | [diff] [blame] | 180 | Value* DecodeString(const Token& token); |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 181 | |
| 182 | // Grabs the next token in the JSON stream. This does not increment the |
| 183 | // stream so it can be used to look ahead at the next token. |
| 184 | Token ParseToken(); |
| 185 | |
[email protected] | b930d13 | 2009-01-05 18:37:51 | [diff] [blame] | 186 | // Increments |json_pos_| past leading whitespace and comments. |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 187 | void EatWhitespaceAndComments(); |
| 188 | |
[email protected] | b930d13 | 2009-01-05 18:37:51 | [diff] [blame] | 189 | // If |json_pos_| is at the start of a comment, eat it, otherwise, returns |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 190 | // false. |
| 191 | bool EatComment(); |
| 192 | |
[email protected] | b930d13 | 2009-01-05 18:37:51 | [diff] [blame] | 193 | // Checks if |json_pos_| matches str. |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 194 | bool NextStringMatch(const std::wstring& str); |
| 195 | |
[email protected] | ba39967 | 2010-04-06 15:42:39 | [diff] [blame] | 196 | // Sets the error code that will be returned to the caller. The current |
[email protected] | 88e72845 | 2008-12-05 22:14:46 | [diff] [blame] | 197 | // line and column are determined and added into the final message. |
[email protected] | ba39967 | 2010-04-06 15:42:39 | [diff] [blame] | 198 | void SetErrorCode(const JsonParseError error, const wchar_t* error_pos); |
[email protected] | 88e72845 | 2008-12-05 22:14:46 | [diff] [blame] | 199 | |
| 200 | // Pointer to the starting position in the input string. |
| 201 | const wchar_t* start_pos_; |
| 202 | |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 203 | // Pointer to the current position in the input string. |
| 204 | const wchar_t* json_pos_; |
| 205 | |
| 206 | // Used to keep track of how many nested lists/dicts there are. |
| 207 | int stack_depth_; |
[email protected] | e724599 | 2008-07-29 00:01:31 | [diff] [blame] | 208 | |
| 209 | // A parser flag that allows trailing commas in objects and arrays. |
| 210 | bool allow_trailing_comma_; |
[email protected] | 88e72845 | 2008-12-05 22:14:46 | [diff] [blame] | 211 | |
[email protected] | ba39967 | 2010-04-06 15:42:39 | [diff] [blame] | 212 | // Contains the error code for the last call to JsonToValue(), if any. |
| 213 | JsonParseError error_code_; |
| 214 | int error_line_; |
| 215 | int error_col_; |
initial.commit | d7cae12 | 2008-07-26 21:49:38 | [diff] [blame] | 216 | }; |
| 217 | |
[email protected] | 93d49d7 | 2009-10-23 20:00:20 | [diff] [blame] | 218 | } // namespace base |
| 219 | |
| 220 | #endif // BASE_JSON_JSON_READER_H_ |