blob: 77c4e744dac4676a1303c5f5c3e3cecd22a3081d [file] [log] [blame]
[email protected]a502bbe72011-01-07 18:06:451// Copyright (c) 2011 The Chromium Authors. All rights reserved.
license.botbf09a502008-08-24 00:55:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commitd7cae122008-07-26 21:49:384//
5// A JSON parser. Converts strings of JSON into a Value object (see
6// base/values.h).
7// http://www.ietf.org/rfc/rfc4627.txt?number=4627
8//
9// Known limitations/deviations from the RFC:
10// - Only knows how to parse ints within the range of a signed 32 bit int and
11// decimal numbers within a double.
12// - Assumes input is encoded as UTF8. The spec says we should allow UTF-16
13// (BE or LE) and UTF-32 (BE or LE) as well.
14// - We limit nesting to 100 levels to prevent stack overflow (this is allowed
15// by the RFC).
16// - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data
17// stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input
18// UTF-8 string for the JSONReader::JsonToValue() function may start with a
19// UTF-8 BOM (0xEF, 0xBB, 0xBF).
20// To avoid the function from mis-treating a UTF-8 BOM as an invalid
21// character, the function skips a Unicode BOM at the beginning of the
22// Unicode string (converted from the input UTF-8 string) before parsing it.
23//
[email protected]e7245992008-07-29 00:01:3124// TODO(tc): Add a parsing option to to relax object keys being wrapped in
25// double quotes
26// TODO(tc): Add an option to disable comment stripping
[email protected]88e728452008-12-05 22:14:4627// TODO(aa): Consider making the constructor public and the static Read() method
28// only a convenience for the common uses with more complex configuration going
29// on the instance.
initial.commitd7cae122008-07-26 21:49:3830
[email protected]93d49d72009-10-23 20:00:2031#ifndef BASE_JSON_JSON_READER_H_
32#define BASE_JSON_JSON_READER_H_
[email protected]32b76ef2010-07-26 23:08:2433#pragma once
initial.commitd7cae122008-07-26 21:49:3834
35#include <string>
36
37#include "base/basictypes.h"
[email protected]c646aed2010-01-21 19:46:2738
39// Chromium and Chromium OS check out gtest to different places, so we're
40// unable to compile on both if we include gtest_prod.h here. Instead, include
41// its only contents -- this will need to be updated if the macro ever changes.
42#define FRIEND_TEST(test_case_name, test_name)\
43friend class test_case_name##_##test_name##_Test
initial.commitd7cae122008-07-26 21:49:3844
45class Value;
46
[email protected]93d49d72009-10-23 20:00:2047namespace base {
48
initial.commitd7cae122008-07-26 21:49:3849class JSONReader {
50 public:
51 // A struct to hold a JS token.
52 class Token {
53 public:
54 enum Type {
55 OBJECT_BEGIN, // {
56 OBJECT_END, // }
57 ARRAY_BEGIN, // [
58 ARRAY_END, // ]
59 STRING,
60 NUMBER,
61 BOOL_TRUE, // true
62 BOOL_FALSE, // false
63 NULL_TOKEN, // null
64 LIST_SEPARATOR, // ,
65 OBJECT_PAIR_SEPARATOR, // :
66 END_OF_INPUT,
67 INVALID_TOKEN,
68 };
69 Token(Type t, const wchar_t* b, int len)
70 : type(t), begin(b), length(len) {}
71
[email protected]a502bbe72011-01-07 18:06:4572 // Get the character that's one past the end of this token.
73 wchar_t NextChar() {
74 return *(begin + length);
75 }
76
initial.commitd7cae122008-07-26 21:49:3877 Type type;
78
79 // A pointer into JSONReader::json_pos_ that's the beginning of this token.
80 const wchar_t* begin;
81
82 // End should be one char past the end of the token.
83 int length;
initial.commitd7cae122008-07-26 21:49:3884 };
85
[email protected]ba399672010-04-06 15:42:3986 // Error codes during parsing.
87 enum JsonParseError {
88 JSON_NO_ERROR = 0,
89 JSON_BAD_ROOT_ELEMENT_TYPE,
90 JSON_INVALID_ESCAPE,
91 JSON_SYNTAX_ERROR,
92 JSON_TRAILING_COMMA,
93 JSON_TOO_MUCH_NESTING,
94 JSON_UNEXPECTED_DATA_AFTER_ROOT,
95 JSON_UNSUPPORTED_ENCODING,
96 JSON_UNQUOTED_DICTIONARY_KEY,
97 };
98
99 // String versions of parse error codes.
[email protected]88e728452008-12-05 22:14:46100 static const char* kBadRootElementType;
101 static const char* kInvalidEscape;
102 static const char* kSyntaxError;
103 static const char* kTrailingComma;
104 static const char* kTooMuchNesting;
105 static const char* kUnexpectedDataAfterRoot;
106 static const char* kUnsupportedEncoding;
107 static const char* kUnquotedDictionaryKey;
108
[email protected]703e807a2009-03-28 19:56:51109 JSONReader();
110
[email protected]b4cebf82008-12-29 19:59:08111 // Reads and parses |json|, returning a Value. The caller owns the returned
112 // instance. If |json| is not a properly formed JSON string, returns NULL.
[email protected]b930d132009-01-05 18:37:51113 // If |allow_trailing_comma| is true, we will ignore trailing commas in
114 // objects and arrays even though this goes against the RFC.
115 static Value* Read(const std::string& json, bool allow_trailing_comma);
initial.commitd7cae122008-07-26 21:49:38116
[email protected]ba399672010-04-06 15:42:39117 // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out|
118 // are optional. If specified and NULL is returned, they will be populated
119 // an error code and a formatted error message (including error location if
120 // appropriate). Otherwise, they will be unmodified.
[email protected]b4cebf82008-12-29 19:59:08121 static Value* ReadAndReturnError(const std::string& json,
122 bool allow_trailing_comma,
[email protected]ba399672010-04-06 15:42:39123 int* error_code_out,
124 std::string* error_msg_out);
[email protected]88e728452008-12-05 22:14:46125
[email protected]ba399672010-04-06 15:42:39126 // Converts a JSON parse error code into a human readable message.
127 // Returns an empty string if error_code is JSON_NO_ERROR.
128 static std::string ErrorCodeToString(JsonParseError error_code);
129
130 // Returns the error code if the last call to JsonToValue() failed.
131 // Returns JSON_NO_ERROR otherwise.
132 JsonParseError error_code() const { return error_code_; }
133
134 // Converts error_code_ to a human-readable string, including line and column
135 // numbers if appropriate.
136 std::string GetErrorMessage() const;
initial.commitd7cae122008-07-26 21:49:38137
[email protected]703e807a2009-03-28 19:56:51138 // Reads and parses |json|, returning a Value. The caller owns the returned
139 // instance. If |json| is not a properly formed JSON string, returns NULL and
140 // a detailed error can be retrieved from |error_message()|.
141 // If |check_root| is true, we require that the root object be an object or
142 // array. Otherwise, it can be any valid JSON type.
143 // If |allow_trailing_comma| is true, we will ignore trailing commas in
144 // objects and arrays even though this goes against the RFC.
[email protected]b4cebf82008-12-29 19:59:08145 Value* JsonToValue(const std::string& json, bool check_root,
146 bool allow_trailing_comma);
initial.commitd7cae122008-07-26 21:49:38147
[email protected]703e807a2009-03-28 19:56:51148 private:
149 static std::string FormatErrorMessage(int line, int column,
[email protected]ba399672010-04-06 15:42:39150 const std::string& description);
[email protected]703e807a2009-03-28 19:56:51151
[email protected]93d49d72009-10-23 20:00:20152 DISALLOW_COPY_AND_ASSIGN(JSONReader);
[email protected]703e807a2009-03-28 19:56:51153
154 FRIEND_TEST(JSONReaderTest, Reading);
155 FRIEND_TEST(JSONReaderTest, ErrorMessages);
156
[email protected]b4cebf82008-12-29 19:59:08157 // Recursively build Value. Returns NULL if we don't have a valid JSON
initial.commitd7cae122008-07-26 21:49:38158 // string. If |is_root| is true, we verify that the root element is either
159 // an object or an array.
[email protected]b4cebf82008-12-29 19:59:08160 Value* BuildValue(bool is_root);
initial.commitd7cae122008-07-26 21:49:38161
162 // Parses a sequence of characters into a Token::NUMBER. If the sequence of
163 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note
164 // that DecodeNumber is used to actually convert from a string to an
165 // int/double.
166 Token ParseNumberToken();
167
168 // Try and convert the substring that token holds into an int or a double. If
[email protected]b4cebf82008-12-29 19:59:08169 // we can (ie., no overflow), return the value, else return NULL.
170 Value* DecodeNumber(const Token& token);
initial.commitd7cae122008-07-26 21:49:38171
172 // Parses a sequence of characters into a Token::STRING. If the sequence of
173 // characters is not a valid string, returns a Token::INVALID_TOKEN. Note
174 // that DecodeString is used to actually decode the escaped string into an
175 // actual wstring.
176 Token ParseStringToken();
177
178 // Convert the substring into a value string. This should always succeed
[email protected]b930d132009-01-05 18:37:51179 // (otherwise ParseStringToken would have failed).
[email protected]b4cebf82008-12-29 19:59:08180 Value* DecodeString(const Token& token);
initial.commitd7cae122008-07-26 21:49:38181
182 // Grabs the next token in the JSON stream. This does not increment the
183 // stream so it can be used to look ahead at the next token.
184 Token ParseToken();
185
[email protected]b930d132009-01-05 18:37:51186 // Increments |json_pos_| past leading whitespace and comments.
initial.commitd7cae122008-07-26 21:49:38187 void EatWhitespaceAndComments();
188
[email protected]b930d132009-01-05 18:37:51189 // If |json_pos_| is at the start of a comment, eat it, otherwise, returns
initial.commitd7cae122008-07-26 21:49:38190 // false.
191 bool EatComment();
192
[email protected]b930d132009-01-05 18:37:51193 // Checks if |json_pos_| matches str.
initial.commitd7cae122008-07-26 21:49:38194 bool NextStringMatch(const std::wstring& str);
195
[email protected]ba399672010-04-06 15:42:39196 // Sets the error code that will be returned to the caller. The current
[email protected]88e728452008-12-05 22:14:46197 // line and column are determined and added into the final message.
[email protected]ba399672010-04-06 15:42:39198 void SetErrorCode(const JsonParseError error, const wchar_t* error_pos);
[email protected]88e728452008-12-05 22:14:46199
200 // Pointer to the starting position in the input string.
201 const wchar_t* start_pos_;
202
initial.commitd7cae122008-07-26 21:49:38203 // Pointer to the current position in the input string.
204 const wchar_t* json_pos_;
205
206 // Used to keep track of how many nested lists/dicts there are.
207 int stack_depth_;
[email protected]e7245992008-07-29 00:01:31208
209 // A parser flag that allows trailing commas in objects and arrays.
210 bool allow_trailing_comma_;
[email protected]88e728452008-12-05 22:14:46211
[email protected]ba399672010-04-06 15:42:39212 // Contains the error code for the last call to JsonToValue(), if any.
213 JsonParseError error_code_;
214 int error_line_;
215 int error_col_;
initial.commitd7cae122008-07-26 21:49:38216};
217
[email protected]93d49d72009-10-23 20:00:20218} // namespace base
219
220#endif // BASE_JSON_JSON_READER_H_