Blame - url/url_util.h - chromium/src

[email protected]

e7bba5f8

2013-04-10 20:10:52

[diff] [blame^]

//

// Redistribution and use in source and binary forms, with or without

5

// modification, are permitted provided that the following conditions are

6

// met:

7

//

8

// * Redistributions of source code must retain the above copyright

9

// notice, this list of conditions and the following disclaimer.

10

// * Redistributions in binary form must reproduce the above

11

// copyright notice, this list of conditions and the following disclaimer

12

// in the documentation and/or other materials provided with the

13

// distribution.

14

// * Neither the name of Google Inc. nor the names of its

15

// contributors may be used to endorse or promote products derived from

16

// this software without specific prior written permission.

17

//

18

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

19

// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

20

// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

21

// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

22

// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

23

// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

24

// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

25

// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

26

// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

27

// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

28

// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

29

30

#ifndef GOOGLEURL_SRC_URL_UTIL_H__

31

#define GOOGLEURL_SRC_URL_UTIL_H__

#include <string>

#include "base/string16.h"

36

#include "googleurl/src/url_common.h"

37

#include "googleurl/src/url_parse.h"

38

#include "googleurl/src/url_canon.h"

namespace url_util {

// Init ------------------------------------------------------------------------

43

44

// Initialization is NOT required, it will be implicitly initialized when first

45

// used. However, this implicit initialization is NOT threadsafe. If you are

46

// using this library in a threaded environment and don't have a consistent

47

// "first call" (an example might be calling "AddStandardScheme" with your

48

// special application-specific schemes) then you will want to call initialize

49

// before spawning any threads.

50

//

51

// It is OK to call this function more than once, subsequent calls will simply

52

// "noop", unless Shutdown() was called in the mean time. This will also be a

53

// "noop" if other calls to the library have forced an initialization

54

// beforehand.

55

GURL_API void Initialize();

56

57

// Cleanup is not required, except some strings may leak. For most user

58

// applications, this is fine. If you're using it in a library that may get

59

// loaded and unloaded, you'll want to unload to properly clean up your

60

// library.

61

GURL_API void Shutdown();

62

63

// Schemes --------------------------------------------------------------------

64

65

// Adds an application-defined scheme to the internal list of "standard" URL

66

// schemes. This function is not threadsafe and can not be called concurrently

67

// with any other url_util function. It will assert if the list of standard

68

// schemes has been locked (see LockStandardSchemes).

69

GURL_API void AddStandardScheme(const char* new_scheme);

70

71

// Sets a flag to prevent future calls to AddStandardScheme from succeeding.

72

//

73

// This is designed to help prevent errors for multithreaded applications.

74

// Normal usage would be to call AddStandardScheme for your custom schemes at

75

// the beginning of program initialization, and then LockStandardSchemes. This

76

// prevents future callers from mistakenly calling AddStandardScheme when the

77

// program is running with multiple threads, where such usage would be

78

// dangerous.

79

//

80

// We could have had AddStandardScheme use a lock instead, but that would add

81

// some platform-specific dependencies we don't otherwise have now, and is

82

// overkill considering the normal usage is so simple.

83

GURL_API void LockStandardSchemes();

84

85

// Locates the scheme in the given string and places it into |found_scheme|,

86

// which may be NULL to indicate the caller does not care about the range.

87

//

88

// Returns whether the given |compare| scheme matches the scheme found in the

89

// input (if any). The |compare| scheme must be a valid canonical scheme or

90

// the result of the comparison is undefined.

91

GURL_API bool FindAndCompareScheme(const char* str,

92

int str_len,

93

const char* compare,

94

url_parse::Component* found_scheme);

95

GURL_API bool FindAndCompareScheme(const char16* str,

96

int str_len,

97

const char* compare,

98

url_parse::Component* found_scheme);

99

inline bool FindAndCompareScheme(const std::string& str,

100

const char* compare,

101

url_parse::Component* found_scheme) {

102

return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),

103

compare, found_scheme);

104

}

105

inline bool FindAndCompareScheme(const string16& str,

106

const char* compare,

107

url_parse::Component* found_scheme) {

108

return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),

109

compare, found_scheme);

110

}

111

112

// Returns true if the given string represents a standard URL. This means that

113

// either the scheme is in the list of known standard schemes.

114

GURL_API bool IsStandard(const char* spec,

115

const url_parse::Component& scheme);

116

GURL_API bool IsStandard(const char16* spec,

117

const url_parse::Component& scheme);

118

119

// TODO(brettw) remove this. This is a temporary compatibility hack to avoid

120

// breaking the WebKit build when this version is synced via Chrome.

121

inline bool IsStandard(const char* spec, int spec_len,

122

const url_parse::Component& scheme) {

123

return IsStandard(spec, scheme);

124

}

125

126

// URL library wrappers -------------------------------------------------------

127

128

// Parses the given spec according to the extracted scheme type. Normal users

129

// should use the URL object, although this may be useful if performance is

130

// critical and you don't want to do the heap allocation for the std::string.

131

//

132

// As with the url_canon::Canonicalize* functions, the charset converter can

133

// be NULL to use UTF-8 (it will be faster in this case).

134

//

135

// Returns true if a valid URL was produced, false if not. On failure, the

136

// output and parsed structures will still be filled and will be consistent,

137

// but they will not represent a loadable URL.

138

GURL_API bool Canonicalize(const char* spec,

139

int spec_len,

140

url_canon::CharsetConverter* charset_converter,

141

url_canon::CanonOutput* output,

142

url_parse::Parsed* output_parsed);

143

GURL_API bool Canonicalize(const char16* spec,

144

int spec_len,

145

url_canon::CharsetConverter* charset_converter,

146

url_canon::CanonOutput* output,

147

url_parse::Parsed* output_parsed);

148

149

// Resolves a potentially relative URL relative to the given parsed base URL.

150

// The base MUST be valid. The resulting canonical URL and parsed information

151

// will be placed in to the given out variables.

152

//

153

// The relative need not be relative. If we discover that it's absolute, this

154

// will produce a canonical version of that URL. See Canonicalize() for more

155

// about the charset_converter.

156

//

157

// Returns true if the output is valid, false if the input could not produce

158

// a valid URL.

159

GURL_API bool ResolveRelative(const char* base_spec,

160

int base_spec_len,

161

const url_parse::Parsed& base_parsed,

162

const char* relative,

163

int relative_length,

164

url_canon::CharsetConverter* charset_converter,

165

url_canon::CanonOutput* output,

166

url_parse::Parsed* output_parsed);

167

GURL_API bool ResolveRelative(const char* base_spec,

168

int base_spec_len,

169

const url_parse::Parsed& base_parsed,

170

const char16* relative,

171

int relative_length,

172

url_canon::CharsetConverter* charset_converter,

173

url_canon::CanonOutput* output,

174

url_parse::Parsed* output_parsed);

175

176

// Replaces components in the given VALID input url. The new canonical URL info

177

// is written to output and out_parsed.

178

//

179

// Returns true if the resulting URL is valid.

180

GURL_API bool ReplaceComponents(

181

const char* spec,

182

int spec_len,

183

const url_parse::Parsed& parsed,

184

const url_canon::Replacements<char>& replacements,

185

url_canon::CharsetConverter* charset_converter,

186

url_canon::CanonOutput* output,

187

url_parse::Parsed* out_parsed);

188

GURL_API bool ReplaceComponents(

189

const char* spec,

190

int spec_len,

191

const url_parse::Parsed& parsed,

192

const url_canon::Replacements<char16>& replacements,

193

url_canon::CharsetConverter* charset_converter,

194

url_canon::CanonOutput* output,

195

url_parse::Parsed* out_parsed);

196

197

// String helper functions ----------------------------------------------------

198

199

// Compare the lower-case form of the given string against the given ASCII

200

// string. This is useful for doing checking if an input string matches some

201

// token, and it is optimized to avoid intermediate string copies.

202

//

203

// The versions of this function that don't take a b_end assume that the b

204

// string is NULL terminated.

205

GURL_API bool LowerCaseEqualsASCII(const char* a_begin,

206

const char* a_end,

207

const char* b);

208

GURL_API bool LowerCaseEqualsASCII(const char* a_begin,

const char* a_end,

const char* b_begin,

const char* b_end);

GURL_API bool LowerCaseEqualsASCII(const char16* a_begin,

const char16* a_end,

const char* b);

// Unescapes the given string using URL escaping rules.

217

GURL_API void DecodeURLEscapeSequences(const char* input, int length,

218

url_canon::CanonOutputW* output);

219

220

// Escapes the given string as defined by the JS method encodeURIComponent. See

221

// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent

222

GURL_API void EncodeURIComponent(const char* input, int length,

223

url_canon::CanonOutput* output);

224

225

226

} // namespace url_util

227

228

#endif // GOOGLEURL_SRC_URL_UTIL_H__