cgyurgyik | dc13a9a | 2020-08-07 20:13:48 | [diff] [blame] | 1 | //===-- String utils --------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LIBC_SRC_STRING_STRING_UTILS_H |
| 10 | #define LIBC_SRC_STRING_STRING_UTILS_H |
| 11 | |
cgyurgyik | dc13a9a | 2020-08-07 20:13:48 | [diff] [blame] | 12 | #include "utils/CPP/Bitset.h" |
| 13 | #include <stddef.h> // size_t |
| 14 | |
| 15 | namespace __llvm_libc { |
| 16 | namespace internal { |
| 17 | |
cgyurgyik | c92d1aa | 2020-09-14 16:20:58 | [diff] [blame] | 18 | // Returns the length of a string, denoted by the first occurrence |
| 19 | // of a null terminator. |
| 20 | static inline size_t string_length(const char *src) { |
| 21 | size_t length; |
| 22 | for (length = 0; *src; ++src, ++length) |
| 23 | ; |
| 24 | return length; |
| 25 | } |
| 26 | |
| 27 | // Returns the first occurrence of 'ch' within the first 'n' characters of |
| 28 | // 'src'. If 'ch' is not found, returns nullptr. |
| 29 | static inline void *find_first_character(const unsigned char *src, |
| 30 | unsigned char ch, size_t n) { |
| 31 | for (; n && *src != ch; --n, ++src) |
| 32 | ; |
| 33 | return n ? const_cast<unsigned char *>(src) : nullptr; |
| 34 | } |
| 35 | |
cgyurgyik | dc13a9a | 2020-08-07 20:13:48 | [diff] [blame] | 36 | // Returns the maximum length span that contains only characters not found in |
| 37 | // 'segment'. If no characters are found, returns the length of 'src'. |
| 38 | static inline size_t complementary_span(const char *src, const char *segment) { |
| 39 | const char *initial = src; |
| 40 | cpp::Bitset<256> bitset; |
| 41 | |
| 42 | for (; *segment; ++segment) |
| 43 | bitset.set(*segment); |
| 44 | for (; *src && !bitset.test(*src); ++src) |
| 45 | ; |
| 46 | return src - initial; |
| 47 | } |
| 48 | |
parallels | bc45bab | 2020-08-13 19:51:16 | [diff] [blame] | 49 | // Given the similarities between strtok and strtok_r, we can implement both |
| 50 | // using a utility function. On the first call, 'src' is scanned for the |
| 51 | // first character not found in 'delimiter_string'. Once found, it scans until |
| 52 | // the first character in the 'delimiter_string' or the null terminator is |
| 53 | // found. We define this span as a token. The end of the token is appended with |
| 54 | // a null terminator, and the token is returned. The point where the last token |
| 55 | // is found is then stored within 'context' for subsequent calls. Subsequent |
| 56 | // calls will use 'context' when a nullptr is passed in for 'src'. Once the null |
| 57 | // terminating character is reached, returns a nullptr. |
cgyurgyik | 79ce64e | 2020-08-14 19:38:52 | [diff] [blame] | 58 | static inline char *string_token(char *__restrict src, |
| 59 | const char *__restrict delimiter_string, |
| 60 | char **__restrict saveptr) { |
parallels | bc45bab | 2020-08-13 19:51:16 | [diff] [blame] | 61 | cpp::Bitset<256> delimiter_set; |
| 62 | for (; *delimiter_string; ++delimiter_string) |
| 63 | delimiter_set.set(*delimiter_string); |
| 64 | |
| 65 | src = src ? src : *saveptr; |
| 66 | for (; *src && delimiter_set.test(*src); ++src) |
| 67 | ; |
| 68 | if (!*src) { |
| 69 | *saveptr = src; |
| 70 | return nullptr; |
| 71 | } |
| 72 | char *token = src; |
| 73 | for (; *src && !delimiter_set.test(*src); ++src) |
| 74 | ; |
| 75 | if (*src) { |
| 76 | *src = '\0'; |
| 77 | ++src; |
| 78 | } |
| 79 | *saveptr = src; |
| 80 | return token; |
| 81 | } |
| 82 | |
cgyurgyik | dc13a9a | 2020-08-07 20:13:48 | [diff] [blame] | 83 | } // namespace internal |
| 84 | } // namespace __llvm_libc |
| 85 | |
| 86 | #endif // LIBC_SRC_STRING_STRING_UTILS_H |