1 /* 2 * Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "jvm_io.h" 26 #include "memory/allocation.hpp" 27 #include "utilities/debug.hpp" 28 #include "utilities/ostream.hpp" 29 #include "utilities/stringUtils.hpp" 30 31 #include <ctype.h> 32 #include <string.h> 33 34 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) { 35 int replace_count = 0; 36 size_t from_len = strlen(from); 37 size_t to_len = strlen(to); 38 assert(from_len >= to_len, "must not expand input"); 39 40 for (char* dst = string; *dst && (dst = strstr(dst, from)) != nullptr;) { 41 char* left_over = dst + from_len; 42 memmove(dst, to, to_len); // does not copy trailing 0 of <to> 43 dst += to_len; // skip over the replacement. 44 memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over> 45 ++ replace_count; 46 } 47 48 return replace_count; 49 } 50 51 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) { 52 assert(str1 != nullptr && str2 != nullptr, "sanity"); 53 54 // filter out zero-length strings else we will underflow on len-1 below 55 if (len1 == 0 || len2 == 0) { 56 return 0.0; 57 } 58 59 size_t total = len1 + len2; 60 size_t hit = 0; 61 62 for (size_t i = 0; i < len1 - 1; i++) { 63 for (size_t j = 0; j < len2 - 1; j++) { 64 if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) { 65 ++hit; 66 break; 67 } 68 } 69 } 70 71 return 2.0 * (double) hit / (double) total; 72 } 73 74 class StringMatcher { 75 public: 76 typedef int getc_function_t(const char* &source, const char* limit); 77 78 private: 79 // These do not get properly inlined. 80 // For full performance, this should be a template class 81 // parameterized by two function arguments. 82 getc_function_t* _pattern_getc; 83 getc_function_t* _string_getc; 84 85 public: 86 StringMatcher(getc_function_t pattern_getc, 87 getc_function_t string_getc) 88 : _pattern_getc(pattern_getc), 89 _string_getc(string_getc) 90 { } 91 92 enum { // special results from _pattern_getc 93 string_match_comma = -0x100 + ',', 94 string_match_star = -0x100 + '*', 95 string_match_eos = -0x100 + '\0' 96 }; 97 98 private: 99 const char* 100 skip_anchor_word(const char* match, 101 const char* match_end, 102 int anchor_length, 103 const char* pattern, 104 const char* pattern_end) { 105 assert(pattern < pattern_end && anchor_length > 0, ""); 106 const char* begp = pattern; 107 int ch1 = _pattern_getc(begp, pattern_end); 108 // note that begp is now advanced over ch1 109 assert(ch1 > 0, "regular char only"); 110 const char* matchp = match; 111 const char* limitp = match_end - anchor_length; 112 while (matchp <= limitp) { 113 int mch = _string_getc(matchp, match_end); 114 if (mch == ch1) { 115 const char* patp = begp; 116 const char* anchorp = matchp; 117 while (patp < pattern_end) { 118 char ch = _pattern_getc(patp, pattern_end); 119 char mch = _string_getc(anchorp, match_end); 120 if (mch != ch) { 121 anchorp = nullptr; 122 break; 123 } 124 } 125 if (anchorp != nullptr) { 126 return anchorp; // Found a full copy of the anchor. 127 } 128 // That did not work, so restart the search for ch1. 129 } 130 } 131 return nullptr; 132 } 133 134 public: 135 bool string_match(const char* pattern, 136 const char* string) { 137 return string_match(pattern, pattern + strlen(pattern), 138 string, string + strlen(string)); 139 } 140 bool string_match(const char* pattern, const char* pattern_end, 141 const char* string, const char* string_end) { 142 const char* patp = pattern; 143 switch (_pattern_getc(patp, pattern_end)) { 144 case string_match_eos: 145 return false; // Empty pattern is always false. 146 case string_match_star: 147 if (patp == pattern_end) { 148 return true; // Lone star pattern is always true. 149 } 150 break; 151 } 152 patp = pattern; // Reset after lookahead. 153 const char* matchp = string; // nullptr if failing 154 for (;;) { 155 int ch = _pattern_getc(patp, pattern_end); 156 switch (ch) { 157 case string_match_eos: 158 case string_match_comma: 159 // End of a list item; see if it's a match. 160 if (matchp == string_end) { 161 return true; 162 } 163 if (ch == string_match_comma) { 164 // Get ready to match the next item. 165 matchp = string; 166 continue; 167 } 168 return false; // End of all items. 169 170 case string_match_star: 171 if (matchp != nullptr) { 172 // Wildcard: Parse out following anchor word and look for it. 173 const char* begp = patp; 174 const char* endp = patp; 175 int anchor_len = 0; 176 for (;;) { 177 // get as many following regular characters as possible 178 endp = patp; 179 ch = _pattern_getc(patp, pattern_end); 180 if (ch <= 0) { 181 break; 182 } 183 anchor_len += 1; 184 } 185 // Anchor word [begp..endp) does not contain ch, so back up. 186 // Now do an eager match to the anchor word, and commit to it. 187 patp = endp; 188 if (ch == string_match_eos || 189 ch == string_match_comma) { 190 // Anchor word is at end of pattern, so treat it as a fixed pattern. 191 const char* limitp = string_end - anchor_len; 192 matchp = limitp; 193 patp = begp; 194 // Resume normal scanning at the only possible match position. 195 continue; 196 } 197 // Find a floating occurrence of the anchor and continue matching. 198 // Note: This is greedy; there is no backtrack here. Good enough. 199 matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp); 200 } 201 continue; 202 } 203 // Normal character. 204 if (matchp != nullptr) { 205 int mch = _string_getc(matchp, string_end); 206 if (mch != ch) { 207 matchp = nullptr; 208 } 209 } 210 } 211 } 212 }; 213 214 // Match a wildcarded class list to a proposed class name (in internal form). 215 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards. 216 class ClassListMatcher : public StringMatcher { 217 public: 218 ClassListMatcher() 219 : StringMatcher(pattern_list_getc, class_name_getc) 220 { } 221 222 private: 223 static int pattern_list_getc(const char* &pattern_ptr, 224 const char* pattern_end) { 225 if (pattern_ptr == pattern_end) { 226 return string_match_eos; 227 } 228 int ch = (unsigned char) *pattern_ptr++; 229 switch (ch) { 230 case ' ': case '\t': case '\n': case '\r': 231 case ',': 232 // End of list item. 233 for (;;) { 234 switch (*pattern_ptr) { 235 case ' ': case '\t': case '\n': case '\r': 236 case ',': 237 pattern_ptr += 1; // Collapse multiple commas or spaces. 238 continue; 239 } 240 break; 241 } 242 return string_match_comma; 243 244 case '*': 245 // Wildcard, matching any number of chars. 246 while (*pattern_ptr == '*') { 247 pattern_ptr += 1; // Collapse multiple stars. 248 } 249 return string_match_star; 250 251 case '.': 252 ch = '/'; // Look for internal form of package separator 253 break; 254 255 case '\\': 256 // Superquote in pattern escapes * , whitespace, and itself. 257 if (pattern_ptr < pattern_end) { 258 ch = (unsigned char) *pattern_ptr++; 259 } 260 break; 261 } 262 263 assert(ch > 0, "regular char only"); 264 return ch; 265 } 266 267 static int class_name_getc(const char* &name_ptr, 268 const char* name_end) { 269 if (name_ptr == name_end) { 270 return string_match_eos; 271 } 272 int ch = (unsigned char) *name_ptr++; 273 if (ch == '.') { 274 ch = '/'; // Normalize to internal form of package separator 275 } 276 return ch; // plain character 277 } 278 }; 279 280 bool StringUtils::class_list_match(const char* class_pattern_list, 281 const char* class_name) { 282 if (class_pattern_list == nullptr || class_name == nullptr || class_name[0] == '\0') 283 return false; 284 ClassListMatcher clm; 285 return clm.string_match(class_pattern_list, class_name); 286 } 287 288 289 const char* StringUtils::strstr_nocase(const char* haystack, const char* needle) { 290 if (needle[0] == '\0') { 291 return haystack; // empty needle matches with anything 292 } 293 for (size_t i = 0; haystack[i] != '\0'; i++) { 294 bool matches = true; 295 for (size_t j = 0; needle[j] != '\0'; j++) { 296 if (haystack[i + j] == '\0') { 297 return nullptr; // hit end of haystack, abort 298 } 299 if (tolower(haystack[i + j]) != tolower(needle[j])) { 300 matches = false; 301 break; // abort, try next i 302 } 303 } 304 if (matches) { 305 return &haystack[i]; // all j were ok for this i 306 } 307 } 308 return nullptr; // no i was a match 309 } 310 311 bool StringUtils::is_star_match(const char* star_pattern, const char* str) { 312 const int N = 1000; 313 char pattern[N]; // copy pattern into this to ensure null termination 314 jio_snprintf(pattern, N, "%s", star_pattern);// ensures null termination 315 char buf[N]; // copy parts of pattern into this 316 const char* str_idx = str; 317 const char* pattern_idx = pattern; 318 while (strlen(pattern_idx) > 0) { 319 // find next section in pattern 320 const char* pattern_part_end = strstr(pattern_idx, "*"); 321 const char* pattern_part = pattern_idx; 322 if (pattern_part_end != nullptr) { // copy part into buffer 323 size_t pattern_part_len = pattern_part_end-pattern_part; 324 strncpy(buf, pattern_part, pattern_part_len); 325 buf[pattern_part_len] = '\0'; // end of string 326 pattern_part = buf; 327 } 328 // find this section in s, case insensitive 329 const char* str_match = strstr_nocase(str_idx, pattern_part); 330 if (str_match == nullptr) { 331 return false; // r_part did not match - abort 332 } 333 size_t match_len = strlen(pattern_part); 334 // advance to match position plus part length 335 str_idx = str_match + match_len; 336 // advance by part length and "*" 337 pattern_idx += match_len + (pattern_part_end == nullptr ? 0 : 1); 338 } 339 return true; // all parts of pattern matched 340 } 341 342 StringUtils::CommaSeparatedStringIterator::~CommaSeparatedStringIterator() { 343 FREE_C_HEAP_ARRAY(char, _list); 344 } 345 346 ccstrlist StringUtils::CommaSeparatedStringIterator::canonicalize(ccstrlist option_value) { 347 char* canonicalized_list = NEW_C_HEAP_ARRAY(char, strlen(option_value) + 1, mtCompiler); 348 int i = 0; 349 char current; 350 while ((current = option_value[i]) != '\0') { 351 if (current == '\n' || current == ' ') { 352 canonicalized_list[i] = ','; 353 } else { 354 canonicalized_list[i] = current; 355 } 356 i++; 357 } 358 canonicalized_list[i] = '\0'; 359 return canonicalized_list; 360 }