1 /*
  2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "cds/archiveBuilder.hpp"
 26 #include "cds/metaspaceShared.hpp"
 27 #include "classfile/altHashing.hpp"
 28 #include "classfile/classLoaderData.hpp"
 29 #include "classfile/vmSymbols.hpp"
 30 #include "gc/shared/collectedHeap.hpp"
 31 #include "logging/log.hpp"
 32 #include "logging/logStream.hpp"
 33 #include "memory/allocation.inline.hpp"
 34 #include "memory/resourceArea.hpp"
 35 #include "memory/universe.hpp"
 36 #include "oops/symbol.hpp"
 37 #include "runtime/atomic.hpp"
 38 #include "runtime/mutexLocker.hpp"
 39 #include "runtime/os.hpp"
 40 #include "runtime/signature.hpp"
 41 #include "utilities/stringUtils.hpp"
 42 #include "utilities/utf8.hpp"
 43 
 44 Symbol* Symbol::_vm_symbols[vmSymbols::number_of_symbols()];
 45 
 46 uint32_t Symbol::pack_hash_and_refcount(short hash, int refcount) {
 47   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
 48   assert(refcount >= 0, "negative refcount");
 49   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
 50   uint32_t hi = hash;
 51   uint32_t lo = refcount;
 52   return (hi << 16) | lo;
 53 }
 54 
 55 Symbol::Symbol(const u1* name, int length, int refcount) {
 56   assert(length <= max_length(), "SymbolTable should have caught this!");
 57   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), refcount);
 58   _length = (u2)length;
 59   // _body[0..1] are allocated in the header just by coincidence in the current
 60   // implementation of Symbol. They are read by identity_hash(), so make sure they
 61   // are initialized.
 62   // No other code should assume that _body[0..1] are always allocated. E.g., do
 63   // not unconditionally read base()[0] as that will be invalid for an empty Symbol.
 64   _body[0] = _body[1] = 0;
 65   memcpy(_body, name, length);
 66 }
 67 
 68 // This copies the symbol when it is added to the ConcurrentHashTable.
 69 Symbol::Symbol(const Symbol& s1) {
 70   _hash_and_refcount = s1._hash_and_refcount;
 71   _length = s1._length;
 72   memcpy(_body, s1._body, _length);
 73 }
 74 
 75 #if INCLUDE_CDS
 76 void Symbol::update_identity_hash() {
 77   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 78   _hash_and_refcount =  pack_hash_and_refcount((short)ArchiveBuilder::current()->entropy(), PERM_REFCOUNT);
 79 }
 80 
 81 void Symbol::set_permanent() {
 82   // This is called at a safepoint during dumping of a dynamic CDS archive.
 83   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 84   _hash_and_refcount =  pack_hash_and_refcount(extract_hash(_hash_and_refcount), PERM_REFCOUNT);
 85 }
 86 #endif
 87 
 88 // ------------------------------------------------------------------
 89 // Symbol::index_of
 90 //
 91 // Test if we have the give substring at or after the i-th char of this
 92 // symbol's utf8 bytes.
 93 // Return -1 on failure.  Otherwise return the first index where substr occurs.
 94 int Symbol::index_of_at(int i, const char* substr, int substr_len) const {
 95   assert(i >= 0 && i <= utf8_length(), "oob");
 96   if (substr_len <= 0)  return 0;
 97   char first_char = substr[0];
 98   address bytes = (address) ((Symbol*)this)->base();
 99   address limit = bytes + utf8_length() - substr_len;  // inclusive limit
100   address scan = bytes + i;
101   if (scan > limit)
102     return -1;
103   for (; scan <= limit; scan++) {
104     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
105     if (scan == nullptr)
106       return -1;  // not found
107     assert(scan >= bytes+i && scan <= limit, "scan oob");
108     if (substr_len <= 2
109         ? (char) scan[substr_len-1] == substr[substr_len-1]
110         : memcmp(scan+1, substr+1, substr_len-1) == 0) {
111       return (int)(scan - bytes);
112     }
113   }
114   return -1;
115 }
116 
117 bool Symbol::is_star_match(const char* pattern) const {
118   if (strchr(pattern, '*') == nullptr) {
119     return equals(pattern);
120   } else {
121     ResourceMark rm;
122     char* buf = as_C_string();
123     return StringUtils::is_star_match(pattern, buf);
124   }
125 }
126 
127 char* Symbol::as_C_string(char* buf, int size) const {
128   if (size > 0) {
129     int len = MIN2(size - 1, utf8_length());
130     for (int i = 0; i < len; i++) {
131       buf[i] = char_at(i);
132     }
133     buf[len] = '\0';
134   }
135   return buf;
136 }
137 
138 char* Symbol::as_C_string() const {
139   int len = utf8_length();
140   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
141   return as_C_string(str, len + 1);
142 }
143 
144 void Symbol::print_utf8_on(outputStream* st) const {
145   st->print("%s", as_C_string());
146 }
147 
148 void Symbol::print_symbol_on(outputStream* st) const {
149   char *s;
150   st = st ? st : tty;
151   {
152     // ResourceMark may not affect st->print(). If st is a string
153     // stream it could resize, using the same resource arena.
154     ResourceMark rm;
155     s = as_quoted_ascii();
156     s = os::strdup(s);
157   }
158   if (s == nullptr) {
159     st->print("(null)");
160   } else {
161     st->print("%s", s);
162     os::free(s);
163   }
164 }
165 
166 char* Symbol::as_quoted_ascii() const {
167   const char *ptr = (const char *)&_body[0];
168   size_t quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
169   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
170   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
171   return result;
172 }
173 
174 jchar* Symbol::as_unicode(int& length) const {
175   Symbol* this_ptr = (Symbol*)this;
176   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
177   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
178   if (length > 0) {
179     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
180   }
181   return result;
182 }
183 
184 const char* Symbol::as_klass_external_name(char* buf, int size) const {
185   if (size > 0) {
186     char* str    = as_C_string(buf, size);
187     int   length = (int)strlen(str);
188     // Turn all '/'s into '.'s (also for array klasses)
189     for (int index = 0; index < length; index++) {
190       if (str[index] == JVM_SIGNATURE_SLASH) {
191         str[index] = JVM_SIGNATURE_DOT;
192       }
193     }
194     return str;
195   } else {
196     return buf;
197   }
198 }
199 
200 const char* Symbol::as_klass_external_name() const {
201   char* str    = as_C_string();
202   int   length = (int)strlen(str);
203   // Turn all '/'s into '.'s (also for array klasses)
204   for (int index = 0; index < length; index++) {
205     if (str[index] == JVM_SIGNATURE_SLASH) {
206       str[index] = JVM_SIGNATURE_DOT;
207     }
208   }
209   return str;
210 }
211 
212 static void print_class(outputStream *os, const SignatureStream& ss) {
213   int sb = ss.raw_symbol_begin(), se = ss.raw_symbol_end();
214   for (int i = sb; i < se; ++i) {
215     char ch = ss.raw_char_at(i);
216     if (ch == JVM_SIGNATURE_SLASH) {
217       os->put(JVM_SIGNATURE_DOT);
218     } else {
219       os->put(ch);
220     }
221   }
222 }
223 
224 static void print_array(outputStream *os, SignatureStream& ss) {
225   int dimensions = ss.skip_array_prefix();
226   assert(dimensions > 0, "");
227   if (ss.is_reference()) {
228     print_class(os, ss);
229   } else {
230     os->print("%s", type2name(ss.type()));
231   }
232   for (int i = 0; i < dimensions; ++i) {
233     os->print("[]");
234   }
235 }
236 
237 void Symbol::print_as_signature_external_return_type(outputStream *os) {
238   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
239     if (ss.at_return_type()) {
240       if (ss.is_array()) {
241         print_array(os, ss);
242       } else if (ss.is_reference()) {
243         print_class(os, ss);
244       } else {
245         os->print("%s", type2name(ss.type()));
246       }
247     }
248   }
249 }
250 
251 void Symbol::print_as_signature_external_parameters(outputStream *os) {
252   bool first = true;
253   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
254     if (ss.at_return_type()) break;
255     if (!first) { os->print(", "); }
256     if (ss.is_array()) {
257       print_array(os, ss);
258     } else if (ss.is_reference()) {
259       print_class(os, ss);
260     } else {
261       os->print("%s", type2name(ss.type()));
262     }
263     first = false;
264   }
265 }
266 
267 void Symbol::print_as_field_external_type(outputStream *os) {
268   SignatureStream ss(this, false);
269   assert(!ss.is_done(), "must have at least one element in field ref");
270   assert(!ss.at_return_type(), "field ref cannot be a return type");
271   assert(!Signature::is_method(this), "field ref cannot be a method");
272 
273   if (ss.is_array()) {
274     print_array(os, ss);
275   } else if (ss.is_reference()) {
276     print_class(os, ss);
277   } else {
278     os->print("%s", type2name(ss.type()));
279   }
280 #ifdef ASSERT
281   ss.next();
282   assert(ss.is_done(), "must have at most one element in field ref");
283 #endif
284 }
285 
286 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
287 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
288 // lookup to avoid reviving a dead Symbol.
289 bool Symbol::try_increment_refcount() {
290   uint32_t found = _hash_and_refcount;
291   while (true) {
292     uint32_t old_value = found;
293     int refc = extract_refcount(old_value);
294     if (refc == PERM_REFCOUNT) {
295       return true;  // sticky max or created permanent
296     } else if (refc == 0) {
297       return false; // dead, can't revive.
298     } else {
299       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value + 1);
300       if (found == old_value) {
301         return true; // successfully updated.
302       }
303       // refcount changed, try again.
304     }
305   }
306 }
307 
308 // The increment_refcount() is called when not doing lookup. It is assumed that you
309 // have a symbol with a non-zero refcount and it can't become zero while referenced by
310 // this caller.
311 void Symbol::increment_refcount() {
312   if (!try_increment_refcount()) {
313     print();
314     fatal("refcount has gone to zero");
315   }
316 #ifndef PRODUCT
317   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
318     NOT_PRODUCT(Atomic::inc(&_total_count);)
319   }
320 #endif
321 }
322 
323 // Decrement refcount potentially while racing increment, so we need
324 // to check the value after attempting to decrement so that if another
325 // thread increments to PERM_REFCOUNT the value is not decremented.
326 void Symbol::decrement_refcount() {
327   uint32_t found = _hash_and_refcount;
328   while (true) {
329     uint32_t old_value = found;
330     int refc = extract_refcount(old_value);
331     if (refc == PERM_REFCOUNT) {
332       return;  // refcount is permanent, permanent is sticky
333     } else if (refc == 0) {
334       print();
335       fatal("refcount underflow");
336       return;
337     } else {
338       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value - 1);
339       if (found == old_value) {
340         return;  // successfully updated.
341       }
342       // refcount changed, try again.
343     }
344   }
345 }
346 
347 void Symbol::make_permanent() {
348   uint32_t found = _hash_and_refcount;
349   while (true) {
350     uint32_t old_value = found;
351     int refc = extract_refcount(old_value);
352     if (refc == PERM_REFCOUNT) {
353       return;  // refcount is permanent, permanent is sticky
354     } else if (refc == 0) {
355       print();
356       fatal("refcount underflow");
357       return;
358     } else {
359       short hash = extract_hash(old_value);
360       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, pack_hash_and_refcount(hash, PERM_REFCOUNT));
361       if (found == old_value) {
362         return;  // successfully updated.
363       }
364       // refcount changed, try again.
365     }
366   }
367 }
368 
369 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
370   if (log_is_enabled(Trace, cds)) {
371     LogStream trace_stream(Log(cds)::trace());
372     trace_stream.print("Iter(Symbol): %p ", this);
373     print_value_on(&trace_stream);
374     trace_stream.cr();
375   }
376 }
377 
378 void Symbol::print_on(outputStream* st) const {
379   st->print("Symbol: '");
380   print_symbol_on(st);
381   st->print("'");
382   st->print(" count %d", refcount());
383 }
384 
385 void Symbol::print() const { print_on(tty); }
386 
387 // The print_value functions are present in all builds, to support the
388 // disassembler and error reporting.
389 void Symbol::print_value_on(outputStream* st) const {
390   st->print_raw("'", 1);
391   st->print_raw((const char*)base(), utf8_length());
392   st->print_raw("'", 1);
393 }
394 
395 void Symbol::print_value() const { print_value_on(tty); }
396 
397 bool Symbol::is_valid(Symbol* s) {
398   if (!is_aligned(s, sizeof(MetaWord))) return false;
399   if ((size_t)s < os::min_page_size()) return false;
400 
401   if (!os::is_readable_range(s, s + 1)) return false;
402 
403   // Symbols are not allocated in Java heap.
404   if (Universe::heap()->is_in(s)) return false;
405 
406   int len = s->utf8_length();
407   if (len < 0) return false;
408 
409   jbyte* bytes = (jbyte*) s->bytes();
410   return os::is_readable_range(bytes, bytes + len);
411 }
412 
413 // SymbolTable prints this in its statistics
414 NOT_PRODUCT(size_t Symbol::_total_count = 0;)
415 
416 #ifndef PRODUCT
417 bool Symbol::is_valid_id(vmSymbolID vm_symbol_id) {
418   return vmSymbols::is_valid_id(vm_symbol_id);
419 }
420 #endif