1 /* 2 * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import jdk.internal.misc.CDS; 29 import jdk.internal.value.DeserializeConstructor; 30 import jdk.internal.vm.annotation.IntrinsicCandidate; 31 import jdk.internal.vm.annotation.Stable; 32 33 import java.lang.constant.Constable; 34 import java.lang.constant.DynamicConstantDesc; 35 import java.util.Arrays; 36 import java.util.HashMap; 37 import java.util.Locale; 38 import java.util.Map; 39 import java.util.Objects; 40 import java.util.Optional; 41 42 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST; 43 import static java.lang.constant.ConstantDescs.CD_char; 44 import static java.lang.constant.ConstantDescs.DEFAULT_NAME; 45 46 /** 47 * The {@code Character} class is the {@linkplain 48 * java.lang##wrapperClass wrapper class} for values of the primitive 49 * type {@code char}. An object of type {@code Character} contains a 50 * single field whose type is {@code char}. 51 * 52 * <p>In addition, this class provides a large number of static methods for 53 * determining a character's category (lowercase letter, digit, etc.) 54 * and for converting characters from uppercase to lowercase and vice 55 * versa. 56 * 57 * <h2><a id="conformance">Unicode Conformance</a></h2> 58 * <p> 59 * The fields and methods of class {@code Character} are defined in terms 60 * of character information from the Unicode Standard, specifically the 61 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 62 * This file specifies properties including name and category for every 63 * assigned Unicode code point or character range. The file is available 64 * from the Unicode Consortium at 65 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 66 * <p> 67 * Character information is based on the Unicode Standard, version 16.0. 68 * <p> 69 * The Java platform has supported different versions of the Unicode 70 * Standard over time. Upgrades to newer versions of the Unicode Standard 71 * occurred in the following Java releases, each indicating the new version: 72 * <table class="striped"> 73 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption> 74 * <thead> 75 * <tr><th scope="col">Java release</th> 76 * <th scope="col">Unicode version</th></tr> 77 * </thead> 78 * <tbody> 79 * <tr><th scope="row" style="text-align:left">Java SE 24</th> 80 * <td>Unicode 16.0</td></tr> 81 * <tr><th scope="row" style="text-align:left">Java SE 22</th> 82 * <td>Unicode 15.1</td></tr> 83 * <tr><th scope="row" style="text-align:left">Java SE 20</th> 84 * <td>Unicode 15.0</td></tr> 85 * <tr><th scope="row" style="text-align:left">Java SE 19</th> 86 * <td>Unicode 14.0</td></tr> 87 * <tr><th scope="row" style="text-align:left">Java SE 15</th> 88 * <td>Unicode 13.0</td></tr> 89 * <tr><th scope="row" style="text-align:left">Java SE 13</th> 90 * <td>Unicode 12.1</td></tr> 91 * <tr><th scope="row" style="text-align:left">Java SE 12</th> 92 * <td>Unicode 11.0</td></tr> 93 * <tr><th scope="row" style="text-align:left">Java SE 11</th> 94 * <td>Unicode 10.0</td></tr> 95 * <tr><th scope="row" style="text-align:left">Java SE 9</th> 96 * <td>Unicode 8.0</td></tr> 97 * <tr><th scope="row" style="text-align:left">Java SE 8</th> 98 * <td>Unicode 6.2</td></tr> 99 * <tr><th scope="row" style="text-align:left">Java SE 7</th> 100 * <td>Unicode 6.0</td></tr> 101 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th> 102 * <td>Unicode 4.0</td></tr> 103 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th> 104 * <td>Unicode 3.0</td></tr> 105 * <tr><th scope="row" style="text-align:left">JDK 1.1</th> 106 * <td>Unicode 2.0</td></tr> 107 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th> 108 * <td>Unicode 1.1.5</td></tr> 109 * </tbody> 110 * </table> 111 * Variations from these base Unicode versions, such as recognized appendixes, 112 * are documented elsewhere. 113 * <h2><a id="unicode">Unicode Character Representations</a></h2> 114 * 115 * <p>The {@code char} data type (and therefore the value that a 116 * {@code Character} object encapsulates) are based on the 117 * original Unicode specification, which defined characters as 118 * fixed-width 16-bit entities. The Unicode Standard has since been 119 * changed to allow for characters whose representation requires more 120 * than 16 bits. The range of legal <em>code point</em>s is now 121 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 122 * (Refer to the <a 123 * href="http://www.unicode.org/reports/tr27/#notation"><i> 124 * definition</i></a> of the U+<i>n</i> notation in the Unicode 125 * Standard.) 126 * 127 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 128 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 129 * <a id="supplementary">Characters</a> whose code points are greater 130 * than U+FFFF are called <em>supplementary character</em>s. The Java 131 * platform uses the UTF-16 representation in {@code char} arrays and 132 * in the {@code String} and {@code StringBuffer} classes. In 133 * this representation, supplementary characters are represented as a pair 134 * of {@code char} values, the first from the <em>high-surrogates</em> 135 * range, (\uD800-\uDBFF), the second from the 136 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 137 * 138 * <p>A {@code char} value, therefore, represents Basic 139 * Multilingual Plane (BMP) code points, including the surrogate 140 * code points, or code units of the UTF-16 encoding. An 141 * {@code int} value represents all Unicode code points, 142 * including supplementary code points. The lower (least significant) 143 * 21 bits of {@code int} are used to represent Unicode code 144 * points and the upper (most significant) 11 bits must be zero. 145 * Unless otherwise specified, the behavior with respect to 146 * supplementary characters and surrogate {@code char} values is 147 * as follows: 148 * 149 * <ul> 150 * <li>The methods that only accept a {@code char} value cannot support 151 * supplementary characters. They treat {@code char} values from the 152 * surrogate ranges as undefined characters. For example, 153 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 154 * this specific value if followed by any low-surrogate value in a string 155 * would represent a letter. 156 * 157 * <li>The methods that accept an {@code int} value support all 158 * Unicode characters, including supplementary characters. For 159 * example, {@code Character.isLetter(0x2F81A)} returns 160 * {@code true} because the code point value represents a letter 161 * (a CJK ideograph). 162 * </ul> 163 * 164 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 165 * used for character values in the range between U+0000 and U+10FFFF, 166 * and <em>Unicode code unit</em> is used for 16-bit 167 * {@code char} values that are code units of the <em>UTF-16</em> 168 * encoding. For more information on Unicode terminology, refer to the 169 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 170 * 171 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a> 172 * class; programmers should treat instances that are {@linkplain #equals(Object) equal} 173 * as interchangeable and should not use instances for synchronization, mutexes, or 174 * with {@linkplain java.lang.ref.Reference object references}. 175 * 176 * <div class="preview-block"> 177 * <div class="preview-comment"> 178 * When preview features are enabled, {@code Character} is a {@linkplain Class#isValue value class}. 179 * Use of value class instances for synchronization, mutexes, or with 180 * {@linkplain java.lang.ref.Reference object references} result in 181 * {@link IdentityException}. 182 * </div> 183 * </div> 184 * 185 * @spec https://www.unicode.org/reports/tr27 Unicode 3.1.0 186 * @author Lee Boynton 187 * @author Guy Steele 188 * @author Akira Tanaka 189 * @author Martin Buchholz 190 * @author Ulf Zibis 191 * @since 1.0 192 */ 193 @jdk.internal.MigratedValueClass 194 @jdk.internal.ValueBased 195 public final class Character implements java.io.Serializable, Comparable<Character>, Constable { 196 /** 197 * The minimum radix available for conversion to and from strings. 198 * The constant value of this field is the smallest value permitted 199 * for the radix argument in radix-conversion methods such as the 200 * {@code digit} method, the {@code forDigit} method, and the 201 * {@code toString} method of class {@code Integer}. 202 * 203 * @see Character#digit(char, int) 204 * @see Character#forDigit(int, int) 205 * @see Integer#toString(int, int) 206 * @see Integer#valueOf(String) 207 */ 208 public static final int MIN_RADIX = 2; 209 210 /** 211 * The maximum radix available for conversion to and from strings. 212 * The constant value of this field is the largest value permitted 213 * for the radix argument in radix-conversion methods such as the 214 * {@code digit} method, the {@code forDigit} method, and the 215 * {@code toString} method of class {@code Integer}. 216 * 217 * @see Character#digit(char, int) 218 * @see Character#forDigit(int, int) 219 * @see Integer#toString(int, int) 220 * @see Integer#valueOf(String) 221 */ 222 public static final int MAX_RADIX = 36; 223 224 /** 225 * The constant value of this field is the smallest value of type 226 * {@code char}, {@code '\u005Cu0000'}. 227 * 228 * @since 1.0.2 229 */ 230 public static final char MIN_VALUE = '\u0000'; 231 232 /** 233 * The constant value of this field is the largest value of type 234 * {@code char}, {@code '\u005CuFFFF'}. 235 * 236 * @since 1.0.2 237 */ 238 public static final char MAX_VALUE = '\uFFFF'; 239 240 /** 241 * The {@code Class} instance representing the primitive type 242 * {@code char}. 243 * 244 * @since 1.1 245 */ 246 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 247 248 /* 249 * Normative general types 250 */ 251 252 /* 253 * General character types 254 */ 255 256 /** 257 * General category "Cn" in the Unicode specification. 258 * @since 1.1 259 */ 260 public static final byte UNASSIGNED = 0; 261 262 /** 263 * General category "Lu" in the Unicode specification. 264 * @since 1.1 265 */ 266 public static final byte UPPERCASE_LETTER = 1; 267 268 /** 269 * General category "Ll" in the Unicode specification. 270 * @since 1.1 271 */ 272 public static final byte LOWERCASE_LETTER = 2; 273 274 /** 275 * General category "Lt" in the Unicode specification. 276 * @since 1.1 277 */ 278 public static final byte TITLECASE_LETTER = 3; 279 280 /** 281 * General category "Lm" in the Unicode specification. 282 * @since 1.1 283 */ 284 public static final byte MODIFIER_LETTER = 4; 285 286 /** 287 * General category "Lo" in the Unicode specification. 288 * @since 1.1 289 */ 290 public static final byte OTHER_LETTER = 5; 291 292 /** 293 * General category "Mn" in the Unicode specification. 294 * @since 1.1 295 */ 296 public static final byte NON_SPACING_MARK = 6; 297 298 /** 299 * General category "Me" in the Unicode specification. 300 * @since 1.1 301 */ 302 public static final byte ENCLOSING_MARK = 7; 303 304 /** 305 * General category "Mc" in the Unicode specification. 306 * @since 1.1 307 */ 308 public static final byte COMBINING_SPACING_MARK = 8; 309 310 /** 311 * General category "Nd" in the Unicode specification. 312 * @since 1.1 313 */ 314 public static final byte DECIMAL_DIGIT_NUMBER = 9; 315 316 /** 317 * General category "Nl" in the Unicode specification. 318 * @since 1.1 319 */ 320 public static final byte LETTER_NUMBER = 10; 321 322 /** 323 * General category "No" in the Unicode specification. 324 * @since 1.1 325 */ 326 public static final byte OTHER_NUMBER = 11; 327 328 /** 329 * General category "Zs" in the Unicode specification. 330 * @since 1.1 331 */ 332 public static final byte SPACE_SEPARATOR = 12; 333 334 /** 335 * General category "Zl" in the Unicode specification. 336 * @since 1.1 337 */ 338 public static final byte LINE_SEPARATOR = 13; 339 340 /** 341 * General category "Zp" in the Unicode specification. 342 * @since 1.1 343 */ 344 public static final byte PARAGRAPH_SEPARATOR = 14; 345 346 /** 347 * General category "Cc" in the Unicode specification. 348 * @since 1.1 349 */ 350 public static final byte CONTROL = 15; 351 352 /** 353 * General category "Cf" in the Unicode specification. 354 * @since 1.1 355 */ 356 public static final byte FORMAT = 16; 357 358 /** 359 * General category "Co" in the Unicode specification. 360 * @since 1.1 361 */ 362 public static final byte PRIVATE_USE = 18; 363 364 /** 365 * General category "Cs" in the Unicode specification. 366 * @since 1.1 367 */ 368 public static final byte SURROGATE = 19; 369 370 /** 371 * General category "Pd" in the Unicode specification. 372 * @since 1.1 373 */ 374 public static final byte DASH_PUNCTUATION = 20; 375 376 /** 377 * General category "Ps" in the Unicode specification. 378 * @since 1.1 379 */ 380 public static final byte START_PUNCTUATION = 21; 381 382 /** 383 * General category "Pe" in the Unicode specification. 384 * @since 1.1 385 */ 386 public static final byte END_PUNCTUATION = 22; 387 388 /** 389 * General category "Pc" in the Unicode specification. 390 * @since 1.1 391 */ 392 public static final byte CONNECTOR_PUNCTUATION = 23; 393 394 /** 395 * General category "Po" in the Unicode specification. 396 * @since 1.1 397 */ 398 public static final byte OTHER_PUNCTUATION = 24; 399 400 /** 401 * General category "Sm" in the Unicode specification. 402 * @since 1.1 403 */ 404 public static final byte MATH_SYMBOL = 25; 405 406 /** 407 * General category "Sc" in the Unicode specification. 408 * @since 1.1 409 */ 410 public static final byte CURRENCY_SYMBOL = 26; 411 412 /** 413 * General category "Sk" in the Unicode specification. 414 * @since 1.1 415 */ 416 public static final byte MODIFIER_SYMBOL = 27; 417 418 /** 419 * General category "So" in the Unicode specification. 420 * @since 1.1 421 */ 422 public static final byte OTHER_SYMBOL = 28; 423 424 /** 425 * General category "Pi" in the Unicode specification. 426 * @since 1.4 427 */ 428 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 429 430 /** 431 * General category "Pf" in the Unicode specification. 432 * @since 1.4 433 */ 434 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 435 436 /** 437 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 438 */ 439 static final int ERROR = 0xFFFFFFFF; 440 441 442 /** 443 * Undefined bidirectional character type. Undefined {@code char} 444 * values have undefined directionality in the Unicode specification. 445 * @since 1.4 446 */ 447 public static final byte DIRECTIONALITY_UNDEFINED = -1; 448 449 /** 450 * Strong bidirectional character type "L" in the Unicode specification. 451 * @since 1.4 452 */ 453 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 454 455 /** 456 * Strong bidirectional character type "R" in the Unicode specification. 457 * @since 1.4 458 */ 459 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 460 461 /** 462 * Strong bidirectional character type "AL" in the Unicode specification. 463 * @since 1.4 464 */ 465 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 466 467 /** 468 * Weak bidirectional character type "EN" in the Unicode specification. 469 * @since 1.4 470 */ 471 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 472 473 /** 474 * Weak bidirectional character type "ES" in the Unicode specification. 475 * @since 1.4 476 */ 477 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 478 479 /** 480 * Weak bidirectional character type "ET" in the Unicode specification. 481 * @since 1.4 482 */ 483 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 484 485 /** 486 * Weak bidirectional character type "AN" in the Unicode specification. 487 * @since 1.4 488 */ 489 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 490 491 /** 492 * Weak bidirectional character type "CS" in the Unicode specification. 493 * @since 1.4 494 */ 495 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 496 497 /** 498 * Weak bidirectional character type "NSM" in the Unicode specification. 499 * @since 1.4 500 */ 501 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 502 503 /** 504 * Weak bidirectional character type "BN" in the Unicode specification. 505 * @since 1.4 506 */ 507 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 508 509 /** 510 * Neutral bidirectional character type "B" in the Unicode specification. 511 * @since 1.4 512 */ 513 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 514 515 /** 516 * Neutral bidirectional character type "S" in the Unicode specification. 517 * @since 1.4 518 */ 519 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 520 521 /** 522 * Neutral bidirectional character type "WS" in the Unicode specification. 523 * @since 1.4 524 */ 525 public static final byte DIRECTIONALITY_WHITESPACE = 12; 526 527 /** 528 * Neutral bidirectional character type "ON" in the Unicode specification. 529 * @since 1.4 530 */ 531 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 532 533 /** 534 * Strong bidirectional character type "LRE" in the Unicode specification. 535 * @since 1.4 536 */ 537 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 538 539 /** 540 * Strong bidirectional character type "LRO" in the Unicode specification. 541 * @since 1.4 542 */ 543 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 544 545 /** 546 * Strong bidirectional character type "RLE" in the Unicode specification. 547 * @since 1.4 548 */ 549 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 550 551 /** 552 * Strong bidirectional character type "RLO" in the Unicode specification. 553 * @since 1.4 554 */ 555 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 556 557 /** 558 * Weak bidirectional character type "PDF" in the Unicode specification. 559 * @since 1.4 560 */ 561 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 562 563 /** 564 * Weak bidirectional character type "LRI" in the Unicode specification. 565 * @since 9 566 */ 567 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 568 569 /** 570 * Weak bidirectional character type "RLI" in the Unicode specification. 571 * @since 9 572 */ 573 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 574 575 /** 576 * Weak bidirectional character type "FSI" in the Unicode specification. 577 * @since 9 578 */ 579 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 580 581 /** 582 * Weak bidirectional character type "PDI" in the Unicode specification. 583 * @since 9 584 */ 585 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 586 587 /** 588 * The minimum value of a 589 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 590 * Unicode high-surrogate code unit</a> 591 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 592 * A high-surrogate is also known as a <i>leading-surrogate</i>. 593 * 594 * @since 1.5 595 */ 596 public static final char MIN_HIGH_SURROGATE = '\uD800'; 597 598 /** 599 * The maximum value of a 600 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 601 * Unicode high-surrogate code unit</a> 602 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 603 * A high-surrogate is also known as a <i>leading-surrogate</i>. 604 * 605 * @since 1.5 606 */ 607 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 608 609 /** 610 * The minimum value of a 611 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 612 * Unicode low-surrogate code unit</a> 613 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 614 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 615 * 616 * @since 1.5 617 */ 618 public static final char MIN_LOW_SURROGATE = '\uDC00'; 619 620 /** 621 * The maximum value of a 622 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 623 * Unicode low-surrogate code unit</a> 624 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 625 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 626 * 627 * @since 1.5 628 */ 629 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 630 631 /** 632 * The minimum value of a Unicode surrogate code unit in the 633 * UTF-16 encoding, constant {@code '\u005CuD800'}. 634 * 635 * @since 1.5 636 */ 637 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 638 639 /** 640 * The maximum value of a Unicode surrogate code unit in the 641 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 642 * 643 * @since 1.5 644 */ 645 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 646 647 /** 648 * The minimum value of a 649 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 650 * Unicode supplementary code point</a>, constant {@code U+10000}. 651 * 652 * @since 1.5 653 */ 654 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 655 656 /** 657 * The minimum value of a 658 * <a href="http://www.unicode.org/glossary/#code_point"> 659 * Unicode code point</a>, constant {@code U+0000}. 660 * 661 * @since 1.5 662 */ 663 public static final int MIN_CODE_POINT = 0x000000; 664 665 /** 666 * The maximum value of a 667 * <a href="http://www.unicode.org/glossary/#code_point"> 668 * Unicode code point</a>, constant {@code U+10FFFF}. 669 * 670 * @since 1.5 671 */ 672 public static final int MAX_CODE_POINT = 0X10FFFF; 673 674 /** 675 * Returns an {@link Optional} containing the nominal descriptor for this 676 * instance. 677 * 678 * @return an {@link Optional} describing the {@linkplain Character} instance 679 * @since 15 680 */ 681 @Override 682 public Optional<DynamicConstantDesc<Character>> describeConstable() { 683 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value)); 684 } 685 686 /** 687 * Instances of this class represent particular subsets of the Unicode 688 * character set. The only family of subsets defined in the 689 * {@code Character} class is {@link Character.UnicodeBlock}. 690 * Other portions of the Java API may define other subsets for their 691 * own purposes. 692 * 693 * @since 1.2 694 */ 695 public static class Subset { 696 697 private String name; 698 699 /** 700 * Constructs a new {@code Subset} instance. 701 * 702 * @param name The name of this subset 703 * @throws NullPointerException if name is {@code null} 704 */ 705 protected Subset(String name) { 706 if (name == null) { 707 throw new NullPointerException("name"); 708 } 709 this.name = name; 710 } 711 712 /** 713 * Compares two {@code Subset} objects for equality. 714 * This method returns {@code true} if and only if 715 * {@code this} and the argument refer to the same 716 * object; since this method is {@code final}, this 717 * guarantee holds for all subclasses. 718 */ 719 public final boolean equals(Object obj) { 720 return (this == obj); 721 } 722 723 /** 724 * Returns the standard hash code as defined by the 725 * {@link Object#hashCode} method. This method 726 * is {@code final} in order to ensure that the 727 * {@code equals} and {@code hashCode} methods will 728 * be consistent in all subclasses. 729 */ 730 public final int hashCode() { 731 return super.hashCode(); 732 } 733 734 /** 735 * Returns the name of this subset. 736 */ 737 public final String toString() { 738 return name; 739 } 740 } 741 742 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 743 // for the latest specification of Unicode Blocks. 744 745 /** 746 * A family of character subsets representing the character blocks in the 747 * Unicode specification. Character blocks generally define characters 748 * used for a specific script or purpose. A character is contained by 749 * at most one Unicode block. 750 * 751 * @since 1.2 752 */ 753 public static final class UnicodeBlock extends Subset { 754 /** 755 * NUM_ENTITIES should match the total number of UnicodeBlocks. 756 * It should be adjusted whenever the Unicode Character Database 757 * is upgraded. 758 */ 759 private static final int NUM_ENTITIES = 782; 760 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES); 761 762 /** 763 * Creates a UnicodeBlock with the given identifier name. 764 * This name must be the same as the block identifier. 765 */ 766 private UnicodeBlock(String idName) { 767 super(idName); 768 map.put(idName, this); 769 } 770 771 /** 772 * Creates a UnicodeBlock with the given identifier name and 773 * alias name. 774 */ 775 private UnicodeBlock(String idName, String alias) { 776 this(idName); 777 map.put(alias, this); 778 } 779 780 /** 781 * Creates a UnicodeBlock with the given identifier name and 782 * alias names. 783 */ 784 private UnicodeBlock(String idName, String... aliases) { 785 this(idName); 786 for (String alias : aliases) 787 map.put(alias, this); 788 } 789 790 /** 791 * Constant for the "Basic Latin" Unicode character block. 792 * @since 1.2 793 */ 794 public static final UnicodeBlock BASIC_LATIN = 795 new UnicodeBlock("BASIC_LATIN", 796 "BASIC LATIN", 797 "BASICLATIN"); 798 799 /** 800 * Constant for the "Latin-1 Supplement" Unicode character block. 801 * @since 1.2 802 */ 803 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 804 new UnicodeBlock("LATIN_1_SUPPLEMENT", 805 "LATIN-1 SUPPLEMENT", 806 "LATIN-1SUPPLEMENT"); 807 808 /** 809 * Constant for the "Latin Extended-A" Unicode character block. 810 * @since 1.2 811 */ 812 public static final UnicodeBlock LATIN_EXTENDED_A = 813 new UnicodeBlock("LATIN_EXTENDED_A", 814 "LATIN EXTENDED-A", 815 "LATINEXTENDED-A"); 816 817 /** 818 * Constant for the "Latin Extended-B" Unicode character block. 819 * @since 1.2 820 */ 821 public static final UnicodeBlock LATIN_EXTENDED_B = 822 new UnicodeBlock("LATIN_EXTENDED_B", 823 "LATIN EXTENDED-B", 824 "LATINEXTENDED-B"); 825 826 /** 827 * Constant for the "IPA Extensions" Unicode character block. 828 * @since 1.2 829 */ 830 public static final UnicodeBlock IPA_EXTENSIONS = 831 new UnicodeBlock("IPA_EXTENSIONS", 832 "IPA EXTENSIONS", 833 "IPAEXTENSIONS"); 834 835 /** 836 * Constant for the "Spacing Modifier Letters" Unicode character block. 837 * @since 1.2 838 */ 839 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 840 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 841 "SPACING MODIFIER LETTERS", 842 "SPACINGMODIFIERLETTERS"); 843 844 /** 845 * Constant for the "Combining Diacritical Marks" Unicode character block. 846 * @since 1.2 847 */ 848 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 849 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 850 "COMBINING DIACRITICAL MARKS", 851 "COMBININGDIACRITICALMARKS"); 852 853 /** 854 * Constant for the "Greek and Coptic" Unicode character block. 855 * <p> 856 * This block was previously known as the "Greek" block. 857 * 858 * @since 1.2 859 */ 860 public static final UnicodeBlock GREEK = 861 new UnicodeBlock("GREEK", 862 "GREEK AND COPTIC", 863 "GREEKANDCOPTIC"); 864 865 /** 866 * Constant for the "Cyrillic" Unicode character block. 867 * @since 1.2 868 */ 869 public static final UnicodeBlock CYRILLIC = 870 new UnicodeBlock("CYRILLIC"); 871 872 /** 873 * Constant for the "Armenian" Unicode character block. 874 * @since 1.2 875 */ 876 public static final UnicodeBlock ARMENIAN = 877 new UnicodeBlock("ARMENIAN"); 878 879 /** 880 * Constant for the "Hebrew" Unicode character block. 881 * @since 1.2 882 */ 883 public static final UnicodeBlock HEBREW = 884 new UnicodeBlock("HEBREW"); 885 886 /** 887 * Constant for the "Arabic" Unicode character block. 888 * @since 1.2 889 */ 890 public static final UnicodeBlock ARABIC = 891 new UnicodeBlock("ARABIC"); 892 893 /** 894 * Constant for the "Devanagari" Unicode character block. 895 * @since 1.2 896 */ 897 public static final UnicodeBlock DEVANAGARI = 898 new UnicodeBlock("DEVANAGARI"); 899 900 /** 901 * Constant for the "Bengali" Unicode character block. 902 * @since 1.2 903 */ 904 public static final UnicodeBlock BENGALI = 905 new UnicodeBlock("BENGALI"); 906 907 /** 908 * Constant for the "Gurmukhi" Unicode character block. 909 * @since 1.2 910 */ 911 public static final UnicodeBlock GURMUKHI = 912 new UnicodeBlock("GURMUKHI"); 913 914 /** 915 * Constant for the "Gujarati" Unicode character block. 916 * @since 1.2 917 */ 918 public static final UnicodeBlock GUJARATI = 919 new UnicodeBlock("GUJARATI"); 920 921 /** 922 * Constant for the "Oriya" Unicode character block. 923 * @since 1.2 924 */ 925 public static final UnicodeBlock ORIYA = 926 new UnicodeBlock("ORIYA"); 927 928 /** 929 * Constant for the "Tamil" Unicode character block. 930 * @since 1.2 931 */ 932 public static final UnicodeBlock TAMIL = 933 new UnicodeBlock("TAMIL"); 934 935 /** 936 * Constant for the "Telugu" Unicode character block. 937 * @since 1.2 938 */ 939 public static final UnicodeBlock TELUGU = 940 new UnicodeBlock("TELUGU"); 941 942 /** 943 * Constant for the "Kannada" Unicode character block. 944 * @since 1.2 945 */ 946 public static final UnicodeBlock KANNADA = 947 new UnicodeBlock("KANNADA"); 948 949 /** 950 * Constant for the "Malayalam" Unicode character block. 951 * @since 1.2 952 */ 953 public static final UnicodeBlock MALAYALAM = 954 new UnicodeBlock("MALAYALAM"); 955 956 /** 957 * Constant for the "Thai" Unicode character block. 958 * @since 1.2 959 */ 960 public static final UnicodeBlock THAI = 961 new UnicodeBlock("THAI"); 962 963 /** 964 * Constant for the "Lao" Unicode character block. 965 * @since 1.2 966 */ 967 public static final UnicodeBlock LAO = 968 new UnicodeBlock("LAO"); 969 970 /** 971 * Constant for the "Tibetan" Unicode character block. 972 * @since 1.2 973 */ 974 public static final UnicodeBlock TIBETAN = 975 new UnicodeBlock("TIBETAN"); 976 977 /** 978 * Constant for the "Georgian" Unicode character block. 979 * @since 1.2 980 */ 981 public static final UnicodeBlock GEORGIAN = 982 new UnicodeBlock("GEORGIAN"); 983 984 /** 985 * Constant for the "Hangul Jamo" Unicode character block. 986 * @since 1.2 987 */ 988 public static final UnicodeBlock HANGUL_JAMO = 989 new UnicodeBlock("HANGUL_JAMO", 990 "HANGUL JAMO", 991 "HANGULJAMO"); 992 993 /** 994 * Constant for the "Latin Extended Additional" Unicode character block. 995 * @since 1.2 996 */ 997 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 998 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 999 "LATIN EXTENDED ADDITIONAL", 1000 "LATINEXTENDEDADDITIONAL"); 1001 1002 /** 1003 * Constant for the "Greek Extended" Unicode character block. 1004 * @since 1.2 1005 */ 1006 public static final UnicodeBlock GREEK_EXTENDED = 1007 new UnicodeBlock("GREEK_EXTENDED", 1008 "GREEK EXTENDED", 1009 "GREEKEXTENDED"); 1010 1011 /** 1012 * Constant for the "General Punctuation" Unicode character block. 1013 * @since 1.2 1014 */ 1015 public static final UnicodeBlock GENERAL_PUNCTUATION = 1016 new UnicodeBlock("GENERAL_PUNCTUATION", 1017 "GENERAL PUNCTUATION", 1018 "GENERALPUNCTUATION"); 1019 1020 /** 1021 * Constant for the "Superscripts and Subscripts" Unicode character 1022 * block. 1023 * @since 1.2 1024 */ 1025 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 1026 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 1027 "SUPERSCRIPTS AND SUBSCRIPTS", 1028 "SUPERSCRIPTSANDSUBSCRIPTS"); 1029 1030 /** 1031 * Constant for the "Currency Symbols" Unicode character block. 1032 * @since 1.2 1033 */ 1034 public static final UnicodeBlock CURRENCY_SYMBOLS = 1035 new UnicodeBlock("CURRENCY_SYMBOLS", 1036 "CURRENCY SYMBOLS", 1037 "CURRENCYSYMBOLS"); 1038 1039 /** 1040 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 1041 * character block. 1042 * <p> 1043 * This block was previously known as "Combining Marks for Symbols". 1044 * @since 1.2 1045 */ 1046 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 1047 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 1048 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 1049 "COMBININGDIACRITICALMARKSFORSYMBOLS", 1050 "COMBINING MARKS FOR SYMBOLS", 1051 "COMBININGMARKSFORSYMBOLS"); 1052 1053 /** 1054 * Constant for the "Letterlike Symbols" Unicode character block. 1055 * @since 1.2 1056 */ 1057 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 1058 new UnicodeBlock("LETTERLIKE_SYMBOLS", 1059 "LETTERLIKE SYMBOLS", 1060 "LETTERLIKESYMBOLS"); 1061 1062 /** 1063 * Constant for the "Number Forms" Unicode character block. 1064 * @since 1.2 1065 */ 1066 public static final UnicodeBlock NUMBER_FORMS = 1067 new UnicodeBlock("NUMBER_FORMS", 1068 "NUMBER FORMS", 1069 "NUMBERFORMS"); 1070 1071 /** 1072 * Constant for the "Arrows" Unicode character block. 1073 * @since 1.2 1074 */ 1075 public static final UnicodeBlock ARROWS = 1076 new UnicodeBlock("ARROWS"); 1077 1078 /** 1079 * Constant for the "Mathematical Operators" Unicode character block. 1080 * @since 1.2 1081 */ 1082 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1083 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1084 "MATHEMATICAL OPERATORS", 1085 "MATHEMATICALOPERATORS"); 1086 1087 /** 1088 * Constant for the "Miscellaneous Technical" Unicode character block. 1089 * @since 1.2 1090 */ 1091 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1092 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1093 "MISCELLANEOUS TECHNICAL", 1094 "MISCELLANEOUSTECHNICAL"); 1095 1096 /** 1097 * Constant for the "Control Pictures" Unicode character block. 1098 * @since 1.2 1099 */ 1100 public static final UnicodeBlock CONTROL_PICTURES = 1101 new UnicodeBlock("CONTROL_PICTURES", 1102 "CONTROL PICTURES", 1103 "CONTROLPICTURES"); 1104 1105 /** 1106 * Constant for the "Optical Character Recognition" Unicode character block. 1107 * @since 1.2 1108 */ 1109 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1110 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1111 "OPTICAL CHARACTER RECOGNITION", 1112 "OPTICALCHARACTERRECOGNITION"); 1113 1114 /** 1115 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1116 * @since 1.2 1117 */ 1118 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1119 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1120 "ENCLOSED ALPHANUMERICS", 1121 "ENCLOSEDALPHANUMERICS"); 1122 1123 /** 1124 * Constant for the "Box Drawing" Unicode character block. 1125 * @since 1.2 1126 */ 1127 public static final UnicodeBlock BOX_DRAWING = 1128 new UnicodeBlock("BOX_DRAWING", 1129 "BOX DRAWING", 1130 "BOXDRAWING"); 1131 1132 /** 1133 * Constant for the "Block Elements" Unicode character block. 1134 * @since 1.2 1135 */ 1136 public static final UnicodeBlock BLOCK_ELEMENTS = 1137 new UnicodeBlock("BLOCK_ELEMENTS", 1138 "BLOCK ELEMENTS", 1139 "BLOCKELEMENTS"); 1140 1141 /** 1142 * Constant for the "Geometric Shapes" Unicode character block. 1143 * @since 1.2 1144 */ 1145 public static final UnicodeBlock GEOMETRIC_SHAPES = 1146 new UnicodeBlock("GEOMETRIC_SHAPES", 1147 "GEOMETRIC SHAPES", 1148 "GEOMETRICSHAPES"); 1149 1150 /** 1151 * Constant for the "Miscellaneous Symbols" Unicode character block. 1152 * @since 1.2 1153 */ 1154 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1155 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1156 "MISCELLANEOUS SYMBOLS", 1157 "MISCELLANEOUSSYMBOLS"); 1158 1159 /** 1160 * Constant for the "Dingbats" Unicode character block. 1161 * @since 1.2 1162 */ 1163 public static final UnicodeBlock DINGBATS = 1164 new UnicodeBlock("DINGBATS"); 1165 1166 /** 1167 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1168 * @since 1.2 1169 */ 1170 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1171 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1172 "CJK SYMBOLS AND PUNCTUATION", 1173 "CJKSYMBOLSANDPUNCTUATION"); 1174 1175 /** 1176 * Constant for the "Hiragana" Unicode character block. 1177 * @since 1.2 1178 */ 1179 public static final UnicodeBlock HIRAGANA = 1180 new UnicodeBlock("HIRAGANA"); 1181 1182 /** 1183 * Constant for the "Katakana" Unicode character block. 1184 * @since 1.2 1185 */ 1186 public static final UnicodeBlock KATAKANA = 1187 new UnicodeBlock("KATAKANA"); 1188 1189 /** 1190 * Constant for the "Bopomofo" Unicode character block. 1191 * @since 1.2 1192 */ 1193 public static final UnicodeBlock BOPOMOFO = 1194 new UnicodeBlock("BOPOMOFO"); 1195 1196 /** 1197 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1198 * @since 1.2 1199 */ 1200 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1201 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1202 "HANGUL COMPATIBILITY JAMO", 1203 "HANGULCOMPATIBILITYJAMO"); 1204 1205 /** 1206 * Constant for the "Kanbun" Unicode character block. 1207 * @since 1.2 1208 */ 1209 public static final UnicodeBlock KANBUN = 1210 new UnicodeBlock("KANBUN"); 1211 1212 /** 1213 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1214 * @since 1.2 1215 */ 1216 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1217 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1218 "ENCLOSED CJK LETTERS AND MONTHS", 1219 "ENCLOSEDCJKLETTERSANDMONTHS"); 1220 1221 /** 1222 * Constant for the "CJK Compatibility" Unicode character block. 1223 * @since 1.2 1224 */ 1225 public static final UnicodeBlock CJK_COMPATIBILITY = 1226 new UnicodeBlock("CJK_COMPATIBILITY", 1227 "CJK COMPATIBILITY", 1228 "CJKCOMPATIBILITY"); 1229 1230 /** 1231 * Constant for the "CJK Unified Ideographs" Unicode character block. 1232 * @since 1.2 1233 */ 1234 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1235 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1236 "CJK UNIFIED IDEOGRAPHS", 1237 "CJKUNIFIEDIDEOGRAPHS"); 1238 1239 /** 1240 * Constant for the "Hangul Syllables" Unicode character block. 1241 * @since 1.2 1242 */ 1243 public static final UnicodeBlock HANGUL_SYLLABLES = 1244 new UnicodeBlock("HANGUL_SYLLABLES", 1245 "HANGUL SYLLABLES", 1246 "HANGULSYLLABLES"); 1247 1248 /** 1249 * Constant for the "Private Use Area" Unicode character block. 1250 * @since 1.2 1251 */ 1252 public static final UnicodeBlock PRIVATE_USE_AREA = 1253 new UnicodeBlock("PRIVATE_USE_AREA", 1254 "PRIVATE USE AREA", 1255 "PRIVATEUSEAREA"); 1256 1257 /** 1258 * Constant for the "CJK Compatibility Ideographs" Unicode character 1259 * block. 1260 * @since 1.2 1261 */ 1262 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1263 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1264 "CJK COMPATIBILITY IDEOGRAPHS", 1265 "CJKCOMPATIBILITYIDEOGRAPHS"); 1266 1267 /** 1268 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1269 * @since 1.2 1270 */ 1271 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1272 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1273 "ALPHABETIC PRESENTATION FORMS", 1274 "ALPHABETICPRESENTATIONFORMS"); 1275 1276 /** 1277 * Constant for the "Arabic Presentation Forms-A" Unicode character 1278 * block. 1279 * @since 1.2 1280 */ 1281 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1282 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1283 "ARABIC PRESENTATION FORMS-A", 1284 "ARABICPRESENTATIONFORMS-A"); 1285 1286 /** 1287 * Constant for the "Combining Half Marks" Unicode character block. 1288 * @since 1.2 1289 */ 1290 public static final UnicodeBlock COMBINING_HALF_MARKS = 1291 new UnicodeBlock("COMBINING_HALF_MARKS", 1292 "COMBINING HALF MARKS", 1293 "COMBININGHALFMARKS"); 1294 1295 /** 1296 * Constant for the "CJK Compatibility Forms" Unicode character block. 1297 * @since 1.2 1298 */ 1299 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1300 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1301 "CJK COMPATIBILITY FORMS", 1302 "CJKCOMPATIBILITYFORMS"); 1303 1304 /** 1305 * Constant for the "Small Form Variants" Unicode character block. 1306 * @since 1.2 1307 */ 1308 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1309 new UnicodeBlock("SMALL_FORM_VARIANTS", 1310 "SMALL FORM VARIANTS", 1311 "SMALLFORMVARIANTS"); 1312 1313 /** 1314 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1315 * @since 1.2 1316 */ 1317 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1318 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1319 "ARABIC PRESENTATION FORMS-B", 1320 "ARABICPRESENTATIONFORMS-B"); 1321 1322 /** 1323 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1324 * block. 1325 * @since 1.2 1326 */ 1327 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1328 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1329 "HALFWIDTH AND FULLWIDTH FORMS", 1330 "HALFWIDTHANDFULLWIDTHFORMS"); 1331 1332 /** 1333 * Constant for the "Specials" Unicode character block. 1334 * @since 1.2 1335 */ 1336 public static final UnicodeBlock SPECIALS = 1337 new UnicodeBlock("SPECIALS"); 1338 1339 /** 1340 * @deprecated 1341 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1342 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1343 * These constants match the block definitions of the Unicode Standard. 1344 * The {@link #of(char)} and {@link #of(int)} methods return the 1345 * standard constants. 1346 */ 1347 @Deprecated(since="1.5") 1348 public static final UnicodeBlock SURROGATES_AREA = 1349 new UnicodeBlock("SURROGATES_AREA"); 1350 1351 /** 1352 * Constant for the "Syriac" Unicode character block. 1353 * @since 1.4 1354 */ 1355 public static final UnicodeBlock SYRIAC = 1356 new UnicodeBlock("SYRIAC"); 1357 1358 /** 1359 * Constant for the "Thaana" Unicode character block. 1360 * @since 1.4 1361 */ 1362 public static final UnicodeBlock THAANA = 1363 new UnicodeBlock("THAANA"); 1364 1365 /** 1366 * Constant for the "Sinhala" Unicode character block. 1367 * @since 1.4 1368 */ 1369 public static final UnicodeBlock SINHALA = 1370 new UnicodeBlock("SINHALA"); 1371 1372 /** 1373 * Constant for the "Myanmar" Unicode character block. 1374 * @since 1.4 1375 */ 1376 public static final UnicodeBlock MYANMAR = 1377 new UnicodeBlock("MYANMAR"); 1378 1379 /** 1380 * Constant for the "Ethiopic" Unicode character block. 1381 * @since 1.4 1382 */ 1383 public static final UnicodeBlock ETHIOPIC = 1384 new UnicodeBlock("ETHIOPIC"); 1385 1386 /** 1387 * Constant for the "Cherokee" Unicode character block. 1388 * @since 1.4 1389 */ 1390 public static final UnicodeBlock CHEROKEE = 1391 new UnicodeBlock("CHEROKEE"); 1392 1393 /** 1394 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1395 * @since 1.4 1396 */ 1397 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1398 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1399 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1400 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1401 1402 /** 1403 * Constant for the "Ogham" Unicode character block. 1404 * @since 1.4 1405 */ 1406 public static final UnicodeBlock OGHAM = 1407 new UnicodeBlock("OGHAM"); 1408 1409 /** 1410 * Constant for the "Runic" Unicode character block. 1411 * @since 1.4 1412 */ 1413 public static final UnicodeBlock RUNIC = 1414 new UnicodeBlock("RUNIC"); 1415 1416 /** 1417 * Constant for the "Khmer" Unicode character block. 1418 * @since 1.4 1419 */ 1420 public static final UnicodeBlock KHMER = 1421 new UnicodeBlock("KHMER"); 1422 1423 /** 1424 * Constant for the "Mongolian" Unicode character block. 1425 * @since 1.4 1426 */ 1427 public static final UnicodeBlock MONGOLIAN = 1428 new UnicodeBlock("MONGOLIAN"); 1429 1430 /** 1431 * Constant for the "Braille Patterns" Unicode character block. 1432 * @since 1.4 1433 */ 1434 public static final UnicodeBlock BRAILLE_PATTERNS = 1435 new UnicodeBlock("BRAILLE_PATTERNS", 1436 "BRAILLE PATTERNS", 1437 "BRAILLEPATTERNS"); 1438 1439 /** 1440 * Constant for the "CJK Radicals Supplement" Unicode character block. 1441 * @since 1.4 1442 */ 1443 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1444 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1445 "CJK RADICALS SUPPLEMENT", 1446 "CJKRADICALSSUPPLEMENT"); 1447 1448 /** 1449 * Constant for the "Kangxi Radicals" Unicode character block. 1450 * @since 1.4 1451 */ 1452 public static final UnicodeBlock KANGXI_RADICALS = 1453 new UnicodeBlock("KANGXI_RADICALS", 1454 "KANGXI RADICALS", 1455 "KANGXIRADICALS"); 1456 1457 /** 1458 * Constant for the "Ideographic Description Characters" Unicode character block. 1459 * @since 1.4 1460 */ 1461 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1462 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1463 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1464 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1465 1466 /** 1467 * Constant for the "Bopomofo Extended" Unicode character block. 1468 * @since 1.4 1469 */ 1470 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1471 new UnicodeBlock("BOPOMOFO_EXTENDED", 1472 "BOPOMOFO EXTENDED", 1473 "BOPOMOFOEXTENDED"); 1474 1475 /** 1476 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1477 * @since 1.4 1478 */ 1479 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1480 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1481 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1482 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1483 1484 /** 1485 * Constant for the "Yi Syllables" Unicode character block. 1486 * @since 1.4 1487 */ 1488 public static final UnicodeBlock YI_SYLLABLES = 1489 new UnicodeBlock("YI_SYLLABLES", 1490 "YI SYLLABLES", 1491 "YISYLLABLES"); 1492 1493 /** 1494 * Constant for the "Yi Radicals" Unicode character block. 1495 * @since 1.4 1496 */ 1497 public static final UnicodeBlock YI_RADICALS = 1498 new UnicodeBlock("YI_RADICALS", 1499 "YI RADICALS", 1500 "YIRADICALS"); 1501 1502 /** 1503 * Constant for the "Cyrillic Supplement" Unicode character block. 1504 * This block was previously known as the "Cyrillic Supplementary" block. 1505 * @since 1.5 1506 */ 1507 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1508 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1509 "CYRILLIC SUPPLEMENTARY", 1510 "CYRILLICSUPPLEMENTARY", 1511 "CYRILLIC SUPPLEMENT", 1512 "CYRILLICSUPPLEMENT"); 1513 1514 /** 1515 * Constant for the "Tagalog" Unicode character block. 1516 * @since 1.5 1517 */ 1518 public static final UnicodeBlock TAGALOG = 1519 new UnicodeBlock("TAGALOG"); 1520 1521 /** 1522 * Constant for the "Hanunoo" Unicode character block. 1523 * @since 1.5 1524 */ 1525 public static final UnicodeBlock HANUNOO = 1526 new UnicodeBlock("HANUNOO"); 1527 1528 /** 1529 * Constant for the "Buhid" Unicode character block. 1530 * @since 1.5 1531 */ 1532 public static final UnicodeBlock BUHID = 1533 new UnicodeBlock("BUHID"); 1534 1535 /** 1536 * Constant for the "Tagbanwa" Unicode character block. 1537 * @since 1.5 1538 */ 1539 public static final UnicodeBlock TAGBANWA = 1540 new UnicodeBlock("TAGBANWA"); 1541 1542 /** 1543 * Constant for the "Limbu" Unicode character block. 1544 * @since 1.5 1545 */ 1546 public static final UnicodeBlock LIMBU = 1547 new UnicodeBlock("LIMBU"); 1548 1549 /** 1550 * Constant for the "Tai Le" Unicode character block. 1551 * @since 1.5 1552 */ 1553 public static final UnicodeBlock TAI_LE = 1554 new UnicodeBlock("TAI_LE", 1555 "TAI LE", 1556 "TAILE"); 1557 1558 /** 1559 * Constant for the "Khmer Symbols" Unicode character block. 1560 * @since 1.5 1561 */ 1562 public static final UnicodeBlock KHMER_SYMBOLS = 1563 new UnicodeBlock("KHMER_SYMBOLS", 1564 "KHMER SYMBOLS", 1565 "KHMERSYMBOLS"); 1566 1567 /** 1568 * Constant for the "Phonetic Extensions" Unicode character block. 1569 * @since 1.5 1570 */ 1571 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1572 new UnicodeBlock("PHONETIC_EXTENSIONS", 1573 "PHONETIC EXTENSIONS", 1574 "PHONETICEXTENSIONS"); 1575 1576 /** 1577 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1578 * @since 1.5 1579 */ 1580 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1581 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1582 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1583 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1584 1585 /** 1586 * Constant for the "Supplemental Arrows-A" Unicode character block. 1587 * @since 1.5 1588 */ 1589 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1590 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1591 "SUPPLEMENTAL ARROWS-A", 1592 "SUPPLEMENTALARROWS-A"); 1593 1594 /** 1595 * Constant for the "Supplemental Arrows-B" Unicode character block. 1596 * @since 1.5 1597 */ 1598 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1599 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1600 "SUPPLEMENTAL ARROWS-B", 1601 "SUPPLEMENTALARROWS-B"); 1602 1603 /** 1604 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1605 * character block. 1606 * @since 1.5 1607 */ 1608 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1609 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1610 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1611 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1612 1613 /** 1614 * Constant for the "Supplemental Mathematical Operators" Unicode 1615 * character block. 1616 * @since 1.5 1617 */ 1618 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1619 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1620 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1621 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1622 1623 /** 1624 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1625 * block. 1626 * @since 1.5 1627 */ 1628 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1629 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1630 "MISCELLANEOUS SYMBOLS AND ARROWS", 1631 "MISCELLANEOUSSYMBOLSANDARROWS"); 1632 1633 /** 1634 * Constant for the "Katakana Phonetic Extensions" Unicode character 1635 * block. 1636 * @since 1.5 1637 */ 1638 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1639 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1640 "KATAKANA PHONETIC EXTENSIONS", 1641 "KATAKANAPHONETICEXTENSIONS"); 1642 1643 /** 1644 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1645 * @since 1.5 1646 */ 1647 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1648 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1649 "YIJING HEXAGRAM SYMBOLS", 1650 "YIJINGHEXAGRAMSYMBOLS"); 1651 1652 /** 1653 * Constant for the "Variation Selectors" Unicode character block. 1654 * @since 1.5 1655 */ 1656 public static final UnicodeBlock VARIATION_SELECTORS = 1657 new UnicodeBlock("VARIATION_SELECTORS", 1658 "VARIATION SELECTORS", 1659 "VARIATIONSELECTORS"); 1660 1661 /** 1662 * Constant for the "Linear B Syllabary" Unicode character block. 1663 * @since 1.5 1664 */ 1665 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1666 new UnicodeBlock("LINEAR_B_SYLLABARY", 1667 "LINEAR B SYLLABARY", 1668 "LINEARBSYLLABARY"); 1669 1670 /** 1671 * Constant for the "Linear B Ideograms" Unicode character block. 1672 * @since 1.5 1673 */ 1674 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1675 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1676 "LINEAR B IDEOGRAMS", 1677 "LINEARBIDEOGRAMS"); 1678 1679 /** 1680 * Constant for the "Aegean Numbers" Unicode character block. 1681 * @since 1.5 1682 */ 1683 public static final UnicodeBlock AEGEAN_NUMBERS = 1684 new UnicodeBlock("AEGEAN_NUMBERS", 1685 "AEGEAN NUMBERS", 1686 "AEGEANNUMBERS"); 1687 1688 /** 1689 * Constant for the "Old Italic" Unicode character block. 1690 * @since 1.5 1691 */ 1692 public static final UnicodeBlock OLD_ITALIC = 1693 new UnicodeBlock("OLD_ITALIC", 1694 "OLD ITALIC", 1695 "OLDITALIC"); 1696 1697 /** 1698 * Constant for the "Gothic" Unicode character block. 1699 * @since 1.5 1700 */ 1701 public static final UnicodeBlock GOTHIC = 1702 new UnicodeBlock("GOTHIC"); 1703 1704 /** 1705 * Constant for the "Ugaritic" Unicode character block. 1706 * @since 1.5 1707 */ 1708 public static final UnicodeBlock UGARITIC = 1709 new UnicodeBlock("UGARITIC"); 1710 1711 /** 1712 * Constant for the "Deseret" Unicode character block. 1713 * @since 1.5 1714 */ 1715 public static final UnicodeBlock DESERET = 1716 new UnicodeBlock("DESERET"); 1717 1718 /** 1719 * Constant for the "Shavian" Unicode character block. 1720 * @since 1.5 1721 */ 1722 public static final UnicodeBlock SHAVIAN = 1723 new UnicodeBlock("SHAVIAN"); 1724 1725 /** 1726 * Constant for the "Osmanya" Unicode character block. 1727 * @since 1.5 1728 */ 1729 public static final UnicodeBlock OSMANYA = 1730 new UnicodeBlock("OSMANYA"); 1731 1732 /** 1733 * Constant for the "Cypriot Syllabary" Unicode character block. 1734 * @since 1.5 1735 */ 1736 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1737 new UnicodeBlock("CYPRIOT_SYLLABARY", 1738 "CYPRIOT SYLLABARY", 1739 "CYPRIOTSYLLABARY"); 1740 1741 /** 1742 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1743 * @since 1.5 1744 */ 1745 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1746 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1747 "BYZANTINE MUSICAL SYMBOLS", 1748 "BYZANTINEMUSICALSYMBOLS"); 1749 1750 /** 1751 * Constant for the "Musical Symbols" Unicode character block. 1752 * @since 1.5 1753 */ 1754 public static final UnicodeBlock MUSICAL_SYMBOLS = 1755 new UnicodeBlock("MUSICAL_SYMBOLS", 1756 "MUSICAL SYMBOLS", 1757 "MUSICALSYMBOLS"); 1758 1759 /** 1760 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1761 * @since 1.5 1762 */ 1763 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1764 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1765 "TAI XUAN JING SYMBOLS", 1766 "TAIXUANJINGSYMBOLS"); 1767 1768 /** 1769 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1770 * character block. 1771 * @since 1.5 1772 */ 1773 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1774 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1775 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1776 "MATHEMATICALALPHANUMERICSYMBOLS"); 1777 1778 /** 1779 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1780 * character block. 1781 * @since 1.5 1782 */ 1783 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1784 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1785 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1786 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1787 1788 /** 1789 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1790 * @since 1.5 1791 */ 1792 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1793 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1794 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1795 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1796 1797 /** 1798 * Constant for the "Tags" Unicode character block. 1799 * @since 1.5 1800 */ 1801 public static final UnicodeBlock TAGS = 1802 new UnicodeBlock("TAGS"); 1803 1804 /** 1805 * Constant for the "Variation Selectors Supplement" Unicode character 1806 * block. 1807 * @since 1.5 1808 */ 1809 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1810 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1811 "VARIATION SELECTORS SUPPLEMENT", 1812 "VARIATIONSELECTORSSUPPLEMENT"); 1813 1814 /** 1815 * Constant for the "Supplementary Private Use Area-A" Unicode character 1816 * block. 1817 * @since 1.5 1818 */ 1819 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1820 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1821 "SUPPLEMENTARY PRIVATE USE AREA-A", 1822 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1823 1824 /** 1825 * Constant for the "Supplementary Private Use Area-B" Unicode character 1826 * block. 1827 * @since 1.5 1828 */ 1829 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1830 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1831 "SUPPLEMENTARY PRIVATE USE AREA-B", 1832 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1833 1834 /** 1835 * Constant for the "High Surrogates" Unicode character block. 1836 * This block represents codepoint values in the high surrogate 1837 * range: U+D800 through U+DB7F 1838 * 1839 * @since 1.5 1840 */ 1841 public static final UnicodeBlock HIGH_SURROGATES = 1842 new UnicodeBlock("HIGH_SURROGATES", 1843 "HIGH SURROGATES", 1844 "HIGHSURROGATES"); 1845 1846 /** 1847 * Constant for the "High Private Use Surrogates" Unicode character 1848 * block. 1849 * This block represents codepoint values in the private use high 1850 * surrogate range: U+DB80 through U+DBFF 1851 * 1852 * @since 1.5 1853 */ 1854 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1855 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1856 "HIGH PRIVATE USE SURROGATES", 1857 "HIGHPRIVATEUSESURROGATES"); 1858 1859 /** 1860 * Constant for the "Low Surrogates" Unicode character block. 1861 * This block represents codepoint values in the low surrogate 1862 * range: U+DC00 through U+DFFF 1863 * 1864 * @since 1.5 1865 */ 1866 public static final UnicodeBlock LOW_SURROGATES = 1867 new UnicodeBlock("LOW_SURROGATES", 1868 "LOW SURROGATES", 1869 "LOWSURROGATES"); 1870 1871 /** 1872 * Constant for the "Arabic Supplement" Unicode character block. 1873 * @since 1.7 1874 */ 1875 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1876 new UnicodeBlock("ARABIC_SUPPLEMENT", 1877 "ARABIC SUPPLEMENT", 1878 "ARABICSUPPLEMENT"); 1879 1880 /** 1881 * Constant for the "NKo" Unicode character block. 1882 * @since 1.7 1883 */ 1884 public static final UnicodeBlock NKO = 1885 new UnicodeBlock("NKO"); 1886 1887 /** 1888 * Constant for the "Samaritan" Unicode character block. 1889 * @since 1.7 1890 */ 1891 public static final UnicodeBlock SAMARITAN = 1892 new UnicodeBlock("SAMARITAN"); 1893 1894 /** 1895 * Constant for the "Mandaic" Unicode character block. 1896 * @since 1.7 1897 */ 1898 public static final UnicodeBlock MANDAIC = 1899 new UnicodeBlock("MANDAIC"); 1900 1901 /** 1902 * Constant for the "Ethiopic Supplement" Unicode character block. 1903 * @since 1.7 1904 */ 1905 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1906 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1907 "ETHIOPIC SUPPLEMENT", 1908 "ETHIOPICSUPPLEMENT"); 1909 1910 /** 1911 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1912 * Unicode character block. 1913 * @since 1.7 1914 */ 1915 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1916 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1917 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1918 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1919 1920 /** 1921 * Constant for the "New Tai Lue" Unicode character block. 1922 * @since 1.7 1923 */ 1924 public static final UnicodeBlock NEW_TAI_LUE = 1925 new UnicodeBlock("NEW_TAI_LUE", 1926 "NEW TAI LUE", 1927 "NEWTAILUE"); 1928 1929 /** 1930 * Constant for the "Buginese" Unicode character block. 1931 * @since 1.7 1932 */ 1933 public static final UnicodeBlock BUGINESE = 1934 new UnicodeBlock("BUGINESE"); 1935 1936 /** 1937 * Constant for the "Tai Tham" Unicode character block. 1938 * @since 1.7 1939 */ 1940 public static final UnicodeBlock TAI_THAM = 1941 new UnicodeBlock("TAI_THAM", 1942 "TAI THAM", 1943 "TAITHAM"); 1944 1945 /** 1946 * Constant for the "Balinese" Unicode character block. 1947 * @since 1.7 1948 */ 1949 public static final UnicodeBlock BALINESE = 1950 new UnicodeBlock("BALINESE"); 1951 1952 /** 1953 * Constant for the "Sundanese" Unicode character block. 1954 * @since 1.7 1955 */ 1956 public static final UnicodeBlock SUNDANESE = 1957 new UnicodeBlock("SUNDANESE"); 1958 1959 /** 1960 * Constant for the "Batak" Unicode character block. 1961 * @since 1.7 1962 */ 1963 public static final UnicodeBlock BATAK = 1964 new UnicodeBlock("BATAK"); 1965 1966 /** 1967 * Constant for the "Lepcha" Unicode character block. 1968 * @since 1.7 1969 */ 1970 public static final UnicodeBlock LEPCHA = 1971 new UnicodeBlock("LEPCHA"); 1972 1973 /** 1974 * Constant for the "Ol Chiki" Unicode character block. 1975 * @since 1.7 1976 */ 1977 public static final UnicodeBlock OL_CHIKI = 1978 new UnicodeBlock("OL_CHIKI", 1979 "OL CHIKI", 1980 "OLCHIKI"); 1981 1982 /** 1983 * Constant for the "Vedic Extensions" Unicode character block. 1984 * @since 1.7 1985 */ 1986 public static final UnicodeBlock VEDIC_EXTENSIONS = 1987 new UnicodeBlock("VEDIC_EXTENSIONS", 1988 "VEDIC EXTENSIONS", 1989 "VEDICEXTENSIONS"); 1990 1991 /** 1992 * Constant for the "Phonetic Extensions Supplement" Unicode character 1993 * block. 1994 * @since 1.7 1995 */ 1996 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1997 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1998 "PHONETIC EXTENSIONS SUPPLEMENT", 1999 "PHONETICEXTENSIONSSUPPLEMENT"); 2000 2001 /** 2002 * Constant for the "Combining Diacritical Marks Supplement" Unicode 2003 * character block. 2004 * @since 1.7 2005 */ 2006 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 2007 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 2008 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 2009 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 2010 2011 /** 2012 * Constant for the "Glagolitic" Unicode character block. 2013 * @since 1.7 2014 */ 2015 public static final UnicodeBlock GLAGOLITIC = 2016 new UnicodeBlock("GLAGOLITIC"); 2017 2018 /** 2019 * Constant for the "Latin Extended-C" Unicode character block. 2020 * @since 1.7 2021 */ 2022 public static final UnicodeBlock LATIN_EXTENDED_C = 2023 new UnicodeBlock("LATIN_EXTENDED_C", 2024 "LATIN EXTENDED-C", 2025 "LATINEXTENDED-C"); 2026 2027 /** 2028 * Constant for the "Coptic" Unicode character block. 2029 * @since 1.7 2030 */ 2031 public static final UnicodeBlock COPTIC = 2032 new UnicodeBlock("COPTIC"); 2033 2034 /** 2035 * Constant for the "Georgian Supplement" Unicode character block. 2036 * @since 1.7 2037 */ 2038 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2039 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 2040 "GEORGIAN SUPPLEMENT", 2041 "GEORGIANSUPPLEMENT"); 2042 2043 /** 2044 * Constant for the "Tifinagh" Unicode character block. 2045 * @since 1.7 2046 */ 2047 public static final UnicodeBlock TIFINAGH = 2048 new UnicodeBlock("TIFINAGH"); 2049 2050 /** 2051 * Constant for the "Ethiopic Extended" Unicode character block. 2052 * @since 1.7 2053 */ 2054 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2055 new UnicodeBlock("ETHIOPIC_EXTENDED", 2056 "ETHIOPIC EXTENDED", 2057 "ETHIOPICEXTENDED"); 2058 2059 /** 2060 * Constant for the "Cyrillic Extended-A" Unicode character block. 2061 * @since 1.7 2062 */ 2063 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2064 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2065 "CYRILLIC EXTENDED-A", 2066 "CYRILLICEXTENDED-A"); 2067 2068 /** 2069 * Constant for the "Supplemental Punctuation" Unicode character block. 2070 * @since 1.7 2071 */ 2072 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2073 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2074 "SUPPLEMENTAL PUNCTUATION", 2075 "SUPPLEMENTALPUNCTUATION"); 2076 2077 /** 2078 * Constant for the "CJK Strokes" Unicode character block. 2079 * @since 1.7 2080 */ 2081 public static final UnicodeBlock CJK_STROKES = 2082 new UnicodeBlock("CJK_STROKES", 2083 "CJK STROKES", 2084 "CJKSTROKES"); 2085 2086 /** 2087 * Constant for the "Lisu" Unicode character block. 2088 * @since 1.7 2089 */ 2090 public static final UnicodeBlock LISU = 2091 new UnicodeBlock("LISU"); 2092 2093 /** 2094 * Constant for the "Vai" Unicode character block. 2095 * @since 1.7 2096 */ 2097 public static final UnicodeBlock VAI = 2098 new UnicodeBlock("VAI"); 2099 2100 /** 2101 * Constant for the "Cyrillic Extended-B" Unicode character block. 2102 * @since 1.7 2103 */ 2104 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2105 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2106 "CYRILLIC EXTENDED-B", 2107 "CYRILLICEXTENDED-B"); 2108 2109 /** 2110 * Constant for the "Bamum" Unicode character block. 2111 * @since 1.7 2112 */ 2113 public static final UnicodeBlock BAMUM = 2114 new UnicodeBlock("BAMUM"); 2115 2116 /** 2117 * Constant for the "Modifier Tone Letters" Unicode character block. 2118 * @since 1.7 2119 */ 2120 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2121 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2122 "MODIFIER TONE LETTERS", 2123 "MODIFIERTONELETTERS"); 2124 2125 /** 2126 * Constant for the "Latin Extended-D" Unicode character block. 2127 * @since 1.7 2128 */ 2129 public static final UnicodeBlock LATIN_EXTENDED_D = 2130 new UnicodeBlock("LATIN_EXTENDED_D", 2131 "LATIN EXTENDED-D", 2132 "LATINEXTENDED-D"); 2133 2134 /** 2135 * Constant for the "Syloti Nagri" Unicode character block. 2136 * @since 1.7 2137 */ 2138 public static final UnicodeBlock SYLOTI_NAGRI = 2139 new UnicodeBlock("SYLOTI_NAGRI", 2140 "SYLOTI NAGRI", 2141 "SYLOTINAGRI"); 2142 2143 /** 2144 * Constant for the "Common Indic Number Forms" Unicode character block. 2145 * @since 1.7 2146 */ 2147 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2148 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2149 "COMMON INDIC NUMBER FORMS", 2150 "COMMONINDICNUMBERFORMS"); 2151 2152 /** 2153 * Constant for the "Phags-pa" Unicode character block. 2154 * @since 1.7 2155 */ 2156 public static final UnicodeBlock PHAGS_PA = 2157 new UnicodeBlock("PHAGS_PA", 2158 "PHAGS-PA"); 2159 2160 /** 2161 * Constant for the "Saurashtra" Unicode character block. 2162 * @since 1.7 2163 */ 2164 public static final UnicodeBlock SAURASHTRA = 2165 new UnicodeBlock("SAURASHTRA"); 2166 2167 /** 2168 * Constant for the "Devanagari Extended" Unicode character block. 2169 * @since 1.7 2170 */ 2171 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2172 new UnicodeBlock("DEVANAGARI_EXTENDED", 2173 "DEVANAGARI EXTENDED", 2174 "DEVANAGARIEXTENDED"); 2175 2176 /** 2177 * Constant for the "Kayah Li" Unicode character block. 2178 * @since 1.7 2179 */ 2180 public static final UnicodeBlock KAYAH_LI = 2181 new UnicodeBlock("KAYAH_LI", 2182 "KAYAH LI", 2183 "KAYAHLI"); 2184 2185 /** 2186 * Constant for the "Rejang" Unicode character block. 2187 * @since 1.7 2188 */ 2189 public static final UnicodeBlock REJANG = 2190 new UnicodeBlock("REJANG"); 2191 2192 /** 2193 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2194 * @since 1.7 2195 */ 2196 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2197 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2198 "HANGUL JAMO EXTENDED-A", 2199 "HANGULJAMOEXTENDED-A"); 2200 2201 /** 2202 * Constant for the "Javanese" Unicode character block. 2203 * @since 1.7 2204 */ 2205 public static final UnicodeBlock JAVANESE = 2206 new UnicodeBlock("JAVANESE"); 2207 2208 /** 2209 * Constant for the "Cham" Unicode character block. 2210 * @since 1.7 2211 */ 2212 public static final UnicodeBlock CHAM = 2213 new UnicodeBlock("CHAM"); 2214 2215 /** 2216 * Constant for the "Myanmar Extended-A" Unicode character block. 2217 * @since 1.7 2218 */ 2219 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2220 new UnicodeBlock("MYANMAR_EXTENDED_A", 2221 "MYANMAR EXTENDED-A", 2222 "MYANMAREXTENDED-A"); 2223 2224 /** 2225 * Constant for the "Tai Viet" Unicode character block. 2226 * @since 1.7 2227 */ 2228 public static final UnicodeBlock TAI_VIET = 2229 new UnicodeBlock("TAI_VIET", 2230 "TAI VIET", 2231 "TAIVIET"); 2232 2233 /** 2234 * Constant for the "Ethiopic Extended-A" Unicode character block. 2235 * @since 1.7 2236 */ 2237 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2238 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2239 "ETHIOPIC EXTENDED-A", 2240 "ETHIOPICEXTENDED-A"); 2241 2242 /** 2243 * Constant for the "Meetei Mayek" Unicode character block. 2244 * @since 1.7 2245 */ 2246 public static final UnicodeBlock MEETEI_MAYEK = 2247 new UnicodeBlock("MEETEI_MAYEK", 2248 "MEETEI MAYEK", 2249 "MEETEIMAYEK"); 2250 2251 /** 2252 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2253 * @since 1.7 2254 */ 2255 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2256 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2257 "HANGUL JAMO EXTENDED-B", 2258 "HANGULJAMOEXTENDED-B"); 2259 2260 /** 2261 * Constant for the "Vertical Forms" Unicode character block. 2262 * @since 1.7 2263 */ 2264 public static final UnicodeBlock VERTICAL_FORMS = 2265 new UnicodeBlock("VERTICAL_FORMS", 2266 "VERTICAL FORMS", 2267 "VERTICALFORMS"); 2268 2269 /** 2270 * Constant for the "Ancient Greek Numbers" Unicode character block. 2271 * @since 1.7 2272 */ 2273 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2274 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2275 "ANCIENT GREEK NUMBERS", 2276 "ANCIENTGREEKNUMBERS"); 2277 2278 /** 2279 * Constant for the "Ancient Symbols" Unicode character block. 2280 * @since 1.7 2281 */ 2282 public static final UnicodeBlock ANCIENT_SYMBOLS = 2283 new UnicodeBlock("ANCIENT_SYMBOLS", 2284 "ANCIENT SYMBOLS", 2285 "ANCIENTSYMBOLS"); 2286 2287 /** 2288 * Constant for the "Phaistos Disc" Unicode character block. 2289 * @since 1.7 2290 */ 2291 public static final UnicodeBlock PHAISTOS_DISC = 2292 new UnicodeBlock("PHAISTOS_DISC", 2293 "PHAISTOS DISC", 2294 "PHAISTOSDISC"); 2295 2296 /** 2297 * Constant for the "Lycian" Unicode character block. 2298 * @since 1.7 2299 */ 2300 public static final UnicodeBlock LYCIAN = 2301 new UnicodeBlock("LYCIAN"); 2302 2303 /** 2304 * Constant for the "Carian" Unicode character block. 2305 * @since 1.7 2306 */ 2307 public static final UnicodeBlock CARIAN = 2308 new UnicodeBlock("CARIAN"); 2309 2310 /** 2311 * Constant for the "Old Persian" Unicode character block. 2312 * @since 1.7 2313 */ 2314 public static final UnicodeBlock OLD_PERSIAN = 2315 new UnicodeBlock("OLD_PERSIAN", 2316 "OLD PERSIAN", 2317 "OLDPERSIAN"); 2318 2319 /** 2320 * Constant for the "Imperial Aramaic" Unicode character block. 2321 * @since 1.7 2322 */ 2323 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2324 new UnicodeBlock("IMPERIAL_ARAMAIC", 2325 "IMPERIAL ARAMAIC", 2326 "IMPERIALARAMAIC"); 2327 2328 /** 2329 * Constant for the "Phoenician" Unicode character block. 2330 * @since 1.7 2331 */ 2332 public static final UnicodeBlock PHOENICIAN = 2333 new UnicodeBlock("PHOENICIAN"); 2334 2335 /** 2336 * Constant for the "Lydian" Unicode character block. 2337 * @since 1.7 2338 */ 2339 public static final UnicodeBlock LYDIAN = 2340 new UnicodeBlock("LYDIAN"); 2341 2342 /** 2343 * Constant for the "Kharoshthi" Unicode character block. 2344 * @since 1.7 2345 */ 2346 public static final UnicodeBlock KHAROSHTHI = 2347 new UnicodeBlock("KHAROSHTHI"); 2348 2349 /** 2350 * Constant for the "Old South Arabian" Unicode character block. 2351 * @since 1.7 2352 */ 2353 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2354 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2355 "OLD SOUTH ARABIAN", 2356 "OLDSOUTHARABIAN"); 2357 2358 /** 2359 * Constant for the "Avestan" Unicode character block. 2360 * @since 1.7 2361 */ 2362 public static final UnicodeBlock AVESTAN = 2363 new UnicodeBlock("AVESTAN"); 2364 2365 /** 2366 * Constant for the "Inscriptional Parthian" Unicode character block. 2367 * @since 1.7 2368 */ 2369 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2370 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2371 "INSCRIPTIONAL PARTHIAN", 2372 "INSCRIPTIONALPARTHIAN"); 2373 2374 /** 2375 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2376 * @since 1.7 2377 */ 2378 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2379 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2380 "INSCRIPTIONAL PAHLAVI", 2381 "INSCRIPTIONALPAHLAVI"); 2382 2383 /** 2384 * Constant for the "Old Turkic" Unicode character block. 2385 * @since 1.7 2386 */ 2387 public static final UnicodeBlock OLD_TURKIC = 2388 new UnicodeBlock("OLD_TURKIC", 2389 "OLD TURKIC", 2390 "OLDTURKIC"); 2391 2392 /** 2393 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2394 * @since 1.7 2395 */ 2396 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2397 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2398 "RUMI NUMERAL SYMBOLS", 2399 "RUMINUMERALSYMBOLS"); 2400 2401 /** 2402 * Constant for the "Brahmi" Unicode character block. 2403 * @since 1.7 2404 */ 2405 public static final UnicodeBlock BRAHMI = 2406 new UnicodeBlock("BRAHMI"); 2407 2408 /** 2409 * Constant for the "Kaithi" Unicode character block. 2410 * @since 1.7 2411 */ 2412 public static final UnicodeBlock KAITHI = 2413 new UnicodeBlock("KAITHI"); 2414 2415 /** 2416 * Constant for the "Cuneiform" Unicode character block. 2417 * @since 1.7 2418 */ 2419 public static final UnicodeBlock CUNEIFORM = 2420 new UnicodeBlock("CUNEIFORM"); 2421 2422 /** 2423 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2424 * character block. 2425 * @since 1.7 2426 */ 2427 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2428 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2429 "CUNEIFORM NUMBERS AND PUNCTUATION", 2430 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2431 2432 /** 2433 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2434 * @since 1.7 2435 */ 2436 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2437 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2438 "EGYPTIAN HIEROGLYPHS", 2439 "EGYPTIANHIEROGLYPHS"); 2440 2441 /** 2442 * Constant for the "Bamum Supplement" Unicode character block. 2443 * @since 1.7 2444 */ 2445 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2446 new UnicodeBlock("BAMUM_SUPPLEMENT", 2447 "BAMUM SUPPLEMENT", 2448 "BAMUMSUPPLEMENT"); 2449 2450 /** 2451 * Constant for the "Kana Supplement" Unicode character block. 2452 * @since 1.7 2453 */ 2454 public static final UnicodeBlock KANA_SUPPLEMENT = 2455 new UnicodeBlock("KANA_SUPPLEMENT", 2456 "KANA SUPPLEMENT", 2457 "KANASUPPLEMENT"); 2458 2459 /** 2460 * Constant for the "Ancient Greek Musical Notation" Unicode character 2461 * block. 2462 * @since 1.7 2463 */ 2464 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2465 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2466 "ANCIENT GREEK MUSICAL NOTATION", 2467 "ANCIENTGREEKMUSICALNOTATION"); 2468 2469 /** 2470 * Constant for the "Counting Rod Numerals" Unicode character block. 2471 * @since 1.7 2472 */ 2473 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2474 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2475 "COUNTING ROD NUMERALS", 2476 "COUNTINGRODNUMERALS"); 2477 2478 /** 2479 * Constant for the "Mahjong Tiles" Unicode character block. 2480 * @since 1.7 2481 */ 2482 public static final UnicodeBlock MAHJONG_TILES = 2483 new UnicodeBlock("MAHJONG_TILES", 2484 "MAHJONG TILES", 2485 "MAHJONGTILES"); 2486 2487 /** 2488 * Constant for the "Domino Tiles" Unicode character block. 2489 * @since 1.7 2490 */ 2491 public static final UnicodeBlock DOMINO_TILES = 2492 new UnicodeBlock("DOMINO_TILES", 2493 "DOMINO TILES", 2494 "DOMINOTILES"); 2495 2496 /** 2497 * Constant for the "Playing Cards" Unicode character block. 2498 * @since 1.7 2499 */ 2500 public static final UnicodeBlock PLAYING_CARDS = 2501 new UnicodeBlock("PLAYING_CARDS", 2502 "PLAYING CARDS", 2503 "PLAYINGCARDS"); 2504 2505 /** 2506 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2507 * block. 2508 * @since 1.7 2509 */ 2510 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2511 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2512 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2513 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2514 2515 /** 2516 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2517 * block. 2518 * @since 1.7 2519 */ 2520 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2521 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2522 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2523 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2524 2525 /** 2526 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2527 * character block. 2528 * @since 1.7 2529 */ 2530 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2531 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2532 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2533 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2534 2535 /** 2536 * Constant for the "Emoticons" Unicode character block. 2537 * @since 1.7 2538 */ 2539 public static final UnicodeBlock EMOTICONS = 2540 new UnicodeBlock("EMOTICONS"); 2541 2542 /** 2543 * Constant for the "Transport And Map Symbols" Unicode character block. 2544 * @since 1.7 2545 */ 2546 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2547 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2548 "TRANSPORT AND MAP SYMBOLS", 2549 "TRANSPORTANDMAPSYMBOLS"); 2550 2551 /** 2552 * Constant for the "Alchemical Symbols" Unicode character block. 2553 * @since 1.7 2554 */ 2555 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2556 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2557 "ALCHEMICAL SYMBOLS", 2558 "ALCHEMICALSYMBOLS"); 2559 2560 /** 2561 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2562 * character block. 2563 * @since 1.7 2564 */ 2565 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2566 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2567 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2568 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2569 2570 /** 2571 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2572 * character block. 2573 * @since 1.7 2574 */ 2575 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2576 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2577 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2578 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2579 2580 /** 2581 * Constant for the "Arabic Extended-A" Unicode character block. 2582 * @since 1.8 2583 */ 2584 public static final UnicodeBlock ARABIC_EXTENDED_A = 2585 new UnicodeBlock("ARABIC_EXTENDED_A", 2586 "ARABIC EXTENDED-A", 2587 "ARABICEXTENDED-A"); 2588 2589 /** 2590 * Constant for the "Sundanese Supplement" Unicode character block. 2591 * @since 1.8 2592 */ 2593 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2594 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2595 "SUNDANESE SUPPLEMENT", 2596 "SUNDANESESUPPLEMENT"); 2597 2598 /** 2599 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2600 * @since 1.8 2601 */ 2602 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2603 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2604 "MEETEI MAYEK EXTENSIONS", 2605 "MEETEIMAYEKEXTENSIONS"); 2606 2607 /** 2608 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2609 * @since 1.8 2610 */ 2611 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2612 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2613 "MEROITIC HIEROGLYPHS", 2614 "MEROITICHIEROGLYPHS"); 2615 2616 /** 2617 * Constant for the "Meroitic Cursive" Unicode character block. 2618 * @since 1.8 2619 */ 2620 public static final UnicodeBlock MEROITIC_CURSIVE = 2621 new UnicodeBlock("MEROITIC_CURSIVE", 2622 "MEROITIC CURSIVE", 2623 "MEROITICCURSIVE"); 2624 2625 /** 2626 * Constant for the "Sora Sompeng" Unicode character block. 2627 * @since 1.8 2628 */ 2629 public static final UnicodeBlock SORA_SOMPENG = 2630 new UnicodeBlock("SORA_SOMPENG", 2631 "SORA SOMPENG", 2632 "SORASOMPENG"); 2633 2634 /** 2635 * Constant for the "Chakma" Unicode character block. 2636 * @since 1.8 2637 */ 2638 public static final UnicodeBlock CHAKMA = 2639 new UnicodeBlock("CHAKMA"); 2640 2641 /** 2642 * Constant for the "Sharada" Unicode character block. 2643 * @since 1.8 2644 */ 2645 public static final UnicodeBlock SHARADA = 2646 new UnicodeBlock("SHARADA"); 2647 2648 /** 2649 * Constant for the "Takri" Unicode character block. 2650 * @since 1.8 2651 */ 2652 public static final UnicodeBlock TAKRI = 2653 new UnicodeBlock("TAKRI"); 2654 2655 /** 2656 * Constant for the "Miao" Unicode character block. 2657 * @since 1.8 2658 */ 2659 public static final UnicodeBlock MIAO = 2660 new UnicodeBlock("MIAO"); 2661 2662 /** 2663 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2664 * character block. 2665 * @since 1.8 2666 */ 2667 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2668 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2669 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2670 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2671 2672 /** 2673 * Constant for the "Combining Diacritical Marks Extended" Unicode 2674 * character block. 2675 * @since 9 2676 */ 2677 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2678 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2679 "COMBINING DIACRITICAL MARKS EXTENDED", 2680 "COMBININGDIACRITICALMARKSEXTENDED"); 2681 2682 /** 2683 * Constant for the "Myanmar Extended-B" Unicode character block. 2684 * @since 9 2685 */ 2686 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2687 new UnicodeBlock("MYANMAR_EXTENDED_B", 2688 "MYANMAR EXTENDED-B", 2689 "MYANMAREXTENDED-B"); 2690 2691 /** 2692 * Constant for the "Latin Extended-E" Unicode character block. 2693 * @since 9 2694 */ 2695 public static final UnicodeBlock LATIN_EXTENDED_E = 2696 new UnicodeBlock("LATIN_EXTENDED_E", 2697 "LATIN EXTENDED-E", 2698 "LATINEXTENDED-E"); 2699 2700 /** 2701 * Constant for the "Coptic Epact Numbers" Unicode character block. 2702 * @since 9 2703 */ 2704 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2705 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2706 "COPTIC EPACT NUMBERS", 2707 "COPTICEPACTNUMBERS"); 2708 2709 /** 2710 * Constant for the "Old Permic" Unicode character block. 2711 * @since 9 2712 */ 2713 public static final UnicodeBlock OLD_PERMIC = 2714 new UnicodeBlock("OLD_PERMIC", 2715 "OLD PERMIC", 2716 "OLDPERMIC"); 2717 2718 /** 2719 * Constant for the "Elbasan" Unicode character block. 2720 * @since 9 2721 */ 2722 public static final UnicodeBlock ELBASAN = 2723 new UnicodeBlock("ELBASAN"); 2724 2725 /** 2726 * Constant for the "Caucasian Albanian" Unicode character block. 2727 * @since 9 2728 */ 2729 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2730 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2731 "CAUCASIAN ALBANIAN", 2732 "CAUCASIANALBANIAN"); 2733 2734 /** 2735 * Constant for the "Linear A" Unicode character block. 2736 * @since 9 2737 */ 2738 public static final UnicodeBlock LINEAR_A = 2739 new UnicodeBlock("LINEAR_A", 2740 "LINEAR A", 2741 "LINEARA"); 2742 2743 /** 2744 * Constant for the "Palmyrene" Unicode character block. 2745 * @since 9 2746 */ 2747 public static final UnicodeBlock PALMYRENE = 2748 new UnicodeBlock("PALMYRENE"); 2749 2750 /** 2751 * Constant for the "Nabataean" Unicode character block. 2752 * @since 9 2753 */ 2754 public static final UnicodeBlock NABATAEAN = 2755 new UnicodeBlock("NABATAEAN"); 2756 2757 /** 2758 * Constant for the "Old North Arabian" Unicode character block. 2759 * @since 9 2760 */ 2761 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2762 new UnicodeBlock("OLD_NORTH_ARABIAN", 2763 "OLD NORTH ARABIAN", 2764 "OLDNORTHARABIAN"); 2765 2766 /** 2767 * Constant for the "Manichaean" Unicode character block. 2768 * @since 9 2769 */ 2770 public static final UnicodeBlock MANICHAEAN = 2771 new UnicodeBlock("MANICHAEAN"); 2772 2773 /** 2774 * Constant for the "Psalter Pahlavi" Unicode character block. 2775 * @since 9 2776 */ 2777 public static final UnicodeBlock PSALTER_PAHLAVI = 2778 new UnicodeBlock("PSALTER_PAHLAVI", 2779 "PSALTER PAHLAVI", 2780 "PSALTERPAHLAVI"); 2781 2782 /** 2783 * Constant for the "Mahajani" Unicode character block. 2784 * @since 9 2785 */ 2786 public static final UnicodeBlock MAHAJANI = 2787 new UnicodeBlock("MAHAJANI"); 2788 2789 /** 2790 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2791 * @since 9 2792 */ 2793 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2794 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2795 "SINHALA ARCHAIC NUMBERS", 2796 "SINHALAARCHAICNUMBERS"); 2797 2798 /** 2799 * Constant for the "Khojki" Unicode character block. 2800 * @since 9 2801 */ 2802 public static final UnicodeBlock KHOJKI = 2803 new UnicodeBlock("KHOJKI"); 2804 2805 /** 2806 * Constant for the "Khudawadi" Unicode character block. 2807 * @since 9 2808 */ 2809 public static final UnicodeBlock KHUDAWADI = 2810 new UnicodeBlock("KHUDAWADI"); 2811 2812 /** 2813 * Constant for the "Grantha" Unicode character block. 2814 * @since 9 2815 */ 2816 public static final UnicodeBlock GRANTHA = 2817 new UnicodeBlock("GRANTHA"); 2818 2819 /** 2820 * Constant for the "Tirhuta" Unicode character block. 2821 * @since 9 2822 */ 2823 public static final UnicodeBlock TIRHUTA = 2824 new UnicodeBlock("TIRHUTA"); 2825 2826 /** 2827 * Constant for the "Siddham" Unicode character block. 2828 * @since 9 2829 */ 2830 public static final UnicodeBlock SIDDHAM = 2831 new UnicodeBlock("SIDDHAM"); 2832 2833 /** 2834 * Constant for the "Modi" Unicode character block. 2835 * @since 9 2836 */ 2837 public static final UnicodeBlock MODI = 2838 new UnicodeBlock("MODI"); 2839 2840 /** 2841 * Constant for the "Warang Citi" Unicode character block. 2842 * @since 9 2843 */ 2844 public static final UnicodeBlock WARANG_CITI = 2845 new UnicodeBlock("WARANG_CITI", 2846 "WARANG CITI", 2847 "WARANGCITI"); 2848 2849 /** 2850 * Constant for the "Pau Cin Hau" Unicode character block. 2851 * @since 9 2852 */ 2853 public static final UnicodeBlock PAU_CIN_HAU = 2854 new UnicodeBlock("PAU_CIN_HAU", 2855 "PAU CIN HAU", 2856 "PAUCINHAU"); 2857 2858 /** 2859 * Constant for the "Mro" Unicode character block. 2860 * @since 9 2861 */ 2862 public static final UnicodeBlock MRO = 2863 new UnicodeBlock("MRO"); 2864 2865 /** 2866 * Constant for the "Bassa Vah" Unicode character block. 2867 * @since 9 2868 */ 2869 public static final UnicodeBlock BASSA_VAH = 2870 new UnicodeBlock("BASSA_VAH", 2871 "BASSA VAH", 2872 "BASSAVAH"); 2873 2874 /** 2875 * Constant for the "Pahawh Hmong" Unicode character block. 2876 * @since 9 2877 */ 2878 public static final UnicodeBlock PAHAWH_HMONG = 2879 new UnicodeBlock("PAHAWH_HMONG", 2880 "PAHAWH HMONG", 2881 "PAHAWHHMONG"); 2882 2883 /** 2884 * Constant for the "Duployan" Unicode character block. 2885 * @since 9 2886 */ 2887 public static final UnicodeBlock DUPLOYAN = 2888 new UnicodeBlock("DUPLOYAN"); 2889 2890 /** 2891 * Constant for the "Shorthand Format Controls" Unicode character block. 2892 * @since 9 2893 */ 2894 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2895 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2896 "SHORTHAND FORMAT CONTROLS", 2897 "SHORTHANDFORMATCONTROLS"); 2898 2899 /** 2900 * Constant for the "Mende Kikakui" Unicode character block. 2901 * @since 9 2902 */ 2903 public static final UnicodeBlock MENDE_KIKAKUI = 2904 new UnicodeBlock("MENDE_KIKAKUI", 2905 "MENDE KIKAKUI", 2906 "MENDEKIKAKUI"); 2907 2908 /** 2909 * Constant for the "Ornamental Dingbats" Unicode character block. 2910 * @since 9 2911 */ 2912 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2913 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2914 "ORNAMENTAL DINGBATS", 2915 "ORNAMENTALDINGBATS"); 2916 2917 /** 2918 * Constant for the "Geometric Shapes Extended" Unicode character block. 2919 * @since 9 2920 */ 2921 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2922 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2923 "GEOMETRIC SHAPES EXTENDED", 2924 "GEOMETRICSHAPESEXTENDED"); 2925 2926 /** 2927 * Constant for the "Supplemental Arrows-C" Unicode character block. 2928 * @since 9 2929 */ 2930 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2931 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2932 "SUPPLEMENTAL ARROWS-C", 2933 "SUPPLEMENTALARROWS-C"); 2934 2935 /** 2936 * Constant for the "Cherokee Supplement" Unicode character block. 2937 * @since 9 2938 */ 2939 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2940 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2941 "CHEROKEE SUPPLEMENT", 2942 "CHEROKEESUPPLEMENT"); 2943 2944 /** 2945 * Constant for the "Hatran" Unicode character block. 2946 * @since 9 2947 */ 2948 public static final UnicodeBlock HATRAN = 2949 new UnicodeBlock("HATRAN"); 2950 2951 /** 2952 * Constant for the "Old Hungarian" Unicode character block. 2953 * @since 9 2954 */ 2955 public static final UnicodeBlock OLD_HUNGARIAN = 2956 new UnicodeBlock("OLD_HUNGARIAN", 2957 "OLD HUNGARIAN", 2958 "OLDHUNGARIAN"); 2959 2960 /** 2961 * Constant for the "Multani" Unicode character block. 2962 * @since 9 2963 */ 2964 public static final UnicodeBlock MULTANI = 2965 new UnicodeBlock("MULTANI"); 2966 2967 /** 2968 * Constant for the "Ahom" Unicode character block. 2969 * @since 9 2970 */ 2971 public static final UnicodeBlock AHOM = 2972 new UnicodeBlock("AHOM"); 2973 2974 /** 2975 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 2976 * @since 9 2977 */ 2978 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2979 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 2980 "EARLY DYNASTIC CUNEIFORM", 2981 "EARLYDYNASTICCUNEIFORM"); 2982 2983 /** 2984 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 2985 * @since 9 2986 */ 2987 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2988 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 2989 "ANATOLIAN HIEROGLYPHS", 2990 "ANATOLIANHIEROGLYPHS"); 2991 2992 /** 2993 * Constant for the "Sutton SignWriting" Unicode character block. 2994 * @since 9 2995 */ 2996 public static final UnicodeBlock SUTTON_SIGNWRITING = 2997 new UnicodeBlock("SUTTON_SIGNWRITING", 2998 "SUTTON SIGNWRITING", 2999 "SUTTONSIGNWRITING"); 3000 3001 /** 3002 * Constant for the "Supplemental Symbols and Pictographs" Unicode 3003 * character block. 3004 * @since 9 3005 */ 3006 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 3007 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 3008 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 3009 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 3010 3011 /** 3012 * Constant for the "CJK Unified Ideographs Extension E" Unicode 3013 * character block. 3014 * @since 9 3015 */ 3016 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 3017 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 3018 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 3019 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 3020 3021 /** 3022 * Constant for the "Syriac Supplement" Unicode 3023 * character block. 3024 * @since 11 3025 */ 3026 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 3027 new UnicodeBlock("SYRIAC_SUPPLEMENT", 3028 "SYRIAC SUPPLEMENT", 3029 "SYRIACSUPPLEMENT"); 3030 3031 /** 3032 * Constant for the "Cyrillic Extended-C" Unicode 3033 * character block. 3034 * @since 11 3035 */ 3036 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 3037 new UnicodeBlock("CYRILLIC_EXTENDED_C", 3038 "CYRILLIC EXTENDED-C", 3039 "CYRILLICEXTENDED-C"); 3040 3041 /** 3042 * Constant for the "Osage" Unicode 3043 * character block. 3044 * @since 11 3045 */ 3046 public static final UnicodeBlock OSAGE = 3047 new UnicodeBlock("OSAGE"); 3048 3049 /** 3050 * Constant for the "Newa" Unicode 3051 * character block. 3052 * @since 11 3053 */ 3054 public static final UnicodeBlock NEWA = 3055 new UnicodeBlock("NEWA"); 3056 3057 /** 3058 * Constant for the "Mongolian Supplement" Unicode 3059 * character block. 3060 * @since 11 3061 */ 3062 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3063 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3064 "MONGOLIAN SUPPLEMENT", 3065 "MONGOLIANSUPPLEMENT"); 3066 3067 /** 3068 * Constant for the "Marchen" Unicode 3069 * character block. 3070 * @since 11 3071 */ 3072 public static final UnicodeBlock MARCHEN = 3073 new UnicodeBlock("MARCHEN"); 3074 3075 /** 3076 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3077 * character block. 3078 * @since 11 3079 */ 3080 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3081 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3082 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3083 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3084 3085 /** 3086 * Constant for the "Tangut" Unicode 3087 * character block. 3088 * @since 11 3089 */ 3090 public static final UnicodeBlock TANGUT = 3091 new UnicodeBlock("TANGUT"); 3092 3093 /** 3094 * Constant for the "Tangut Components" Unicode 3095 * character block. 3096 * @since 11 3097 */ 3098 public static final UnicodeBlock TANGUT_COMPONENTS = 3099 new UnicodeBlock("TANGUT_COMPONENTS", 3100 "TANGUT COMPONENTS", 3101 "TANGUTCOMPONENTS"); 3102 3103 /** 3104 * Constant for the "Kana Extended-A" Unicode 3105 * character block. 3106 * @since 11 3107 */ 3108 public static final UnicodeBlock KANA_EXTENDED_A = 3109 new UnicodeBlock("KANA_EXTENDED_A", 3110 "KANA EXTENDED-A", 3111 "KANAEXTENDED-A"); 3112 /** 3113 * Constant for the "Glagolitic Supplement" Unicode 3114 * character block. 3115 * @since 11 3116 */ 3117 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3118 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3119 "GLAGOLITIC SUPPLEMENT", 3120 "GLAGOLITICSUPPLEMENT"); 3121 /** 3122 * Constant for the "Adlam" Unicode 3123 * character block. 3124 * @since 11 3125 */ 3126 public static final UnicodeBlock ADLAM = 3127 new UnicodeBlock("ADLAM"); 3128 3129 /** 3130 * Constant for the "Masaram Gondi" Unicode 3131 * character block. 3132 * @since 11 3133 */ 3134 public static final UnicodeBlock MASARAM_GONDI = 3135 new UnicodeBlock("MASARAM_GONDI", 3136 "MASARAM GONDI", 3137 "MASARAMGONDI"); 3138 3139 /** 3140 * Constant for the "Zanabazar Square" Unicode 3141 * character block. 3142 * @since 11 3143 */ 3144 public static final UnicodeBlock ZANABAZAR_SQUARE = 3145 new UnicodeBlock("ZANABAZAR_SQUARE", 3146 "ZANABAZAR SQUARE", 3147 "ZANABAZARSQUARE"); 3148 3149 /** 3150 * Constant for the "Nushu" Unicode 3151 * character block. 3152 * @since 11 3153 */ 3154 public static final UnicodeBlock NUSHU = 3155 new UnicodeBlock("NUSHU"); 3156 3157 /** 3158 * Constant for the "Soyombo" Unicode 3159 * character block. 3160 * @since 11 3161 */ 3162 public static final UnicodeBlock SOYOMBO = 3163 new UnicodeBlock("SOYOMBO"); 3164 3165 /** 3166 * Constant for the "Bhaiksuki" Unicode 3167 * character block. 3168 * @since 11 3169 */ 3170 public static final UnicodeBlock BHAIKSUKI = 3171 new UnicodeBlock("BHAIKSUKI"); 3172 3173 /** 3174 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3175 * character block. 3176 * @since 11 3177 */ 3178 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3179 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3180 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3181 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3182 /** 3183 * Constant for the "Georgian Extended" Unicode 3184 * character block. 3185 * @since 12 3186 */ 3187 public static final UnicodeBlock GEORGIAN_EXTENDED = 3188 new UnicodeBlock("GEORGIAN_EXTENDED", 3189 "GEORGIAN EXTENDED", 3190 "GEORGIANEXTENDED"); 3191 3192 /** 3193 * Constant for the "Hanifi Rohingya" Unicode 3194 * character block. 3195 * @since 12 3196 */ 3197 public static final UnicodeBlock HANIFI_ROHINGYA = 3198 new UnicodeBlock("HANIFI_ROHINGYA", 3199 "HANIFI ROHINGYA", 3200 "HANIFIROHINGYA"); 3201 3202 /** 3203 * Constant for the "Old Sogdian" Unicode 3204 * character block. 3205 * @since 12 3206 */ 3207 public static final UnicodeBlock OLD_SOGDIAN = 3208 new UnicodeBlock("OLD_SOGDIAN", 3209 "OLD SOGDIAN", 3210 "OLDSOGDIAN"); 3211 3212 /** 3213 * Constant for the "Sogdian" Unicode 3214 * character block. 3215 * @since 12 3216 */ 3217 public static final UnicodeBlock SOGDIAN = 3218 new UnicodeBlock("SOGDIAN"); 3219 3220 /** 3221 * Constant for the "Dogra" Unicode 3222 * character block. 3223 * @since 12 3224 */ 3225 public static final UnicodeBlock DOGRA = 3226 new UnicodeBlock("DOGRA"); 3227 3228 /** 3229 * Constant for the "Gunjala Gondi" Unicode 3230 * character block. 3231 * @since 12 3232 */ 3233 public static final UnicodeBlock GUNJALA_GONDI = 3234 new UnicodeBlock("GUNJALA_GONDI", 3235 "GUNJALA GONDI", 3236 "GUNJALAGONDI"); 3237 3238 /** 3239 * Constant for the "Makasar" Unicode 3240 * character block. 3241 * @since 12 3242 */ 3243 public static final UnicodeBlock MAKASAR = 3244 new UnicodeBlock("MAKASAR"); 3245 3246 /** 3247 * Constant for the "Medefaidrin" Unicode 3248 * character block. 3249 * @since 12 3250 */ 3251 public static final UnicodeBlock MEDEFAIDRIN = 3252 new UnicodeBlock("MEDEFAIDRIN"); 3253 3254 /** 3255 * Constant for the "Mayan Numerals" Unicode 3256 * character block. 3257 * @since 12 3258 */ 3259 public static final UnicodeBlock MAYAN_NUMERALS = 3260 new UnicodeBlock("MAYAN_NUMERALS", 3261 "MAYAN NUMERALS", 3262 "MAYANNUMERALS"); 3263 3264 /** 3265 * Constant for the "Indic Siyaq Numbers" Unicode 3266 * character block. 3267 * @since 12 3268 */ 3269 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3270 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3271 "INDIC SIYAQ NUMBERS", 3272 "INDICSIYAQNUMBERS"); 3273 3274 /** 3275 * Constant for the "Chess Symbols" Unicode 3276 * character block. 3277 * @since 12 3278 */ 3279 public static final UnicodeBlock CHESS_SYMBOLS = 3280 new UnicodeBlock("CHESS_SYMBOLS", 3281 "CHESS SYMBOLS", 3282 "CHESSSYMBOLS"); 3283 3284 /** 3285 * Constant for the "Elymaic" Unicode 3286 * character block. 3287 * @since 13 3288 */ 3289 public static final UnicodeBlock ELYMAIC = 3290 new UnicodeBlock("ELYMAIC"); 3291 3292 /** 3293 * Constant for the "Nandinagari" Unicode 3294 * character block. 3295 * @since 13 3296 */ 3297 public static final UnicodeBlock NANDINAGARI = 3298 new UnicodeBlock("NANDINAGARI"); 3299 3300 /** 3301 * Constant for the "Tamil Supplement" Unicode 3302 * character block. 3303 * @since 13 3304 */ 3305 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3306 new UnicodeBlock("TAMIL_SUPPLEMENT", 3307 "TAMIL SUPPLEMENT", 3308 "TAMILSUPPLEMENT"); 3309 3310 /** 3311 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3312 * character block. 3313 * @since 13 3314 */ 3315 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3316 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3317 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3318 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3319 3320 /** 3321 * Constant for the "Small Kana Extension" Unicode 3322 * character block. 3323 * @since 13 3324 */ 3325 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3326 new UnicodeBlock("SMALL_KANA_EXTENSION", 3327 "SMALL KANA EXTENSION", 3328 "SMALLKANAEXTENSION"); 3329 3330 /** 3331 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3332 * character block. 3333 * @since 13 3334 */ 3335 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3336 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3337 "NYIAKENG PUACHUE HMONG", 3338 "NYIAKENGPUACHUEHMONG"); 3339 3340 /** 3341 * Constant for the "Wancho" Unicode 3342 * character block. 3343 * @since 13 3344 */ 3345 public static final UnicodeBlock WANCHO = 3346 new UnicodeBlock("WANCHO"); 3347 3348 /** 3349 * Constant for the "Ottoman Siyaq Numbers" Unicode 3350 * character block. 3351 * @since 13 3352 */ 3353 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3354 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3355 "OTTOMAN SIYAQ NUMBERS", 3356 "OTTOMANSIYAQNUMBERS"); 3357 3358 /** 3359 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3360 * character block. 3361 * @since 13 3362 */ 3363 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3364 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3365 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3366 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3367 3368 /** 3369 * Constant for the "Yezidi" Unicode 3370 * character block. 3371 * @since 15 3372 */ 3373 public static final UnicodeBlock YEZIDI = 3374 new UnicodeBlock("YEZIDI"); 3375 3376 /** 3377 * Constant for the "Chorasmian" Unicode 3378 * character block. 3379 * @since 15 3380 */ 3381 public static final UnicodeBlock CHORASMIAN = 3382 new UnicodeBlock("CHORASMIAN"); 3383 3384 /** 3385 * Constant for the "Dives Akuru" Unicode 3386 * character block. 3387 * @since 15 3388 */ 3389 public static final UnicodeBlock DIVES_AKURU = 3390 new UnicodeBlock("DIVES_AKURU", 3391 "DIVES AKURU", 3392 "DIVESAKURU"); 3393 3394 /** 3395 * Constant for the "Lisu Supplement" Unicode 3396 * character block. 3397 * @since 15 3398 */ 3399 public static final UnicodeBlock LISU_SUPPLEMENT = 3400 new UnicodeBlock("LISU_SUPPLEMENT", 3401 "LISU SUPPLEMENT", 3402 "LISUSUPPLEMENT"); 3403 3404 /** 3405 * Constant for the "Khitan Small Script" Unicode 3406 * character block. 3407 * @since 15 3408 */ 3409 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 3410 new UnicodeBlock("KHITAN_SMALL_SCRIPT", 3411 "KHITAN SMALL SCRIPT", 3412 "KHITANSMALLSCRIPT"); 3413 3414 /** 3415 * Constant for the "Tangut Supplement" Unicode 3416 * character block. 3417 * @since 15 3418 */ 3419 public static final UnicodeBlock TANGUT_SUPPLEMENT = 3420 new UnicodeBlock("TANGUT_SUPPLEMENT", 3421 "TANGUT SUPPLEMENT", 3422 "TANGUTSUPPLEMENT"); 3423 3424 /** 3425 * Constant for the "Symbols for Legacy Computing" Unicode 3426 * character block. 3427 * @since 15 3428 */ 3429 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 3430 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", 3431 "SYMBOLS FOR LEGACY COMPUTING", 3432 "SYMBOLSFORLEGACYCOMPUTING"); 3433 3434 /** 3435 * Constant for the "CJK Unified Ideographs Extension G" Unicode 3436 * character block. 3437 * @since 15 3438 */ 3439 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 3440 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 3441 "CJK UNIFIED IDEOGRAPHS EXTENSION G", 3442 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG"); 3443 3444 /** 3445 * Constant for the "Arabic Extended-B" Unicode 3446 * character block. 3447 * @since 19 3448 */ 3449 public static final UnicodeBlock ARABIC_EXTENDED_B = 3450 new UnicodeBlock("ARABIC_EXTENDED_B", 3451 "ARABIC EXTENDED-B", 3452 "ARABICEXTENDED-B"); 3453 3454 /** 3455 * Constant for the "Vithkuqi" Unicode 3456 * character block. 3457 * @since 19 3458 */ 3459 public static final UnicodeBlock VITHKUQI = 3460 new UnicodeBlock("VITHKUQI"); 3461 3462 /** 3463 * Constant for the "Latin Extended-F" Unicode 3464 * character block. 3465 * @since 19 3466 */ 3467 public static final UnicodeBlock LATIN_EXTENDED_F = 3468 new UnicodeBlock("LATIN_EXTENDED_F", 3469 "LATIN EXTENDED-F", 3470 "LATINEXTENDED-F"); 3471 3472 /** 3473 * Constant for the "Old Uyghur" Unicode 3474 * character block. 3475 * @since 19 3476 */ 3477 public static final UnicodeBlock OLD_UYGHUR = 3478 new UnicodeBlock("OLD_UYGHUR", 3479 "OLD UYGHUR", 3480 "OLDUYGHUR"); 3481 3482 /** 3483 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode 3484 * character block. 3485 * @since 19 3486 */ 3487 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 3488 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 3489 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A", 3490 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A"); 3491 3492 /** 3493 * Constant for the "Cypro-Minoan" Unicode 3494 * character block. 3495 * @since 19 3496 */ 3497 public static final UnicodeBlock CYPRO_MINOAN = 3498 new UnicodeBlock("CYPRO_MINOAN", 3499 "CYPRO-MINOAN", 3500 "CYPRO-MINOAN"); 3501 3502 /** 3503 * Constant for the "Tangsa" Unicode 3504 * character block. 3505 * @since 19 3506 */ 3507 public static final UnicodeBlock TANGSA = 3508 new UnicodeBlock("TANGSA"); 3509 3510 /** 3511 * Constant for the "Kana Extended-B" Unicode 3512 * character block. 3513 * @since 19 3514 */ 3515 public static final UnicodeBlock KANA_EXTENDED_B = 3516 new UnicodeBlock("KANA_EXTENDED_B", 3517 "KANA EXTENDED-B", 3518 "KANAEXTENDED-B"); 3519 3520 /** 3521 * Constant for the "Znamenny Musical Notation" Unicode 3522 * character block. 3523 * @since 19 3524 */ 3525 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 3526 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 3527 "ZNAMENNY MUSICAL NOTATION", 3528 "ZNAMENNYMUSICALNOTATION"); 3529 3530 /** 3531 * Constant for the "Latin Extended-G" Unicode 3532 * character block. 3533 * @since 19 3534 */ 3535 public static final UnicodeBlock LATIN_EXTENDED_G = 3536 new UnicodeBlock("LATIN_EXTENDED_G", 3537 "LATIN EXTENDED-G", 3538 "LATINEXTENDED-G"); 3539 3540 /** 3541 * Constant for the "Toto" Unicode 3542 * character block. 3543 * @since 19 3544 */ 3545 public static final UnicodeBlock TOTO = 3546 new UnicodeBlock("TOTO"); 3547 3548 /** 3549 * Constant for the "Ethiopic Extended-B" Unicode 3550 * character block. 3551 * @since 19 3552 */ 3553 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 3554 new UnicodeBlock("ETHIOPIC_EXTENDED_B", 3555 "ETHIOPIC EXTENDED-B", 3556 "ETHIOPICEXTENDED-B"); 3557 3558 /** 3559 * Constant for the "Arabic Extended-C" Unicode 3560 * character block. 3561 * @since 20 3562 */ 3563 public static final UnicodeBlock ARABIC_EXTENDED_C = 3564 new UnicodeBlock("ARABIC_EXTENDED_C", 3565 "ARABIC EXTENDED-C", 3566 "ARABICEXTENDED-C"); 3567 3568 /** 3569 * Constant for the "Devanagari Extended-A" Unicode 3570 * character block. 3571 * @since 20 3572 */ 3573 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 3574 new UnicodeBlock("DEVANAGARI_EXTENDED_A", 3575 "DEVANAGARI EXTENDED-A", 3576 "DEVANAGARIEXTENDED-A"); 3577 3578 /** 3579 * Constant for the "Kawi" Unicode 3580 * character block. 3581 * @since 20 3582 */ 3583 public static final UnicodeBlock KAWI = 3584 new UnicodeBlock("KAWI"); 3585 3586 /** 3587 * Constant for the "Kaktovik Numerals" Unicode 3588 * character block. 3589 * @since 20 3590 */ 3591 public static final UnicodeBlock KAKTOVIK_NUMERALS = 3592 new UnicodeBlock("KAKTOVIK_NUMERALS", 3593 "KAKTOVIK NUMERALS", 3594 "KAKTOVIKNUMERALS"); 3595 3596 /** 3597 * Constant for the "Cyrillic Extended-D" Unicode 3598 * character block. 3599 * @since 20 3600 */ 3601 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 3602 new UnicodeBlock("CYRILLIC_EXTENDED_D", 3603 "CYRILLIC EXTENDED-D", 3604 "CYRILLICEXTENDED-D"); 3605 3606 /** 3607 * Constant for the "Nag Mundari" Unicode 3608 * character block. 3609 * @since 20 3610 */ 3611 public static final UnicodeBlock NAG_MUNDARI = 3612 new UnicodeBlock("NAG_MUNDARI", 3613 "NAG MUNDARI", 3614 "NAGMUNDARI"); 3615 3616 /** 3617 * Constant for the "CJK Unified Ideographs Extension H" Unicode 3618 * character block. 3619 * @since 20 3620 */ 3621 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 3622 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 3623 "CJK UNIFIED IDEOGRAPHS EXTENSION H", 3624 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH"); 3625 3626 /** 3627 * Constant for the "CJK Unified Ideographs Extension I" Unicode 3628 * character block. 3629 * @since 22 3630 */ 3631 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 3632 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I", 3633 "CJK UNIFIED IDEOGRAPHS EXTENSION I", 3634 "CJKUNIFIEDIDEOGRAPHSEXTENSIONI"); 3635 3636 /** 3637 * Constant for the "Todhri" Unicode 3638 * character block. 3639 * @since 24 3640 */ 3641 public static final UnicodeBlock TODHRI = 3642 new UnicodeBlock("TODHRI"); 3643 3644 /** 3645 * Constant for the "Garay" Unicode 3646 * character block. 3647 * @since 24 3648 */ 3649 public static final UnicodeBlock GARAY = 3650 new UnicodeBlock("GARAY"); 3651 3652 /** 3653 * Constant for the "Tulu-Tigalari" Unicode 3654 * character block. 3655 * @since 24 3656 */ 3657 public static final UnicodeBlock TULU_TIGALARI = 3658 new UnicodeBlock("TULU_TIGALARI", 3659 "TULU-TIGALARI"); 3660 3661 /** 3662 * Constant for the "Myanmar Extended-C" Unicode 3663 * character block. 3664 * @since 24 3665 */ 3666 public static final UnicodeBlock MYANMAR_EXTENDED_C = 3667 new UnicodeBlock("MYANMAR_EXTENDED_C", 3668 "MYANMAR EXTENDED-C", 3669 "MYANMAREXTENDED-C"); 3670 3671 /** 3672 * Constant for the "Sunuwar" Unicode 3673 * character block. 3674 * @since 24 3675 */ 3676 public static final UnicodeBlock SUNUWAR = 3677 new UnicodeBlock("SUNUWAR"); 3678 3679 /** 3680 * Constant for the "Egyptian Hieroglyphs Extended-A" Unicode 3681 * character block. 3682 * @since 24 3683 */ 3684 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS_EXTENDED_A = 3685 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS_EXTENDED_A", 3686 "EGYPTIAN HIEROGLYPHS EXTENDED-A", 3687 "EGYPTIANHIEROGLYPHSEXTENDED-A"); 3688 3689 /** 3690 * Constant for the "Gurung Khema" Unicode 3691 * character block. 3692 * @since 24 3693 */ 3694 public static final UnicodeBlock GURUNG_KHEMA = 3695 new UnicodeBlock("GURUNG_KHEMA", 3696 "GURUNG KHEMA", 3697 "GURUNGKHEMA"); 3698 3699 /** 3700 * Constant for the "Kirat Rai" Unicode 3701 * character block. 3702 * @since 24 3703 */ 3704 public static final UnicodeBlock KIRAT_RAI = 3705 new UnicodeBlock("KIRAT_RAI", 3706 "KIRAT RAI", 3707 "KIRATRAI"); 3708 3709 /** 3710 * Constant for the "Symbols for Legacy Computing Supplement" Unicode 3711 * character block. 3712 * @since 24 3713 */ 3714 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT = 3715 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT", 3716 "SYMBOLS FOR LEGACY COMPUTING SUPPLEMENT", 3717 "SYMBOLSFORLEGACYCOMPUTINGSUPPLEMENT"); 3718 3719 /** 3720 * Constant for the "Ol Onal" Unicode 3721 * character block. 3722 * @since 24 3723 */ 3724 public static final UnicodeBlock OL_ONAL = 3725 new UnicodeBlock("OL_ONAL", 3726 "OL ONAL", 3727 "OLONAL"); 3728 3729 private static final int[] blockStarts = { 3730 0x0000, // 0000..007F; Basic Latin 3731 0x0080, // 0080..00FF; Latin-1 Supplement 3732 0x0100, // 0100..017F; Latin Extended-A 3733 0x0180, // 0180..024F; Latin Extended-B 3734 0x0250, // 0250..02AF; IPA Extensions 3735 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3736 0x0300, // 0300..036F; Combining Diacritical Marks 3737 0x0370, // 0370..03FF; Greek and Coptic 3738 0x0400, // 0400..04FF; Cyrillic 3739 0x0500, // 0500..052F; Cyrillic Supplement 3740 0x0530, // 0530..058F; Armenian 3741 0x0590, // 0590..05FF; Hebrew 3742 0x0600, // 0600..06FF; Arabic 3743 0x0700, // 0700..074F; Syriac 3744 0x0750, // 0750..077F; Arabic Supplement 3745 0x0780, // 0780..07BF; Thaana 3746 0x07C0, // 07C0..07FF; NKo 3747 0x0800, // 0800..083F; Samaritan 3748 0x0840, // 0840..085F; Mandaic 3749 0x0860, // 0860..086F; Syriac Supplement 3750 0x0870, // 0870..089F; Arabic Extended-B 3751 0x08A0, // 08A0..08FF; Arabic Extended-A 3752 0x0900, // 0900..097F; Devanagari 3753 0x0980, // 0980..09FF; Bengali 3754 0x0A00, // 0A00..0A7F; Gurmukhi 3755 0x0A80, // 0A80..0AFF; Gujarati 3756 0x0B00, // 0B00..0B7F; Oriya 3757 0x0B80, // 0B80..0BFF; Tamil 3758 0x0C00, // 0C00..0C7F; Telugu 3759 0x0C80, // 0C80..0CFF; Kannada 3760 0x0D00, // 0D00..0D7F; Malayalam 3761 0x0D80, // 0D80..0DFF; Sinhala 3762 0x0E00, // 0E00..0E7F; Thai 3763 0x0E80, // 0E80..0EFF; Lao 3764 0x0F00, // 0F00..0FFF; Tibetan 3765 0x1000, // 1000..109F; Myanmar 3766 0x10A0, // 10A0..10FF; Georgian 3767 0x1100, // 1100..11FF; Hangul Jamo 3768 0x1200, // 1200..137F; Ethiopic 3769 0x1380, // 1380..139F; Ethiopic Supplement 3770 0x13A0, // 13A0..13FF; Cherokee 3771 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3772 0x1680, // 1680..169F; Ogham 3773 0x16A0, // 16A0..16FF; Runic 3774 0x1700, // 1700..171F; Tagalog 3775 0x1720, // 1720..173F; Hanunoo 3776 0x1740, // 1740..175F; Buhid 3777 0x1760, // 1760..177F; Tagbanwa 3778 0x1780, // 1780..17FF; Khmer 3779 0x1800, // 1800..18AF; Mongolian 3780 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3781 0x1900, // 1900..194F; Limbu 3782 0x1950, // 1950..197F; Tai Le 3783 0x1980, // 1980..19DF; New Tai Lue 3784 0x19E0, // 19E0..19FF; Khmer Symbols 3785 0x1A00, // 1A00..1A1F; Buginese 3786 0x1A20, // 1A20..1AAF; Tai Tham 3787 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3788 0x1B00, // 1B00..1B7F; Balinese 3789 0x1B80, // 1B80..1BBF; Sundanese 3790 0x1BC0, // 1BC0..1BFF; Batak 3791 0x1C00, // 1C00..1C4F; Lepcha 3792 0x1C50, // 1C50..1C7F; Ol Chiki 3793 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3794 0x1C90, // 1C90..1CBF; Georgian Extended 3795 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3796 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3797 0x1D00, // 1D00..1D7F; Phonetic Extensions 3798 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3799 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3800 0x1E00, // 1E00..1EFF; Latin Extended Additional 3801 0x1F00, // 1F00..1FFF; Greek Extended 3802 0x2000, // 2000..206F; General Punctuation 3803 0x2070, // 2070..209F; Superscripts and Subscripts 3804 0x20A0, // 20A0..20CF; Currency Symbols 3805 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3806 0x2100, // 2100..214F; Letterlike Symbols 3807 0x2150, // 2150..218F; Number Forms 3808 0x2190, // 2190..21FF; Arrows 3809 0x2200, // 2200..22FF; Mathematical Operators 3810 0x2300, // 2300..23FF; Miscellaneous Technical 3811 0x2400, // 2400..243F; Control Pictures 3812 0x2440, // 2440..245F; Optical Character Recognition 3813 0x2460, // 2460..24FF; Enclosed Alphanumerics 3814 0x2500, // 2500..257F; Box Drawing 3815 0x2580, // 2580..259F; Block Elements 3816 0x25A0, // 25A0..25FF; Geometric Shapes 3817 0x2600, // 2600..26FF; Miscellaneous Symbols 3818 0x2700, // 2700..27BF; Dingbats 3819 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3820 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3821 0x2800, // 2800..28FF; Braille Patterns 3822 0x2900, // 2900..297F; Supplemental Arrows-B 3823 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3824 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3825 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3826 0x2C00, // 2C00..2C5F; Glagolitic 3827 0x2C60, // 2C60..2C7F; Latin Extended-C 3828 0x2C80, // 2C80..2CFF; Coptic 3829 0x2D00, // 2D00..2D2F; Georgian Supplement 3830 0x2D30, // 2D30..2D7F; Tifinagh 3831 0x2D80, // 2D80..2DDF; Ethiopic Extended 3832 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3833 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3834 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3835 0x2F00, // 2F00..2FDF; Kangxi Radicals 3836 0x2FE0, // unassigned 3837 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3838 0x3000, // 3000..303F; CJK Symbols and Punctuation 3839 0x3040, // 3040..309F; Hiragana 3840 0x30A0, // 30A0..30FF; Katakana 3841 0x3100, // 3100..312F; Bopomofo 3842 0x3130, // 3130..318F; Hangul Compatibility Jamo 3843 0x3190, // 3190..319F; Kanbun 3844 0x31A0, // 31A0..31BF; Bopomofo Extended 3845 0x31C0, // 31C0..31EF; CJK Strokes 3846 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3847 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3848 0x3300, // 3300..33FF; CJK Compatibility 3849 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3850 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3851 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3852 0xA000, // A000..A48F; Yi Syllables 3853 0xA490, // A490..A4CF; Yi Radicals 3854 0xA4D0, // A4D0..A4FF; Lisu 3855 0xA500, // A500..A63F; Vai 3856 0xA640, // A640..A69F; Cyrillic Extended-B 3857 0xA6A0, // A6A0..A6FF; Bamum 3858 0xA700, // A700..A71F; Modifier Tone Letters 3859 0xA720, // A720..A7FF; Latin Extended-D 3860 0xA800, // A800..A82F; Syloti Nagri 3861 0xA830, // A830..A83F; Common Indic Number Forms 3862 0xA840, // A840..A87F; Phags-pa 3863 0xA880, // A880..A8DF; Saurashtra 3864 0xA8E0, // A8E0..A8FF; Devanagari Extended 3865 0xA900, // A900..A92F; Kayah Li 3866 0xA930, // A930..A95F; Rejang 3867 0xA960, // A960..A97F; Hangul Jamo Extended-A 3868 0xA980, // A980..A9DF; Javanese 3869 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3870 0xAA00, // AA00..AA5F; Cham 3871 0xAA60, // AA60..AA7F; Myanmar Extended-A 3872 0xAA80, // AA80..AADF; Tai Viet 3873 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3874 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3875 0xAB30, // AB30..AB6F; Latin Extended-E 3876 0xAB70, // AB70..ABBF; Cherokee Supplement 3877 0xABC0, // ABC0..ABFF; Meetei Mayek 3878 0xAC00, // AC00..D7AF; Hangul Syllables 3879 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3880 0xD800, // D800..DB7F; High Surrogates 3881 0xDB80, // DB80..DBFF; High Private Use Surrogates 3882 0xDC00, // DC00..DFFF; Low Surrogates 3883 0xE000, // E000..F8FF; Private Use Area 3884 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3885 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3886 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3887 0xFE00, // FE00..FE0F; Variation Selectors 3888 0xFE10, // FE10..FE1F; Vertical Forms 3889 0xFE20, // FE20..FE2F; Combining Half Marks 3890 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3891 0xFE50, // FE50..FE6F; Small Form Variants 3892 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3893 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3894 0xFFF0, // FFF0..FFFF; Specials 3895 0x10000, // 10000..1007F; Linear B Syllabary 3896 0x10080, // 10080..100FF; Linear B Ideograms 3897 0x10100, // 10100..1013F; Aegean Numbers 3898 0x10140, // 10140..1018F; Ancient Greek Numbers 3899 0x10190, // 10190..101CF; Ancient Symbols 3900 0x101D0, // 101D0..101FF; Phaistos Disc 3901 0x10200, // unassigned 3902 0x10280, // 10280..1029F; Lycian 3903 0x102A0, // 102A0..102DF; Carian 3904 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3905 0x10300, // 10300..1032F; Old Italic 3906 0x10330, // 10330..1034F; Gothic 3907 0x10350, // 10350..1037F; Old Permic 3908 0x10380, // 10380..1039F; Ugaritic 3909 0x103A0, // 103A0..103DF; Old Persian 3910 0x103E0, // unassigned 3911 0x10400, // 10400..1044F; Deseret 3912 0x10450, // 10450..1047F; Shavian 3913 0x10480, // 10480..104AF; Osmanya 3914 0x104B0, // 104B0..104FF; Osage 3915 0x10500, // 10500..1052F; Elbasan 3916 0x10530, // 10530..1056F; Caucasian Albanian 3917 0x10570, // 10570..105BF; Vithkuqi 3918 0x105C0, // 105C0..105FF; Todhri 3919 0x10600, // 10600..1077F; Linear A 3920 0x10780, // 10780..107BF; Latin Extended-F 3921 0x107C0, // unassigned 3922 0x10800, // 10800..1083F; Cypriot Syllabary 3923 0x10840, // 10840..1085F; Imperial Aramaic 3924 0x10860, // 10860..1087F; Palmyrene 3925 0x10880, // 10880..108AF; Nabataean 3926 0x108B0, // unassigned 3927 0x108E0, // 108E0..108FF; Hatran 3928 0x10900, // 10900..1091F; Phoenician 3929 0x10920, // 10920..1093F; Lydian 3930 0x10940, // unassigned 3931 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3932 0x109A0, // 109A0..109FF; Meroitic Cursive 3933 0x10A00, // 10A00..10A5F; Kharoshthi 3934 0x10A60, // 10A60..10A7F; Old South Arabian 3935 0x10A80, // 10A80..10A9F; Old North Arabian 3936 0x10AA0, // unassigned 3937 0x10AC0, // 10AC0..10AFF; Manichaean 3938 0x10B00, // 10B00..10B3F; Avestan 3939 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3940 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3941 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3942 0x10BB0, // unassigned 3943 0x10C00, // 10C00..10C4F; Old Turkic 3944 0x10C50, // unassigned 3945 0x10C80, // 10C80..10CFF; Old Hungarian 3946 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3947 0x10D40, // 10D40..10D8F; Garay 3948 0x10D90, // unassigned 3949 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3950 0x10E80, // 10E80..10EBF; Yezidi 3951 0x10EC0, // 10EC0..10EFF; Arabic Extended-C 3952 0x10F00, // 10F00..10F2F; Old Sogdian 3953 0x10F30, // 10F30..10F6F; Sogdian 3954 0x10F70, // 10F70..10FAF; Old Uyghur 3955 0x10FB0, // 10FB0..10FDF; Chorasmian 3956 0x10FE0, // 10FE0..10FFF; Elymaic 3957 0x11000, // 11000..1107F; Brahmi 3958 0x11080, // 11080..110CF; Kaithi 3959 0x110D0, // 110D0..110FF; Sora Sompeng 3960 0x11100, // 11100..1114F; Chakma 3961 0x11150, // 11150..1117F; Mahajani 3962 0x11180, // 11180..111DF; Sharada 3963 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3964 0x11200, // 11200..1124F; Khojki 3965 0x11250, // unassigned 3966 0x11280, // 11280..112AF; Multani 3967 0x112B0, // 112B0..112FF; Khudawadi 3968 0x11300, // 11300..1137F; Grantha 3969 0x11380, // 11380..113FF; Tulu-Tigalari 3970 0x11400, // 11400..1147F; Newa 3971 0x11480, // 11480..114DF; Tirhuta 3972 0x114E0, // unassigned 3973 0x11580, // 11580..115FF; Siddham 3974 0x11600, // 11600..1165F; Modi 3975 0x11660, // 11660..1167F; Mongolian Supplement 3976 0x11680, // 11680..116CF; Takri 3977 0x116D0, // 116D0..116FF; Myanmar Extended-C 3978 0x11700, // 11700..1174F; Ahom 3979 0x11750, // unassigned 3980 0x11800, // 11800..1184F; Dogra 3981 0x11850, // unassigned 3982 0x118A0, // 118A0..118FF; Warang Citi 3983 0x11900, // 11900..1195F; Dives Akuru 3984 0x11960, // unassigned 3985 0x119A0, // 119A0..119FF; Nandinagari 3986 0x11A00, // 11A00..11A4F; Zanabazar Square 3987 0x11A50, // 11A50..11AAF; Soyombo 3988 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 3989 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3990 0x11B00, // 11B00..11B5F; Devanagari Extended-A 3991 0x11B60, // unassigned 3992 0x11BC0, // 11BC0..11BFF; Sunuwar 3993 0x11C00, // 11C00..11C6F; Bhaiksuki 3994 0x11C70, // 11C70..11CBF; Marchen 3995 0x11CC0, // unassigned 3996 0x11D00, // 11D00..11D5F; Masaram Gondi 3997 0x11D60, // 11D60..11DAF; Gunjala Gondi 3998 0x11DB0, // unassigned 3999 0x11EE0, // 11EE0..11EFF; Makasar 4000 0x11F00, // 11F00..11F5F; Kawi 4001 0x11F60, // unassigned 4002 0x11FB0, // 11FB0..11FBF; Lisu Supplement 4003 0x11FC0, // 11FC0..11FFF; Tamil Supplement 4004 0x12000, // 12000..123FF; Cuneiform 4005 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 4006 0x12480, // 12480..1254F; Early Dynastic Cuneiform 4007 0x12550, // unassigned 4008 0x12F90, // 12F90..12FFF; Cypro-Minoan 4009 0x13000, // 13000..1342F; Egyptian Hieroglyphs 4010 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls 4011 0x13460, // 13460..143FF; Egyptian Hieroglyphs Extended-A 4012 0x14400, // 14400..1467F; Anatolian Hieroglyphs 4013 0x14680, // unassigned 4014 0x16100, // 16100..1613F; Gurung Khema 4015 0x16140, // unassigned 4016 0x16800, // 16800..16A3F; Bamum Supplement 4017 0x16A40, // 16A40..16A6F; Mro 4018 0x16A70, // 16A70..16ACF; Tangsa 4019 0x16AD0, // 16AD0..16AFF; Bassa Vah 4020 0x16B00, // 16B00..16B8F; Pahawh Hmong 4021 0x16B90, // unassigned 4022 0x16D40, // 16D40..16D7F; Kirat Rai 4023 0x16D80, // unassigned 4024 0x16E40, // 16E40..16E9F; Medefaidrin 4025 0x16EA0, // unassigned 4026 0x16F00, // 16F00..16F9F; Miao 4027 0x16FA0, // unassigned 4028 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 4029 0x17000, // 17000..187FF; Tangut 4030 0x18800, // 18800..18AFF; Tangut Components 4031 0x18B00, // 18B00..18CFF; Khitan Small Script 4032 0x18D00, // 18D00..18D7F; Tangut Supplement 4033 0x18D80, // unassigned 4034 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B 4035 0x1B000, // 1B000..1B0FF; Kana Supplement 4036 0x1B100, // 1B100..1B12F; Kana Extended-A 4037 0x1B130, // 1B130..1B16F; Small Kana Extension 4038 0x1B170, // 1B170..1B2FF; Nushu 4039 0x1B300, // unassigned 4040 0x1BC00, // 1BC00..1BC9F; Duployan 4041 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 4042 0x1BCB0, // unassigned 4043 0x1CC00, // 1CC00..1CEBF; Symbols for Legacy Computing Supplement 4044 0x1CEC0, // unassigned 4045 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation 4046 0x1CFD0, // unassigned 4047 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 4048 0x1D100, // 1D100..1D1FF; Musical Symbols 4049 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 4050 0x1D250, // unassigned 4051 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals 4052 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 4053 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 4054 0x1D360, // 1D360..1D37F; Counting Rod Numerals 4055 0x1D380, // unassigned 4056 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 4057 0x1D800, // 1D800..1DAAF; Sutton SignWriting 4058 0x1DAB0, // unassigned 4059 0x1DF00, // 1DF00..1DFFF; Latin Extended-G 4060 0x1E000, // 1E000..1E02F; Glagolitic Supplement 4061 0x1E030, // 1E030..1E08F; Cyrillic Extended-D 4062 0x1E090, // unassigned 4063 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 4064 0x1E150, // unassigned 4065 0x1E290, // 1E290..1E2BF; Toto 4066 0x1E2C0, // 1E2C0..1E2FF; Wancho 4067 0x1E300, // unassigned 4068 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari 4069 0x1E500, // unassigned 4070 0x1E5D0, // 1E5D0..1E5FF; Ol Onal 4071 0x1E600, // unassigned 4072 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B 4073 0x1E800, // 1E800..1E8DF; Mende Kikakui 4074 0x1E8E0, // unassigned 4075 0x1E900, // 1E900..1E95F; Adlam 4076 0x1E960, // unassigned 4077 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 4078 0x1ECC0, // unassigned 4079 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 4080 0x1ED50, // unassigned 4081 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 4082 0x1EF00, // unassigned 4083 0x1F000, // 1F000..1F02F; Mahjong Tiles 4084 0x1F030, // 1F030..1F09F; Domino Tiles 4085 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 4086 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 4087 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 4088 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 4089 0x1F600, // 1F600..1F64F; Emoticons 4090 0x1F650, // 1F650..1F67F; Ornamental Dingbats 4091 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 4092 0x1F700, // 1F700..1F77F; Alchemical Symbols 4093 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 4094 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 4095 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 4096 0x1FA00, // 1FA00..1FA6F; Chess Symbols 4097 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 4098 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing 4099 0x1FC00, // unassigned 4100 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 4101 0x2A6E0, // unassigned 4102 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 4103 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 4104 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 4105 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 4106 0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I 4107 0x2EE60, // unassigned 4108 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 4109 0x2FA20, // unassigned 4110 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G 4111 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H 4112 0x323B0, // unassigned 4113 0xE0000, // E0000..E007F; Tags 4114 0xE0080, // unassigned 4115 0xE0100, // E0100..E01EF; Variation Selectors Supplement 4116 0xE01F0, // unassigned 4117 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 4118 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 4119 }; 4120 4121 private static final UnicodeBlock[] blocks = { 4122 BASIC_LATIN, 4123 LATIN_1_SUPPLEMENT, 4124 LATIN_EXTENDED_A, 4125 LATIN_EXTENDED_B, 4126 IPA_EXTENSIONS, 4127 SPACING_MODIFIER_LETTERS, 4128 COMBINING_DIACRITICAL_MARKS, 4129 GREEK, 4130 CYRILLIC, 4131 CYRILLIC_SUPPLEMENTARY, 4132 ARMENIAN, 4133 HEBREW, 4134 ARABIC, 4135 SYRIAC, 4136 ARABIC_SUPPLEMENT, 4137 THAANA, 4138 NKO, 4139 SAMARITAN, 4140 MANDAIC, 4141 SYRIAC_SUPPLEMENT, 4142 ARABIC_EXTENDED_B, 4143 ARABIC_EXTENDED_A, 4144 DEVANAGARI, 4145 BENGALI, 4146 GURMUKHI, 4147 GUJARATI, 4148 ORIYA, 4149 TAMIL, 4150 TELUGU, 4151 KANNADA, 4152 MALAYALAM, 4153 SINHALA, 4154 THAI, 4155 LAO, 4156 TIBETAN, 4157 MYANMAR, 4158 GEORGIAN, 4159 HANGUL_JAMO, 4160 ETHIOPIC, 4161 ETHIOPIC_SUPPLEMENT, 4162 CHEROKEE, 4163 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 4164 OGHAM, 4165 RUNIC, 4166 TAGALOG, 4167 HANUNOO, 4168 BUHID, 4169 TAGBANWA, 4170 KHMER, 4171 MONGOLIAN, 4172 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 4173 LIMBU, 4174 TAI_LE, 4175 NEW_TAI_LUE, 4176 KHMER_SYMBOLS, 4177 BUGINESE, 4178 TAI_THAM, 4179 COMBINING_DIACRITICAL_MARKS_EXTENDED, 4180 BALINESE, 4181 SUNDANESE, 4182 BATAK, 4183 LEPCHA, 4184 OL_CHIKI, 4185 CYRILLIC_EXTENDED_C, 4186 GEORGIAN_EXTENDED, 4187 SUNDANESE_SUPPLEMENT, 4188 VEDIC_EXTENSIONS, 4189 PHONETIC_EXTENSIONS, 4190 PHONETIC_EXTENSIONS_SUPPLEMENT, 4191 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 4192 LATIN_EXTENDED_ADDITIONAL, 4193 GREEK_EXTENDED, 4194 GENERAL_PUNCTUATION, 4195 SUPERSCRIPTS_AND_SUBSCRIPTS, 4196 CURRENCY_SYMBOLS, 4197 COMBINING_MARKS_FOR_SYMBOLS, 4198 LETTERLIKE_SYMBOLS, 4199 NUMBER_FORMS, 4200 ARROWS, 4201 MATHEMATICAL_OPERATORS, 4202 MISCELLANEOUS_TECHNICAL, 4203 CONTROL_PICTURES, 4204 OPTICAL_CHARACTER_RECOGNITION, 4205 ENCLOSED_ALPHANUMERICS, 4206 BOX_DRAWING, 4207 BLOCK_ELEMENTS, 4208 GEOMETRIC_SHAPES, 4209 MISCELLANEOUS_SYMBOLS, 4210 DINGBATS, 4211 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 4212 SUPPLEMENTAL_ARROWS_A, 4213 BRAILLE_PATTERNS, 4214 SUPPLEMENTAL_ARROWS_B, 4215 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 4216 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 4217 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 4218 GLAGOLITIC, 4219 LATIN_EXTENDED_C, 4220 COPTIC, 4221 GEORGIAN_SUPPLEMENT, 4222 TIFINAGH, 4223 ETHIOPIC_EXTENDED, 4224 CYRILLIC_EXTENDED_A, 4225 SUPPLEMENTAL_PUNCTUATION, 4226 CJK_RADICALS_SUPPLEMENT, 4227 KANGXI_RADICALS, 4228 null, 4229 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 4230 CJK_SYMBOLS_AND_PUNCTUATION, 4231 HIRAGANA, 4232 KATAKANA, 4233 BOPOMOFO, 4234 HANGUL_COMPATIBILITY_JAMO, 4235 KANBUN, 4236 BOPOMOFO_EXTENDED, 4237 CJK_STROKES, 4238 KATAKANA_PHONETIC_EXTENSIONS, 4239 ENCLOSED_CJK_LETTERS_AND_MONTHS, 4240 CJK_COMPATIBILITY, 4241 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 4242 YIJING_HEXAGRAM_SYMBOLS, 4243 CJK_UNIFIED_IDEOGRAPHS, 4244 YI_SYLLABLES, 4245 YI_RADICALS, 4246 LISU, 4247 VAI, 4248 CYRILLIC_EXTENDED_B, 4249 BAMUM, 4250 MODIFIER_TONE_LETTERS, 4251 LATIN_EXTENDED_D, 4252 SYLOTI_NAGRI, 4253 COMMON_INDIC_NUMBER_FORMS, 4254 PHAGS_PA, 4255 SAURASHTRA, 4256 DEVANAGARI_EXTENDED, 4257 KAYAH_LI, 4258 REJANG, 4259 HANGUL_JAMO_EXTENDED_A, 4260 JAVANESE, 4261 MYANMAR_EXTENDED_B, 4262 CHAM, 4263 MYANMAR_EXTENDED_A, 4264 TAI_VIET, 4265 MEETEI_MAYEK_EXTENSIONS, 4266 ETHIOPIC_EXTENDED_A, 4267 LATIN_EXTENDED_E, 4268 CHEROKEE_SUPPLEMENT, 4269 MEETEI_MAYEK, 4270 HANGUL_SYLLABLES, 4271 HANGUL_JAMO_EXTENDED_B, 4272 HIGH_SURROGATES, 4273 HIGH_PRIVATE_USE_SURROGATES, 4274 LOW_SURROGATES, 4275 PRIVATE_USE_AREA, 4276 CJK_COMPATIBILITY_IDEOGRAPHS, 4277 ALPHABETIC_PRESENTATION_FORMS, 4278 ARABIC_PRESENTATION_FORMS_A, 4279 VARIATION_SELECTORS, 4280 VERTICAL_FORMS, 4281 COMBINING_HALF_MARKS, 4282 CJK_COMPATIBILITY_FORMS, 4283 SMALL_FORM_VARIANTS, 4284 ARABIC_PRESENTATION_FORMS_B, 4285 HALFWIDTH_AND_FULLWIDTH_FORMS, 4286 SPECIALS, 4287 LINEAR_B_SYLLABARY, 4288 LINEAR_B_IDEOGRAMS, 4289 AEGEAN_NUMBERS, 4290 ANCIENT_GREEK_NUMBERS, 4291 ANCIENT_SYMBOLS, 4292 PHAISTOS_DISC, 4293 null, 4294 LYCIAN, 4295 CARIAN, 4296 COPTIC_EPACT_NUMBERS, 4297 OLD_ITALIC, 4298 GOTHIC, 4299 OLD_PERMIC, 4300 UGARITIC, 4301 OLD_PERSIAN, 4302 null, 4303 DESERET, 4304 SHAVIAN, 4305 OSMANYA, 4306 OSAGE, 4307 ELBASAN, 4308 CAUCASIAN_ALBANIAN, 4309 VITHKUQI, 4310 TODHRI, 4311 LINEAR_A, 4312 LATIN_EXTENDED_F, 4313 null, 4314 CYPRIOT_SYLLABARY, 4315 IMPERIAL_ARAMAIC, 4316 PALMYRENE, 4317 NABATAEAN, 4318 null, 4319 HATRAN, 4320 PHOENICIAN, 4321 LYDIAN, 4322 null, 4323 MEROITIC_HIEROGLYPHS, 4324 MEROITIC_CURSIVE, 4325 KHAROSHTHI, 4326 OLD_SOUTH_ARABIAN, 4327 OLD_NORTH_ARABIAN, 4328 null, 4329 MANICHAEAN, 4330 AVESTAN, 4331 INSCRIPTIONAL_PARTHIAN, 4332 INSCRIPTIONAL_PAHLAVI, 4333 PSALTER_PAHLAVI, 4334 null, 4335 OLD_TURKIC, 4336 null, 4337 OLD_HUNGARIAN, 4338 HANIFI_ROHINGYA, 4339 GARAY, 4340 null, 4341 RUMI_NUMERAL_SYMBOLS, 4342 YEZIDI, 4343 ARABIC_EXTENDED_C, 4344 OLD_SOGDIAN, 4345 SOGDIAN, 4346 OLD_UYGHUR, 4347 CHORASMIAN, 4348 ELYMAIC, 4349 BRAHMI, 4350 KAITHI, 4351 SORA_SOMPENG, 4352 CHAKMA, 4353 MAHAJANI, 4354 SHARADA, 4355 SINHALA_ARCHAIC_NUMBERS, 4356 KHOJKI, 4357 null, 4358 MULTANI, 4359 KHUDAWADI, 4360 GRANTHA, 4361 TULU_TIGALARI, 4362 NEWA, 4363 TIRHUTA, 4364 null, 4365 SIDDHAM, 4366 MODI, 4367 MONGOLIAN_SUPPLEMENT, 4368 TAKRI, 4369 MYANMAR_EXTENDED_C, 4370 AHOM, 4371 null, 4372 DOGRA, 4373 null, 4374 WARANG_CITI, 4375 DIVES_AKURU, 4376 null, 4377 NANDINAGARI, 4378 ZANABAZAR_SQUARE, 4379 SOYOMBO, 4380 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A, 4381 PAU_CIN_HAU, 4382 DEVANAGARI_EXTENDED_A, 4383 null, 4384 SUNUWAR, 4385 BHAIKSUKI, 4386 MARCHEN, 4387 null, 4388 MASARAM_GONDI, 4389 GUNJALA_GONDI, 4390 null, 4391 MAKASAR, 4392 KAWI, 4393 null, 4394 LISU_SUPPLEMENT, 4395 TAMIL_SUPPLEMENT, 4396 CUNEIFORM, 4397 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 4398 EARLY_DYNASTIC_CUNEIFORM, 4399 null, 4400 CYPRO_MINOAN, 4401 EGYPTIAN_HIEROGLYPHS, 4402 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 4403 EGYPTIAN_HIEROGLYPHS_EXTENDED_A, 4404 ANATOLIAN_HIEROGLYPHS, 4405 null, 4406 GURUNG_KHEMA, 4407 null, 4408 BAMUM_SUPPLEMENT, 4409 MRO, 4410 TANGSA, 4411 BASSA_VAH, 4412 PAHAWH_HMONG, 4413 null, 4414 KIRAT_RAI, 4415 null, 4416 MEDEFAIDRIN, 4417 null, 4418 MIAO, 4419 null, 4420 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 4421 TANGUT, 4422 TANGUT_COMPONENTS, 4423 KHITAN_SMALL_SCRIPT, 4424 TANGUT_SUPPLEMENT, 4425 null, 4426 KANA_EXTENDED_B, 4427 KANA_SUPPLEMENT, 4428 KANA_EXTENDED_A, 4429 SMALL_KANA_EXTENSION, 4430 NUSHU, 4431 null, 4432 DUPLOYAN, 4433 SHORTHAND_FORMAT_CONTROLS, 4434 null, 4435 SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT, 4436 null, 4437 ZNAMENNY_MUSICAL_NOTATION, 4438 null, 4439 BYZANTINE_MUSICAL_SYMBOLS, 4440 MUSICAL_SYMBOLS, 4441 ANCIENT_GREEK_MUSICAL_NOTATION, 4442 null, 4443 KAKTOVIK_NUMERALS, 4444 MAYAN_NUMERALS, 4445 TAI_XUAN_JING_SYMBOLS, 4446 COUNTING_ROD_NUMERALS, 4447 null, 4448 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 4449 SUTTON_SIGNWRITING, 4450 null, 4451 LATIN_EXTENDED_G, 4452 GLAGOLITIC_SUPPLEMENT, 4453 CYRILLIC_EXTENDED_D, 4454 null, 4455 NYIAKENG_PUACHUE_HMONG, 4456 null, 4457 TOTO, 4458 WANCHO, 4459 null, 4460 NAG_MUNDARI, 4461 null, 4462 OL_ONAL, 4463 null, 4464 ETHIOPIC_EXTENDED_B, 4465 MENDE_KIKAKUI, 4466 null, 4467 ADLAM, 4468 null, 4469 INDIC_SIYAQ_NUMBERS, 4470 null, 4471 OTTOMAN_SIYAQ_NUMBERS, 4472 null, 4473 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 4474 null, 4475 MAHJONG_TILES, 4476 DOMINO_TILES, 4477 PLAYING_CARDS, 4478 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 4479 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 4480 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 4481 EMOTICONS, 4482 ORNAMENTAL_DINGBATS, 4483 TRANSPORT_AND_MAP_SYMBOLS, 4484 ALCHEMICAL_SYMBOLS, 4485 GEOMETRIC_SHAPES_EXTENDED, 4486 SUPPLEMENTAL_ARROWS_C, 4487 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 4488 CHESS_SYMBOLS, 4489 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 4490 SYMBOLS_FOR_LEGACY_COMPUTING, 4491 null, 4492 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 4493 null, 4494 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 4495 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 4496 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 4497 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 4498 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I, 4499 null, 4500 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 4501 null, 4502 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, 4503 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H, 4504 null, 4505 TAGS, 4506 null, 4507 VARIATION_SELECTORS_SUPPLEMENT, 4508 null, 4509 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 4510 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 4511 }; 4512 4513 4514 /** 4515 * Returns the object representing the Unicode block containing the 4516 * given character, or {@code null} if the character is not a 4517 * member of a defined block. 4518 * 4519 * <p><b>Note:</b> This method cannot handle 4520 * <a href="Character.html#supplementary"> supplementary 4521 * characters</a>. To support all Unicode characters, including 4522 * supplementary characters, use the {@link #of(int)} method. 4523 * 4524 * @param c The character in question 4525 * @return The {@code UnicodeBlock} instance representing the 4526 * Unicode block of which this character is a member, or 4527 * {@code null} if the character is not a member of any 4528 * Unicode block 4529 */ 4530 public static UnicodeBlock of(char c) { 4531 return of((int)c); 4532 } 4533 4534 /** 4535 * Returns the object representing the Unicode block 4536 * containing the given character (Unicode code point), or 4537 * {@code null} if the character is not a member of a 4538 * defined block. 4539 * 4540 * @param codePoint the character (Unicode code point) in question. 4541 * @return The {@code UnicodeBlock} instance representing the 4542 * Unicode block of which this character is a member, or 4543 * {@code null} if the character is not a member of any 4544 * Unicode block 4545 * @throws IllegalArgumentException if the specified 4546 * {@code codePoint} is an invalid Unicode code point. 4547 * @see Character#isValidCodePoint(int) 4548 * @since 1.5 4549 */ 4550 public static UnicodeBlock of(int codePoint) { 4551 if (!isValidCodePoint(codePoint)) { 4552 throw new IllegalArgumentException( 4553 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4554 } 4555 4556 int top, bottom, current; 4557 bottom = 0; 4558 top = blockStarts.length; 4559 current = top/2; 4560 4561 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4562 while (top - bottom > 1) { 4563 if (codePoint >= blockStarts[current]) { 4564 bottom = current; 4565 } else { 4566 top = current; 4567 } 4568 current = (top + bottom) / 2; 4569 } 4570 return blocks[current]; 4571 } 4572 4573 /** 4574 * Returns the UnicodeBlock with the given name. Block 4575 * names are determined by The Unicode Standard. The file 4576 * {@code Blocks.txt} defines blocks for a particular 4577 * version of the standard. The {@link Character} class specifies 4578 * the version of the standard that it supports. 4579 * <p> 4580 * This method accepts block names in the following forms: 4581 * <ol> 4582 * <li> Canonical block names as defined by the Unicode Standard. 4583 * For example, the standard defines a "Basic Latin" block. Therefore, this 4584 * method accepts "Basic Latin" as a valid block name. The documentation of 4585 * each UnicodeBlock provides the canonical name. 4586 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4587 * is a valid block name for the "Basic Latin" block. 4588 * <li>The text representation of each constant UnicodeBlock identifier. 4589 * For example, this method will return the {@link #BASIC_LATIN} block if 4590 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4591 * hyphens in the canonical name with underscores. 4592 * </ol> 4593 * Finally, character case is ignored for all of the valid block name forms. 4594 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4595 * The en_US locale's case mapping rules are used to provide case-insensitive 4596 * string comparisons for block name validation. 4597 * <p> 4598 * If the Unicode Standard changes block names, both the previous and 4599 * current names will be accepted. 4600 * 4601 * @param blockName A {@code UnicodeBlock} name. 4602 * @return The {@code UnicodeBlock} instance identified 4603 * by {@code blockName} 4604 * @throws IllegalArgumentException if {@code blockName} is an 4605 * invalid name 4606 * @throws NullPointerException if {@code blockName} is null 4607 * @since 1.5 4608 */ 4609 public static final UnicodeBlock forName(String blockName) { 4610 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4611 if (block == null) { 4612 throw new IllegalArgumentException("Not a valid block name: " 4613 + blockName); 4614 } 4615 return block; 4616 } 4617 } 4618 4619 4620 /** 4621 * A family of character subsets representing the character scripts 4622 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4623 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4624 * character is assigned to a single Unicode script, either a specific 4625 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4626 * one of the following three special values, 4627 * {@link Character.UnicodeScript#INHERITED Inherited}, 4628 * {@link Character.UnicodeScript#COMMON Common} or 4629 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4630 * 4631 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property 4632 * @since 1.7 4633 */ 4634 public static enum UnicodeScript { 4635 4636 /** 4637 * Unicode script "Common". 4638 */ 4639 COMMON, 4640 4641 /** 4642 * Unicode script "Latin". 4643 */ 4644 LATIN, 4645 4646 /** 4647 * Unicode script "Greek". 4648 */ 4649 GREEK, 4650 4651 /** 4652 * Unicode script "Cyrillic". 4653 */ 4654 CYRILLIC, 4655 4656 /** 4657 * Unicode script "Armenian". 4658 */ 4659 ARMENIAN, 4660 4661 /** 4662 * Unicode script "Hebrew". 4663 */ 4664 HEBREW, 4665 4666 /** 4667 * Unicode script "Arabic". 4668 */ 4669 ARABIC, 4670 4671 /** 4672 * Unicode script "Syriac". 4673 */ 4674 SYRIAC, 4675 4676 /** 4677 * Unicode script "Thaana". 4678 */ 4679 THAANA, 4680 4681 /** 4682 * Unicode script "Devanagari". 4683 */ 4684 DEVANAGARI, 4685 4686 /** 4687 * Unicode script "Bengali". 4688 */ 4689 BENGALI, 4690 4691 /** 4692 * Unicode script "Gurmukhi". 4693 */ 4694 GURMUKHI, 4695 4696 /** 4697 * Unicode script "Gujarati". 4698 */ 4699 GUJARATI, 4700 4701 /** 4702 * Unicode script "Oriya". 4703 */ 4704 ORIYA, 4705 4706 /** 4707 * Unicode script "Tamil". 4708 */ 4709 TAMIL, 4710 4711 /** 4712 * Unicode script "Telugu". 4713 */ 4714 TELUGU, 4715 4716 /** 4717 * Unicode script "Kannada". 4718 */ 4719 KANNADA, 4720 4721 /** 4722 * Unicode script "Malayalam". 4723 */ 4724 MALAYALAM, 4725 4726 /** 4727 * Unicode script "Sinhala". 4728 */ 4729 SINHALA, 4730 4731 /** 4732 * Unicode script "Thai". 4733 */ 4734 THAI, 4735 4736 /** 4737 * Unicode script "Lao". 4738 */ 4739 LAO, 4740 4741 /** 4742 * Unicode script "Tibetan". 4743 */ 4744 TIBETAN, 4745 4746 /** 4747 * Unicode script "Myanmar". 4748 */ 4749 MYANMAR, 4750 4751 /** 4752 * Unicode script "Georgian". 4753 */ 4754 GEORGIAN, 4755 4756 /** 4757 * Unicode script "Hangul". 4758 */ 4759 HANGUL, 4760 4761 /** 4762 * Unicode script "Ethiopic". 4763 */ 4764 ETHIOPIC, 4765 4766 /** 4767 * Unicode script "Cherokee". 4768 */ 4769 CHEROKEE, 4770 4771 /** 4772 * Unicode script "Canadian_Aboriginal". 4773 */ 4774 CANADIAN_ABORIGINAL, 4775 4776 /** 4777 * Unicode script "Ogham". 4778 */ 4779 OGHAM, 4780 4781 /** 4782 * Unicode script "Runic". 4783 */ 4784 RUNIC, 4785 4786 /** 4787 * Unicode script "Khmer". 4788 */ 4789 KHMER, 4790 4791 /** 4792 * Unicode script "Mongolian". 4793 */ 4794 MONGOLIAN, 4795 4796 /** 4797 * Unicode script "Hiragana". 4798 */ 4799 HIRAGANA, 4800 4801 /** 4802 * Unicode script "Katakana". 4803 */ 4804 KATAKANA, 4805 4806 /** 4807 * Unicode script "Bopomofo". 4808 */ 4809 BOPOMOFO, 4810 4811 /** 4812 * Unicode script "Han". 4813 */ 4814 HAN, 4815 4816 /** 4817 * Unicode script "Yi". 4818 */ 4819 YI, 4820 4821 /** 4822 * Unicode script "Old_Italic". 4823 */ 4824 OLD_ITALIC, 4825 4826 /** 4827 * Unicode script "Gothic". 4828 */ 4829 GOTHIC, 4830 4831 /** 4832 * Unicode script "Deseret". 4833 */ 4834 DESERET, 4835 4836 /** 4837 * Unicode script "Inherited". 4838 */ 4839 INHERITED, 4840 4841 /** 4842 * Unicode script "Tagalog". 4843 */ 4844 TAGALOG, 4845 4846 /** 4847 * Unicode script "Hanunoo". 4848 */ 4849 HANUNOO, 4850 4851 /** 4852 * Unicode script "Buhid". 4853 */ 4854 BUHID, 4855 4856 /** 4857 * Unicode script "Tagbanwa". 4858 */ 4859 TAGBANWA, 4860 4861 /** 4862 * Unicode script "Limbu". 4863 */ 4864 LIMBU, 4865 4866 /** 4867 * Unicode script "Tai_Le". 4868 */ 4869 TAI_LE, 4870 4871 /** 4872 * Unicode script "Linear_B". 4873 */ 4874 LINEAR_B, 4875 4876 /** 4877 * Unicode script "Ugaritic". 4878 */ 4879 UGARITIC, 4880 4881 /** 4882 * Unicode script "Shavian". 4883 */ 4884 SHAVIAN, 4885 4886 /** 4887 * Unicode script "Osmanya". 4888 */ 4889 OSMANYA, 4890 4891 /** 4892 * Unicode script "Cypriot". 4893 */ 4894 CYPRIOT, 4895 4896 /** 4897 * Unicode script "Braille". 4898 */ 4899 BRAILLE, 4900 4901 /** 4902 * Unicode script "Buginese". 4903 */ 4904 BUGINESE, 4905 4906 /** 4907 * Unicode script "Coptic". 4908 */ 4909 COPTIC, 4910 4911 /** 4912 * Unicode script "New_Tai_Lue". 4913 */ 4914 NEW_TAI_LUE, 4915 4916 /** 4917 * Unicode script "Glagolitic". 4918 */ 4919 GLAGOLITIC, 4920 4921 /** 4922 * Unicode script "Tifinagh". 4923 */ 4924 TIFINAGH, 4925 4926 /** 4927 * Unicode script "Syloti_Nagri". 4928 */ 4929 SYLOTI_NAGRI, 4930 4931 /** 4932 * Unicode script "Old_Persian". 4933 */ 4934 OLD_PERSIAN, 4935 4936 /** 4937 * Unicode script "Kharoshthi". 4938 */ 4939 KHAROSHTHI, 4940 4941 /** 4942 * Unicode script "Balinese". 4943 */ 4944 BALINESE, 4945 4946 /** 4947 * Unicode script "Cuneiform". 4948 */ 4949 CUNEIFORM, 4950 4951 /** 4952 * Unicode script "Phoenician". 4953 */ 4954 PHOENICIAN, 4955 4956 /** 4957 * Unicode script "Phags_Pa". 4958 */ 4959 PHAGS_PA, 4960 4961 /** 4962 * Unicode script "Nko". 4963 */ 4964 NKO, 4965 4966 /** 4967 * Unicode script "Sundanese". 4968 */ 4969 SUNDANESE, 4970 4971 /** 4972 * Unicode script "Batak". 4973 */ 4974 BATAK, 4975 4976 /** 4977 * Unicode script "Lepcha". 4978 */ 4979 LEPCHA, 4980 4981 /** 4982 * Unicode script "Ol_Chiki". 4983 */ 4984 OL_CHIKI, 4985 4986 /** 4987 * Unicode script "Vai". 4988 */ 4989 VAI, 4990 4991 /** 4992 * Unicode script "Saurashtra". 4993 */ 4994 SAURASHTRA, 4995 4996 /** 4997 * Unicode script "Kayah_Li". 4998 */ 4999 KAYAH_LI, 5000 5001 /** 5002 * Unicode script "Rejang". 5003 */ 5004 REJANG, 5005 5006 /** 5007 * Unicode script "Lycian". 5008 */ 5009 LYCIAN, 5010 5011 /** 5012 * Unicode script "Carian". 5013 */ 5014 CARIAN, 5015 5016 /** 5017 * Unicode script "Lydian". 5018 */ 5019 LYDIAN, 5020 5021 /** 5022 * Unicode script "Cham". 5023 */ 5024 CHAM, 5025 5026 /** 5027 * Unicode script "Tai_Tham". 5028 */ 5029 TAI_THAM, 5030 5031 /** 5032 * Unicode script "Tai_Viet". 5033 */ 5034 TAI_VIET, 5035 5036 /** 5037 * Unicode script "Avestan". 5038 */ 5039 AVESTAN, 5040 5041 /** 5042 * Unicode script "Egyptian_Hieroglyphs". 5043 */ 5044 EGYPTIAN_HIEROGLYPHS, 5045 5046 /** 5047 * Unicode script "Samaritan". 5048 */ 5049 SAMARITAN, 5050 5051 /** 5052 * Unicode script "Mandaic". 5053 */ 5054 MANDAIC, 5055 5056 /** 5057 * Unicode script "Lisu". 5058 */ 5059 LISU, 5060 5061 /** 5062 * Unicode script "Bamum". 5063 */ 5064 BAMUM, 5065 5066 /** 5067 * Unicode script "Javanese". 5068 */ 5069 JAVANESE, 5070 5071 /** 5072 * Unicode script "Meetei_Mayek". 5073 */ 5074 MEETEI_MAYEK, 5075 5076 /** 5077 * Unicode script "Imperial_Aramaic". 5078 */ 5079 IMPERIAL_ARAMAIC, 5080 5081 /** 5082 * Unicode script "Old_South_Arabian". 5083 */ 5084 OLD_SOUTH_ARABIAN, 5085 5086 /** 5087 * Unicode script "Inscriptional_Parthian". 5088 */ 5089 INSCRIPTIONAL_PARTHIAN, 5090 5091 /** 5092 * Unicode script "Inscriptional_Pahlavi". 5093 */ 5094 INSCRIPTIONAL_PAHLAVI, 5095 5096 /** 5097 * Unicode script "Old_Turkic". 5098 */ 5099 OLD_TURKIC, 5100 5101 /** 5102 * Unicode script "Brahmi". 5103 */ 5104 BRAHMI, 5105 5106 /** 5107 * Unicode script "Kaithi". 5108 */ 5109 KAITHI, 5110 5111 /** 5112 * Unicode script "Meroitic Hieroglyphs". 5113 * @since 1.8 5114 */ 5115 MEROITIC_HIEROGLYPHS, 5116 5117 /** 5118 * Unicode script "Meroitic Cursive". 5119 * @since 1.8 5120 */ 5121 MEROITIC_CURSIVE, 5122 5123 /** 5124 * Unicode script "Sora Sompeng". 5125 * @since 1.8 5126 */ 5127 SORA_SOMPENG, 5128 5129 /** 5130 * Unicode script "Chakma". 5131 * @since 1.8 5132 */ 5133 CHAKMA, 5134 5135 /** 5136 * Unicode script "Sharada". 5137 * @since 1.8 5138 */ 5139 SHARADA, 5140 5141 /** 5142 * Unicode script "Takri". 5143 * @since 1.8 5144 */ 5145 TAKRI, 5146 5147 /** 5148 * Unicode script "Miao". 5149 * @since 1.8 5150 */ 5151 MIAO, 5152 5153 /** 5154 * Unicode script "Caucasian Albanian". 5155 * @since 9 5156 */ 5157 CAUCASIAN_ALBANIAN, 5158 5159 /** 5160 * Unicode script "Bassa Vah". 5161 * @since 9 5162 */ 5163 BASSA_VAH, 5164 5165 /** 5166 * Unicode script "Duployan". 5167 * @since 9 5168 */ 5169 DUPLOYAN, 5170 5171 /** 5172 * Unicode script "Elbasan". 5173 * @since 9 5174 */ 5175 ELBASAN, 5176 5177 /** 5178 * Unicode script "Grantha". 5179 * @since 9 5180 */ 5181 GRANTHA, 5182 5183 /** 5184 * Unicode script "Pahawh Hmong". 5185 * @since 9 5186 */ 5187 PAHAWH_HMONG, 5188 5189 /** 5190 * Unicode script "Khojki". 5191 * @since 9 5192 */ 5193 KHOJKI, 5194 5195 /** 5196 * Unicode script "Linear A". 5197 * @since 9 5198 */ 5199 LINEAR_A, 5200 5201 /** 5202 * Unicode script "Mahajani". 5203 * @since 9 5204 */ 5205 MAHAJANI, 5206 5207 /** 5208 * Unicode script "Manichaean". 5209 * @since 9 5210 */ 5211 MANICHAEAN, 5212 5213 /** 5214 * Unicode script "Mende Kikakui". 5215 * @since 9 5216 */ 5217 MENDE_KIKAKUI, 5218 5219 /** 5220 * Unicode script "Modi". 5221 * @since 9 5222 */ 5223 MODI, 5224 5225 /** 5226 * Unicode script "Mro". 5227 * @since 9 5228 */ 5229 MRO, 5230 5231 /** 5232 * Unicode script "Old North Arabian". 5233 * @since 9 5234 */ 5235 OLD_NORTH_ARABIAN, 5236 5237 /** 5238 * Unicode script "Nabataean". 5239 * @since 9 5240 */ 5241 NABATAEAN, 5242 5243 /** 5244 * Unicode script "Palmyrene". 5245 * @since 9 5246 */ 5247 PALMYRENE, 5248 5249 /** 5250 * Unicode script "Pau Cin Hau". 5251 * @since 9 5252 */ 5253 PAU_CIN_HAU, 5254 5255 /** 5256 * Unicode script "Old Permic". 5257 * @since 9 5258 */ 5259 OLD_PERMIC, 5260 5261 /** 5262 * Unicode script "Psalter Pahlavi". 5263 * @since 9 5264 */ 5265 PSALTER_PAHLAVI, 5266 5267 /** 5268 * Unicode script "Siddham". 5269 * @since 9 5270 */ 5271 SIDDHAM, 5272 5273 /** 5274 * Unicode script "Khudawadi". 5275 * @since 9 5276 */ 5277 KHUDAWADI, 5278 5279 /** 5280 * Unicode script "Tirhuta". 5281 * @since 9 5282 */ 5283 TIRHUTA, 5284 5285 /** 5286 * Unicode script "Warang Citi". 5287 * @since 9 5288 */ 5289 WARANG_CITI, 5290 5291 /** 5292 * Unicode script "Ahom". 5293 * @since 9 5294 */ 5295 AHOM, 5296 5297 /** 5298 * Unicode script "Anatolian Hieroglyphs". 5299 * @since 9 5300 */ 5301 ANATOLIAN_HIEROGLYPHS, 5302 5303 /** 5304 * Unicode script "Hatran". 5305 * @since 9 5306 */ 5307 HATRAN, 5308 5309 /** 5310 * Unicode script "Multani". 5311 * @since 9 5312 */ 5313 MULTANI, 5314 5315 /** 5316 * Unicode script "Old Hungarian". 5317 * @since 9 5318 */ 5319 OLD_HUNGARIAN, 5320 5321 /** 5322 * Unicode script "SignWriting". 5323 * @since 9 5324 */ 5325 SIGNWRITING, 5326 5327 /** 5328 * Unicode script "Adlam". 5329 * @since 11 5330 */ 5331 ADLAM, 5332 5333 /** 5334 * Unicode script "Bhaiksuki". 5335 * @since 11 5336 */ 5337 BHAIKSUKI, 5338 5339 /** 5340 * Unicode script "Marchen". 5341 * @since 11 5342 */ 5343 MARCHEN, 5344 5345 /** 5346 * Unicode script "Newa". 5347 * @since 11 5348 */ 5349 NEWA, 5350 5351 /** 5352 * Unicode script "Osage". 5353 * @since 11 5354 */ 5355 OSAGE, 5356 5357 /** 5358 * Unicode script "Tangut". 5359 * @since 11 5360 */ 5361 TANGUT, 5362 5363 /** 5364 * Unicode script "Masaram Gondi". 5365 * @since 11 5366 */ 5367 MASARAM_GONDI, 5368 5369 /** 5370 * Unicode script "Nushu". 5371 * @since 11 5372 */ 5373 NUSHU, 5374 5375 /** 5376 * Unicode script "Soyombo". 5377 * @since 11 5378 */ 5379 SOYOMBO, 5380 5381 /** 5382 * Unicode script "Zanabazar Square". 5383 * @since 11 5384 */ 5385 ZANABAZAR_SQUARE, 5386 5387 /** 5388 * Unicode script "Hanifi Rohingya". 5389 * @since 12 5390 */ 5391 HANIFI_ROHINGYA, 5392 5393 /** 5394 * Unicode script "Old Sogdian". 5395 * @since 12 5396 */ 5397 OLD_SOGDIAN, 5398 5399 /** 5400 * Unicode script "Sogdian". 5401 * @since 12 5402 */ 5403 SOGDIAN, 5404 5405 /** 5406 * Unicode script "Dogra". 5407 * @since 12 5408 */ 5409 DOGRA, 5410 5411 /** 5412 * Unicode script "Gunjala Gondi". 5413 * @since 12 5414 */ 5415 GUNJALA_GONDI, 5416 5417 /** 5418 * Unicode script "Makasar". 5419 * @since 12 5420 */ 5421 MAKASAR, 5422 5423 /** 5424 * Unicode script "Medefaidrin". 5425 * @since 12 5426 */ 5427 MEDEFAIDRIN, 5428 5429 /** 5430 * Unicode script "Elymaic". 5431 * @since 13 5432 */ 5433 ELYMAIC, 5434 5435 /** 5436 * Unicode script "Nandinagari". 5437 * @since 13 5438 */ 5439 NANDINAGARI, 5440 5441 /** 5442 * Unicode script "Nyiakeng Puachue Hmong". 5443 * @since 13 5444 */ 5445 NYIAKENG_PUACHUE_HMONG, 5446 5447 /** 5448 * Unicode script "Wancho". 5449 * @since 13 5450 */ 5451 WANCHO, 5452 5453 /** 5454 * Unicode script "Yezidi". 5455 * @since 15 5456 */ 5457 YEZIDI, 5458 5459 /** 5460 * Unicode script "Chorasmian". 5461 * @since 15 5462 */ 5463 CHORASMIAN, 5464 5465 /** 5466 * Unicode script "Dives Akuru". 5467 * @since 15 5468 */ 5469 DIVES_AKURU, 5470 5471 /** 5472 * Unicode script "Khitan Small Script". 5473 * @since 15 5474 */ 5475 KHITAN_SMALL_SCRIPT, 5476 5477 /** 5478 * Unicode script "Vithkuqi". 5479 * @since 19 5480 */ 5481 VITHKUQI, 5482 5483 /** 5484 * Unicode script "Old Uyghur". 5485 * @since 19 5486 */ 5487 OLD_UYGHUR, 5488 5489 /** 5490 * Unicode script "Cypro Minoan". 5491 * @since 19 5492 */ 5493 CYPRO_MINOAN, 5494 5495 /** 5496 * Unicode script "Tangsa". 5497 * @since 19 5498 */ 5499 TANGSA, 5500 5501 /** 5502 * Unicode script "Toto". 5503 * @since 19 5504 */ 5505 TOTO, 5506 5507 /** 5508 * Unicode script "Kawi". 5509 * @since 20 5510 */ 5511 KAWI, 5512 5513 /** 5514 * Unicode script "Nag Mundari". 5515 * @since 20 5516 */ 5517 NAG_MUNDARI, 5518 5519 /** 5520 * Unicode script "Todhri". 5521 * @since 24 5522 */ 5523 TODHRI, 5524 5525 /** 5526 * Unicode script "Garay". 5527 * @since 24 5528 */ 5529 GARAY, 5530 5531 /** 5532 * Unicode script "Tulu Tigalari". 5533 * @since 24 5534 */ 5535 TULU_TIGALARI, 5536 5537 /** 5538 * Unicode script "Sunuwar". 5539 * @since 24 5540 */ 5541 SUNUWAR, 5542 5543 /** 5544 * Unicode script "Gurung Khema". 5545 * @since 24 5546 */ 5547 GURUNG_KHEMA, 5548 5549 /** 5550 * Unicode script "Kirat Rai". 5551 * @since 24 5552 */ 5553 KIRAT_RAI, 5554 5555 /** 5556 * Unicode script "Ol Onal". 5557 * @since 24 5558 */ 5559 OL_ONAL, 5560 5561 /** 5562 * Unicode script "Unknown". 5563 */ 5564 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map. 5565 5566 private static final int[] scriptStarts = { 5567 0x0000, // 0000..0040; COMMON 5568 0x0041, // 0041..005A; LATIN 5569 0x005B, // 005B..0060; COMMON 5570 0x0061, // 0061..007A; LATIN 5571 0x007B, // 007B..00A9; COMMON 5572 0x00AA, // 00AA ; LATIN 5573 0x00AB, // 00AB..00B9; COMMON 5574 0x00BA, // 00BA ; LATIN 5575 0x00BB, // 00BB..00BF; COMMON 5576 0x00C0, // 00C0..00D6; LATIN 5577 0x00D7, // 00D7 ; COMMON 5578 0x00D8, // 00D8..00F6; LATIN 5579 0x00F7, // 00F7 ; COMMON 5580 0x00F8, // 00F8..02B8; LATIN 5581 0x02B9, // 02B9..02DF; COMMON 5582 0x02E0, // 02E0..02E4; LATIN 5583 0x02E5, // 02E5..02E9; COMMON 5584 0x02EA, // 02EA..02EB; BOPOMOFO 5585 0x02EC, // 02EC..02FF; COMMON 5586 0x0300, // 0300..036F; INHERITED 5587 0x0370, // 0370..0373; GREEK 5588 0x0374, // 0374 ; COMMON 5589 0x0375, // 0375..0377; GREEK 5590 0x0378, // 0378..0379; UNKNOWN 5591 0x037A, // 037A..037D; GREEK 5592 0x037E, // 037E ; COMMON 5593 0x037F, // 037F ; GREEK 5594 0x0380, // 0380..0383; UNKNOWN 5595 0x0384, // 0384 ; GREEK 5596 0x0385, // 0385 ; COMMON 5597 0x0386, // 0386 ; GREEK 5598 0x0387, // 0387 ; COMMON 5599 0x0388, // 0388..038A; GREEK 5600 0x038B, // 038B ; UNKNOWN 5601 0x038C, // 038C ; GREEK 5602 0x038D, // 038D ; UNKNOWN 5603 0x038E, // 038E..03A1; GREEK 5604 0x03A2, // 03A2 ; UNKNOWN 5605 0x03A3, // 03A3..03E1; GREEK 5606 0x03E2, // 03E2..03EF; COPTIC 5607 0x03F0, // 03F0..03FF; GREEK 5608 0x0400, // 0400..0484; CYRILLIC 5609 0x0485, // 0485..0486; INHERITED 5610 0x0487, // 0487..052F; CYRILLIC 5611 0x0530, // 0530 ; UNKNOWN 5612 0x0531, // 0531..0556; ARMENIAN 5613 0x0557, // 0557..0558; UNKNOWN 5614 0x0559, // 0559..058A; ARMENIAN 5615 0x058B, // 058B..058C; UNKNOWN 5616 0x058D, // 058D..058F; ARMENIAN 5617 0x0590, // 0590 ; UNKNOWN 5618 0x0591, // 0591..05C7; HEBREW 5619 0x05C8, // 05C8..05CF; UNKNOWN 5620 0x05D0, // 05D0..05EA; HEBREW 5621 0x05EB, // 05EB..05EE; UNKNOWN 5622 0x05EF, // 05EF..05F4; HEBREW 5623 0x05F5, // 05F5..05FF; UNKNOWN 5624 0x0600, // 0600..0604; ARABIC 5625 0x0605, // 0605 ; COMMON 5626 0x0606, // 0606..060B; ARABIC 5627 0x060C, // 060C ; COMMON 5628 0x060D, // 060D..061A; ARABIC 5629 0x061B, // 061B ; COMMON 5630 0x061C, // 061C..061E; ARABIC 5631 0x061F, // 061F ; COMMON 5632 0x0620, // 0620..063F; ARABIC 5633 0x0640, // 0640 ; COMMON 5634 0x0641, // 0641..064A; ARABIC 5635 0x064B, // 064B..0655; INHERITED 5636 0x0656, // 0656..066F; ARABIC 5637 0x0670, // 0670 ; INHERITED 5638 0x0671, // 0671..06DC; ARABIC 5639 0x06DD, // 06DD ; COMMON 5640 0x06DE, // 06DE..06FF; ARABIC 5641 0x0700, // 0700..070D; SYRIAC 5642 0x070E, // 070E ; UNKNOWN 5643 0x070F, // 070F..074A; SYRIAC 5644 0x074B, // 074B..074C; UNKNOWN 5645 0x074D, // 074D..074F; SYRIAC 5646 0x0750, // 0750..077F; ARABIC 5647 0x0780, // 0780..07B1; THAANA 5648 0x07B2, // 07B2..07BF; UNKNOWN 5649 0x07C0, // 07C0..07FA; NKO 5650 0x07FB, // 07FB..07FC; UNKNOWN 5651 0x07FD, // 07FD..07FF; NKO 5652 0x0800, // 0800..082D; SAMARITAN 5653 0x082E, // 082E..082F; UNKNOWN 5654 0x0830, // 0830..083E; SAMARITAN 5655 0x083F, // 083F ; UNKNOWN 5656 0x0840, // 0840..085B; MANDAIC 5657 0x085C, // 085C..085D; UNKNOWN 5658 0x085E, // 085E ; MANDAIC 5659 0x085F, // 085F ; UNKNOWN 5660 0x0860, // 0860..086A; SYRIAC 5661 0x086B, // 086B..086F; UNKNOWN 5662 0x0870, // 0870..088E; ARABIC 5663 0x088F, // 088F ; UNKNOWN 5664 0x0890, // 0890..0891; ARABIC 5665 0x0892, // 0892..0896; UNKNOWN 5666 0x0897, // 0897..08E1; ARABIC 5667 0x08E2, // 08E2 ; COMMON 5668 0x08E3, // 08E3..08FF; ARABIC 5669 0x0900, // 0900..0950; DEVANAGARI 5670 0x0951, // 0951..0954; INHERITED 5671 0x0955, // 0955..0963; DEVANAGARI 5672 0x0964, // 0964..0965; COMMON 5673 0x0966, // 0966..097F; DEVANAGARI 5674 0x0980, // 0980..0983; BENGALI 5675 0x0984, // 0984 ; UNKNOWN 5676 0x0985, // 0985..098C; BENGALI 5677 0x098D, // 098D..098E; UNKNOWN 5678 0x098F, // 098F..0990; BENGALI 5679 0x0991, // 0991..0992; UNKNOWN 5680 0x0993, // 0993..09A8; BENGALI 5681 0x09A9, // 09A9 ; UNKNOWN 5682 0x09AA, // 09AA..09B0; BENGALI 5683 0x09B1, // 09B1 ; UNKNOWN 5684 0x09B2, // 09B2 ; BENGALI 5685 0x09B3, // 09B3..09B5; UNKNOWN 5686 0x09B6, // 09B6..09B9; BENGALI 5687 0x09BA, // 09BA..09BB; UNKNOWN 5688 0x09BC, // 09BC..09C4; BENGALI 5689 0x09C5, // 09C5..09C6; UNKNOWN 5690 0x09C7, // 09C7..09C8; BENGALI 5691 0x09C9, // 09C9..09CA; UNKNOWN 5692 0x09CB, // 09CB..09CE; BENGALI 5693 0x09CF, // 09CF..09D6; UNKNOWN 5694 0x09D7, // 09D7 ; BENGALI 5695 0x09D8, // 09D8..09DB; UNKNOWN 5696 0x09DC, // 09DC..09DD; BENGALI 5697 0x09DE, // 09DE ; UNKNOWN 5698 0x09DF, // 09DF..09E3; BENGALI 5699 0x09E4, // 09E4..09E5; UNKNOWN 5700 0x09E6, // 09E6..09FE; BENGALI 5701 0x09FF, // 09FF..0A00; UNKNOWN 5702 0x0A01, // 0A01..0A03; GURMUKHI 5703 0x0A04, // 0A04 ; UNKNOWN 5704 0x0A05, // 0A05..0A0A; GURMUKHI 5705 0x0A0B, // 0A0B..0A0E; UNKNOWN 5706 0x0A0F, // 0A0F..0A10; GURMUKHI 5707 0x0A11, // 0A11..0A12; UNKNOWN 5708 0x0A13, // 0A13..0A28; GURMUKHI 5709 0x0A29, // 0A29 ; UNKNOWN 5710 0x0A2A, // 0A2A..0A30; GURMUKHI 5711 0x0A31, // 0A31 ; UNKNOWN 5712 0x0A32, // 0A32..0A33; GURMUKHI 5713 0x0A34, // 0A34 ; UNKNOWN 5714 0x0A35, // 0A35..0A36; GURMUKHI 5715 0x0A37, // 0A37 ; UNKNOWN 5716 0x0A38, // 0A38..0A39; GURMUKHI 5717 0x0A3A, // 0A3A..0A3B; UNKNOWN 5718 0x0A3C, // 0A3C ; GURMUKHI 5719 0x0A3D, // 0A3D ; UNKNOWN 5720 0x0A3E, // 0A3E..0A42; GURMUKHI 5721 0x0A43, // 0A43..0A46; UNKNOWN 5722 0x0A47, // 0A47..0A48; GURMUKHI 5723 0x0A49, // 0A49..0A4A; UNKNOWN 5724 0x0A4B, // 0A4B..0A4D; GURMUKHI 5725 0x0A4E, // 0A4E..0A50; UNKNOWN 5726 0x0A51, // 0A51 ; GURMUKHI 5727 0x0A52, // 0A52..0A58; UNKNOWN 5728 0x0A59, // 0A59..0A5C; GURMUKHI 5729 0x0A5D, // 0A5D ; UNKNOWN 5730 0x0A5E, // 0A5E ; GURMUKHI 5731 0x0A5F, // 0A5F..0A65; UNKNOWN 5732 0x0A66, // 0A66..0A76; GURMUKHI 5733 0x0A77, // 0A77..0A80; UNKNOWN 5734 0x0A81, // 0A81..0A83; GUJARATI 5735 0x0A84, // 0A84 ; UNKNOWN 5736 0x0A85, // 0A85..0A8D; GUJARATI 5737 0x0A8E, // 0A8E ; UNKNOWN 5738 0x0A8F, // 0A8F..0A91; GUJARATI 5739 0x0A92, // 0A92 ; UNKNOWN 5740 0x0A93, // 0A93..0AA8; GUJARATI 5741 0x0AA9, // 0AA9 ; UNKNOWN 5742 0x0AAA, // 0AAA..0AB0; GUJARATI 5743 0x0AB1, // 0AB1 ; UNKNOWN 5744 0x0AB2, // 0AB2..0AB3; GUJARATI 5745 0x0AB4, // 0AB4 ; UNKNOWN 5746 0x0AB5, // 0AB5..0AB9; GUJARATI 5747 0x0ABA, // 0ABA..0ABB; UNKNOWN 5748 0x0ABC, // 0ABC..0AC5; GUJARATI 5749 0x0AC6, // 0AC6 ; UNKNOWN 5750 0x0AC7, // 0AC7..0AC9; GUJARATI 5751 0x0ACA, // 0ACA ; UNKNOWN 5752 0x0ACB, // 0ACB..0ACD; GUJARATI 5753 0x0ACE, // 0ACE..0ACF; UNKNOWN 5754 0x0AD0, // 0AD0 ; GUJARATI 5755 0x0AD1, // 0AD1..0ADF; UNKNOWN 5756 0x0AE0, // 0AE0..0AE3; GUJARATI 5757 0x0AE4, // 0AE4..0AE5; UNKNOWN 5758 0x0AE6, // 0AE6..0AF1; GUJARATI 5759 0x0AF2, // 0AF2..0AF8; UNKNOWN 5760 0x0AF9, // 0AF9..0AFF; GUJARATI 5761 0x0B00, // 0B00 ; UNKNOWN 5762 0x0B01, // 0B01..0B03; ORIYA 5763 0x0B04, // 0B04 ; UNKNOWN 5764 0x0B05, // 0B05..0B0C; ORIYA 5765 0x0B0D, // 0B0D..0B0E; UNKNOWN 5766 0x0B0F, // 0B0F..0B10; ORIYA 5767 0x0B11, // 0B11..0B12; UNKNOWN 5768 0x0B13, // 0B13..0B28; ORIYA 5769 0x0B29, // 0B29 ; UNKNOWN 5770 0x0B2A, // 0B2A..0B30; ORIYA 5771 0x0B31, // 0B31 ; UNKNOWN 5772 0x0B32, // 0B32..0B33; ORIYA 5773 0x0B34, // 0B34 ; UNKNOWN 5774 0x0B35, // 0B35..0B39; ORIYA 5775 0x0B3A, // 0B3A..0B3B; UNKNOWN 5776 0x0B3C, // 0B3C..0B44; ORIYA 5777 0x0B45, // 0B45..0B46; UNKNOWN 5778 0x0B47, // 0B47..0B48; ORIYA 5779 0x0B49, // 0B49..0B4A; UNKNOWN 5780 0x0B4B, // 0B4B..0B4D; ORIYA 5781 0x0B4E, // 0B4E..0B54; UNKNOWN 5782 0x0B55, // 0B55..0B57; ORIYA 5783 0x0B58, // 0B58..0B5B; UNKNOWN 5784 0x0B5C, // 0B5C..0B5D; ORIYA 5785 0x0B5E, // 0B5E ; UNKNOWN 5786 0x0B5F, // 0B5F..0B63; ORIYA 5787 0x0B64, // 0B64..0B65; UNKNOWN 5788 0x0B66, // 0B66..0B77; ORIYA 5789 0x0B78, // 0B78..0B81; UNKNOWN 5790 0x0B82, // 0B82..0B83; TAMIL 5791 0x0B84, // 0B84 ; UNKNOWN 5792 0x0B85, // 0B85..0B8A; TAMIL 5793 0x0B8B, // 0B8B..0B8D; UNKNOWN 5794 0x0B8E, // 0B8E..0B90; TAMIL 5795 0x0B91, // 0B91 ; UNKNOWN 5796 0x0B92, // 0B92..0B95; TAMIL 5797 0x0B96, // 0B96..0B98; UNKNOWN 5798 0x0B99, // 0B99..0B9A; TAMIL 5799 0x0B9B, // 0B9B ; UNKNOWN 5800 0x0B9C, // 0B9C ; TAMIL 5801 0x0B9D, // 0B9D ; UNKNOWN 5802 0x0B9E, // 0B9E..0B9F; TAMIL 5803 0x0BA0, // 0BA0..0BA2; UNKNOWN 5804 0x0BA3, // 0BA3..0BA4; TAMIL 5805 0x0BA5, // 0BA5..0BA7; UNKNOWN 5806 0x0BA8, // 0BA8..0BAA; TAMIL 5807 0x0BAB, // 0BAB..0BAD; UNKNOWN 5808 0x0BAE, // 0BAE..0BB9; TAMIL 5809 0x0BBA, // 0BBA..0BBD; UNKNOWN 5810 0x0BBE, // 0BBE..0BC2; TAMIL 5811 0x0BC3, // 0BC3..0BC5; UNKNOWN 5812 0x0BC6, // 0BC6..0BC8; TAMIL 5813 0x0BC9, // 0BC9 ; UNKNOWN 5814 0x0BCA, // 0BCA..0BCD; TAMIL 5815 0x0BCE, // 0BCE..0BCF; UNKNOWN 5816 0x0BD0, // 0BD0 ; TAMIL 5817 0x0BD1, // 0BD1..0BD6; UNKNOWN 5818 0x0BD7, // 0BD7 ; TAMIL 5819 0x0BD8, // 0BD8..0BE5; UNKNOWN 5820 0x0BE6, // 0BE6..0BFA; TAMIL 5821 0x0BFB, // 0BFB..0BFF; UNKNOWN 5822 0x0C00, // 0C00..0C0C; TELUGU 5823 0x0C0D, // 0C0D ; UNKNOWN 5824 0x0C0E, // 0C0E..0C10; TELUGU 5825 0x0C11, // 0C11 ; UNKNOWN 5826 0x0C12, // 0C12..0C28; TELUGU 5827 0x0C29, // 0C29 ; UNKNOWN 5828 0x0C2A, // 0C2A..0C39; TELUGU 5829 0x0C3A, // 0C3A..0C3B; UNKNOWN 5830 0x0C3C, // 0C3C..0C44; TELUGU 5831 0x0C45, // 0C45 ; UNKNOWN 5832 0x0C46, // 0C46..0C48; TELUGU 5833 0x0C49, // 0C49 ; UNKNOWN 5834 0x0C4A, // 0C4A..0C4D; TELUGU 5835 0x0C4E, // 0C4E..0C54; UNKNOWN 5836 0x0C55, // 0C55..0C56; TELUGU 5837 0x0C57, // 0C57 ; UNKNOWN 5838 0x0C58, // 0C58..0C5A; TELUGU 5839 0x0C5B, // 0C5B..0C5C; UNKNOWN 5840 0x0C5D, // 0C5D ; TELUGU 5841 0x0C5E, // 0C5E..0C5F; UNKNOWN 5842 0x0C60, // 0C60..0C63; TELUGU 5843 0x0C64, // 0C64..0C65; UNKNOWN 5844 0x0C66, // 0C66..0C6F; TELUGU 5845 0x0C70, // 0C70..0C76; UNKNOWN 5846 0x0C77, // 0C77..0C7F; TELUGU 5847 0x0C80, // 0C80..0C8C; KANNADA 5848 0x0C8D, // 0C8D ; UNKNOWN 5849 0x0C8E, // 0C8E..0C90; KANNADA 5850 0x0C91, // 0C91 ; UNKNOWN 5851 0x0C92, // 0C92..0CA8; KANNADA 5852 0x0CA9, // 0CA9 ; UNKNOWN 5853 0x0CAA, // 0CAA..0CB3; KANNADA 5854 0x0CB4, // 0CB4 ; UNKNOWN 5855 0x0CB5, // 0CB5..0CB9; KANNADA 5856 0x0CBA, // 0CBA..0CBB; UNKNOWN 5857 0x0CBC, // 0CBC..0CC4; KANNADA 5858 0x0CC5, // 0CC5 ; UNKNOWN 5859 0x0CC6, // 0CC6..0CC8; KANNADA 5860 0x0CC9, // 0CC9 ; UNKNOWN 5861 0x0CCA, // 0CCA..0CCD; KANNADA 5862 0x0CCE, // 0CCE..0CD4; UNKNOWN 5863 0x0CD5, // 0CD5..0CD6; KANNADA 5864 0x0CD7, // 0CD7..0CDC; UNKNOWN 5865 0x0CDD, // 0CDD..0CDE; KANNADA 5866 0x0CDF, // 0CDF ; UNKNOWN 5867 0x0CE0, // 0CE0..0CE3; KANNADA 5868 0x0CE4, // 0CE4..0CE5; UNKNOWN 5869 0x0CE6, // 0CE6..0CEF; KANNADA 5870 0x0CF0, // 0CF0 ; UNKNOWN 5871 0x0CF1, // 0CF1..0CF3; KANNADA 5872 0x0CF4, // 0CF4..0CFF; UNKNOWN 5873 0x0D00, // 0D00..0D0C; MALAYALAM 5874 0x0D0D, // 0D0D ; UNKNOWN 5875 0x0D0E, // 0D0E..0D10; MALAYALAM 5876 0x0D11, // 0D11 ; UNKNOWN 5877 0x0D12, // 0D12..0D44; MALAYALAM 5878 0x0D45, // 0D45 ; UNKNOWN 5879 0x0D46, // 0D46..0D48; MALAYALAM 5880 0x0D49, // 0D49 ; UNKNOWN 5881 0x0D4A, // 0D4A..0D4F; MALAYALAM 5882 0x0D50, // 0D50..0D53; UNKNOWN 5883 0x0D54, // 0D54..0D63; MALAYALAM 5884 0x0D64, // 0D64..0D65; UNKNOWN 5885 0x0D66, // 0D66..0D7F; MALAYALAM 5886 0x0D80, // 0D80 ; UNKNOWN 5887 0x0D81, // 0D81..0D83; SINHALA 5888 0x0D84, // 0D84 ; UNKNOWN 5889 0x0D85, // 0D85..0D96; SINHALA 5890 0x0D97, // 0D97..0D99; UNKNOWN 5891 0x0D9A, // 0D9A..0DB1; SINHALA 5892 0x0DB2, // 0DB2 ; UNKNOWN 5893 0x0DB3, // 0DB3..0DBB; SINHALA 5894 0x0DBC, // 0DBC ; UNKNOWN 5895 0x0DBD, // 0DBD ; SINHALA 5896 0x0DBE, // 0DBE..0DBF; UNKNOWN 5897 0x0DC0, // 0DC0..0DC6; SINHALA 5898 0x0DC7, // 0DC7..0DC9; UNKNOWN 5899 0x0DCA, // 0DCA ; SINHALA 5900 0x0DCB, // 0DCB..0DCE; UNKNOWN 5901 0x0DCF, // 0DCF..0DD4; SINHALA 5902 0x0DD5, // 0DD5 ; UNKNOWN 5903 0x0DD6, // 0DD6 ; SINHALA 5904 0x0DD7, // 0DD7 ; UNKNOWN 5905 0x0DD8, // 0DD8..0DDF; SINHALA 5906 0x0DE0, // 0DE0..0DE5; UNKNOWN 5907 0x0DE6, // 0DE6..0DEF; SINHALA 5908 0x0DF0, // 0DF0..0DF1; UNKNOWN 5909 0x0DF2, // 0DF2..0DF4; SINHALA 5910 0x0DF5, // 0DF5..0E00; UNKNOWN 5911 0x0E01, // 0E01..0E3A; THAI 5912 0x0E3B, // 0E3B..0E3E; UNKNOWN 5913 0x0E3F, // 0E3F ; COMMON 5914 0x0E40, // 0E40..0E5B; THAI 5915 0x0E5C, // 0E5C..0E80; UNKNOWN 5916 0x0E81, // 0E81..0E82; LAO 5917 0x0E83, // 0E83 ; UNKNOWN 5918 0x0E84, // 0E84 ; LAO 5919 0x0E85, // 0E85 ; UNKNOWN 5920 0x0E86, // 0E86..0E8A; LAO 5921 0x0E8B, // 0E8B ; UNKNOWN 5922 0x0E8C, // 0E8C..0EA3; LAO 5923 0x0EA4, // 0EA4 ; UNKNOWN 5924 0x0EA5, // 0EA5 ; LAO 5925 0x0EA6, // 0EA6 ; UNKNOWN 5926 0x0EA7, // 0EA7..0EBD; LAO 5927 0x0EBE, // 0EBE..0EBF; UNKNOWN 5928 0x0EC0, // 0EC0..0EC4; LAO 5929 0x0EC5, // 0EC5 ; UNKNOWN 5930 0x0EC6, // 0EC6 ; LAO 5931 0x0EC7, // 0EC7 ; UNKNOWN 5932 0x0EC8, // 0EC8..0ECE; LAO 5933 0x0ECF, // 0ECF ; UNKNOWN 5934 0x0ED0, // 0ED0..0ED9; LAO 5935 0x0EDA, // 0EDA..0EDB; UNKNOWN 5936 0x0EDC, // 0EDC..0EDF; LAO 5937 0x0EE0, // 0EE0..0EFF; UNKNOWN 5938 0x0F00, // 0F00..0F47; TIBETAN 5939 0x0F48, // 0F48 ; UNKNOWN 5940 0x0F49, // 0F49..0F6C; TIBETAN 5941 0x0F6D, // 0F6D..0F70; UNKNOWN 5942 0x0F71, // 0F71..0F97; TIBETAN 5943 0x0F98, // 0F98 ; UNKNOWN 5944 0x0F99, // 0F99..0FBC; TIBETAN 5945 0x0FBD, // 0FBD ; UNKNOWN 5946 0x0FBE, // 0FBE..0FCC; TIBETAN 5947 0x0FCD, // 0FCD ; UNKNOWN 5948 0x0FCE, // 0FCE..0FD4; TIBETAN 5949 0x0FD5, // 0FD5..0FD8; COMMON 5950 0x0FD9, // 0FD9..0FDA; TIBETAN 5951 0x0FDB, // 0FDB..0FFF; UNKNOWN 5952 0x1000, // 1000..109F; MYANMAR 5953 0x10A0, // 10A0..10C5; GEORGIAN 5954 0x10C6, // 10C6 ; UNKNOWN 5955 0x10C7, // 10C7 ; GEORGIAN 5956 0x10C8, // 10C8..10CC; UNKNOWN 5957 0x10CD, // 10CD ; GEORGIAN 5958 0x10CE, // 10CE..10CF; UNKNOWN 5959 0x10D0, // 10D0..10FA; GEORGIAN 5960 0x10FB, // 10FB ; COMMON 5961 0x10FC, // 10FC..10FF; GEORGIAN 5962 0x1100, // 1100..11FF; HANGUL 5963 0x1200, // 1200..1248; ETHIOPIC 5964 0x1249, // 1249 ; UNKNOWN 5965 0x124A, // 124A..124D; ETHIOPIC 5966 0x124E, // 124E..124F; UNKNOWN 5967 0x1250, // 1250..1256; ETHIOPIC 5968 0x1257, // 1257 ; UNKNOWN 5969 0x1258, // 1258 ; ETHIOPIC 5970 0x1259, // 1259 ; UNKNOWN 5971 0x125A, // 125A..125D; ETHIOPIC 5972 0x125E, // 125E..125F; UNKNOWN 5973 0x1260, // 1260..1288; ETHIOPIC 5974 0x1289, // 1289 ; UNKNOWN 5975 0x128A, // 128A..128D; ETHIOPIC 5976 0x128E, // 128E..128F; UNKNOWN 5977 0x1290, // 1290..12B0; ETHIOPIC 5978 0x12B1, // 12B1 ; UNKNOWN 5979 0x12B2, // 12B2..12B5; ETHIOPIC 5980 0x12B6, // 12B6..12B7; UNKNOWN 5981 0x12B8, // 12B8..12BE; ETHIOPIC 5982 0x12BF, // 12BF ; UNKNOWN 5983 0x12C0, // 12C0 ; ETHIOPIC 5984 0x12C1, // 12C1 ; UNKNOWN 5985 0x12C2, // 12C2..12C5; ETHIOPIC 5986 0x12C6, // 12C6..12C7; UNKNOWN 5987 0x12C8, // 12C8..12D6; ETHIOPIC 5988 0x12D7, // 12D7 ; UNKNOWN 5989 0x12D8, // 12D8..1310; ETHIOPIC 5990 0x1311, // 1311 ; UNKNOWN 5991 0x1312, // 1312..1315; ETHIOPIC 5992 0x1316, // 1316..1317; UNKNOWN 5993 0x1318, // 1318..135A; ETHIOPIC 5994 0x135B, // 135B..135C; UNKNOWN 5995 0x135D, // 135D..137C; ETHIOPIC 5996 0x137D, // 137D..137F; UNKNOWN 5997 0x1380, // 1380..1399; ETHIOPIC 5998 0x139A, // 139A..139F; UNKNOWN 5999 0x13A0, // 13A0..13F5; CHEROKEE 6000 0x13F6, // 13F6..13F7; UNKNOWN 6001 0x13F8, // 13F8..13FD; CHEROKEE 6002 0x13FE, // 13FE..13FF; UNKNOWN 6003 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 6004 0x1680, // 1680..169C; OGHAM 6005 0x169D, // 169D..169F; UNKNOWN 6006 0x16A0, // 16A0..16EA; RUNIC 6007 0x16EB, // 16EB..16ED; COMMON 6008 0x16EE, // 16EE..16F8; RUNIC 6009 0x16F9, // 16F9..16FF; UNKNOWN 6010 0x1700, // 1700..1715; TAGALOG 6011 0x1716, // 1716..171E; UNKNOWN 6012 0x171F, // 171F ; TAGALOG 6013 0x1720, // 1720..1734; HANUNOO 6014 0x1735, // 1735..1736; COMMON 6015 0x1737, // 1737..173F; UNKNOWN 6016 0x1740, // 1740..1753; BUHID 6017 0x1754, // 1754..175F; UNKNOWN 6018 0x1760, // 1760..176C; TAGBANWA 6019 0x176D, // 176D ; UNKNOWN 6020 0x176E, // 176E..1770; TAGBANWA 6021 0x1771, // 1771 ; UNKNOWN 6022 0x1772, // 1772..1773; TAGBANWA 6023 0x1774, // 1774..177F; UNKNOWN 6024 0x1780, // 1780..17DD; KHMER 6025 0x17DE, // 17DE..17DF; UNKNOWN 6026 0x17E0, // 17E0..17E9; KHMER 6027 0x17EA, // 17EA..17EF; UNKNOWN 6028 0x17F0, // 17F0..17F9; KHMER 6029 0x17FA, // 17FA..17FF; UNKNOWN 6030 0x1800, // 1800..1801; MONGOLIAN 6031 0x1802, // 1802..1803; COMMON 6032 0x1804, // 1804 ; MONGOLIAN 6033 0x1805, // 1805 ; COMMON 6034 0x1806, // 1806..1819; MONGOLIAN 6035 0x181A, // 181A..181F; UNKNOWN 6036 0x1820, // 1820..1878; MONGOLIAN 6037 0x1879, // 1879..187F; UNKNOWN 6038 0x1880, // 1880..18AA; MONGOLIAN 6039 0x18AB, // 18AB..18AF; UNKNOWN 6040 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 6041 0x18F6, // 18F6..18FF; UNKNOWN 6042 0x1900, // 1900..191E; LIMBU 6043 0x191F, // 191F ; UNKNOWN 6044 0x1920, // 1920..192B; LIMBU 6045 0x192C, // 192C..192F; UNKNOWN 6046 0x1930, // 1930..193B; LIMBU 6047 0x193C, // 193C..193F; UNKNOWN 6048 0x1940, // 1940 ; LIMBU 6049 0x1941, // 1941..1943; UNKNOWN 6050 0x1944, // 1944..194F; LIMBU 6051 0x1950, // 1950..196D; TAI_LE 6052 0x196E, // 196E..196F; UNKNOWN 6053 0x1970, // 1970..1974; TAI_LE 6054 0x1975, // 1975..197F; UNKNOWN 6055 0x1980, // 1980..19AB; NEW_TAI_LUE 6056 0x19AC, // 19AC..19AF; UNKNOWN 6057 0x19B0, // 19B0..19C9; NEW_TAI_LUE 6058 0x19CA, // 19CA..19CF; UNKNOWN 6059 0x19D0, // 19D0..19DA; NEW_TAI_LUE 6060 0x19DB, // 19DB..19DD; UNKNOWN 6061 0x19DE, // 19DE..19DF; NEW_TAI_LUE 6062 0x19E0, // 19E0..19FF; KHMER 6063 0x1A00, // 1A00..1A1B; BUGINESE 6064 0x1A1C, // 1A1C..1A1D; UNKNOWN 6065 0x1A1E, // 1A1E..1A1F; BUGINESE 6066 0x1A20, // 1A20..1A5E; TAI_THAM 6067 0x1A5F, // 1A5F ; UNKNOWN 6068 0x1A60, // 1A60..1A7C; TAI_THAM 6069 0x1A7D, // 1A7D..1A7E; UNKNOWN 6070 0x1A7F, // 1A7F..1A89; TAI_THAM 6071 0x1A8A, // 1A8A..1A8F; UNKNOWN 6072 0x1A90, // 1A90..1A99; TAI_THAM 6073 0x1A9A, // 1A9A..1A9F; UNKNOWN 6074 0x1AA0, // 1AA0..1AAD; TAI_THAM 6075 0x1AAE, // 1AAE..1AAF; UNKNOWN 6076 0x1AB0, // 1AB0..1ACE; INHERITED 6077 0x1ACF, // 1ACF..1AFF; UNKNOWN 6078 0x1B00, // 1B00..1B4C; BALINESE 6079 0x1B4D, // 1B4D ; UNKNOWN 6080 0x1B4E, // 1B4E..1B7F; BALINESE 6081 0x1B80, // 1B80..1BBF; SUNDANESE 6082 0x1BC0, // 1BC0..1BF3; BATAK 6083 0x1BF4, // 1BF4..1BFB; UNKNOWN 6084 0x1BFC, // 1BFC..1BFF; BATAK 6085 0x1C00, // 1C00..1C37; LEPCHA 6086 0x1C38, // 1C38..1C3A; UNKNOWN 6087 0x1C3B, // 1C3B..1C49; LEPCHA 6088 0x1C4A, // 1C4A..1C4C; UNKNOWN 6089 0x1C4D, // 1C4D..1C4F; LEPCHA 6090 0x1C50, // 1C50..1C7F; OL_CHIKI 6091 0x1C80, // 1C80..1C8A; CYRILLIC 6092 0x1C8B, // 1C8B..1C8F; UNKNOWN 6093 0x1C90, // 1C90..1CBA; GEORGIAN 6094 0x1CBB, // 1CBB..1CBC; UNKNOWN 6095 0x1CBD, // 1CBD..1CBF; GEORGIAN 6096 0x1CC0, // 1CC0..1CC7; SUNDANESE 6097 0x1CC8, // 1CC8..1CCF; UNKNOWN 6098 0x1CD0, // 1CD0..1CD2; INHERITED 6099 0x1CD3, // 1CD3 ; COMMON 6100 0x1CD4, // 1CD4..1CE0; INHERITED 6101 0x1CE1, // 1CE1 ; COMMON 6102 0x1CE2, // 1CE2..1CE8; INHERITED 6103 0x1CE9, // 1CE9..1CEC; COMMON 6104 0x1CED, // 1CED ; INHERITED 6105 0x1CEE, // 1CEE..1CF3; COMMON 6106 0x1CF4, // 1CF4 ; INHERITED 6107 0x1CF5, // 1CF5..1CF7; COMMON 6108 0x1CF8, // 1CF8..1CF9; INHERITED 6109 0x1CFA, // 1CFA ; COMMON 6110 0x1CFB, // 1CFB..1CFF; UNKNOWN 6111 0x1D00, // 1D00..1D25; LATIN 6112 0x1D26, // 1D26..1D2A; GREEK 6113 0x1D2B, // 1D2B ; CYRILLIC 6114 0x1D2C, // 1D2C..1D5C; LATIN 6115 0x1D5D, // 1D5D..1D61; GREEK 6116 0x1D62, // 1D62..1D65; LATIN 6117 0x1D66, // 1D66..1D6A; GREEK 6118 0x1D6B, // 1D6B..1D77; LATIN 6119 0x1D78, // 1D78 ; CYRILLIC 6120 0x1D79, // 1D79..1DBE; LATIN 6121 0x1DBF, // 1DBF ; GREEK 6122 0x1DC0, // 1DC0..1DFF; INHERITED 6123 0x1E00, // 1E00..1EFF; LATIN 6124 0x1F00, // 1F00..1F15; GREEK 6125 0x1F16, // 1F16..1F17; UNKNOWN 6126 0x1F18, // 1F18..1F1D; GREEK 6127 0x1F1E, // 1F1E..1F1F; UNKNOWN 6128 0x1F20, // 1F20..1F45; GREEK 6129 0x1F46, // 1F46..1F47; UNKNOWN 6130 0x1F48, // 1F48..1F4D; GREEK 6131 0x1F4E, // 1F4E..1F4F; UNKNOWN 6132 0x1F50, // 1F50..1F57; GREEK 6133 0x1F58, // 1F58 ; UNKNOWN 6134 0x1F59, // 1F59 ; GREEK 6135 0x1F5A, // 1F5A ; UNKNOWN 6136 0x1F5B, // 1F5B ; GREEK 6137 0x1F5C, // 1F5C ; UNKNOWN 6138 0x1F5D, // 1F5D ; GREEK 6139 0x1F5E, // 1F5E ; UNKNOWN 6140 0x1F5F, // 1F5F..1F7D; GREEK 6141 0x1F7E, // 1F7E..1F7F; UNKNOWN 6142 0x1F80, // 1F80..1FB4; GREEK 6143 0x1FB5, // 1FB5 ; UNKNOWN 6144 0x1FB6, // 1FB6..1FC4; GREEK 6145 0x1FC5, // 1FC5 ; UNKNOWN 6146 0x1FC6, // 1FC6..1FD3; GREEK 6147 0x1FD4, // 1FD4..1FD5; UNKNOWN 6148 0x1FD6, // 1FD6..1FDB; GREEK 6149 0x1FDC, // 1FDC ; UNKNOWN 6150 0x1FDD, // 1FDD..1FEF; GREEK 6151 0x1FF0, // 1FF0..1FF1; UNKNOWN 6152 0x1FF2, // 1FF2..1FF4; GREEK 6153 0x1FF5, // 1FF5 ; UNKNOWN 6154 0x1FF6, // 1FF6..1FFE; GREEK 6155 0x1FFF, // 1FFF ; UNKNOWN 6156 0x2000, // 2000..200B; COMMON 6157 0x200C, // 200C..200D; INHERITED 6158 0x200E, // 200E..2064; COMMON 6159 0x2065, // 2065 ; UNKNOWN 6160 0x2066, // 2066..2070; COMMON 6161 0x2071, // 2071 ; LATIN 6162 0x2072, // 2072..2073; UNKNOWN 6163 0x2074, // 2074..207E; COMMON 6164 0x207F, // 207F ; LATIN 6165 0x2080, // 2080..208E; COMMON 6166 0x208F, // 208F ; UNKNOWN 6167 0x2090, // 2090..209C; LATIN 6168 0x209D, // 209D..209F; UNKNOWN 6169 0x20A0, // 20A0..20C0; COMMON 6170 0x20C1, // 20C1..20CF; UNKNOWN 6171 0x20D0, // 20D0..20F0; INHERITED 6172 0x20F1, // 20F1..20FF; UNKNOWN 6173 0x2100, // 2100..2125; COMMON 6174 0x2126, // 2126 ; GREEK 6175 0x2127, // 2127..2129; COMMON 6176 0x212A, // 212A..212B; LATIN 6177 0x212C, // 212C..2131; COMMON 6178 0x2132, // 2132 ; LATIN 6179 0x2133, // 2133..214D; COMMON 6180 0x214E, // 214E ; LATIN 6181 0x214F, // 214F..215F; COMMON 6182 0x2160, // 2160..2188; LATIN 6183 0x2189, // 2189..218B; COMMON 6184 0x218C, // 218C..218F; UNKNOWN 6185 0x2190, // 2190..2429; COMMON 6186 0x242A, // 242A..243F; UNKNOWN 6187 0x2440, // 2440..244A; COMMON 6188 0x244B, // 244B..245F; UNKNOWN 6189 0x2460, // 2460..27FF; COMMON 6190 0x2800, // 2800..28FF; BRAILLE 6191 0x2900, // 2900..2B73; COMMON 6192 0x2B74, // 2B74..2B75; UNKNOWN 6193 0x2B76, // 2B76..2B95; COMMON 6194 0x2B96, // 2B96 ; UNKNOWN 6195 0x2B97, // 2B97..2BFF; COMMON 6196 0x2C00, // 2C00..2C5F; GLAGOLITIC 6197 0x2C60, // 2C60..2C7F; LATIN 6198 0x2C80, // 2C80..2CF3; COPTIC 6199 0x2CF4, // 2CF4..2CF8; UNKNOWN 6200 0x2CF9, // 2CF9..2CFF; COPTIC 6201 0x2D00, // 2D00..2D25; GEORGIAN 6202 0x2D26, // 2D26 ; UNKNOWN 6203 0x2D27, // 2D27 ; GEORGIAN 6204 0x2D28, // 2D28..2D2C; UNKNOWN 6205 0x2D2D, // 2D2D ; GEORGIAN 6206 0x2D2E, // 2D2E..2D2F; UNKNOWN 6207 0x2D30, // 2D30..2D67; TIFINAGH 6208 0x2D68, // 2D68..2D6E; UNKNOWN 6209 0x2D6F, // 2D6F..2D70; TIFINAGH 6210 0x2D71, // 2D71..2D7E; UNKNOWN 6211 0x2D7F, // 2D7F ; TIFINAGH 6212 0x2D80, // 2D80..2D96; ETHIOPIC 6213 0x2D97, // 2D97..2D9F; UNKNOWN 6214 0x2DA0, // 2DA0..2DA6; ETHIOPIC 6215 0x2DA7, // 2DA7 ; UNKNOWN 6216 0x2DA8, // 2DA8..2DAE; ETHIOPIC 6217 0x2DAF, // 2DAF ; UNKNOWN 6218 0x2DB0, // 2DB0..2DB6; ETHIOPIC 6219 0x2DB7, // 2DB7 ; UNKNOWN 6220 0x2DB8, // 2DB8..2DBE; ETHIOPIC 6221 0x2DBF, // 2DBF ; UNKNOWN 6222 0x2DC0, // 2DC0..2DC6; ETHIOPIC 6223 0x2DC7, // 2DC7 ; UNKNOWN 6224 0x2DC8, // 2DC8..2DCE; ETHIOPIC 6225 0x2DCF, // 2DCF ; UNKNOWN 6226 0x2DD0, // 2DD0..2DD6; ETHIOPIC 6227 0x2DD7, // 2DD7 ; UNKNOWN 6228 0x2DD8, // 2DD8..2DDE; ETHIOPIC 6229 0x2DDF, // 2DDF ; UNKNOWN 6230 0x2DE0, // 2DE0..2DFF; CYRILLIC 6231 0x2E00, // 2E00..2E5D; COMMON 6232 0x2E5E, // 2E5E..2E7F; UNKNOWN 6233 0x2E80, // 2E80..2E99; HAN 6234 0x2E9A, // 2E9A ; UNKNOWN 6235 0x2E9B, // 2E9B..2EF3; HAN 6236 0x2EF4, // 2EF4..2EFF; UNKNOWN 6237 0x2F00, // 2F00..2FD5; HAN 6238 0x2FD6, // 2FD6..2FEF; UNKNOWN 6239 0x2FF0, // 2FF0..3004; COMMON 6240 0x3005, // 3005 ; HAN 6241 0x3006, // 3006 ; COMMON 6242 0x3007, // 3007 ; HAN 6243 0x3008, // 3008..3020; COMMON 6244 0x3021, // 3021..3029; HAN 6245 0x302A, // 302A..302D; INHERITED 6246 0x302E, // 302E..302F; HANGUL 6247 0x3030, // 3030..3037; COMMON 6248 0x3038, // 3038..303B; HAN 6249 0x303C, // 303C..303F; COMMON 6250 0x3040, // 3040 ; UNKNOWN 6251 0x3041, // 3041..3096; HIRAGANA 6252 0x3097, // 3097..3098; UNKNOWN 6253 0x3099, // 3099..309A; INHERITED 6254 0x309B, // 309B..309C; COMMON 6255 0x309D, // 309D..309F; HIRAGANA 6256 0x30A0, // 30A0 ; COMMON 6257 0x30A1, // 30A1..30FA; KATAKANA 6258 0x30FB, // 30FB..30FC; COMMON 6259 0x30FD, // 30FD..30FF; KATAKANA 6260 0x3100, // 3100..3104; UNKNOWN 6261 0x3105, // 3105..312F; BOPOMOFO 6262 0x3130, // 3130 ; UNKNOWN 6263 0x3131, // 3131..318E; HANGUL 6264 0x318F, // 318F ; UNKNOWN 6265 0x3190, // 3190..319F; COMMON 6266 0x31A0, // 31A0..31BF; BOPOMOFO 6267 0x31C0, // 31C0..31E5; COMMON 6268 0x31E6, // 31E6..31EE; UNKNOWN 6269 0x31EF, // 31EF ; COMMON 6270 0x31F0, // 31F0..31FF; KATAKANA 6271 0x3200, // 3200..321E; HANGUL 6272 0x321F, // 321F ; UNKNOWN 6273 0x3220, // 3220..325F; COMMON 6274 0x3260, // 3260..327E; HANGUL 6275 0x327F, // 327F..32CF; COMMON 6276 0x32D0, // 32D0..32FE; KATAKANA 6277 0x32FF, // 32FF ; COMMON 6278 0x3300, // 3300..3357; KATAKANA 6279 0x3358, // 3358..33FF; COMMON 6280 0x3400, // 3400..4DBF; HAN 6281 0x4DC0, // 4DC0..4DFF; COMMON 6282 0x4E00, // 4E00..9FFF; HAN 6283 0xA000, // A000..A48C; YI 6284 0xA48D, // A48D..A48F; UNKNOWN 6285 0xA490, // A490..A4C6; YI 6286 0xA4C7, // A4C7..A4CF; UNKNOWN 6287 0xA4D0, // A4D0..A4FF; LISU 6288 0xA500, // A500..A62B; VAI 6289 0xA62C, // A62C..A63F; UNKNOWN 6290 0xA640, // A640..A69F; CYRILLIC 6291 0xA6A0, // A6A0..A6F7; BAMUM 6292 0xA6F8, // A6F8..A6FF; UNKNOWN 6293 0xA700, // A700..A721; COMMON 6294 0xA722, // A722..A787; LATIN 6295 0xA788, // A788..A78A; COMMON 6296 0xA78B, // A78B..A7CD; LATIN 6297 0xA7CE, // A7CE..A7CF; UNKNOWN 6298 0xA7D0, // A7D0..A7D1; LATIN 6299 0xA7D2, // A7D2 ; UNKNOWN 6300 0xA7D3, // A7D3 ; LATIN 6301 0xA7D4, // A7D4 ; UNKNOWN 6302 0xA7D5, // A7D5..A7DC; LATIN 6303 0xA7DD, // A7DD..A7F1; UNKNOWN 6304 0xA7F2, // A7F2..A7FF; LATIN 6305 0xA800, // A800..A82C; SYLOTI_NAGRI 6306 0xA82D, // A82D..A82F; UNKNOWN 6307 0xA830, // A830..A839; COMMON 6308 0xA83A, // A83A..A83F; UNKNOWN 6309 0xA840, // A840..A877; PHAGS_PA 6310 0xA878, // A878..A87F; UNKNOWN 6311 0xA880, // A880..A8C5; SAURASHTRA 6312 0xA8C6, // A8C6..A8CD; UNKNOWN 6313 0xA8CE, // A8CE..A8D9; SAURASHTRA 6314 0xA8DA, // A8DA..A8DF; UNKNOWN 6315 0xA8E0, // A8E0..A8FF; DEVANAGARI 6316 0xA900, // A900..A92D; KAYAH_LI 6317 0xA92E, // A92E ; COMMON 6318 0xA92F, // A92F ; KAYAH_LI 6319 0xA930, // A930..A953; REJANG 6320 0xA954, // A954..A95E; UNKNOWN 6321 0xA95F, // A95F ; REJANG 6322 0xA960, // A960..A97C; HANGUL 6323 0xA97D, // A97D..A97F; UNKNOWN 6324 0xA980, // A980..A9CD; JAVANESE 6325 0xA9CE, // A9CE ; UNKNOWN 6326 0xA9CF, // A9CF ; COMMON 6327 0xA9D0, // A9D0..A9D9; JAVANESE 6328 0xA9DA, // A9DA..A9DD; UNKNOWN 6329 0xA9DE, // A9DE..A9DF; JAVANESE 6330 0xA9E0, // A9E0..A9FE; MYANMAR 6331 0xA9FF, // A9FF ; UNKNOWN 6332 0xAA00, // AA00..AA36; CHAM 6333 0xAA37, // AA37..AA3F; UNKNOWN 6334 0xAA40, // AA40..AA4D; CHAM 6335 0xAA4E, // AA4E..AA4F; UNKNOWN 6336 0xAA50, // AA50..AA59; CHAM 6337 0xAA5A, // AA5A..AA5B; UNKNOWN 6338 0xAA5C, // AA5C..AA5F; CHAM 6339 0xAA60, // AA60..AA7F; MYANMAR 6340 0xAA80, // AA80..AAC2; TAI_VIET 6341 0xAAC3, // AAC3..AADA; UNKNOWN 6342 0xAADB, // AADB..AADF; TAI_VIET 6343 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 6344 0xAAF7, // AAF7..AB00; UNKNOWN 6345 0xAB01, // AB01..AB06; ETHIOPIC 6346 0xAB07, // AB07..AB08; UNKNOWN 6347 0xAB09, // AB09..AB0E; ETHIOPIC 6348 0xAB0F, // AB0F..AB10; UNKNOWN 6349 0xAB11, // AB11..AB16; ETHIOPIC 6350 0xAB17, // AB17..AB1F; UNKNOWN 6351 0xAB20, // AB20..AB26; ETHIOPIC 6352 0xAB27, // AB27 ; UNKNOWN 6353 0xAB28, // AB28..AB2E; ETHIOPIC 6354 0xAB2F, // AB2F ; UNKNOWN 6355 0xAB30, // AB30..AB5A; LATIN 6356 0xAB5B, // AB5B ; COMMON 6357 0xAB5C, // AB5C..AB64; LATIN 6358 0xAB65, // AB65 ; GREEK 6359 0xAB66, // AB66..AB69; LATIN 6360 0xAB6A, // AB6A..AB6B; COMMON 6361 0xAB6C, // AB6C..AB6F; UNKNOWN 6362 0xAB70, // AB70..ABBF; CHEROKEE 6363 0xABC0, // ABC0..ABED; MEETEI_MAYEK 6364 0xABEE, // ABEE..ABEF; UNKNOWN 6365 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 6366 0xABFA, // ABFA..ABFF; UNKNOWN 6367 0xAC00, // AC00..D7A3; HANGUL 6368 0xD7A4, // D7A4..D7AF; UNKNOWN 6369 0xD7B0, // D7B0..D7C6; HANGUL 6370 0xD7C7, // D7C7..D7CA; UNKNOWN 6371 0xD7CB, // D7CB..D7FB; HANGUL 6372 0xD7FC, // D7FC..F8FF; UNKNOWN 6373 0xF900, // F900..FA6D; HAN 6374 0xFA6E, // FA6E..FA6F; UNKNOWN 6375 0xFA70, // FA70..FAD9; HAN 6376 0xFADA, // FADA..FAFF; UNKNOWN 6377 0xFB00, // FB00..FB06; LATIN 6378 0xFB07, // FB07..FB12; UNKNOWN 6379 0xFB13, // FB13..FB17; ARMENIAN 6380 0xFB18, // FB18..FB1C; UNKNOWN 6381 0xFB1D, // FB1D..FB36; HEBREW 6382 0xFB37, // FB37 ; UNKNOWN 6383 0xFB38, // FB38..FB3C; HEBREW 6384 0xFB3D, // FB3D ; UNKNOWN 6385 0xFB3E, // FB3E ; HEBREW 6386 0xFB3F, // FB3F ; UNKNOWN 6387 0xFB40, // FB40..FB41; HEBREW 6388 0xFB42, // FB42 ; UNKNOWN 6389 0xFB43, // FB43..FB44; HEBREW 6390 0xFB45, // FB45 ; UNKNOWN 6391 0xFB46, // FB46..FB4F; HEBREW 6392 0xFB50, // FB50..FBC2; ARABIC 6393 0xFBC3, // FBC3..FBD2; UNKNOWN 6394 0xFBD3, // FBD3..FD3D; ARABIC 6395 0xFD3E, // FD3E..FD3F; COMMON 6396 0xFD40, // FD40..FD8F; ARABIC 6397 0xFD90, // FD90..FD91; UNKNOWN 6398 0xFD92, // FD92..FDC7; ARABIC 6399 0xFDC8, // FDC8..FDCE; UNKNOWN 6400 0xFDCF, // FDCF ; ARABIC 6401 0xFDD0, // FDD0..FDEF; UNKNOWN 6402 0xFDF0, // FDF0..FDFF; ARABIC 6403 0xFE00, // FE00..FE0F; INHERITED 6404 0xFE10, // FE10..FE19; COMMON 6405 0xFE1A, // FE1A..FE1F; UNKNOWN 6406 0xFE20, // FE20..FE2D; INHERITED 6407 0xFE2E, // FE2E..FE2F; CYRILLIC 6408 0xFE30, // FE30..FE52; COMMON 6409 0xFE53, // FE53 ; UNKNOWN 6410 0xFE54, // FE54..FE66; COMMON 6411 0xFE67, // FE67 ; UNKNOWN 6412 0xFE68, // FE68..FE6B; COMMON 6413 0xFE6C, // FE6C..FE6F; UNKNOWN 6414 0xFE70, // FE70..FE74; ARABIC 6415 0xFE75, // FE75 ; UNKNOWN 6416 0xFE76, // FE76..FEFC; ARABIC 6417 0xFEFD, // FEFD..FEFE; UNKNOWN 6418 0xFEFF, // FEFF ; COMMON 6419 0xFF00, // FF00 ; UNKNOWN 6420 0xFF01, // FF01..FF20; COMMON 6421 0xFF21, // FF21..FF3A; LATIN 6422 0xFF3B, // FF3B..FF40; COMMON 6423 0xFF41, // FF41..FF5A; LATIN 6424 0xFF5B, // FF5B..FF65; COMMON 6425 0xFF66, // FF66..FF6F; KATAKANA 6426 0xFF70, // FF70 ; COMMON 6427 0xFF71, // FF71..FF9D; KATAKANA 6428 0xFF9E, // FF9E..FF9F; COMMON 6429 0xFFA0, // FFA0..FFBE; HANGUL 6430 0xFFBF, // FFBF..FFC1; UNKNOWN 6431 0xFFC2, // FFC2..FFC7; HANGUL 6432 0xFFC8, // FFC8..FFC9; UNKNOWN 6433 0xFFCA, // FFCA..FFCF; HANGUL 6434 0xFFD0, // FFD0..FFD1; UNKNOWN 6435 0xFFD2, // FFD2..FFD7; HANGUL 6436 0xFFD8, // FFD8..FFD9; UNKNOWN 6437 0xFFDA, // FFDA..FFDC; HANGUL 6438 0xFFDD, // FFDD..FFDF; UNKNOWN 6439 0xFFE0, // FFE0..FFE6; COMMON 6440 0xFFE7, // FFE7 ; UNKNOWN 6441 0xFFE8, // FFE8..FFEE; COMMON 6442 0xFFEF, // FFEF..FFF8; UNKNOWN 6443 0xFFF9, // FFF9..FFFD; COMMON 6444 0xFFFE, // FFFE..FFFF; UNKNOWN 6445 0x10000, // 10000..1000B; LINEAR_B 6446 0x1000C, // 1000C ; UNKNOWN 6447 0x1000D, // 1000D..10026; LINEAR_B 6448 0x10027, // 10027 ; UNKNOWN 6449 0x10028, // 10028..1003A; LINEAR_B 6450 0x1003B, // 1003B ; UNKNOWN 6451 0x1003C, // 1003C..1003D; LINEAR_B 6452 0x1003E, // 1003E ; UNKNOWN 6453 0x1003F, // 1003F..1004D; LINEAR_B 6454 0x1004E, // 1004E..1004F; UNKNOWN 6455 0x10050, // 10050..1005D; LINEAR_B 6456 0x1005E, // 1005E..1007F; UNKNOWN 6457 0x10080, // 10080..100FA; LINEAR_B 6458 0x100FB, // 100FB..100FF; UNKNOWN 6459 0x10100, // 10100..10102; COMMON 6460 0x10103, // 10103..10106; UNKNOWN 6461 0x10107, // 10107..10133; COMMON 6462 0x10134, // 10134..10136; UNKNOWN 6463 0x10137, // 10137..1013F; COMMON 6464 0x10140, // 10140..1018E; GREEK 6465 0x1018F, // 1018F ; UNKNOWN 6466 0x10190, // 10190..1019C; COMMON 6467 0x1019D, // 1019D..1019F; UNKNOWN 6468 0x101A0, // 101A0 ; GREEK 6469 0x101A1, // 101A1..101CF; UNKNOWN 6470 0x101D0, // 101D0..101FC; COMMON 6471 0x101FD, // 101FD ; INHERITED 6472 0x101FE, // 101FE..1027F; UNKNOWN 6473 0x10280, // 10280..1029C; LYCIAN 6474 0x1029D, // 1029D..1029F; UNKNOWN 6475 0x102A0, // 102A0..102D0; CARIAN 6476 0x102D1, // 102D1..102DF; UNKNOWN 6477 0x102E0, // 102E0 ; INHERITED 6478 0x102E1, // 102E1..102FB; COMMON 6479 0x102FC, // 102FC..102FF; UNKNOWN 6480 0x10300, // 10300..10323; OLD_ITALIC 6481 0x10324, // 10324..1032C; UNKNOWN 6482 0x1032D, // 1032D..1032F; OLD_ITALIC 6483 0x10330, // 10330..1034A; GOTHIC 6484 0x1034B, // 1034B..1034F; UNKNOWN 6485 0x10350, // 10350..1037A; OLD_PERMIC 6486 0x1037B, // 1037B..1037F; UNKNOWN 6487 0x10380, // 10380..1039D; UGARITIC 6488 0x1039E, // 1039E ; UNKNOWN 6489 0x1039F, // 1039F ; UGARITIC 6490 0x103A0, // 103A0..103C3; OLD_PERSIAN 6491 0x103C4, // 103C4..103C7; UNKNOWN 6492 0x103C8, // 103C8..103D5; OLD_PERSIAN 6493 0x103D6, // 103D6..103FF; UNKNOWN 6494 0x10400, // 10400..1044F; DESERET 6495 0x10450, // 10450..1047F; SHAVIAN 6496 0x10480, // 10480..1049D; OSMANYA 6497 0x1049E, // 1049E..1049F; UNKNOWN 6498 0x104A0, // 104A0..104A9; OSMANYA 6499 0x104AA, // 104AA..104AF; UNKNOWN 6500 0x104B0, // 104B0..104D3; OSAGE 6501 0x104D4, // 104D4..104D7; UNKNOWN 6502 0x104D8, // 104D8..104FB; OSAGE 6503 0x104FC, // 104FC..104FF; UNKNOWN 6504 0x10500, // 10500..10527; ELBASAN 6505 0x10528, // 10528..1052F; UNKNOWN 6506 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 6507 0x10564, // 10564..1056E; UNKNOWN 6508 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 6509 0x10570, // 10570..1057A; VITHKUQI 6510 0x1057B, // 1057B ; UNKNOWN 6511 0x1057C, // 1057C..1058A; VITHKUQI 6512 0x1058B, // 1058B ; UNKNOWN 6513 0x1058C, // 1058C..10592; VITHKUQI 6514 0x10593, // 10593 ; UNKNOWN 6515 0x10594, // 10594..10595; VITHKUQI 6516 0x10596, // 10596 ; UNKNOWN 6517 0x10597, // 10597..105A1; VITHKUQI 6518 0x105A2, // 105A2 ; UNKNOWN 6519 0x105A3, // 105A3..105B1; VITHKUQI 6520 0x105B2, // 105B2 ; UNKNOWN 6521 0x105B3, // 105B3..105B9; VITHKUQI 6522 0x105BA, // 105BA ; UNKNOWN 6523 0x105BB, // 105BB..105BC; VITHKUQI 6524 0x105BD, // 105BD..105BF; UNKNOWN 6525 0x105C0, // 105C0..105F3; TODHRI 6526 0x105F4, // 105F4..105FF; UNKNOWN 6527 0x10600, // 10600..10736; LINEAR_A 6528 0x10737, // 10737..1073F; UNKNOWN 6529 0x10740, // 10740..10755; LINEAR_A 6530 0x10756, // 10756..1075F; UNKNOWN 6531 0x10760, // 10760..10767; LINEAR_A 6532 0x10768, // 10768..1077F; UNKNOWN 6533 0x10780, // 10780..10785; LATIN 6534 0x10786, // 10786 ; UNKNOWN 6535 0x10787, // 10787..107B0; LATIN 6536 0x107B1, // 107B1 ; UNKNOWN 6537 0x107B2, // 107B2..107BA; LATIN 6538 0x107BB, // 107BB..107FF; UNKNOWN 6539 0x10800, // 10800..10805; CYPRIOT 6540 0x10806, // 10806..10807; UNKNOWN 6541 0x10808, // 10808 ; CYPRIOT 6542 0x10809, // 10809 ; UNKNOWN 6543 0x1080A, // 1080A..10835; CYPRIOT 6544 0x10836, // 10836 ; UNKNOWN 6545 0x10837, // 10837..10838; CYPRIOT 6546 0x10839, // 10839..1083B; UNKNOWN 6547 0x1083C, // 1083C ; CYPRIOT 6548 0x1083D, // 1083D..1083E; UNKNOWN 6549 0x1083F, // 1083F ; CYPRIOT 6550 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 6551 0x10856, // 10856 ; UNKNOWN 6552 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 6553 0x10860, // 10860..1087F; PALMYRENE 6554 0x10880, // 10880..1089E; NABATAEAN 6555 0x1089F, // 1089F..108A6; UNKNOWN 6556 0x108A7, // 108A7..108AF; NABATAEAN 6557 0x108B0, // 108B0..108DF; UNKNOWN 6558 0x108E0, // 108E0..108F2; HATRAN 6559 0x108F3, // 108F3 ; UNKNOWN 6560 0x108F4, // 108F4..108F5; HATRAN 6561 0x108F6, // 108F6..108FA; UNKNOWN 6562 0x108FB, // 108FB..108FF; HATRAN 6563 0x10900, // 10900..1091B; PHOENICIAN 6564 0x1091C, // 1091C..1091E; UNKNOWN 6565 0x1091F, // 1091F ; PHOENICIAN 6566 0x10920, // 10920..10939; LYDIAN 6567 0x1093A, // 1093A..1093E; UNKNOWN 6568 0x1093F, // 1093F ; LYDIAN 6569 0x10940, // 10940..1097F; UNKNOWN 6570 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 6571 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 6572 0x109B8, // 109B8..109BB; UNKNOWN 6573 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 6574 0x109D0, // 109D0..109D1; UNKNOWN 6575 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 6576 0x10A00, // 10A00..10A03; KHAROSHTHI 6577 0x10A04, // 10A04 ; UNKNOWN 6578 0x10A05, // 10A05..10A06; KHAROSHTHI 6579 0x10A07, // 10A07..10A0B; UNKNOWN 6580 0x10A0C, // 10A0C..10A13; KHAROSHTHI 6581 0x10A14, // 10A14 ; UNKNOWN 6582 0x10A15, // 10A15..10A17; KHAROSHTHI 6583 0x10A18, // 10A18 ; UNKNOWN 6584 0x10A19, // 10A19..10A35; KHAROSHTHI 6585 0x10A36, // 10A36..10A37; UNKNOWN 6586 0x10A38, // 10A38..10A3A; KHAROSHTHI 6587 0x10A3B, // 10A3B..10A3E; UNKNOWN 6588 0x10A3F, // 10A3F..10A48; KHAROSHTHI 6589 0x10A49, // 10A49..10A4F; UNKNOWN 6590 0x10A50, // 10A50..10A58; KHAROSHTHI 6591 0x10A59, // 10A59..10A5F; UNKNOWN 6592 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 6593 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 6594 0x10AA0, // 10AA0..10ABF; UNKNOWN 6595 0x10AC0, // 10AC0..10AE6; MANICHAEAN 6596 0x10AE7, // 10AE7..10AEA; UNKNOWN 6597 0x10AEB, // 10AEB..10AF6; MANICHAEAN 6598 0x10AF7, // 10AF7..10AFF; UNKNOWN 6599 0x10B00, // 10B00..10B35; AVESTAN 6600 0x10B36, // 10B36..10B38; UNKNOWN 6601 0x10B39, // 10B39..10B3F; AVESTAN 6602 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 6603 0x10B56, // 10B56..10B57; UNKNOWN 6604 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 6605 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 6606 0x10B73, // 10B73..10B77; UNKNOWN 6607 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 6608 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 6609 0x10B92, // 10B92..10B98; UNKNOWN 6610 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 6611 0x10B9D, // 10B9D..10BA8; UNKNOWN 6612 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 6613 0x10BB0, // 10BB0..10BFF; UNKNOWN 6614 0x10C00, // 10C00..10C48; OLD_TURKIC 6615 0x10C49, // 10C49..10C7F; UNKNOWN 6616 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 6617 0x10CB3, // 10CB3..10CBF; UNKNOWN 6618 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 6619 0x10CF3, // 10CF3..10CF9; UNKNOWN 6620 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 6621 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 6622 0x10D28, // 10D28..10D2F; UNKNOWN 6623 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 6624 0x10D3A, // 10D3A..10D3F; UNKNOWN 6625 0x10D40, // 10D40..10D65; GARAY 6626 0x10D66, // 10D66..10D68; UNKNOWN 6627 0x10D69, // 10D69..10D85; GARAY 6628 0x10D86, // 10D86..10D8D; UNKNOWN 6629 0x10D8E, // 10D8E..10D8F; GARAY 6630 0x10D90, // 10D90..10E5F; UNKNOWN 6631 0x10E60, // 10E60..10E7E; ARABIC 6632 0x10E7F, // 10E7F ; UNKNOWN 6633 0x10E80, // 10E80..10EA9; YEZIDI 6634 0x10EAA, // 10EAA ; UNKNOWN 6635 0x10EAB, // 10EAB..10EAD; YEZIDI 6636 0x10EAE, // 10EAE..10EAF; UNKNOWN 6637 0x10EB0, // 10EB0..10EB1; YEZIDI 6638 0x10EB2, // 10EB2..10EC1; UNKNOWN 6639 0x10EC2, // 10EC2..10EC4; ARABIC 6640 0x10EC5, // 10EC5..10EFB; UNKNOWN 6641 0x10EFC, // 10EFC..10EFF; ARABIC 6642 0x10F00, // 10F00..10F27; OLD_SOGDIAN 6643 0x10F28, // 10F28..10F2F; UNKNOWN 6644 0x10F30, // 10F30..10F59; SOGDIAN 6645 0x10F5A, // 10F5A..10F6F; UNKNOWN 6646 0x10F70, // 10F70..10F89; OLD_UYGHUR 6647 0x10F8A, // 10F8A..10FAF; UNKNOWN 6648 0x10FB0, // 10FB0..10FCB; CHORASMIAN 6649 0x10FCC, // 10FCC..10FDF; UNKNOWN 6650 0x10FE0, // 10FE0..10FF6; ELYMAIC 6651 0x10FF7, // 10FF7..10FFF; UNKNOWN 6652 0x11000, // 11000..1104D; BRAHMI 6653 0x1104E, // 1104E..11051; UNKNOWN 6654 0x11052, // 11052..11075; BRAHMI 6655 0x11076, // 11076..1107E; UNKNOWN 6656 0x1107F, // 1107F ; BRAHMI 6657 0x11080, // 11080..110C2; KAITHI 6658 0x110C3, // 110C3..110CC; UNKNOWN 6659 0x110CD, // 110CD ; KAITHI 6660 0x110CE, // 110CE..110CF; UNKNOWN 6661 0x110D0, // 110D0..110E8; SORA_SOMPENG 6662 0x110E9, // 110E9..110EF; UNKNOWN 6663 0x110F0, // 110F0..110F9; SORA_SOMPENG 6664 0x110FA, // 110FA..110FF; UNKNOWN 6665 0x11100, // 11100..11134; CHAKMA 6666 0x11135, // 11135 ; UNKNOWN 6667 0x11136, // 11136..11147; CHAKMA 6668 0x11148, // 11148..1114F; UNKNOWN 6669 0x11150, // 11150..11176; MAHAJANI 6670 0x11177, // 11177..1117F; UNKNOWN 6671 0x11180, // 11180..111DF; SHARADA 6672 0x111E0, // 111E0 ; UNKNOWN 6673 0x111E1, // 111E1..111F4; SINHALA 6674 0x111F5, // 111F5..111FF; UNKNOWN 6675 0x11200, // 11200..11211; KHOJKI 6676 0x11212, // 11212 ; UNKNOWN 6677 0x11213, // 11213..11241; KHOJKI 6678 0x11242, // 11242..1127F; UNKNOWN 6679 0x11280, // 11280..11286; MULTANI 6680 0x11287, // 11287 ; UNKNOWN 6681 0x11288, // 11288 ; MULTANI 6682 0x11289, // 11289 ; UNKNOWN 6683 0x1128A, // 1128A..1128D; MULTANI 6684 0x1128E, // 1128E ; UNKNOWN 6685 0x1128F, // 1128F..1129D; MULTANI 6686 0x1129E, // 1129E ; UNKNOWN 6687 0x1129F, // 1129F..112A9; MULTANI 6688 0x112AA, // 112AA..112AF; UNKNOWN 6689 0x112B0, // 112B0..112EA; KHUDAWADI 6690 0x112EB, // 112EB..112EF; UNKNOWN 6691 0x112F0, // 112F0..112F9; KHUDAWADI 6692 0x112FA, // 112FA..112FF; UNKNOWN 6693 0x11300, // 11300..11303; GRANTHA 6694 0x11304, // 11304 ; UNKNOWN 6695 0x11305, // 11305..1130C; GRANTHA 6696 0x1130D, // 1130D..1130E; UNKNOWN 6697 0x1130F, // 1130F..11310; GRANTHA 6698 0x11311, // 11311..11312; UNKNOWN 6699 0x11313, // 11313..11328; GRANTHA 6700 0x11329, // 11329 ; UNKNOWN 6701 0x1132A, // 1132A..11330; GRANTHA 6702 0x11331, // 11331 ; UNKNOWN 6703 0x11332, // 11332..11333; GRANTHA 6704 0x11334, // 11334 ; UNKNOWN 6705 0x11335, // 11335..11339; GRANTHA 6706 0x1133A, // 1133A ; UNKNOWN 6707 0x1133B, // 1133B ; INHERITED 6708 0x1133C, // 1133C..11344; GRANTHA 6709 0x11345, // 11345..11346; UNKNOWN 6710 0x11347, // 11347..11348; GRANTHA 6711 0x11349, // 11349..1134A; UNKNOWN 6712 0x1134B, // 1134B..1134D; GRANTHA 6713 0x1134E, // 1134E..1134F; UNKNOWN 6714 0x11350, // 11350 ; GRANTHA 6715 0x11351, // 11351..11356; UNKNOWN 6716 0x11357, // 11357 ; GRANTHA 6717 0x11358, // 11358..1135C; UNKNOWN 6718 0x1135D, // 1135D..11363; GRANTHA 6719 0x11364, // 11364..11365; UNKNOWN 6720 0x11366, // 11366..1136C; GRANTHA 6721 0x1136D, // 1136D..1136F; UNKNOWN 6722 0x11370, // 11370..11374; GRANTHA 6723 0x11375, // 11375..1137F; UNKNOWN 6724 0x11380, // 11380..11389; TULU_TIGALARI 6725 0x1138A, // 1138A ; UNKNOWN 6726 0x1138B, // 1138B ; TULU_TIGALARI 6727 0x1138C, // 1138C..1138D; UNKNOWN 6728 0x1138E, // 1138E ; TULU_TIGALARI 6729 0x1138F, // 1138F ; UNKNOWN 6730 0x11390, // 11390..113B5; TULU_TIGALARI 6731 0x113B6, // 113B6 ; UNKNOWN 6732 0x113B7, // 113B7..113C0; TULU_TIGALARI 6733 0x113C1, // 113C1 ; UNKNOWN 6734 0x113C2, // 113C2 ; TULU_TIGALARI 6735 0x113C3, // 113C3..113C4; UNKNOWN 6736 0x113C5, // 113C5 ; TULU_TIGALARI 6737 0x113C6, // 113C6 ; UNKNOWN 6738 0x113C7, // 113C7..113CA; TULU_TIGALARI 6739 0x113CB, // 113CB ; UNKNOWN 6740 0x113CC, // 113CC..113D5; TULU_TIGALARI 6741 0x113D6, // 113D6 ; UNKNOWN 6742 0x113D7, // 113D7..113D8; TULU_TIGALARI 6743 0x113D9, // 113D9..113E0; UNKNOWN 6744 0x113E1, // 113E1..113E2; TULU_TIGALARI 6745 0x113E3, // 113E3..113FF; UNKNOWN 6746 0x11400, // 11400..1145B; NEWA 6747 0x1145C, // 1145C ; UNKNOWN 6748 0x1145D, // 1145D..11461; NEWA 6749 0x11462, // 11462..1147F; UNKNOWN 6750 0x11480, // 11480..114C7; TIRHUTA 6751 0x114C8, // 114C8..114CF; UNKNOWN 6752 0x114D0, // 114D0..114D9; TIRHUTA 6753 0x114DA, // 114DA..1157F; UNKNOWN 6754 0x11580, // 11580..115B5; SIDDHAM 6755 0x115B6, // 115B6..115B7; UNKNOWN 6756 0x115B8, // 115B8..115DD; SIDDHAM 6757 0x115DE, // 115DE..115FF; UNKNOWN 6758 0x11600, // 11600..11644; MODI 6759 0x11645, // 11645..1164F; UNKNOWN 6760 0x11650, // 11650..11659; MODI 6761 0x1165A, // 1165A..1165F; UNKNOWN 6762 0x11660, // 11660..1166C; MONGOLIAN 6763 0x1166D, // 1166D..1167F; UNKNOWN 6764 0x11680, // 11680..116B9; TAKRI 6765 0x116BA, // 116BA..116BF; UNKNOWN 6766 0x116C0, // 116C0..116C9; TAKRI 6767 0x116CA, // 116CA..116CF; UNKNOWN 6768 0x116D0, // 116D0..116E3; MYANMAR 6769 0x116E4, // 116E4..116FF; UNKNOWN 6770 0x11700, // 11700..1171A; AHOM 6771 0x1171B, // 1171B..1171C; UNKNOWN 6772 0x1171D, // 1171D..1172B; AHOM 6773 0x1172C, // 1172C..1172F; UNKNOWN 6774 0x11730, // 11730..11746; AHOM 6775 0x11747, // 11747..117FF; UNKNOWN 6776 0x11800, // 11800..1183B; DOGRA 6777 0x1183C, // 1183C..1189F; UNKNOWN 6778 0x118A0, // 118A0..118F2; WARANG_CITI 6779 0x118F3, // 118F3..118FE; UNKNOWN 6780 0x118FF, // 118FF ; WARANG_CITI 6781 0x11900, // 11900..11906; DIVES_AKURU 6782 0x11907, // 11907..11908; UNKNOWN 6783 0x11909, // 11909 ; DIVES_AKURU 6784 0x1190A, // 1190A..1190B; UNKNOWN 6785 0x1190C, // 1190C..11913; DIVES_AKURU 6786 0x11914, // 11914 ; UNKNOWN 6787 0x11915, // 11915..11916; DIVES_AKURU 6788 0x11917, // 11917 ; UNKNOWN 6789 0x11918, // 11918..11935; DIVES_AKURU 6790 0x11936, // 11936 ; UNKNOWN 6791 0x11937, // 11937..11938; DIVES_AKURU 6792 0x11939, // 11939..1193A; UNKNOWN 6793 0x1193B, // 1193B..11946; DIVES_AKURU 6794 0x11947, // 11947..1194F; UNKNOWN 6795 0x11950, // 11950..11959; DIVES_AKURU 6796 0x1195A, // 1195A..1199F; UNKNOWN 6797 0x119A0, // 119A0..119A7; NANDINAGARI 6798 0x119A8, // 119A8..119A9; UNKNOWN 6799 0x119AA, // 119AA..119D7; NANDINAGARI 6800 0x119D8, // 119D8..119D9; UNKNOWN 6801 0x119DA, // 119DA..119E4; NANDINAGARI 6802 0x119E5, // 119E5..119FF; UNKNOWN 6803 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6804 0x11A48, // 11A48..11A4F; UNKNOWN 6805 0x11A50, // 11A50..11AA2; SOYOMBO 6806 0x11AA3, // 11AA3..11AAF; UNKNOWN 6807 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL 6808 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6809 0x11AF9, // 11AF9..11AFF; UNKNOWN 6810 0x11B00, // 11B00..11B09; DEVANAGARI 6811 0x11B0A, // 11B0A..11BBF; UNKNOWN 6812 0x11BC0, // 11BC0..11BE1; SUNUWAR 6813 0x11BE2, // 11BE2..11BEF; UNKNOWN 6814 0x11BF0, // 11BF0..11BF9; SUNUWAR 6815 0x11BFA, // 11BFA..11BFF; UNKNOWN 6816 0x11C00, // 11C00..11C08; BHAIKSUKI 6817 0x11C09, // 11C09 ; UNKNOWN 6818 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6819 0x11C37, // 11C37 ; UNKNOWN 6820 0x11C38, // 11C38..11C45; BHAIKSUKI 6821 0x11C46, // 11C46..11C4F; UNKNOWN 6822 0x11C50, // 11C50..11C6C; BHAIKSUKI 6823 0x11C6D, // 11C6D..11C6F; UNKNOWN 6824 0x11C70, // 11C70..11C8F; MARCHEN 6825 0x11C90, // 11C90..11C91; UNKNOWN 6826 0x11C92, // 11C92..11CA7; MARCHEN 6827 0x11CA8, // 11CA8 ; UNKNOWN 6828 0x11CA9, // 11CA9..11CB6; MARCHEN 6829 0x11CB7, // 11CB7..11CFF; UNKNOWN 6830 0x11D00, // 11D00..11D06; MASARAM_GONDI 6831 0x11D07, // 11D07 ; UNKNOWN 6832 0x11D08, // 11D08..11D09; MASARAM_GONDI 6833 0x11D0A, // 11D0A ; UNKNOWN 6834 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6835 0x11D37, // 11D37..11D39; UNKNOWN 6836 0x11D3A, // 11D3A ; MASARAM_GONDI 6837 0x11D3B, // 11D3B ; UNKNOWN 6838 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6839 0x11D3E, // 11D3E ; UNKNOWN 6840 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6841 0x11D48, // 11D48..11D4F; UNKNOWN 6842 0x11D50, // 11D50..11D59; MASARAM_GONDI 6843 0x11D5A, // 11D5A..11D5F; UNKNOWN 6844 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6845 0x11D66, // 11D66 ; UNKNOWN 6846 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6847 0x11D69, // 11D69 ; UNKNOWN 6848 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6849 0x11D8F, // 11D8F ; UNKNOWN 6850 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6851 0x11D92, // 11D92 ; UNKNOWN 6852 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6853 0x11D99, // 11D99..11D9F; UNKNOWN 6854 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6855 0x11DAA, // 11DAA..11EDF; UNKNOWN 6856 0x11EE0, // 11EE0..11EF8; MAKASAR 6857 0x11EF9, // 11EF9..11EFF; UNKNOWN 6858 0x11F00, // 11F00..11F10; KAWI 6859 0x11F11, // 11F11 ; UNKNOWN 6860 0x11F12, // 11F12..11F3A; KAWI 6861 0x11F3B, // 11F3B..11F3D; UNKNOWN 6862 0x11F3E, // 11F3E..11F5A; KAWI 6863 0x11F5B, // 11F5B..11FAF; UNKNOWN 6864 0x11FB0, // 11FB0 ; LISU 6865 0x11FB1, // 11FB1..11FBF; UNKNOWN 6866 0x11FC0, // 11FC0..11FF1; TAMIL 6867 0x11FF2, // 11FF2..11FFE; UNKNOWN 6868 0x11FFF, // 11FFF ; TAMIL 6869 0x12000, // 12000..12399; CUNEIFORM 6870 0x1239A, // 1239A..123FF; UNKNOWN 6871 0x12400, // 12400..1246E; CUNEIFORM 6872 0x1246F, // 1246F ; UNKNOWN 6873 0x12470, // 12470..12474; CUNEIFORM 6874 0x12475, // 12475..1247F; UNKNOWN 6875 0x12480, // 12480..12543; CUNEIFORM 6876 0x12544, // 12544..12F8F; UNKNOWN 6877 0x12F90, // 12F90..12FF2; CYPRO_MINOAN 6878 0x12FF3, // 12FF3..12FFF; UNKNOWN 6879 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS 6880 0x13456, // 13456..1345F; UNKNOWN 6881 0x13460, // 13460..143FA; EGYPTIAN_HIEROGLYPHS 6882 0x143FB, // 143FB..143FF; UNKNOWN 6883 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6884 0x14647, // 14647..160FF; UNKNOWN 6885 0x16100, // 16100..16139; GURUNG_KHEMA 6886 0x1613A, // 1613A..167FF; UNKNOWN 6887 0x16800, // 16800..16A38; BAMUM 6888 0x16A39, // 16A39..16A3F; UNKNOWN 6889 0x16A40, // 16A40..16A5E; MRO 6890 0x16A5F, // 16A5F ; UNKNOWN 6891 0x16A60, // 16A60..16A69; MRO 6892 0x16A6A, // 16A6A..16A6D; UNKNOWN 6893 0x16A6E, // 16A6E..16A6F; MRO 6894 0x16A70, // 16A70..16ABE; TANGSA 6895 0x16ABF, // 16ABF ; UNKNOWN 6896 0x16AC0, // 16AC0..16AC9; TANGSA 6897 0x16ACA, // 16ACA..16ACF; UNKNOWN 6898 0x16AD0, // 16AD0..16AED; BASSA_VAH 6899 0x16AEE, // 16AEE..16AEF; UNKNOWN 6900 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6901 0x16AF6, // 16AF6..16AFF; UNKNOWN 6902 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6903 0x16B46, // 16B46..16B4F; UNKNOWN 6904 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6905 0x16B5A, // 16B5A ; UNKNOWN 6906 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6907 0x16B62, // 16B62 ; UNKNOWN 6908 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6909 0x16B78, // 16B78..16B7C; UNKNOWN 6910 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6911 0x16B90, // 16B90..16D3F; UNKNOWN 6912 0x16D40, // 16D40..16D79; KIRAT_RAI 6913 0x16D7A, // 16D7A..16E3F; UNKNOWN 6914 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6915 0x16E9B, // 16E9B..16EFF; UNKNOWN 6916 0x16F00, // 16F00..16F4A; MIAO 6917 0x16F4B, // 16F4B..16F4E; UNKNOWN 6918 0x16F4F, // 16F4F..16F87; MIAO 6919 0x16F88, // 16F88..16F8E; UNKNOWN 6920 0x16F8F, // 16F8F..16F9F; MIAO 6921 0x16FA0, // 16FA0..16FDF; UNKNOWN 6922 0x16FE0, // 16FE0 ; TANGUT 6923 0x16FE1, // 16FE1 ; NUSHU 6924 0x16FE2, // 16FE2..16FE3; HAN 6925 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT 6926 0x16FE5, // 16FE5..16FEF; UNKNOWN 6927 0x16FF0, // 16FF0..16FF1; HAN 6928 0x16FF2, // 16FF2..16FFF; UNKNOWN 6929 0x17000, // 17000..187F7; TANGUT 6930 0x187F8, // 187F8..187FF; UNKNOWN 6931 0x18800, // 18800..18AFF; TANGUT 6932 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT 6933 0x18CD6, // 18CD6..18CFE; UNKNOWN 6934 0x18CFF, // 18CFF ; KHITAN_SMALL_SCRIPT 6935 0x18D00, // 18D00..18D08; TANGUT 6936 0x18D09, // 18D09..1AFEF; UNKNOWN 6937 0x1AFF0, // 1AFF0..1AFF3; KATAKANA 6938 0x1AFF4, // 1AFF4 ; UNKNOWN 6939 0x1AFF5, // 1AFF5..1AFFB; KATAKANA 6940 0x1AFFC, // 1AFFC ; UNKNOWN 6941 0x1AFFD, // 1AFFD..1AFFE; KATAKANA 6942 0x1AFFF, // 1AFFF ; UNKNOWN 6943 0x1B000, // 1B000 ; KATAKANA 6944 0x1B001, // 1B001..1B11F; HIRAGANA 6945 0x1B120, // 1B120..1B122; KATAKANA 6946 0x1B123, // 1B123..1B131; UNKNOWN 6947 0x1B132, // 1B132 ; HIRAGANA 6948 0x1B133, // 1B133..1B14F; UNKNOWN 6949 0x1B150, // 1B150..1B152; HIRAGANA 6950 0x1B153, // 1B153..1B154; UNKNOWN 6951 0x1B155, // 1B155 ; KATAKANA 6952 0x1B156, // 1B156..1B163; UNKNOWN 6953 0x1B164, // 1B164..1B167; KATAKANA 6954 0x1B168, // 1B168..1B16F; UNKNOWN 6955 0x1B170, // 1B170..1B2FB; NUSHU 6956 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6957 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6958 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6959 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6960 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6961 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6962 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6963 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6964 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6965 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6966 0x1BCA0, // 1BCA0..1BCA3; COMMON 6967 0x1BCA4, // 1BCA4..1CBFF; UNKNOWN 6968 0x1CC00, // 1CC00..1CCF9; COMMON 6969 0x1CCFA, // 1CCFA..1CCFF; UNKNOWN 6970 0x1CD00, // 1CD00..1CEB3; COMMON 6971 0x1CEB4, // 1CEB4..1CEFF; UNKNOWN 6972 0x1CF00, // 1CF00..1CF2D; INHERITED 6973 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN 6974 0x1CF30, // 1CF30..1CF46; INHERITED 6975 0x1CF47, // 1CF47..1CF4F; UNKNOWN 6976 0x1CF50, // 1CF50..1CFC3; COMMON 6977 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN 6978 0x1D000, // 1D000..1D0F5; COMMON 6979 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6980 0x1D100, // 1D100..1D126; COMMON 6981 0x1D127, // 1D127..1D128; UNKNOWN 6982 0x1D129, // 1D129..1D166; COMMON 6983 0x1D167, // 1D167..1D169; INHERITED 6984 0x1D16A, // 1D16A..1D17A; COMMON 6985 0x1D17B, // 1D17B..1D182; INHERITED 6986 0x1D183, // 1D183..1D184; COMMON 6987 0x1D185, // 1D185..1D18B; INHERITED 6988 0x1D18C, // 1D18C..1D1A9; COMMON 6989 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6990 0x1D1AE, // 1D1AE..1D1EA; COMMON 6991 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN 6992 0x1D200, // 1D200..1D245; GREEK 6993 0x1D246, // 1D246..1D2BF; UNKNOWN 6994 0x1D2C0, // 1D2C0..1D2D3; COMMON 6995 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN 6996 0x1D2E0, // 1D2E0..1D2F3; COMMON 6997 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6998 0x1D300, // 1D300..1D356; COMMON 6999 0x1D357, // 1D357..1D35F; UNKNOWN 7000 0x1D360, // 1D360..1D378; COMMON 7001 0x1D379, // 1D379..1D3FF; UNKNOWN 7002 0x1D400, // 1D400..1D454; COMMON 7003 0x1D455, // 1D455 ; UNKNOWN 7004 0x1D456, // 1D456..1D49C; COMMON 7005 0x1D49D, // 1D49D ; UNKNOWN 7006 0x1D49E, // 1D49E..1D49F; COMMON 7007 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 7008 0x1D4A2, // 1D4A2 ; COMMON 7009 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 7010 0x1D4A5, // 1D4A5..1D4A6; COMMON 7011 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 7012 0x1D4A9, // 1D4A9..1D4AC; COMMON 7013 0x1D4AD, // 1D4AD ; UNKNOWN 7014 0x1D4AE, // 1D4AE..1D4B9; COMMON 7015 0x1D4BA, // 1D4BA ; UNKNOWN 7016 0x1D4BB, // 1D4BB ; COMMON 7017 0x1D4BC, // 1D4BC ; UNKNOWN 7018 0x1D4BD, // 1D4BD..1D4C3; COMMON 7019 0x1D4C4, // 1D4C4 ; UNKNOWN 7020 0x1D4C5, // 1D4C5..1D505; COMMON 7021 0x1D506, // 1D506 ; UNKNOWN 7022 0x1D507, // 1D507..1D50A; COMMON 7023 0x1D50B, // 1D50B..1D50C; UNKNOWN 7024 0x1D50D, // 1D50D..1D514; COMMON 7025 0x1D515, // 1D515 ; UNKNOWN 7026 0x1D516, // 1D516..1D51C; COMMON 7027 0x1D51D, // 1D51D ; UNKNOWN 7028 0x1D51E, // 1D51E..1D539; COMMON 7029 0x1D53A, // 1D53A ; UNKNOWN 7030 0x1D53B, // 1D53B..1D53E; COMMON 7031 0x1D53F, // 1D53F ; UNKNOWN 7032 0x1D540, // 1D540..1D544; COMMON 7033 0x1D545, // 1D545 ; UNKNOWN 7034 0x1D546, // 1D546 ; COMMON 7035 0x1D547, // 1D547..1D549; UNKNOWN 7036 0x1D54A, // 1D54A..1D550; COMMON 7037 0x1D551, // 1D551 ; UNKNOWN 7038 0x1D552, // 1D552..1D6A5; COMMON 7039 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 7040 0x1D6A8, // 1D6A8..1D7CB; COMMON 7041 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 7042 0x1D7CE, // 1D7CE..1D7FF; COMMON 7043 0x1D800, // 1D800..1DA8B; SIGNWRITING 7044 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 7045 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 7046 0x1DAA0, // 1DAA0 ; UNKNOWN 7047 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 7048 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN 7049 0x1DF00, // 1DF00..1DF1E; LATIN 7050 0x1DF1F, // 1DF1F..1DF24; UNKNOWN 7051 0x1DF25, // 1DF25..1DF2A; LATIN 7052 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN 7053 0x1E000, // 1E000..1E006; GLAGOLITIC 7054 0x1E007, // 1E007 ; UNKNOWN 7055 0x1E008, // 1E008..1E018; GLAGOLITIC 7056 0x1E019, // 1E019..1E01A; UNKNOWN 7057 0x1E01B, // 1E01B..1E021; GLAGOLITIC 7058 0x1E022, // 1E022 ; UNKNOWN 7059 0x1E023, // 1E023..1E024; GLAGOLITIC 7060 0x1E025, // 1E025 ; UNKNOWN 7061 0x1E026, // 1E026..1E02A; GLAGOLITIC 7062 0x1E02B, // 1E02B..1E02F; UNKNOWN 7063 0x1E030, // 1E030..1E06D; CYRILLIC 7064 0x1E06E, // 1E06E..1E08E; UNKNOWN 7065 0x1E08F, // 1E08F ; CYRILLIC 7066 0x1E090, // 1E090..1E0FF; UNKNOWN 7067 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 7068 0x1E12D, // 1E12D..1E12F; UNKNOWN 7069 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 7070 0x1E13E, // 1E13E..1E13F; UNKNOWN 7071 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 7072 0x1E14A, // 1E14A..1E14D; UNKNOWN 7073 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 7074 0x1E150, // 1E150..1E28F; UNKNOWN 7075 0x1E290, // 1E290..1E2AE; TOTO 7076 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN 7077 0x1E2C0, // 1E2C0..1E2F9; WANCHO 7078 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 7079 0x1E2FF, // 1E2FF ; WANCHO 7080 0x1E300, // 1E300..1E4CF; UNKNOWN 7081 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI 7082 0x1E4FA, // 1E4FA..1E5CF; UNKNOWN 7083 0x1E5D0, // 1E5D0..1E5FA; OL_ONAL 7084 0x1E5FB, // 1E5FB..1E5FE; UNKNOWN 7085 0x1E5FF, // 1E5FF ; OL_ONAL 7086 0x1E600, // 1E600..1E7DF; UNKNOWN 7087 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC 7088 0x1E7E7, // 1E7E7 ; UNKNOWN 7089 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC 7090 0x1E7EC, // 1E7EC ; UNKNOWN 7091 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC 7092 0x1E7EF, // 1E7EF ; UNKNOWN 7093 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC 7094 0x1E7FF, // 1E7FF ; UNKNOWN 7095 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 7096 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 7097 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 7098 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 7099 0x1E900, // 1E900..1E94B; ADLAM 7100 0x1E94C, // 1E94C..1E94F; UNKNOWN 7101 0x1E950, // 1E950..1E959; ADLAM 7102 0x1E95A, // 1E95A..1E95D; UNKNOWN 7103 0x1E95E, // 1E95E..1E95F; ADLAM 7104 0x1E960, // 1E960..1EC70; UNKNOWN 7105 0x1EC71, // 1EC71..1ECB4; COMMON 7106 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 7107 0x1ED01, // 1ED01..1ED3D; COMMON 7108 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 7109 0x1EE00, // 1EE00..1EE03; ARABIC 7110 0x1EE04, // 1EE04 ; UNKNOWN 7111 0x1EE05, // 1EE05..1EE1F; ARABIC 7112 0x1EE20, // 1EE20 ; UNKNOWN 7113 0x1EE21, // 1EE21..1EE22; ARABIC 7114 0x1EE23, // 1EE23 ; UNKNOWN 7115 0x1EE24, // 1EE24 ; ARABIC 7116 0x1EE25, // 1EE25..1EE26; UNKNOWN 7117 0x1EE27, // 1EE27 ; ARABIC 7118 0x1EE28, // 1EE28 ; UNKNOWN 7119 0x1EE29, // 1EE29..1EE32; ARABIC 7120 0x1EE33, // 1EE33 ; UNKNOWN 7121 0x1EE34, // 1EE34..1EE37; ARABIC 7122 0x1EE38, // 1EE38 ; UNKNOWN 7123 0x1EE39, // 1EE39 ; ARABIC 7124 0x1EE3A, // 1EE3A ; UNKNOWN 7125 0x1EE3B, // 1EE3B ; ARABIC 7126 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 7127 0x1EE42, // 1EE42 ; ARABIC 7128 0x1EE43, // 1EE43..1EE46; UNKNOWN 7129 0x1EE47, // 1EE47 ; ARABIC 7130 0x1EE48, // 1EE48 ; UNKNOWN 7131 0x1EE49, // 1EE49 ; ARABIC 7132 0x1EE4A, // 1EE4A ; UNKNOWN 7133 0x1EE4B, // 1EE4B ; ARABIC 7134 0x1EE4C, // 1EE4C ; UNKNOWN 7135 0x1EE4D, // 1EE4D..1EE4F; ARABIC 7136 0x1EE50, // 1EE50 ; UNKNOWN 7137 0x1EE51, // 1EE51..1EE52; ARABIC 7138 0x1EE53, // 1EE53 ; UNKNOWN 7139 0x1EE54, // 1EE54 ; ARABIC 7140 0x1EE55, // 1EE55..1EE56; UNKNOWN 7141 0x1EE57, // 1EE57 ; ARABIC 7142 0x1EE58, // 1EE58 ; UNKNOWN 7143 0x1EE59, // 1EE59 ; ARABIC 7144 0x1EE5A, // 1EE5A ; UNKNOWN 7145 0x1EE5B, // 1EE5B ; ARABIC 7146 0x1EE5C, // 1EE5C ; UNKNOWN 7147 0x1EE5D, // 1EE5D ; ARABIC 7148 0x1EE5E, // 1EE5E ; UNKNOWN 7149 0x1EE5F, // 1EE5F ; ARABIC 7150 0x1EE60, // 1EE60 ; UNKNOWN 7151 0x1EE61, // 1EE61..1EE62; ARABIC 7152 0x1EE63, // 1EE63 ; UNKNOWN 7153 0x1EE64, // 1EE64 ; ARABIC 7154 0x1EE65, // 1EE65..1EE66; UNKNOWN 7155 0x1EE67, // 1EE67..1EE6A; ARABIC 7156 0x1EE6B, // 1EE6B ; UNKNOWN 7157 0x1EE6C, // 1EE6C..1EE72; ARABIC 7158 0x1EE73, // 1EE73 ; UNKNOWN 7159 0x1EE74, // 1EE74..1EE77; ARABIC 7160 0x1EE78, // 1EE78 ; UNKNOWN 7161 0x1EE79, // 1EE79..1EE7C; ARABIC 7162 0x1EE7D, // 1EE7D ; UNKNOWN 7163 0x1EE7E, // 1EE7E ; ARABIC 7164 0x1EE7F, // 1EE7F ; UNKNOWN 7165 0x1EE80, // 1EE80..1EE89; ARABIC 7166 0x1EE8A, // 1EE8A ; UNKNOWN 7167 0x1EE8B, // 1EE8B..1EE9B; ARABIC 7168 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 7169 0x1EEA1, // 1EEA1..1EEA3; ARABIC 7170 0x1EEA4, // 1EEA4 ; UNKNOWN 7171 0x1EEA5, // 1EEA5..1EEA9; ARABIC 7172 0x1EEAA, // 1EEAA ; UNKNOWN 7173 0x1EEAB, // 1EEAB..1EEBB; ARABIC 7174 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 7175 0x1EEF0, // 1EEF0..1EEF1; ARABIC 7176 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 7177 0x1F000, // 1F000..1F02B; COMMON 7178 0x1F02C, // 1F02C..1F02F; UNKNOWN 7179 0x1F030, // 1F030..1F093; COMMON 7180 0x1F094, // 1F094..1F09F; UNKNOWN 7181 0x1F0A0, // 1F0A0..1F0AE; COMMON 7182 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 7183 0x1F0B1, // 1F0B1..1F0BF; COMMON 7184 0x1F0C0, // 1F0C0 ; UNKNOWN 7185 0x1F0C1, // 1F0C1..1F0CF; COMMON 7186 0x1F0D0, // 1F0D0 ; UNKNOWN 7187 0x1F0D1, // 1F0D1..1F0F5; COMMON 7188 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 7189 0x1F100, // 1F100..1F1AD; COMMON 7190 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN 7191 0x1F1E6, // 1F1E6..1F1FF; COMMON 7192 0x1F200, // 1F200 ; HIRAGANA 7193 0x1F201, // 1F201..1F202; COMMON 7194 0x1F203, // 1F203..1F20F; UNKNOWN 7195 0x1F210, // 1F210..1F23B; COMMON 7196 0x1F23C, // 1F23C..1F23F; UNKNOWN 7197 0x1F240, // 1F240..1F248; COMMON 7198 0x1F249, // 1F249..1F24F; UNKNOWN 7199 0x1F250, // 1F250..1F251; COMMON 7200 0x1F252, // 1F252..1F25F; UNKNOWN 7201 0x1F260, // 1F260..1F265; COMMON 7202 0x1F266, // 1F266..1F2FF; UNKNOWN 7203 0x1F300, // 1F300..1F6D7; COMMON 7204 0x1F6D8, // 1F6D8..1F6DB; UNKNOWN 7205 0x1F6DC, // 1F6DC..1F6EC; COMMON 7206 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 7207 0x1F6F0, // 1F6F0..1F6FC; COMMON 7208 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN 7209 0x1F700, // 1F700..1F776; COMMON 7210 0x1F777, // 1F777..1F77A; UNKNOWN 7211 0x1F77B, // 1F77B..1F7D9; COMMON 7212 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN 7213 0x1F7E0, // 1F7E0..1F7EB; COMMON 7214 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN 7215 0x1F7F0, // 1F7F0 ; COMMON 7216 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN 7217 0x1F800, // 1F800..1F80B; COMMON 7218 0x1F80C, // 1F80C..1F80F; UNKNOWN 7219 0x1F810, // 1F810..1F847; COMMON 7220 0x1F848, // 1F848..1F84F; UNKNOWN 7221 0x1F850, // 1F850..1F859; COMMON 7222 0x1F85A, // 1F85A..1F85F; UNKNOWN 7223 0x1F860, // 1F860..1F887; COMMON 7224 0x1F888, // 1F888..1F88F; UNKNOWN 7225 0x1F890, // 1F890..1F8AD; COMMON 7226 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN 7227 0x1F8B0, // 1F8B0..1F8BB; COMMON 7228 0x1F8BC, // 1F8BC..1F8BF; UNKNOWN 7229 0x1F8C0, // 1F8C0..1F8C1; COMMON 7230 0x1F8C2, // 1F8C2..1F8FF; UNKNOWN 7231 0x1F900, // 1F900..1FA53; COMMON 7232 0x1FA54, // 1FA54..1FA5F; UNKNOWN 7233 0x1FA60, // 1FA60..1FA6D; COMMON 7234 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 7235 0x1FA70, // 1FA70..1FA7C; COMMON 7236 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN 7237 0x1FA80, // 1FA80..1FA89; COMMON 7238 0x1FA8A, // 1FA8A..1FA8E; UNKNOWN 7239 0x1FA8F, // 1FA8F..1FAC6; COMMON 7240 0x1FAC7, // 1FAC7..1FACD; UNKNOWN 7241 0x1FACE, // 1FACE..1FADC; COMMON 7242 0x1FADD, // 1FADD..1FADE; UNKNOWN 7243 0x1FADF, // 1FADF..1FAE9; COMMON 7244 0x1FAEA, // 1FAEA..1FAEF; UNKNOWN 7245 0x1FAF0, // 1FAF0..1FAF8; COMMON 7246 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN 7247 0x1FB00, // 1FB00..1FB92; COMMON 7248 0x1FB93, // 1FB93 ; UNKNOWN 7249 0x1FB94, // 1FB94..1FBF9; COMMON 7250 0x1FBFA, // 1FBFA..1FFFF; UNKNOWN 7251 0x20000, // 20000..2A6DF; HAN 7252 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN 7253 0x2A700, // 2A700..2B739; HAN 7254 0x2B73A, // 2B73A..2B73F; UNKNOWN 7255 0x2B740, // 2B740..2B81D; HAN 7256 0x2B81E, // 2B81E..2B81F; UNKNOWN 7257 0x2B820, // 2B820..2CEA1; HAN 7258 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 7259 0x2CEB0, // 2CEB0..2EBE0; HAN 7260 0x2EBE1, // 2EBE1..2EBEF; UNKNOWN 7261 0x2EBF0, // 2EBF0..2EE5D; HAN 7262 0x2EE5E, // 2EE5E..2F7FF; UNKNOWN 7263 0x2F800, // 2F800..2FA1D; HAN 7264 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN 7265 0x30000, // 30000..3134A; HAN 7266 0x3134B, // 3134B..3134F; UNKNOWN 7267 0x31350, // 31350..323AF; HAN 7268 0x323B0, // 323B0..E0000; UNKNOWN 7269 0xE0001, // E0001 ; COMMON 7270 0xE0002, // E0002..E001F; UNKNOWN 7271 0xE0020, // E0020..E007F; COMMON 7272 0xE0080, // E0080..E00FF; UNKNOWN 7273 0xE0100, // E0100..E01EF; INHERITED 7274 0xE01F0, // E01F0..10FFFF; UNKNOWN 7275 }; 7276 7277 private static final UnicodeScript[] scripts = { 7278 COMMON, // 0000..0040 7279 LATIN, // 0041..005A 7280 COMMON, // 005B..0060 7281 LATIN, // 0061..007A 7282 COMMON, // 007B..00A9 7283 LATIN, // 00AA 7284 COMMON, // 00AB..00B9 7285 LATIN, // 00BA 7286 COMMON, // 00BB..00BF 7287 LATIN, // 00C0..00D6 7288 COMMON, // 00D7 7289 LATIN, // 00D8..00F6 7290 COMMON, // 00F7 7291 LATIN, // 00F8..02B8 7292 COMMON, // 02B9..02DF 7293 LATIN, // 02E0..02E4 7294 COMMON, // 02E5..02E9 7295 BOPOMOFO, // 02EA..02EB 7296 COMMON, // 02EC..02FF 7297 INHERITED, // 0300..036F 7298 GREEK, // 0370..0373 7299 COMMON, // 0374 7300 GREEK, // 0375..0377 7301 UNKNOWN, // 0378..0379 7302 GREEK, // 037A..037D 7303 COMMON, // 037E 7304 GREEK, // 037F 7305 UNKNOWN, // 0380..0383 7306 GREEK, // 0384 7307 COMMON, // 0385 7308 GREEK, // 0386 7309 COMMON, // 0387 7310 GREEK, // 0388..038A 7311 UNKNOWN, // 038B 7312 GREEK, // 038C 7313 UNKNOWN, // 038D 7314 GREEK, // 038E..03A1 7315 UNKNOWN, // 03A2 7316 GREEK, // 03A3..03E1 7317 COPTIC, // 03E2..03EF 7318 GREEK, // 03F0..03FF 7319 CYRILLIC, // 0400..0484 7320 INHERITED, // 0485..0486 7321 CYRILLIC, // 0487..052F 7322 UNKNOWN, // 0530 7323 ARMENIAN, // 0531..0556 7324 UNKNOWN, // 0557..0558 7325 ARMENIAN, // 0559..058A 7326 UNKNOWN, // 058B..058C 7327 ARMENIAN, // 058D..058F 7328 UNKNOWN, // 0590 7329 HEBREW, // 0591..05C7 7330 UNKNOWN, // 05C8..05CF 7331 HEBREW, // 05D0..05EA 7332 UNKNOWN, // 05EB..05EE 7333 HEBREW, // 05EF..05F4 7334 UNKNOWN, // 05F5..05FF 7335 ARABIC, // 0600..0604 7336 COMMON, // 0605 7337 ARABIC, // 0606..060B 7338 COMMON, // 060C 7339 ARABIC, // 060D..061A 7340 COMMON, // 061B 7341 ARABIC, // 061C..061E 7342 COMMON, // 061F 7343 ARABIC, // 0620..063F 7344 COMMON, // 0640 7345 ARABIC, // 0641..064A 7346 INHERITED, // 064B..0655 7347 ARABIC, // 0656..066F 7348 INHERITED, // 0670 7349 ARABIC, // 0671..06DC 7350 COMMON, // 06DD 7351 ARABIC, // 06DE..06FF 7352 SYRIAC, // 0700..070D 7353 UNKNOWN, // 070E 7354 SYRIAC, // 070F..074A 7355 UNKNOWN, // 074B..074C 7356 SYRIAC, // 074D..074F 7357 ARABIC, // 0750..077F 7358 THAANA, // 0780..07B1 7359 UNKNOWN, // 07B2..07BF 7360 NKO, // 07C0..07FA 7361 UNKNOWN, // 07FB..07FC 7362 NKO, // 07FD..07FF 7363 SAMARITAN, // 0800..082D 7364 UNKNOWN, // 082E..082F 7365 SAMARITAN, // 0830..083E 7366 UNKNOWN, // 083F 7367 MANDAIC, // 0840..085B 7368 UNKNOWN, // 085C..085D 7369 MANDAIC, // 085E 7370 UNKNOWN, // 085F 7371 SYRIAC, // 0860..086A 7372 UNKNOWN, // 086B..086F 7373 ARABIC, // 0870..088E 7374 UNKNOWN, // 088F 7375 ARABIC, // 0890..0891 7376 UNKNOWN, // 0892..0896 7377 ARABIC, // 0897..08E1 7378 COMMON, // 08E2 7379 ARABIC, // 08E3..08FF 7380 DEVANAGARI, // 0900..0950 7381 INHERITED, // 0951..0954 7382 DEVANAGARI, // 0955..0963 7383 COMMON, // 0964..0965 7384 DEVANAGARI, // 0966..097F 7385 BENGALI, // 0980..0983 7386 UNKNOWN, // 0984 7387 BENGALI, // 0985..098C 7388 UNKNOWN, // 098D..098E 7389 BENGALI, // 098F..0990 7390 UNKNOWN, // 0991..0992 7391 BENGALI, // 0993..09A8 7392 UNKNOWN, // 09A9 7393 BENGALI, // 09AA..09B0 7394 UNKNOWN, // 09B1 7395 BENGALI, // 09B2 7396 UNKNOWN, // 09B3..09B5 7397 BENGALI, // 09B6..09B9 7398 UNKNOWN, // 09BA..09BB 7399 BENGALI, // 09BC..09C4 7400 UNKNOWN, // 09C5..09C6 7401 BENGALI, // 09C7..09C8 7402 UNKNOWN, // 09C9..09CA 7403 BENGALI, // 09CB..09CE 7404 UNKNOWN, // 09CF..09D6 7405 BENGALI, // 09D7 7406 UNKNOWN, // 09D8..09DB 7407 BENGALI, // 09DC..09DD 7408 UNKNOWN, // 09DE 7409 BENGALI, // 09DF..09E3 7410 UNKNOWN, // 09E4..09E5 7411 BENGALI, // 09E6..09FE 7412 UNKNOWN, // 09FF..0A00 7413 GURMUKHI, // 0A01..0A03 7414 UNKNOWN, // 0A04 7415 GURMUKHI, // 0A05..0A0A 7416 UNKNOWN, // 0A0B..0A0E 7417 GURMUKHI, // 0A0F..0A10 7418 UNKNOWN, // 0A11..0A12 7419 GURMUKHI, // 0A13..0A28 7420 UNKNOWN, // 0A29 7421 GURMUKHI, // 0A2A..0A30 7422 UNKNOWN, // 0A31 7423 GURMUKHI, // 0A32..0A33 7424 UNKNOWN, // 0A34 7425 GURMUKHI, // 0A35..0A36 7426 UNKNOWN, // 0A37 7427 GURMUKHI, // 0A38..0A39 7428 UNKNOWN, // 0A3A..0A3B 7429 GURMUKHI, // 0A3C 7430 UNKNOWN, // 0A3D 7431 GURMUKHI, // 0A3E..0A42 7432 UNKNOWN, // 0A43..0A46 7433 GURMUKHI, // 0A47..0A48 7434 UNKNOWN, // 0A49..0A4A 7435 GURMUKHI, // 0A4B..0A4D 7436 UNKNOWN, // 0A4E..0A50 7437 GURMUKHI, // 0A51 7438 UNKNOWN, // 0A52..0A58 7439 GURMUKHI, // 0A59..0A5C 7440 UNKNOWN, // 0A5D 7441 GURMUKHI, // 0A5E 7442 UNKNOWN, // 0A5F..0A65 7443 GURMUKHI, // 0A66..0A76 7444 UNKNOWN, // 0A77..0A80 7445 GUJARATI, // 0A81..0A83 7446 UNKNOWN, // 0A84 7447 GUJARATI, // 0A85..0A8D 7448 UNKNOWN, // 0A8E 7449 GUJARATI, // 0A8F..0A91 7450 UNKNOWN, // 0A92 7451 GUJARATI, // 0A93..0AA8 7452 UNKNOWN, // 0AA9 7453 GUJARATI, // 0AAA..0AB0 7454 UNKNOWN, // 0AB1 7455 GUJARATI, // 0AB2..0AB3 7456 UNKNOWN, // 0AB4 7457 GUJARATI, // 0AB5..0AB9 7458 UNKNOWN, // 0ABA..0ABB 7459 GUJARATI, // 0ABC..0AC5 7460 UNKNOWN, // 0AC6 7461 GUJARATI, // 0AC7..0AC9 7462 UNKNOWN, // 0ACA 7463 GUJARATI, // 0ACB..0ACD 7464 UNKNOWN, // 0ACE..0ACF 7465 GUJARATI, // 0AD0 7466 UNKNOWN, // 0AD1..0ADF 7467 GUJARATI, // 0AE0..0AE3 7468 UNKNOWN, // 0AE4..0AE5 7469 GUJARATI, // 0AE6..0AF1 7470 UNKNOWN, // 0AF2..0AF8 7471 GUJARATI, // 0AF9..0AFF 7472 UNKNOWN, // 0B00 7473 ORIYA, // 0B01..0B03 7474 UNKNOWN, // 0B04 7475 ORIYA, // 0B05..0B0C 7476 UNKNOWN, // 0B0D..0B0E 7477 ORIYA, // 0B0F..0B10 7478 UNKNOWN, // 0B11..0B12 7479 ORIYA, // 0B13..0B28 7480 UNKNOWN, // 0B29 7481 ORIYA, // 0B2A..0B30 7482 UNKNOWN, // 0B31 7483 ORIYA, // 0B32..0B33 7484 UNKNOWN, // 0B34 7485 ORIYA, // 0B35..0B39 7486 UNKNOWN, // 0B3A..0B3B 7487 ORIYA, // 0B3C..0B44 7488 UNKNOWN, // 0B45..0B46 7489 ORIYA, // 0B47..0B48 7490 UNKNOWN, // 0B49..0B4A 7491 ORIYA, // 0B4B..0B4D 7492 UNKNOWN, // 0B4E..0B54 7493 ORIYA, // 0B55..0B57 7494 UNKNOWN, // 0B58..0B5B 7495 ORIYA, // 0B5C..0B5D 7496 UNKNOWN, // 0B5E 7497 ORIYA, // 0B5F..0B63 7498 UNKNOWN, // 0B64..0B65 7499 ORIYA, // 0B66..0B77 7500 UNKNOWN, // 0B78..0B81 7501 TAMIL, // 0B82..0B83 7502 UNKNOWN, // 0B84 7503 TAMIL, // 0B85..0B8A 7504 UNKNOWN, // 0B8B..0B8D 7505 TAMIL, // 0B8E..0B90 7506 UNKNOWN, // 0B91 7507 TAMIL, // 0B92..0B95 7508 UNKNOWN, // 0B96..0B98 7509 TAMIL, // 0B99..0B9A 7510 UNKNOWN, // 0B9B 7511 TAMIL, // 0B9C 7512 UNKNOWN, // 0B9D 7513 TAMIL, // 0B9E..0B9F 7514 UNKNOWN, // 0BA0..0BA2 7515 TAMIL, // 0BA3..0BA4 7516 UNKNOWN, // 0BA5..0BA7 7517 TAMIL, // 0BA8..0BAA 7518 UNKNOWN, // 0BAB..0BAD 7519 TAMIL, // 0BAE..0BB9 7520 UNKNOWN, // 0BBA..0BBD 7521 TAMIL, // 0BBE..0BC2 7522 UNKNOWN, // 0BC3..0BC5 7523 TAMIL, // 0BC6..0BC8 7524 UNKNOWN, // 0BC9 7525 TAMIL, // 0BCA..0BCD 7526 UNKNOWN, // 0BCE..0BCF 7527 TAMIL, // 0BD0 7528 UNKNOWN, // 0BD1..0BD6 7529 TAMIL, // 0BD7 7530 UNKNOWN, // 0BD8..0BE5 7531 TAMIL, // 0BE6..0BFA 7532 UNKNOWN, // 0BFB..0BFF 7533 TELUGU, // 0C00..0C0C 7534 UNKNOWN, // 0C0D 7535 TELUGU, // 0C0E..0C10 7536 UNKNOWN, // 0C11 7537 TELUGU, // 0C12..0C28 7538 UNKNOWN, // 0C29 7539 TELUGU, // 0C2A..0C39 7540 UNKNOWN, // 0C3A..0C3B 7541 TELUGU, // 0C3C..0C44 7542 UNKNOWN, // 0C45 7543 TELUGU, // 0C46..0C48 7544 UNKNOWN, // 0C49 7545 TELUGU, // 0C4A..0C4D 7546 UNKNOWN, // 0C4E..0C54 7547 TELUGU, // 0C55..0C56 7548 UNKNOWN, // 0C57 7549 TELUGU, // 0C58..0C5A 7550 UNKNOWN, // 0C5B..0C5C 7551 TELUGU, // 0C5D 7552 UNKNOWN, // 0C5E..0C5F 7553 TELUGU, // 0C60..0C63 7554 UNKNOWN, // 0C64..0C65 7555 TELUGU, // 0C66..0C6F 7556 UNKNOWN, // 0C70..0C76 7557 TELUGU, // 0C77..0C7F 7558 KANNADA, // 0C80..0C8C 7559 UNKNOWN, // 0C8D 7560 KANNADA, // 0C8E..0C90 7561 UNKNOWN, // 0C91 7562 KANNADA, // 0C92..0CA8 7563 UNKNOWN, // 0CA9 7564 KANNADA, // 0CAA..0CB3 7565 UNKNOWN, // 0CB4 7566 KANNADA, // 0CB5..0CB9 7567 UNKNOWN, // 0CBA..0CBB 7568 KANNADA, // 0CBC..0CC4 7569 UNKNOWN, // 0CC5 7570 KANNADA, // 0CC6..0CC8 7571 UNKNOWN, // 0CC9 7572 KANNADA, // 0CCA..0CCD 7573 UNKNOWN, // 0CCE..0CD4 7574 KANNADA, // 0CD5..0CD6 7575 UNKNOWN, // 0CD7..0CDC 7576 KANNADA, // 0CDD..0CDE 7577 UNKNOWN, // 0CDF 7578 KANNADA, // 0CE0..0CE3 7579 UNKNOWN, // 0CE4..0CE5 7580 KANNADA, // 0CE6..0CEF 7581 UNKNOWN, // 0CF0 7582 KANNADA, // 0CF1..0CF3 7583 UNKNOWN, // 0CF4..0CFF 7584 MALAYALAM, // 0D00..0D0C 7585 UNKNOWN, // 0D0D 7586 MALAYALAM, // 0D0E..0D10 7587 UNKNOWN, // 0D11 7588 MALAYALAM, // 0D12..0D44 7589 UNKNOWN, // 0D45 7590 MALAYALAM, // 0D46..0D48 7591 UNKNOWN, // 0D49 7592 MALAYALAM, // 0D4A..0D4F 7593 UNKNOWN, // 0D50..0D53 7594 MALAYALAM, // 0D54..0D63 7595 UNKNOWN, // 0D64..0D65 7596 MALAYALAM, // 0D66..0D7F 7597 UNKNOWN, // 0D80 7598 SINHALA, // 0D81..0D83 7599 UNKNOWN, // 0D84 7600 SINHALA, // 0D85..0D96 7601 UNKNOWN, // 0D97..0D99 7602 SINHALA, // 0D9A..0DB1 7603 UNKNOWN, // 0DB2 7604 SINHALA, // 0DB3..0DBB 7605 UNKNOWN, // 0DBC 7606 SINHALA, // 0DBD 7607 UNKNOWN, // 0DBE..0DBF 7608 SINHALA, // 0DC0..0DC6 7609 UNKNOWN, // 0DC7..0DC9 7610 SINHALA, // 0DCA 7611 UNKNOWN, // 0DCB..0DCE 7612 SINHALA, // 0DCF..0DD4 7613 UNKNOWN, // 0DD5 7614 SINHALA, // 0DD6 7615 UNKNOWN, // 0DD7 7616 SINHALA, // 0DD8..0DDF 7617 UNKNOWN, // 0DE0..0DE5 7618 SINHALA, // 0DE6..0DEF 7619 UNKNOWN, // 0DF0..0DF1 7620 SINHALA, // 0DF2..0DF4 7621 UNKNOWN, // 0DF5..0E00 7622 THAI, // 0E01..0E3A 7623 UNKNOWN, // 0E3B..0E3E 7624 COMMON, // 0E3F 7625 THAI, // 0E40..0E5B 7626 UNKNOWN, // 0E5C..0E80 7627 LAO, // 0E81..0E82 7628 UNKNOWN, // 0E83 7629 LAO, // 0E84 7630 UNKNOWN, // 0E85 7631 LAO, // 0E86..0E8A 7632 UNKNOWN, // 0E8B 7633 LAO, // 0E8C..0EA3 7634 UNKNOWN, // 0EA4 7635 LAO, // 0EA5 7636 UNKNOWN, // 0EA6 7637 LAO, // 0EA7..0EBD 7638 UNKNOWN, // 0EBE..0EBF 7639 LAO, // 0EC0..0EC4 7640 UNKNOWN, // 0EC5 7641 LAO, // 0EC6 7642 UNKNOWN, // 0EC7 7643 LAO, // 0EC8..0ECE 7644 UNKNOWN, // 0ECF 7645 LAO, // 0ED0..0ED9 7646 UNKNOWN, // 0EDA..0EDB 7647 LAO, // 0EDC..0EDF 7648 UNKNOWN, // 0EE0..0EFF 7649 TIBETAN, // 0F00..0F47 7650 UNKNOWN, // 0F48 7651 TIBETAN, // 0F49..0F6C 7652 UNKNOWN, // 0F6D..0F70 7653 TIBETAN, // 0F71..0F97 7654 UNKNOWN, // 0F98 7655 TIBETAN, // 0F99..0FBC 7656 UNKNOWN, // 0FBD 7657 TIBETAN, // 0FBE..0FCC 7658 UNKNOWN, // 0FCD 7659 TIBETAN, // 0FCE..0FD4 7660 COMMON, // 0FD5..0FD8 7661 TIBETAN, // 0FD9..0FDA 7662 UNKNOWN, // 0FDB..0FFF 7663 MYANMAR, // 1000..109F 7664 GEORGIAN, // 10A0..10C5 7665 UNKNOWN, // 10C6 7666 GEORGIAN, // 10C7 7667 UNKNOWN, // 10C8..10CC 7668 GEORGIAN, // 10CD 7669 UNKNOWN, // 10CE..10CF 7670 GEORGIAN, // 10D0..10FA 7671 COMMON, // 10FB 7672 GEORGIAN, // 10FC..10FF 7673 HANGUL, // 1100..11FF 7674 ETHIOPIC, // 1200..1248 7675 UNKNOWN, // 1249 7676 ETHIOPIC, // 124A..124D 7677 UNKNOWN, // 124E..124F 7678 ETHIOPIC, // 1250..1256 7679 UNKNOWN, // 1257 7680 ETHIOPIC, // 1258 7681 UNKNOWN, // 1259 7682 ETHIOPIC, // 125A..125D 7683 UNKNOWN, // 125E..125F 7684 ETHIOPIC, // 1260..1288 7685 UNKNOWN, // 1289 7686 ETHIOPIC, // 128A..128D 7687 UNKNOWN, // 128E..128F 7688 ETHIOPIC, // 1290..12B0 7689 UNKNOWN, // 12B1 7690 ETHIOPIC, // 12B2..12B5 7691 UNKNOWN, // 12B6..12B7 7692 ETHIOPIC, // 12B8..12BE 7693 UNKNOWN, // 12BF 7694 ETHIOPIC, // 12C0 7695 UNKNOWN, // 12C1 7696 ETHIOPIC, // 12C2..12C5 7697 UNKNOWN, // 12C6..12C7 7698 ETHIOPIC, // 12C8..12D6 7699 UNKNOWN, // 12D7 7700 ETHIOPIC, // 12D8..1310 7701 UNKNOWN, // 1311 7702 ETHIOPIC, // 1312..1315 7703 UNKNOWN, // 1316..1317 7704 ETHIOPIC, // 1318..135A 7705 UNKNOWN, // 135B..135C 7706 ETHIOPIC, // 135D..137C 7707 UNKNOWN, // 137D..137F 7708 ETHIOPIC, // 1380..1399 7709 UNKNOWN, // 139A..139F 7710 CHEROKEE, // 13A0..13F5 7711 UNKNOWN, // 13F6..13F7 7712 CHEROKEE, // 13F8..13FD 7713 UNKNOWN, // 13FE..13FF 7714 CANADIAN_ABORIGINAL, // 1400..167F 7715 OGHAM, // 1680..169C 7716 UNKNOWN, // 169D..169F 7717 RUNIC, // 16A0..16EA 7718 COMMON, // 16EB..16ED 7719 RUNIC, // 16EE..16F8 7720 UNKNOWN, // 16F9..16FF 7721 TAGALOG, // 1700..1715 7722 UNKNOWN, // 1716..171E 7723 TAGALOG, // 171F 7724 HANUNOO, // 1720..1734 7725 COMMON, // 1735..1736 7726 UNKNOWN, // 1737..173F 7727 BUHID, // 1740..1753 7728 UNKNOWN, // 1754..175F 7729 TAGBANWA, // 1760..176C 7730 UNKNOWN, // 176D 7731 TAGBANWA, // 176E..1770 7732 UNKNOWN, // 1771 7733 TAGBANWA, // 1772..1773 7734 UNKNOWN, // 1774..177F 7735 KHMER, // 1780..17DD 7736 UNKNOWN, // 17DE..17DF 7737 KHMER, // 17E0..17E9 7738 UNKNOWN, // 17EA..17EF 7739 KHMER, // 17F0..17F9 7740 UNKNOWN, // 17FA..17FF 7741 MONGOLIAN, // 1800..1801 7742 COMMON, // 1802..1803 7743 MONGOLIAN, // 1804 7744 COMMON, // 1805 7745 MONGOLIAN, // 1806..1819 7746 UNKNOWN, // 181A..181F 7747 MONGOLIAN, // 1820..1878 7748 UNKNOWN, // 1879..187F 7749 MONGOLIAN, // 1880..18AA 7750 UNKNOWN, // 18AB..18AF 7751 CANADIAN_ABORIGINAL, // 18B0..18F5 7752 UNKNOWN, // 18F6..18FF 7753 LIMBU, // 1900..191E 7754 UNKNOWN, // 191F 7755 LIMBU, // 1920..192B 7756 UNKNOWN, // 192C..192F 7757 LIMBU, // 1930..193B 7758 UNKNOWN, // 193C..193F 7759 LIMBU, // 1940 7760 UNKNOWN, // 1941..1943 7761 LIMBU, // 1944..194F 7762 TAI_LE, // 1950..196D 7763 UNKNOWN, // 196E..196F 7764 TAI_LE, // 1970..1974 7765 UNKNOWN, // 1975..197F 7766 NEW_TAI_LUE, // 1980..19AB 7767 UNKNOWN, // 19AC..19AF 7768 NEW_TAI_LUE, // 19B0..19C9 7769 UNKNOWN, // 19CA..19CF 7770 NEW_TAI_LUE, // 19D0..19DA 7771 UNKNOWN, // 19DB..19DD 7772 NEW_TAI_LUE, // 19DE..19DF 7773 KHMER, // 19E0..19FF 7774 BUGINESE, // 1A00..1A1B 7775 UNKNOWN, // 1A1C..1A1D 7776 BUGINESE, // 1A1E..1A1F 7777 TAI_THAM, // 1A20..1A5E 7778 UNKNOWN, // 1A5F 7779 TAI_THAM, // 1A60..1A7C 7780 UNKNOWN, // 1A7D..1A7E 7781 TAI_THAM, // 1A7F..1A89 7782 UNKNOWN, // 1A8A..1A8F 7783 TAI_THAM, // 1A90..1A99 7784 UNKNOWN, // 1A9A..1A9F 7785 TAI_THAM, // 1AA0..1AAD 7786 UNKNOWN, // 1AAE..1AAF 7787 INHERITED, // 1AB0..1ACE 7788 UNKNOWN, // 1ACF..1AFF 7789 BALINESE, // 1B00..1B4C 7790 UNKNOWN, // 1B4D 7791 BALINESE, // 1B4E..1B7F 7792 SUNDANESE, // 1B80..1BBF 7793 BATAK, // 1BC0..1BF3 7794 UNKNOWN, // 1BF4..1BFB 7795 BATAK, // 1BFC..1BFF 7796 LEPCHA, // 1C00..1C37 7797 UNKNOWN, // 1C38..1C3A 7798 LEPCHA, // 1C3B..1C49 7799 UNKNOWN, // 1C4A..1C4C 7800 LEPCHA, // 1C4D..1C4F 7801 OL_CHIKI, // 1C50..1C7F 7802 CYRILLIC, // 1C80..1C8A 7803 UNKNOWN, // 1C8B..1C8F 7804 GEORGIAN, // 1C90..1CBA 7805 UNKNOWN, // 1CBB..1CBC 7806 GEORGIAN, // 1CBD..1CBF 7807 SUNDANESE, // 1CC0..1CC7 7808 UNKNOWN, // 1CC8..1CCF 7809 INHERITED, // 1CD0..1CD2 7810 COMMON, // 1CD3 7811 INHERITED, // 1CD4..1CE0 7812 COMMON, // 1CE1 7813 INHERITED, // 1CE2..1CE8 7814 COMMON, // 1CE9..1CEC 7815 INHERITED, // 1CED 7816 COMMON, // 1CEE..1CF3 7817 INHERITED, // 1CF4 7818 COMMON, // 1CF5..1CF7 7819 INHERITED, // 1CF8..1CF9 7820 COMMON, // 1CFA 7821 UNKNOWN, // 1CFB..1CFF 7822 LATIN, // 1D00..1D25 7823 GREEK, // 1D26..1D2A 7824 CYRILLIC, // 1D2B 7825 LATIN, // 1D2C..1D5C 7826 GREEK, // 1D5D..1D61 7827 LATIN, // 1D62..1D65 7828 GREEK, // 1D66..1D6A 7829 LATIN, // 1D6B..1D77 7830 CYRILLIC, // 1D78 7831 LATIN, // 1D79..1DBE 7832 GREEK, // 1DBF 7833 INHERITED, // 1DC0..1DFF 7834 LATIN, // 1E00..1EFF 7835 GREEK, // 1F00..1F15 7836 UNKNOWN, // 1F16..1F17 7837 GREEK, // 1F18..1F1D 7838 UNKNOWN, // 1F1E..1F1F 7839 GREEK, // 1F20..1F45 7840 UNKNOWN, // 1F46..1F47 7841 GREEK, // 1F48..1F4D 7842 UNKNOWN, // 1F4E..1F4F 7843 GREEK, // 1F50..1F57 7844 UNKNOWN, // 1F58 7845 GREEK, // 1F59 7846 UNKNOWN, // 1F5A 7847 GREEK, // 1F5B 7848 UNKNOWN, // 1F5C 7849 GREEK, // 1F5D 7850 UNKNOWN, // 1F5E 7851 GREEK, // 1F5F..1F7D 7852 UNKNOWN, // 1F7E..1F7F 7853 GREEK, // 1F80..1FB4 7854 UNKNOWN, // 1FB5 7855 GREEK, // 1FB6..1FC4 7856 UNKNOWN, // 1FC5 7857 GREEK, // 1FC6..1FD3 7858 UNKNOWN, // 1FD4..1FD5 7859 GREEK, // 1FD6..1FDB 7860 UNKNOWN, // 1FDC 7861 GREEK, // 1FDD..1FEF 7862 UNKNOWN, // 1FF0..1FF1 7863 GREEK, // 1FF2..1FF4 7864 UNKNOWN, // 1FF5 7865 GREEK, // 1FF6..1FFE 7866 UNKNOWN, // 1FFF 7867 COMMON, // 2000..200B 7868 INHERITED, // 200C..200D 7869 COMMON, // 200E..2064 7870 UNKNOWN, // 2065 7871 COMMON, // 2066..2070 7872 LATIN, // 2071 7873 UNKNOWN, // 2072..2073 7874 COMMON, // 2074..207E 7875 LATIN, // 207F 7876 COMMON, // 2080..208E 7877 UNKNOWN, // 208F 7878 LATIN, // 2090..209C 7879 UNKNOWN, // 209D..209F 7880 COMMON, // 20A0..20C0 7881 UNKNOWN, // 20C1..20CF 7882 INHERITED, // 20D0..20F0 7883 UNKNOWN, // 20F1..20FF 7884 COMMON, // 2100..2125 7885 GREEK, // 2126 7886 COMMON, // 2127..2129 7887 LATIN, // 212A..212B 7888 COMMON, // 212C..2131 7889 LATIN, // 2132 7890 COMMON, // 2133..214D 7891 LATIN, // 214E 7892 COMMON, // 214F..215F 7893 LATIN, // 2160..2188 7894 COMMON, // 2189..218B 7895 UNKNOWN, // 218C..218F 7896 COMMON, // 2190..2429 7897 UNKNOWN, // 242A..243F 7898 COMMON, // 2440..244A 7899 UNKNOWN, // 244B..245F 7900 COMMON, // 2460..27FF 7901 BRAILLE, // 2800..28FF 7902 COMMON, // 2900..2B73 7903 UNKNOWN, // 2B74..2B75 7904 COMMON, // 2B76..2B95 7905 UNKNOWN, // 2B96 7906 COMMON, // 2B97..2BFF 7907 GLAGOLITIC, // 2C00..2C5F 7908 LATIN, // 2C60..2C7F 7909 COPTIC, // 2C80..2CF3 7910 UNKNOWN, // 2CF4..2CF8 7911 COPTIC, // 2CF9..2CFF 7912 GEORGIAN, // 2D00..2D25 7913 UNKNOWN, // 2D26 7914 GEORGIAN, // 2D27 7915 UNKNOWN, // 2D28..2D2C 7916 GEORGIAN, // 2D2D 7917 UNKNOWN, // 2D2E..2D2F 7918 TIFINAGH, // 2D30..2D67 7919 UNKNOWN, // 2D68..2D6E 7920 TIFINAGH, // 2D6F..2D70 7921 UNKNOWN, // 2D71..2D7E 7922 TIFINAGH, // 2D7F 7923 ETHIOPIC, // 2D80..2D96 7924 UNKNOWN, // 2D97..2D9F 7925 ETHIOPIC, // 2DA0..2DA6 7926 UNKNOWN, // 2DA7 7927 ETHIOPIC, // 2DA8..2DAE 7928 UNKNOWN, // 2DAF 7929 ETHIOPIC, // 2DB0..2DB6 7930 UNKNOWN, // 2DB7 7931 ETHIOPIC, // 2DB8..2DBE 7932 UNKNOWN, // 2DBF 7933 ETHIOPIC, // 2DC0..2DC6 7934 UNKNOWN, // 2DC7 7935 ETHIOPIC, // 2DC8..2DCE 7936 UNKNOWN, // 2DCF 7937 ETHIOPIC, // 2DD0..2DD6 7938 UNKNOWN, // 2DD7 7939 ETHIOPIC, // 2DD8..2DDE 7940 UNKNOWN, // 2DDF 7941 CYRILLIC, // 2DE0..2DFF 7942 COMMON, // 2E00..2E5D 7943 UNKNOWN, // 2E5E..2E7F 7944 HAN, // 2E80..2E99 7945 UNKNOWN, // 2E9A 7946 HAN, // 2E9B..2EF3 7947 UNKNOWN, // 2EF4..2EFF 7948 HAN, // 2F00..2FD5 7949 UNKNOWN, // 2FD6..2FEF 7950 COMMON, // 2FF0..3004 7951 HAN, // 3005 7952 COMMON, // 3006 7953 HAN, // 3007 7954 COMMON, // 3008..3020 7955 HAN, // 3021..3029 7956 INHERITED, // 302A..302D 7957 HANGUL, // 302E..302F 7958 COMMON, // 3030..3037 7959 HAN, // 3038..303B 7960 COMMON, // 303C..303F 7961 UNKNOWN, // 3040 7962 HIRAGANA, // 3041..3096 7963 UNKNOWN, // 3097..3098 7964 INHERITED, // 3099..309A 7965 COMMON, // 309B..309C 7966 HIRAGANA, // 309D..309F 7967 COMMON, // 30A0 7968 KATAKANA, // 30A1..30FA 7969 COMMON, // 30FB..30FC 7970 KATAKANA, // 30FD..30FF 7971 UNKNOWN, // 3100..3104 7972 BOPOMOFO, // 3105..312F 7973 UNKNOWN, // 3130 7974 HANGUL, // 3131..318E 7975 UNKNOWN, // 318F 7976 COMMON, // 3190..319F 7977 BOPOMOFO, // 31A0..31BF 7978 COMMON, // 31C0..31E5 7979 UNKNOWN, // 31E6..31EE 7980 COMMON, // 31EF 7981 KATAKANA, // 31F0..31FF 7982 HANGUL, // 3200..321E 7983 UNKNOWN, // 321F 7984 COMMON, // 3220..325F 7985 HANGUL, // 3260..327E 7986 COMMON, // 327F..32CF 7987 KATAKANA, // 32D0..32FE 7988 COMMON, // 32FF 7989 KATAKANA, // 3300..3357 7990 COMMON, // 3358..33FF 7991 HAN, // 3400..4DBF 7992 COMMON, // 4DC0..4DFF 7993 HAN, // 4E00..9FFF 7994 YI, // A000..A48C 7995 UNKNOWN, // A48D..A48F 7996 YI, // A490..A4C6 7997 UNKNOWN, // A4C7..A4CF 7998 LISU, // A4D0..A4FF 7999 VAI, // A500..A62B 8000 UNKNOWN, // A62C..A63F 8001 CYRILLIC, // A640..A69F 8002 BAMUM, // A6A0..A6F7 8003 UNKNOWN, // A6F8..A6FF 8004 COMMON, // A700..A721 8005 LATIN, // A722..A787 8006 COMMON, // A788..A78A 8007 LATIN, // A78B..A7CD 8008 UNKNOWN, // A7CE..A7CF 8009 LATIN, // A7D0..A7D1 8010 UNKNOWN, // A7D2 8011 LATIN, // A7D3 8012 UNKNOWN, // A7D4 8013 LATIN, // A7D5..A7DC 8014 UNKNOWN, // A7DD..A7F1 8015 LATIN, // A7F2..A7FF 8016 SYLOTI_NAGRI, // A800..A82C 8017 UNKNOWN, // A82D..A82F 8018 COMMON, // A830..A839 8019 UNKNOWN, // A83A..A83F 8020 PHAGS_PA, // A840..A877 8021 UNKNOWN, // A878..A87F 8022 SAURASHTRA, // A880..A8C5 8023 UNKNOWN, // A8C6..A8CD 8024 SAURASHTRA, // A8CE..A8D9 8025 UNKNOWN, // A8DA..A8DF 8026 DEVANAGARI, // A8E0..A8FF 8027 KAYAH_LI, // A900..A92D 8028 COMMON, // A92E 8029 KAYAH_LI, // A92F 8030 REJANG, // A930..A953 8031 UNKNOWN, // A954..A95E 8032 REJANG, // A95F 8033 HANGUL, // A960..A97C 8034 UNKNOWN, // A97D..A97F 8035 JAVANESE, // A980..A9CD 8036 UNKNOWN, // A9CE 8037 COMMON, // A9CF 8038 JAVANESE, // A9D0..A9D9 8039 UNKNOWN, // A9DA..A9DD 8040 JAVANESE, // A9DE..A9DF 8041 MYANMAR, // A9E0..A9FE 8042 UNKNOWN, // A9FF 8043 CHAM, // AA00..AA36 8044 UNKNOWN, // AA37..AA3F 8045 CHAM, // AA40..AA4D 8046 UNKNOWN, // AA4E..AA4F 8047 CHAM, // AA50..AA59 8048 UNKNOWN, // AA5A..AA5B 8049 CHAM, // AA5C..AA5F 8050 MYANMAR, // AA60..AA7F 8051 TAI_VIET, // AA80..AAC2 8052 UNKNOWN, // AAC3..AADA 8053 TAI_VIET, // AADB..AADF 8054 MEETEI_MAYEK, // AAE0..AAF6 8055 UNKNOWN, // AAF7..AB00 8056 ETHIOPIC, // AB01..AB06 8057 UNKNOWN, // AB07..AB08 8058 ETHIOPIC, // AB09..AB0E 8059 UNKNOWN, // AB0F..AB10 8060 ETHIOPIC, // AB11..AB16 8061 UNKNOWN, // AB17..AB1F 8062 ETHIOPIC, // AB20..AB26 8063 UNKNOWN, // AB27 8064 ETHIOPIC, // AB28..AB2E 8065 UNKNOWN, // AB2F 8066 LATIN, // AB30..AB5A 8067 COMMON, // AB5B 8068 LATIN, // AB5C..AB64 8069 GREEK, // AB65 8070 LATIN, // AB66..AB69 8071 COMMON, // AB6A..AB6B 8072 UNKNOWN, // AB6C..AB6F 8073 CHEROKEE, // AB70..ABBF 8074 MEETEI_MAYEK, // ABC0..ABED 8075 UNKNOWN, // ABEE..ABEF 8076 MEETEI_MAYEK, // ABF0..ABF9 8077 UNKNOWN, // ABFA..ABFF 8078 HANGUL, // AC00..D7A3 8079 UNKNOWN, // D7A4..D7AF 8080 HANGUL, // D7B0..D7C6 8081 UNKNOWN, // D7C7..D7CA 8082 HANGUL, // D7CB..D7FB 8083 UNKNOWN, // D7FC..F8FF 8084 HAN, // F900..FA6D 8085 UNKNOWN, // FA6E..FA6F 8086 HAN, // FA70..FAD9 8087 UNKNOWN, // FADA..FAFF 8088 LATIN, // FB00..FB06 8089 UNKNOWN, // FB07..FB12 8090 ARMENIAN, // FB13..FB17 8091 UNKNOWN, // FB18..FB1C 8092 HEBREW, // FB1D..FB36 8093 UNKNOWN, // FB37 8094 HEBREW, // FB38..FB3C 8095 UNKNOWN, // FB3D 8096 HEBREW, // FB3E 8097 UNKNOWN, // FB3F 8098 HEBREW, // FB40..FB41 8099 UNKNOWN, // FB42 8100 HEBREW, // FB43..FB44 8101 UNKNOWN, // FB45 8102 HEBREW, // FB46..FB4F 8103 ARABIC, // FB50..FBC2 8104 UNKNOWN, // FBC3..FBD2 8105 ARABIC, // FBD3..FD3D 8106 COMMON, // FD3E..FD3F 8107 ARABIC, // FD40..FD8F 8108 UNKNOWN, // FD90..FD91 8109 ARABIC, // FD92..FDC7 8110 UNKNOWN, // FDC8..FDCE 8111 ARABIC, // FDCF 8112 UNKNOWN, // FDD0..FDEF 8113 ARABIC, // FDF0..FDFF 8114 INHERITED, // FE00..FE0F 8115 COMMON, // FE10..FE19 8116 UNKNOWN, // FE1A..FE1F 8117 INHERITED, // FE20..FE2D 8118 CYRILLIC, // FE2E..FE2F 8119 COMMON, // FE30..FE52 8120 UNKNOWN, // FE53 8121 COMMON, // FE54..FE66 8122 UNKNOWN, // FE67 8123 COMMON, // FE68..FE6B 8124 UNKNOWN, // FE6C..FE6F 8125 ARABIC, // FE70..FE74 8126 UNKNOWN, // FE75 8127 ARABIC, // FE76..FEFC 8128 UNKNOWN, // FEFD..FEFE 8129 COMMON, // FEFF 8130 UNKNOWN, // FF00 8131 COMMON, // FF01..FF20 8132 LATIN, // FF21..FF3A 8133 COMMON, // FF3B..FF40 8134 LATIN, // FF41..FF5A 8135 COMMON, // FF5B..FF65 8136 KATAKANA, // FF66..FF6F 8137 COMMON, // FF70 8138 KATAKANA, // FF71..FF9D 8139 COMMON, // FF9E..FF9F 8140 HANGUL, // FFA0..FFBE 8141 UNKNOWN, // FFBF..FFC1 8142 HANGUL, // FFC2..FFC7 8143 UNKNOWN, // FFC8..FFC9 8144 HANGUL, // FFCA..FFCF 8145 UNKNOWN, // FFD0..FFD1 8146 HANGUL, // FFD2..FFD7 8147 UNKNOWN, // FFD8..FFD9 8148 HANGUL, // FFDA..FFDC 8149 UNKNOWN, // FFDD..FFDF 8150 COMMON, // FFE0..FFE6 8151 UNKNOWN, // FFE7 8152 COMMON, // FFE8..FFEE 8153 UNKNOWN, // FFEF..FFF8 8154 COMMON, // FFF9..FFFD 8155 UNKNOWN, // FFFE..FFFF 8156 LINEAR_B, // 10000..1000B 8157 UNKNOWN, // 1000C 8158 LINEAR_B, // 1000D..10026 8159 UNKNOWN, // 10027 8160 LINEAR_B, // 10028..1003A 8161 UNKNOWN, // 1003B 8162 LINEAR_B, // 1003C..1003D 8163 UNKNOWN, // 1003E 8164 LINEAR_B, // 1003F..1004D 8165 UNKNOWN, // 1004E..1004F 8166 LINEAR_B, // 10050..1005D 8167 UNKNOWN, // 1005E..1007F 8168 LINEAR_B, // 10080..100FA 8169 UNKNOWN, // 100FB..100FF 8170 COMMON, // 10100..10102 8171 UNKNOWN, // 10103..10106 8172 COMMON, // 10107..10133 8173 UNKNOWN, // 10134..10136 8174 COMMON, // 10137..1013F 8175 GREEK, // 10140..1018E 8176 UNKNOWN, // 1018F 8177 COMMON, // 10190..1019C 8178 UNKNOWN, // 1019D..1019F 8179 GREEK, // 101A0 8180 UNKNOWN, // 101A1..101CF 8181 COMMON, // 101D0..101FC 8182 INHERITED, // 101FD 8183 UNKNOWN, // 101FE..1027F 8184 LYCIAN, // 10280..1029C 8185 UNKNOWN, // 1029D..1029F 8186 CARIAN, // 102A0..102D0 8187 UNKNOWN, // 102D1..102DF 8188 INHERITED, // 102E0 8189 COMMON, // 102E1..102FB 8190 UNKNOWN, // 102FC..102FF 8191 OLD_ITALIC, // 10300..10323 8192 UNKNOWN, // 10324..1032C 8193 OLD_ITALIC, // 1032D..1032F 8194 GOTHIC, // 10330..1034A 8195 UNKNOWN, // 1034B..1034F 8196 OLD_PERMIC, // 10350..1037A 8197 UNKNOWN, // 1037B..1037F 8198 UGARITIC, // 10380..1039D 8199 UNKNOWN, // 1039E 8200 UGARITIC, // 1039F 8201 OLD_PERSIAN, // 103A0..103C3 8202 UNKNOWN, // 103C4..103C7 8203 OLD_PERSIAN, // 103C8..103D5 8204 UNKNOWN, // 103D6..103FF 8205 DESERET, // 10400..1044F 8206 SHAVIAN, // 10450..1047F 8207 OSMANYA, // 10480..1049D 8208 UNKNOWN, // 1049E..1049F 8209 OSMANYA, // 104A0..104A9 8210 UNKNOWN, // 104AA..104AF 8211 OSAGE, // 104B0..104D3 8212 UNKNOWN, // 104D4..104D7 8213 OSAGE, // 104D8..104FB 8214 UNKNOWN, // 104FC..104FF 8215 ELBASAN, // 10500..10527 8216 UNKNOWN, // 10528..1052F 8217 CAUCASIAN_ALBANIAN, // 10530..10563 8218 UNKNOWN, // 10564..1056E 8219 CAUCASIAN_ALBANIAN, // 1056F 8220 VITHKUQI, // 10570..1057A 8221 UNKNOWN, // 1057B 8222 VITHKUQI, // 1057C..1058A 8223 UNKNOWN, // 1058B 8224 VITHKUQI, // 1058C..10592 8225 UNKNOWN, // 10593 8226 VITHKUQI, // 10594..10595 8227 UNKNOWN, // 10596 8228 VITHKUQI, // 10597..105A1 8229 UNKNOWN, // 105A2 8230 VITHKUQI, // 105A3..105B1 8231 UNKNOWN, // 105B2 8232 VITHKUQI, // 105B3..105B9 8233 UNKNOWN, // 105BA 8234 VITHKUQI, // 105BB..105BC 8235 UNKNOWN, // 105BD..105BF 8236 TODHRI, // 105C0..105F3 8237 UNKNOWN, // 105F4..105FF 8238 LINEAR_A, // 10600..10736 8239 UNKNOWN, // 10737..1073F 8240 LINEAR_A, // 10740..10755 8241 UNKNOWN, // 10756..1075F 8242 LINEAR_A, // 10760..10767 8243 UNKNOWN, // 10768..1077F 8244 LATIN, // 10780..10785 8245 UNKNOWN, // 10786 8246 LATIN, // 10787..107B0 8247 UNKNOWN, // 107B1 8248 LATIN, // 107B2..107BA 8249 UNKNOWN, // 107BB..107FF 8250 CYPRIOT, // 10800..10805 8251 UNKNOWN, // 10806..10807 8252 CYPRIOT, // 10808 8253 UNKNOWN, // 10809 8254 CYPRIOT, // 1080A..10835 8255 UNKNOWN, // 10836 8256 CYPRIOT, // 10837..10838 8257 UNKNOWN, // 10839..1083B 8258 CYPRIOT, // 1083C 8259 UNKNOWN, // 1083D..1083E 8260 CYPRIOT, // 1083F 8261 IMPERIAL_ARAMAIC, // 10840..10855 8262 UNKNOWN, // 10856 8263 IMPERIAL_ARAMAIC, // 10857..1085F 8264 PALMYRENE, // 10860..1087F 8265 NABATAEAN, // 10880..1089E 8266 UNKNOWN, // 1089F..108A6 8267 NABATAEAN, // 108A7..108AF 8268 UNKNOWN, // 108B0..108DF 8269 HATRAN, // 108E0..108F2 8270 UNKNOWN, // 108F3 8271 HATRAN, // 108F4..108F5 8272 UNKNOWN, // 108F6..108FA 8273 HATRAN, // 108FB..108FF 8274 PHOENICIAN, // 10900..1091B 8275 UNKNOWN, // 1091C..1091E 8276 PHOENICIAN, // 1091F 8277 LYDIAN, // 10920..10939 8278 UNKNOWN, // 1093A..1093E 8279 LYDIAN, // 1093F 8280 UNKNOWN, // 10940..1097F 8281 MEROITIC_HIEROGLYPHS, // 10980..1099F 8282 MEROITIC_CURSIVE, // 109A0..109B7 8283 UNKNOWN, // 109B8..109BB 8284 MEROITIC_CURSIVE, // 109BC..109CF 8285 UNKNOWN, // 109D0..109D1 8286 MEROITIC_CURSIVE, // 109D2..109FF 8287 KHAROSHTHI, // 10A00..10A03 8288 UNKNOWN, // 10A04 8289 KHAROSHTHI, // 10A05..10A06 8290 UNKNOWN, // 10A07..10A0B 8291 KHAROSHTHI, // 10A0C..10A13 8292 UNKNOWN, // 10A14 8293 KHAROSHTHI, // 10A15..10A17 8294 UNKNOWN, // 10A18 8295 KHAROSHTHI, // 10A19..10A35 8296 UNKNOWN, // 10A36..10A37 8297 KHAROSHTHI, // 10A38..10A3A 8298 UNKNOWN, // 10A3B..10A3E 8299 KHAROSHTHI, // 10A3F..10A48 8300 UNKNOWN, // 10A49..10A4F 8301 KHAROSHTHI, // 10A50..10A58 8302 UNKNOWN, // 10A59..10A5F 8303 OLD_SOUTH_ARABIAN, // 10A60..10A7F 8304 OLD_NORTH_ARABIAN, // 10A80..10A9F 8305 UNKNOWN, // 10AA0..10ABF 8306 MANICHAEAN, // 10AC0..10AE6 8307 UNKNOWN, // 10AE7..10AEA 8308 MANICHAEAN, // 10AEB..10AF6 8309 UNKNOWN, // 10AF7..10AFF 8310 AVESTAN, // 10B00..10B35 8311 UNKNOWN, // 10B36..10B38 8312 AVESTAN, // 10B39..10B3F 8313 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 8314 UNKNOWN, // 10B56..10B57 8315 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 8316 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 8317 UNKNOWN, // 10B73..10B77 8318 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 8319 PSALTER_PAHLAVI, // 10B80..10B91 8320 UNKNOWN, // 10B92..10B98 8321 PSALTER_PAHLAVI, // 10B99..10B9C 8322 UNKNOWN, // 10B9D..10BA8 8323 PSALTER_PAHLAVI, // 10BA9..10BAF 8324 UNKNOWN, // 10BB0..10BFF 8325 OLD_TURKIC, // 10C00..10C48 8326 UNKNOWN, // 10C49..10C7F 8327 OLD_HUNGARIAN, // 10C80..10CB2 8328 UNKNOWN, // 10CB3..10CBF 8329 OLD_HUNGARIAN, // 10CC0..10CF2 8330 UNKNOWN, // 10CF3..10CF9 8331 OLD_HUNGARIAN, // 10CFA..10CFF 8332 HANIFI_ROHINGYA, // 10D00..10D27 8333 UNKNOWN, // 10D28..10D2F 8334 HANIFI_ROHINGYA, // 10D30..10D39 8335 UNKNOWN, // 10D3A..10D3F 8336 GARAY, // 10D40..10D65 8337 UNKNOWN, // 10D66..10D68 8338 GARAY, // 10D69..10D85 8339 UNKNOWN, // 10D86..10D8D 8340 GARAY, // 10D8E..10D8F 8341 UNKNOWN, // 10D90..10E5F 8342 ARABIC, // 10E60..10E7E 8343 UNKNOWN, // 10E7F 8344 YEZIDI, // 10E80..10EA9 8345 UNKNOWN, // 10EAA 8346 YEZIDI, // 10EAB..10EAD 8347 UNKNOWN, // 10EAE..10EAF 8348 YEZIDI, // 10EB0..10EB1 8349 UNKNOWN, // 10EB2..10EC1 8350 ARABIC, // 10EC2..10EC4 8351 UNKNOWN, // 10EC5..10EFB 8352 ARABIC, // 10EFC..10EFF 8353 OLD_SOGDIAN, // 10F00..10F27 8354 UNKNOWN, // 10F28..10F2F 8355 SOGDIAN, // 10F30..10F59 8356 UNKNOWN, // 10F5A..10F6F 8357 OLD_UYGHUR, // 10F70..10F89 8358 UNKNOWN, // 10F8A..10FAF 8359 CHORASMIAN, // 10FB0..10FCB 8360 UNKNOWN, // 10FCC..10FDF 8361 ELYMAIC, // 10FE0..10FF6 8362 UNKNOWN, // 10FF7..10FFF 8363 BRAHMI, // 11000..1104D 8364 UNKNOWN, // 1104E..11051 8365 BRAHMI, // 11052..11075 8366 UNKNOWN, // 11076..1107E 8367 BRAHMI, // 1107F 8368 KAITHI, // 11080..110C2 8369 UNKNOWN, // 110C3..110CC 8370 KAITHI, // 110CD 8371 UNKNOWN, // 110CE..110CF 8372 SORA_SOMPENG, // 110D0..110E8 8373 UNKNOWN, // 110E9..110EF 8374 SORA_SOMPENG, // 110F0..110F9 8375 UNKNOWN, // 110FA..110FF 8376 CHAKMA, // 11100..11134 8377 UNKNOWN, // 11135 8378 CHAKMA, // 11136..11147 8379 UNKNOWN, // 11148..1114F 8380 MAHAJANI, // 11150..11176 8381 UNKNOWN, // 11177..1117F 8382 SHARADA, // 11180..111DF 8383 UNKNOWN, // 111E0 8384 SINHALA, // 111E1..111F4 8385 UNKNOWN, // 111F5..111FF 8386 KHOJKI, // 11200..11211 8387 UNKNOWN, // 11212 8388 KHOJKI, // 11213..11241 8389 UNKNOWN, // 11242..1127F 8390 MULTANI, // 11280..11286 8391 UNKNOWN, // 11287 8392 MULTANI, // 11288 8393 UNKNOWN, // 11289 8394 MULTANI, // 1128A..1128D 8395 UNKNOWN, // 1128E 8396 MULTANI, // 1128F..1129D 8397 UNKNOWN, // 1129E 8398 MULTANI, // 1129F..112A9 8399 UNKNOWN, // 112AA..112AF 8400 KHUDAWADI, // 112B0..112EA 8401 UNKNOWN, // 112EB..112EF 8402 KHUDAWADI, // 112F0..112F9 8403 UNKNOWN, // 112FA..112FF 8404 GRANTHA, // 11300..11303 8405 UNKNOWN, // 11304 8406 GRANTHA, // 11305..1130C 8407 UNKNOWN, // 1130D..1130E 8408 GRANTHA, // 1130F..11310 8409 UNKNOWN, // 11311..11312 8410 GRANTHA, // 11313..11328 8411 UNKNOWN, // 11329 8412 GRANTHA, // 1132A..11330 8413 UNKNOWN, // 11331 8414 GRANTHA, // 11332..11333 8415 UNKNOWN, // 11334 8416 GRANTHA, // 11335..11339 8417 UNKNOWN, // 1133A 8418 INHERITED, // 1133B 8419 GRANTHA, // 1133C..11344 8420 UNKNOWN, // 11345..11346 8421 GRANTHA, // 11347..11348 8422 UNKNOWN, // 11349..1134A 8423 GRANTHA, // 1134B..1134D 8424 UNKNOWN, // 1134E..1134F 8425 GRANTHA, // 11350 8426 UNKNOWN, // 11351..11356 8427 GRANTHA, // 11357 8428 UNKNOWN, // 11358..1135C 8429 GRANTHA, // 1135D..11363 8430 UNKNOWN, // 11364..11365 8431 GRANTHA, // 11366..1136C 8432 UNKNOWN, // 1136D..1136F 8433 GRANTHA, // 11370..11374 8434 UNKNOWN, // 11375..1137F 8435 TULU_TIGALARI, // 11380..11389 8436 UNKNOWN, // 1138A 8437 TULU_TIGALARI, // 1138B 8438 UNKNOWN, // 1138C..1138D 8439 TULU_TIGALARI, // 1138E 8440 UNKNOWN, // 1138F 8441 TULU_TIGALARI, // 11390..113B5 8442 UNKNOWN, // 113B6 8443 TULU_TIGALARI, // 113B7..113C0 8444 UNKNOWN, // 113C1 8445 TULU_TIGALARI, // 113C2 8446 UNKNOWN, // 113C3..113C4 8447 TULU_TIGALARI, // 113C5 8448 UNKNOWN, // 113C6 8449 TULU_TIGALARI, // 113C7..113CA 8450 UNKNOWN, // 113CB 8451 TULU_TIGALARI, // 113CC..113D5 8452 UNKNOWN, // 113D6 8453 TULU_TIGALARI, // 113D7..113D8 8454 UNKNOWN, // 113D9..113E0 8455 TULU_TIGALARI, // 113E1..113E2 8456 UNKNOWN, // 113E3..113FF 8457 NEWA, // 11400..1145B 8458 UNKNOWN, // 1145C 8459 NEWA, // 1145D..11461 8460 UNKNOWN, // 11462..1147F 8461 TIRHUTA, // 11480..114C7 8462 UNKNOWN, // 114C8..114CF 8463 TIRHUTA, // 114D0..114D9 8464 UNKNOWN, // 114DA..1157F 8465 SIDDHAM, // 11580..115B5 8466 UNKNOWN, // 115B6..115B7 8467 SIDDHAM, // 115B8..115DD 8468 UNKNOWN, // 115DE..115FF 8469 MODI, // 11600..11644 8470 UNKNOWN, // 11645..1164F 8471 MODI, // 11650..11659 8472 UNKNOWN, // 1165A..1165F 8473 MONGOLIAN, // 11660..1166C 8474 UNKNOWN, // 1166D..1167F 8475 TAKRI, // 11680..116B9 8476 UNKNOWN, // 116BA..116BF 8477 TAKRI, // 116C0..116C9 8478 UNKNOWN, // 116CA..116CF 8479 MYANMAR, // 116D0..116E3 8480 UNKNOWN, // 116E4..116FF 8481 AHOM, // 11700..1171A 8482 UNKNOWN, // 1171B..1171C 8483 AHOM, // 1171D..1172B 8484 UNKNOWN, // 1172C..1172F 8485 AHOM, // 11730..11746 8486 UNKNOWN, // 11747..117FF 8487 DOGRA, // 11800..1183B 8488 UNKNOWN, // 1183C..1189F 8489 WARANG_CITI, // 118A0..118F2 8490 UNKNOWN, // 118F3..118FE 8491 WARANG_CITI, // 118FF 8492 DIVES_AKURU, // 11900..11906 8493 UNKNOWN, // 11907..11908 8494 DIVES_AKURU, // 11909 8495 UNKNOWN, // 1190A..1190B 8496 DIVES_AKURU, // 1190C..11913 8497 UNKNOWN, // 11914 8498 DIVES_AKURU, // 11915..11916 8499 UNKNOWN, // 11917 8500 DIVES_AKURU, // 11918..11935 8501 UNKNOWN, // 11936 8502 DIVES_AKURU, // 11937..11938 8503 UNKNOWN, // 11939..1193A 8504 DIVES_AKURU, // 1193B..11946 8505 UNKNOWN, // 11947..1194F 8506 DIVES_AKURU, // 11950..11959 8507 UNKNOWN, // 1195A..1199F 8508 NANDINAGARI, // 119A0..119A7 8509 UNKNOWN, // 119A8..119A9 8510 NANDINAGARI, // 119AA..119D7 8511 UNKNOWN, // 119D8..119D9 8512 NANDINAGARI, // 119DA..119E4 8513 UNKNOWN, // 119E5..119FF 8514 ZANABAZAR_SQUARE, // 11A00..11A47 8515 UNKNOWN, // 11A48..11A4F 8516 SOYOMBO, // 11A50..11AA2 8517 UNKNOWN, // 11AA3..11AAF 8518 CANADIAN_ABORIGINAL, // 11AB0..11ABF 8519 PAU_CIN_HAU, // 11AC0..11AF8 8520 UNKNOWN, // 11AF9..11AFF 8521 DEVANAGARI, // 11B00..11B09 8522 UNKNOWN, // 11B0A..11BBF 8523 SUNUWAR, // 11BC0..11BE1 8524 UNKNOWN, // 11BE2..11BEF 8525 SUNUWAR, // 11BF0..11BF9 8526 UNKNOWN, // 11BFA..11BFF 8527 BHAIKSUKI, // 11C00..11C08 8528 UNKNOWN, // 11C09 8529 BHAIKSUKI, // 11C0A..11C36 8530 UNKNOWN, // 11C37 8531 BHAIKSUKI, // 11C38..11C45 8532 UNKNOWN, // 11C46..11C4F 8533 BHAIKSUKI, // 11C50..11C6C 8534 UNKNOWN, // 11C6D..11C6F 8535 MARCHEN, // 11C70..11C8F 8536 UNKNOWN, // 11C90..11C91 8537 MARCHEN, // 11C92..11CA7 8538 UNKNOWN, // 11CA8 8539 MARCHEN, // 11CA9..11CB6 8540 UNKNOWN, // 11CB7..11CFF 8541 MASARAM_GONDI, // 11D00..11D06 8542 UNKNOWN, // 11D07 8543 MASARAM_GONDI, // 11D08..11D09 8544 UNKNOWN, // 11D0A 8545 MASARAM_GONDI, // 11D0B..11D36 8546 UNKNOWN, // 11D37..11D39 8547 MASARAM_GONDI, // 11D3A 8548 UNKNOWN, // 11D3B 8549 MASARAM_GONDI, // 11D3C..11D3D 8550 UNKNOWN, // 11D3E 8551 MASARAM_GONDI, // 11D3F..11D47 8552 UNKNOWN, // 11D48..11D4F 8553 MASARAM_GONDI, // 11D50..11D59 8554 UNKNOWN, // 11D5A..11D5F 8555 GUNJALA_GONDI, // 11D60..11D65 8556 UNKNOWN, // 11D66 8557 GUNJALA_GONDI, // 11D67..11D68 8558 UNKNOWN, // 11D69 8559 GUNJALA_GONDI, // 11D6A..11D8E 8560 UNKNOWN, // 11D8F 8561 GUNJALA_GONDI, // 11D90..11D91 8562 UNKNOWN, // 11D92 8563 GUNJALA_GONDI, // 11D93..11D98 8564 UNKNOWN, // 11D99..11D9F 8565 GUNJALA_GONDI, // 11DA0..11DA9 8566 UNKNOWN, // 11DAA..11EDF 8567 MAKASAR, // 11EE0..11EF8 8568 UNKNOWN, // 11EF9..11EFF 8569 KAWI, // 11F00..11F10 8570 UNKNOWN, // 11F11 8571 KAWI, // 11F12..11F3A 8572 UNKNOWN, // 11F3B..11F3D 8573 KAWI, // 11F3E..11F5A 8574 UNKNOWN, // 11F5B..11FAF 8575 LISU, // 11FB0 8576 UNKNOWN, // 11FB1..11FBF 8577 TAMIL, // 11FC0..11FF1 8578 UNKNOWN, // 11FF2..11FFE 8579 TAMIL, // 11FFF 8580 CUNEIFORM, // 12000..12399 8581 UNKNOWN, // 1239A..123FF 8582 CUNEIFORM, // 12400..1246E 8583 UNKNOWN, // 1246F 8584 CUNEIFORM, // 12470..12474 8585 UNKNOWN, // 12475..1247F 8586 CUNEIFORM, // 12480..12543 8587 UNKNOWN, // 12544..12F8F 8588 CYPRO_MINOAN, // 12F90..12FF2 8589 UNKNOWN, // 12FF3..12FFF 8590 EGYPTIAN_HIEROGLYPHS, // 13000..13455 8591 UNKNOWN, // 13456..1345F 8592 EGYPTIAN_HIEROGLYPHS, // 13460..143FA 8593 UNKNOWN, // 143FB..143FF 8594 ANATOLIAN_HIEROGLYPHS, // 14400..14646 8595 UNKNOWN, // 14647..160FF 8596 GURUNG_KHEMA, // 16100..16139 8597 UNKNOWN, // 1613A..167FF 8598 BAMUM, // 16800..16A38 8599 UNKNOWN, // 16A39..16A3F 8600 MRO, // 16A40..16A5E 8601 UNKNOWN, // 16A5F 8602 MRO, // 16A60..16A69 8603 UNKNOWN, // 16A6A..16A6D 8604 MRO, // 16A6E..16A6F 8605 TANGSA, // 16A70..16ABE 8606 UNKNOWN, // 16ABF 8607 TANGSA, // 16AC0..16AC9 8608 UNKNOWN, // 16ACA..16ACF 8609 BASSA_VAH, // 16AD0..16AED 8610 UNKNOWN, // 16AEE..16AEF 8611 BASSA_VAH, // 16AF0..16AF5 8612 UNKNOWN, // 16AF6..16AFF 8613 PAHAWH_HMONG, // 16B00..16B45 8614 UNKNOWN, // 16B46..16B4F 8615 PAHAWH_HMONG, // 16B50..16B59 8616 UNKNOWN, // 16B5A 8617 PAHAWH_HMONG, // 16B5B..16B61 8618 UNKNOWN, // 16B62 8619 PAHAWH_HMONG, // 16B63..16B77 8620 UNKNOWN, // 16B78..16B7C 8621 PAHAWH_HMONG, // 16B7D..16B8F 8622 UNKNOWN, // 16B90..16D3F 8623 KIRAT_RAI, // 16D40..16D79 8624 UNKNOWN, // 16D7A..16E3F 8625 MEDEFAIDRIN, // 16E40..16E9A 8626 UNKNOWN, // 16E9B..16EFF 8627 MIAO, // 16F00..16F4A 8628 UNKNOWN, // 16F4B..16F4E 8629 MIAO, // 16F4F..16F87 8630 UNKNOWN, // 16F88..16F8E 8631 MIAO, // 16F8F..16F9F 8632 UNKNOWN, // 16FA0..16FDF 8633 TANGUT, // 16FE0 8634 NUSHU, // 16FE1 8635 HAN, // 16FE2..16FE3 8636 KHITAN_SMALL_SCRIPT, // 16FE4 8637 UNKNOWN, // 16FE5..16FEF 8638 HAN, // 16FF0..16FF1 8639 UNKNOWN, // 16FF2..16FFF 8640 TANGUT, // 17000..187F7 8641 UNKNOWN, // 187F8..187FF 8642 TANGUT, // 18800..18AFF 8643 KHITAN_SMALL_SCRIPT, // 18B00..18CD5 8644 UNKNOWN, // 18CD6..18CFE 8645 KHITAN_SMALL_SCRIPT, // 18CFF 8646 TANGUT, // 18D00..18D08 8647 UNKNOWN, // 18D09..1AFEF 8648 KATAKANA, // 1AFF0..1AFF3 8649 UNKNOWN, // 1AFF4 8650 KATAKANA, // 1AFF5..1AFFB 8651 UNKNOWN, // 1AFFC 8652 KATAKANA, // 1AFFD..1AFFE 8653 UNKNOWN, // 1AFFF 8654 KATAKANA, // 1B000 8655 HIRAGANA, // 1B001..1B11F 8656 KATAKANA, // 1B120..1B122 8657 UNKNOWN, // 1B123..1B131 8658 HIRAGANA, // 1B132 8659 UNKNOWN, // 1B133..1B14F 8660 HIRAGANA, // 1B150..1B152 8661 UNKNOWN, // 1B153..1B154 8662 KATAKANA, // 1B155 8663 UNKNOWN, // 1B156..1B163 8664 KATAKANA, // 1B164..1B167 8665 UNKNOWN, // 1B168..1B16F 8666 NUSHU, // 1B170..1B2FB 8667 UNKNOWN, // 1B2FC..1BBFF 8668 DUPLOYAN, // 1BC00..1BC6A 8669 UNKNOWN, // 1BC6B..1BC6F 8670 DUPLOYAN, // 1BC70..1BC7C 8671 UNKNOWN, // 1BC7D..1BC7F 8672 DUPLOYAN, // 1BC80..1BC88 8673 UNKNOWN, // 1BC89..1BC8F 8674 DUPLOYAN, // 1BC90..1BC99 8675 UNKNOWN, // 1BC9A..1BC9B 8676 DUPLOYAN, // 1BC9C..1BC9F 8677 COMMON, // 1BCA0..1BCA3 8678 UNKNOWN, // 1BCA4..1CBFF 8679 COMMON, // 1CC00..1CCF9 8680 UNKNOWN, // 1CCFA..1CCFF 8681 COMMON, // 1CD00..1CEB3 8682 UNKNOWN, // 1CEB4..1CEFF 8683 INHERITED, // 1CF00..1CF2D 8684 UNKNOWN, // 1CF2E..1CF2F 8685 INHERITED, // 1CF30..1CF46 8686 UNKNOWN, // 1CF47..1CF4F 8687 COMMON, // 1CF50..1CFC3 8688 UNKNOWN, // 1CFC4..1CFFF 8689 COMMON, // 1D000..1D0F5 8690 UNKNOWN, // 1D0F6..1D0FF 8691 COMMON, // 1D100..1D126 8692 UNKNOWN, // 1D127..1D128 8693 COMMON, // 1D129..1D166 8694 INHERITED, // 1D167..1D169 8695 COMMON, // 1D16A..1D17A 8696 INHERITED, // 1D17B..1D182 8697 COMMON, // 1D183..1D184 8698 INHERITED, // 1D185..1D18B 8699 COMMON, // 1D18C..1D1A9 8700 INHERITED, // 1D1AA..1D1AD 8701 COMMON, // 1D1AE..1D1EA 8702 UNKNOWN, // 1D1EB..1D1FF 8703 GREEK, // 1D200..1D245 8704 UNKNOWN, // 1D246..1D2BF 8705 COMMON, // 1D2C0..1D2D3 8706 UNKNOWN, // 1D2D4..1D2DF 8707 COMMON, // 1D2E0..1D2F3 8708 UNKNOWN, // 1D2F4..1D2FF 8709 COMMON, // 1D300..1D356 8710 UNKNOWN, // 1D357..1D35F 8711 COMMON, // 1D360..1D378 8712 UNKNOWN, // 1D379..1D3FF 8713 COMMON, // 1D400..1D454 8714 UNKNOWN, // 1D455 8715 COMMON, // 1D456..1D49C 8716 UNKNOWN, // 1D49D 8717 COMMON, // 1D49E..1D49F 8718 UNKNOWN, // 1D4A0..1D4A1 8719 COMMON, // 1D4A2 8720 UNKNOWN, // 1D4A3..1D4A4 8721 COMMON, // 1D4A5..1D4A6 8722 UNKNOWN, // 1D4A7..1D4A8 8723 COMMON, // 1D4A9..1D4AC 8724 UNKNOWN, // 1D4AD 8725 COMMON, // 1D4AE..1D4B9 8726 UNKNOWN, // 1D4BA 8727 COMMON, // 1D4BB 8728 UNKNOWN, // 1D4BC 8729 COMMON, // 1D4BD..1D4C3 8730 UNKNOWN, // 1D4C4 8731 COMMON, // 1D4C5..1D505 8732 UNKNOWN, // 1D506 8733 COMMON, // 1D507..1D50A 8734 UNKNOWN, // 1D50B..1D50C 8735 COMMON, // 1D50D..1D514 8736 UNKNOWN, // 1D515 8737 COMMON, // 1D516..1D51C 8738 UNKNOWN, // 1D51D 8739 COMMON, // 1D51E..1D539 8740 UNKNOWN, // 1D53A 8741 COMMON, // 1D53B..1D53E 8742 UNKNOWN, // 1D53F 8743 COMMON, // 1D540..1D544 8744 UNKNOWN, // 1D545 8745 COMMON, // 1D546 8746 UNKNOWN, // 1D547..1D549 8747 COMMON, // 1D54A..1D550 8748 UNKNOWN, // 1D551 8749 COMMON, // 1D552..1D6A5 8750 UNKNOWN, // 1D6A6..1D6A7 8751 COMMON, // 1D6A8..1D7CB 8752 UNKNOWN, // 1D7CC..1D7CD 8753 COMMON, // 1D7CE..1D7FF 8754 SIGNWRITING, // 1D800..1DA8B 8755 UNKNOWN, // 1DA8C..1DA9A 8756 SIGNWRITING, // 1DA9B..1DA9F 8757 UNKNOWN, // 1DAA0 8758 SIGNWRITING, // 1DAA1..1DAAF 8759 UNKNOWN, // 1DAB0..1DEFF 8760 LATIN, // 1DF00..1DF1E 8761 UNKNOWN, // 1DF1F..1DF24 8762 LATIN, // 1DF25..1DF2A 8763 UNKNOWN, // 1DF2B..1DFFF 8764 GLAGOLITIC, // 1E000..1E006 8765 UNKNOWN, // 1E007 8766 GLAGOLITIC, // 1E008..1E018 8767 UNKNOWN, // 1E019..1E01A 8768 GLAGOLITIC, // 1E01B..1E021 8769 UNKNOWN, // 1E022 8770 GLAGOLITIC, // 1E023..1E024 8771 UNKNOWN, // 1E025 8772 GLAGOLITIC, // 1E026..1E02A 8773 UNKNOWN, // 1E02B..1E02F 8774 CYRILLIC, // 1E030..1E06D 8775 UNKNOWN, // 1E06E..1E08E 8776 CYRILLIC, // 1E08F 8777 UNKNOWN, // 1E090..1E0FF 8778 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 8779 UNKNOWN, // 1E12D..1E12F 8780 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 8781 UNKNOWN, // 1E13E..1E13F 8782 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 8783 UNKNOWN, // 1E14A..1E14D 8784 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 8785 UNKNOWN, // 1E150..1E28F 8786 TOTO, // 1E290..1E2AE 8787 UNKNOWN, // 1E2AF..1E2BF 8788 WANCHO, // 1E2C0..1E2F9 8789 UNKNOWN, // 1E2FA..1E2FE 8790 WANCHO, // 1E2FF 8791 UNKNOWN, // 1E300..1E4CF 8792 NAG_MUNDARI, // 1E4D0..1E4F9 8793 UNKNOWN, // 1E4FA..1E5CF 8794 OL_ONAL, // 1E5D0..1E5FA 8795 UNKNOWN, // 1E5FB..1E5FE 8796 OL_ONAL, // 1E5FF 8797 UNKNOWN, // 1E600..1E7DF 8798 ETHIOPIC, // 1E7E0..1E7E6 8799 UNKNOWN, // 1E7E7 8800 ETHIOPIC, // 1E7E8..1E7EB 8801 UNKNOWN, // 1E7EC 8802 ETHIOPIC, // 1E7ED..1E7EE 8803 UNKNOWN, // 1E7EF 8804 ETHIOPIC, // 1E7F0..1E7FE 8805 UNKNOWN, // 1E7FF 8806 MENDE_KIKAKUI, // 1E800..1E8C4 8807 UNKNOWN, // 1E8C5..1E8C6 8808 MENDE_KIKAKUI, // 1E8C7..1E8D6 8809 UNKNOWN, // 1E8D7..1E8FF 8810 ADLAM, // 1E900..1E94B 8811 UNKNOWN, // 1E94C..1E94F 8812 ADLAM, // 1E950..1E959 8813 UNKNOWN, // 1E95A..1E95D 8814 ADLAM, // 1E95E..1E95F 8815 UNKNOWN, // 1E960..1EC70 8816 COMMON, // 1EC71..1ECB4 8817 UNKNOWN, // 1ECB5..1ED00 8818 COMMON, // 1ED01..1ED3D 8819 UNKNOWN, // 1ED3E..1EDFF 8820 ARABIC, // 1EE00..1EE03 8821 UNKNOWN, // 1EE04 8822 ARABIC, // 1EE05..1EE1F 8823 UNKNOWN, // 1EE20 8824 ARABIC, // 1EE21..1EE22 8825 UNKNOWN, // 1EE23 8826 ARABIC, // 1EE24 8827 UNKNOWN, // 1EE25..1EE26 8828 ARABIC, // 1EE27 8829 UNKNOWN, // 1EE28 8830 ARABIC, // 1EE29..1EE32 8831 UNKNOWN, // 1EE33 8832 ARABIC, // 1EE34..1EE37 8833 UNKNOWN, // 1EE38 8834 ARABIC, // 1EE39 8835 UNKNOWN, // 1EE3A 8836 ARABIC, // 1EE3B 8837 UNKNOWN, // 1EE3C..1EE41 8838 ARABIC, // 1EE42 8839 UNKNOWN, // 1EE43..1EE46 8840 ARABIC, // 1EE47 8841 UNKNOWN, // 1EE48 8842 ARABIC, // 1EE49 8843 UNKNOWN, // 1EE4A 8844 ARABIC, // 1EE4B 8845 UNKNOWN, // 1EE4C 8846 ARABIC, // 1EE4D..1EE4F 8847 UNKNOWN, // 1EE50 8848 ARABIC, // 1EE51..1EE52 8849 UNKNOWN, // 1EE53 8850 ARABIC, // 1EE54 8851 UNKNOWN, // 1EE55..1EE56 8852 ARABIC, // 1EE57 8853 UNKNOWN, // 1EE58 8854 ARABIC, // 1EE59 8855 UNKNOWN, // 1EE5A 8856 ARABIC, // 1EE5B 8857 UNKNOWN, // 1EE5C 8858 ARABIC, // 1EE5D 8859 UNKNOWN, // 1EE5E 8860 ARABIC, // 1EE5F 8861 UNKNOWN, // 1EE60 8862 ARABIC, // 1EE61..1EE62 8863 UNKNOWN, // 1EE63 8864 ARABIC, // 1EE64 8865 UNKNOWN, // 1EE65..1EE66 8866 ARABIC, // 1EE67..1EE6A 8867 UNKNOWN, // 1EE6B 8868 ARABIC, // 1EE6C..1EE72 8869 UNKNOWN, // 1EE73 8870 ARABIC, // 1EE74..1EE77 8871 UNKNOWN, // 1EE78 8872 ARABIC, // 1EE79..1EE7C 8873 UNKNOWN, // 1EE7D 8874 ARABIC, // 1EE7E 8875 UNKNOWN, // 1EE7F 8876 ARABIC, // 1EE80..1EE89 8877 UNKNOWN, // 1EE8A 8878 ARABIC, // 1EE8B..1EE9B 8879 UNKNOWN, // 1EE9C..1EEA0 8880 ARABIC, // 1EEA1..1EEA3 8881 UNKNOWN, // 1EEA4 8882 ARABIC, // 1EEA5..1EEA9 8883 UNKNOWN, // 1EEAA 8884 ARABIC, // 1EEAB..1EEBB 8885 UNKNOWN, // 1EEBC..1EEEF 8886 ARABIC, // 1EEF0..1EEF1 8887 UNKNOWN, // 1EEF2..1EFFF 8888 COMMON, // 1F000..1F02B 8889 UNKNOWN, // 1F02C..1F02F 8890 COMMON, // 1F030..1F093 8891 UNKNOWN, // 1F094..1F09F 8892 COMMON, // 1F0A0..1F0AE 8893 UNKNOWN, // 1F0AF..1F0B0 8894 COMMON, // 1F0B1..1F0BF 8895 UNKNOWN, // 1F0C0 8896 COMMON, // 1F0C1..1F0CF 8897 UNKNOWN, // 1F0D0 8898 COMMON, // 1F0D1..1F0F5 8899 UNKNOWN, // 1F0F6..1F0FF 8900 COMMON, // 1F100..1F1AD 8901 UNKNOWN, // 1F1AE..1F1E5 8902 COMMON, // 1F1E6..1F1FF 8903 HIRAGANA, // 1F200 8904 COMMON, // 1F201..1F202 8905 UNKNOWN, // 1F203..1F20F 8906 COMMON, // 1F210..1F23B 8907 UNKNOWN, // 1F23C..1F23F 8908 COMMON, // 1F240..1F248 8909 UNKNOWN, // 1F249..1F24F 8910 COMMON, // 1F250..1F251 8911 UNKNOWN, // 1F252..1F25F 8912 COMMON, // 1F260..1F265 8913 UNKNOWN, // 1F266..1F2FF 8914 COMMON, // 1F300..1F6D7 8915 UNKNOWN, // 1F6D8..1F6DB 8916 COMMON, // 1F6DC..1F6EC 8917 UNKNOWN, // 1F6ED..1F6EF 8918 COMMON, // 1F6F0..1F6FC 8919 UNKNOWN, // 1F6FD..1F6FF 8920 COMMON, // 1F700..1F776 8921 UNKNOWN, // 1F777..1F77A 8922 COMMON, // 1F77B..1F7D9 8923 UNKNOWN, // 1F7DA..1F7DF 8924 COMMON, // 1F7E0..1F7EB 8925 UNKNOWN, // 1F7EC..1F7EF 8926 COMMON, // 1F7F0 8927 UNKNOWN, // 1F7F1..1F7FF 8928 COMMON, // 1F800..1F80B 8929 UNKNOWN, // 1F80C..1F80F 8930 COMMON, // 1F810..1F847 8931 UNKNOWN, // 1F848..1F84F 8932 COMMON, // 1F850..1F859 8933 UNKNOWN, // 1F85A..1F85F 8934 COMMON, // 1F860..1F887 8935 UNKNOWN, // 1F888..1F88F 8936 COMMON, // 1F890..1F8AD 8937 UNKNOWN, // 1F8AE..1F8AF 8938 COMMON, // 1F8B0..1F8BB 8939 UNKNOWN, // 1F8BC..1F8BF 8940 COMMON, // 1F8C0..1F8C1 8941 UNKNOWN, // 1F8C2..1F8FF 8942 COMMON, // 1F900..1FA53 8943 UNKNOWN, // 1FA54..1FA5F 8944 COMMON, // 1FA60..1FA6D 8945 UNKNOWN, // 1FA6E..1FA6F 8946 COMMON, // 1FA70..1FA7C 8947 UNKNOWN, // 1FA7D..1FA7F 8948 COMMON, // 1FA80..1FA89 8949 UNKNOWN, // 1FA8A..1FA8E 8950 COMMON, // 1FA8F..1FAC6 8951 UNKNOWN, // 1FAC7..1FACD 8952 COMMON, // 1FACE..1FADC 8953 UNKNOWN, // 1FADD..1FADE 8954 COMMON, // 1FADF..1FAE9 8955 UNKNOWN, // 1FAEA..1FAEF 8956 COMMON, // 1FAF0..1FAF8 8957 UNKNOWN, // 1FAF9..1FAFF 8958 COMMON, // 1FB00..1FB92 8959 UNKNOWN, // 1FB93 8960 COMMON, // 1FB94..1FBF9 8961 UNKNOWN, // 1FBFA..1FFFF 8962 HAN, // 20000..2A6DF 8963 UNKNOWN, // 2A6E0..2A6FF 8964 HAN, // 2A700..2B739 8965 UNKNOWN, // 2B73A..2B73F 8966 HAN, // 2B740..2B81D 8967 UNKNOWN, // 2B81E..2B81F 8968 HAN, // 2B820..2CEA1 8969 UNKNOWN, // 2CEA2..2CEAF 8970 HAN, // 2CEB0..2EBE0 8971 UNKNOWN, // 2EBE1..2EBEF 8972 HAN, // 2EBF0..2EE5D 8973 UNKNOWN, // 2EE5E..2F7FF 8974 HAN, // 2F800..2FA1D 8975 UNKNOWN, // 2FA1E..2FFFF 8976 HAN, // 30000..3134A 8977 UNKNOWN, // 3134B..3134F 8978 HAN, // 31350..323AF 8979 UNKNOWN, // 323B0..E0000 8980 COMMON, // E0001 8981 UNKNOWN, // E0002..E001F 8982 COMMON, // E0020..E007F 8983 UNKNOWN, // E0080..E00FF 8984 INHERITED, // E0100..E01EF 8985 UNKNOWN, // E01F0..10FFFF 8986 }; 8987 8988 private static final HashMap<String, Character.UnicodeScript> aliases; 8989 static { 8990 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1); 8991 aliases.put("ADLM", ADLAM); 8992 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 8993 aliases.put("AHOM", AHOM); 8994 aliases.put("ARAB", ARABIC); 8995 aliases.put("ARMI", IMPERIAL_ARAMAIC); 8996 aliases.put("ARMN", ARMENIAN); 8997 aliases.put("AVST", AVESTAN); 8998 aliases.put("BALI", BALINESE); 8999 aliases.put("BAMU", BAMUM); 9000 aliases.put("BASS", BASSA_VAH); 9001 aliases.put("BATK", BATAK); 9002 aliases.put("BENG", BENGALI); 9003 aliases.put("BHKS", BHAIKSUKI); 9004 aliases.put("BOPO", BOPOMOFO); 9005 aliases.put("BRAH", BRAHMI); 9006 aliases.put("BRAI", BRAILLE); 9007 aliases.put("BUGI", BUGINESE); 9008 aliases.put("BUHD", BUHID); 9009 aliases.put("CAKM", CHAKMA); 9010 aliases.put("CANS", CANADIAN_ABORIGINAL); 9011 aliases.put("CARI", CARIAN); 9012 aliases.put("CHAM", CHAM); 9013 aliases.put("CHER", CHEROKEE); 9014 aliases.put("CHRS", CHORASMIAN); 9015 aliases.put("COPT", COPTIC); 9016 aliases.put("CPMN", CYPRO_MINOAN); 9017 aliases.put("CPRT", CYPRIOT); 9018 aliases.put("CYRL", CYRILLIC); 9019 aliases.put("DEVA", DEVANAGARI); 9020 aliases.put("DIAK", DIVES_AKURU); 9021 aliases.put("DOGR", DOGRA); 9022 aliases.put("DSRT", DESERET); 9023 aliases.put("DUPL", DUPLOYAN); 9024 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 9025 aliases.put("ELBA", ELBASAN); 9026 aliases.put("ELYM", ELYMAIC); 9027 aliases.put("ETHI", ETHIOPIC); 9028 aliases.put("GARA", GARAY); 9029 aliases.put("GEOR", GEORGIAN); 9030 aliases.put("GLAG", GLAGOLITIC); 9031 aliases.put("GONG", GUNJALA_GONDI); 9032 aliases.put("GONM", MASARAM_GONDI); 9033 aliases.put("GOTH", GOTHIC); 9034 aliases.put("GRAN", GRANTHA); 9035 aliases.put("GREK", GREEK); 9036 aliases.put("GUJR", GUJARATI); 9037 aliases.put("GUKH", GURUNG_KHEMA); 9038 aliases.put("GURU", GURMUKHI); 9039 aliases.put("HANG", HANGUL); 9040 aliases.put("HANI", HAN); 9041 aliases.put("HANO", HANUNOO); 9042 aliases.put("HATR", HATRAN); 9043 aliases.put("HEBR", HEBREW); 9044 aliases.put("HIRA", HIRAGANA); 9045 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 9046 aliases.put("HMNG", PAHAWH_HMONG); 9047 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 9048 aliases.put("HUNG", OLD_HUNGARIAN); 9049 aliases.put("ITAL", OLD_ITALIC); 9050 aliases.put("JAVA", JAVANESE); 9051 aliases.put("KALI", KAYAH_LI); 9052 aliases.put("KANA", KATAKANA); 9053 aliases.put("KAWI", KAWI); 9054 aliases.put("KHAR", KHAROSHTHI); 9055 aliases.put("KHMR", KHMER); 9056 aliases.put("KHOJ", KHOJKI); 9057 aliases.put("KITS", KHITAN_SMALL_SCRIPT); 9058 aliases.put("KNDA", KANNADA); 9059 aliases.put("KRAI", KIRAT_RAI); 9060 aliases.put("KTHI", KAITHI); 9061 aliases.put("LANA", TAI_THAM); 9062 aliases.put("LAOO", LAO); 9063 aliases.put("LATN", LATIN); 9064 aliases.put("LEPC", LEPCHA); 9065 aliases.put("LIMB", LIMBU); 9066 aliases.put("LINA", LINEAR_A); 9067 aliases.put("LINB", LINEAR_B); 9068 aliases.put("LISU", LISU); 9069 aliases.put("LYCI", LYCIAN); 9070 aliases.put("LYDI", LYDIAN); 9071 aliases.put("MAHJ", MAHAJANI); 9072 aliases.put("MAKA", MAKASAR); 9073 aliases.put("MAND", MANDAIC); 9074 aliases.put("MANI", MANICHAEAN); 9075 aliases.put("MARC", MARCHEN); 9076 aliases.put("MEDF", MEDEFAIDRIN); 9077 aliases.put("MEND", MENDE_KIKAKUI); 9078 aliases.put("MERC", MEROITIC_CURSIVE); 9079 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 9080 aliases.put("MLYM", MALAYALAM); 9081 aliases.put("MODI", MODI); 9082 aliases.put("MONG", MONGOLIAN); 9083 aliases.put("MROO", MRO); 9084 aliases.put("MTEI", MEETEI_MAYEK); 9085 aliases.put("MULT", MULTANI); 9086 aliases.put("MYMR", MYANMAR); 9087 aliases.put("NAGM", NAG_MUNDARI); 9088 aliases.put("NAND", NANDINAGARI); 9089 aliases.put("NARB", OLD_NORTH_ARABIAN); 9090 aliases.put("NBAT", NABATAEAN); 9091 aliases.put("NEWA", NEWA); 9092 aliases.put("NKOO", NKO); 9093 aliases.put("NSHU", NUSHU); 9094 aliases.put("OGAM", OGHAM); 9095 aliases.put("OLCK", OL_CHIKI); 9096 aliases.put("ONAO", OL_ONAL); 9097 aliases.put("ORKH", OLD_TURKIC); 9098 aliases.put("ORYA", ORIYA); 9099 aliases.put("OSGE", OSAGE); 9100 aliases.put("OSMA", OSMANYA); 9101 aliases.put("OUGR", OLD_UYGHUR); 9102 aliases.put("PALM", PALMYRENE); 9103 aliases.put("PAUC", PAU_CIN_HAU); 9104 aliases.put("PERM", OLD_PERMIC); 9105 aliases.put("PHAG", PHAGS_PA); 9106 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 9107 aliases.put("PHLP", PSALTER_PAHLAVI); 9108 aliases.put("PHNX", PHOENICIAN); 9109 aliases.put("PLRD", MIAO); 9110 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 9111 aliases.put("RJNG", REJANG); 9112 aliases.put("ROHG", HANIFI_ROHINGYA); 9113 aliases.put("RUNR", RUNIC); 9114 aliases.put("SAMR", SAMARITAN); 9115 aliases.put("SARB", OLD_SOUTH_ARABIAN); 9116 aliases.put("SAUR", SAURASHTRA); 9117 aliases.put("SGNW", SIGNWRITING); 9118 aliases.put("SHAW", SHAVIAN); 9119 aliases.put("SHRD", SHARADA); 9120 aliases.put("SIDD", SIDDHAM); 9121 aliases.put("SIND", KHUDAWADI); 9122 aliases.put("SINH", SINHALA); 9123 aliases.put("SOGD", SOGDIAN); 9124 aliases.put("SOGO", OLD_SOGDIAN); 9125 aliases.put("SORA", SORA_SOMPENG); 9126 aliases.put("SOYO", SOYOMBO); 9127 aliases.put("SUND", SUNDANESE); 9128 aliases.put("SUNU", SUNUWAR); 9129 aliases.put("SYLO", SYLOTI_NAGRI); 9130 aliases.put("SYRC", SYRIAC); 9131 aliases.put("TAGB", TAGBANWA); 9132 aliases.put("TAKR", TAKRI); 9133 aliases.put("TALE", TAI_LE); 9134 aliases.put("TALU", NEW_TAI_LUE); 9135 aliases.put("TAML", TAMIL); 9136 aliases.put("TANG", TANGUT); 9137 aliases.put("TAVT", TAI_VIET); 9138 aliases.put("TELU", TELUGU); 9139 aliases.put("TFNG", TIFINAGH); 9140 aliases.put("TGLG", TAGALOG); 9141 aliases.put("THAA", THAANA); 9142 aliases.put("THAI", THAI); 9143 aliases.put("TIBT", TIBETAN); 9144 aliases.put("TIRH", TIRHUTA); 9145 aliases.put("TNSA", TANGSA); 9146 aliases.put("TODR", TODHRI); 9147 aliases.put("TOTO", TOTO); 9148 aliases.put("TUTG", TULU_TIGALARI); 9149 aliases.put("UGAR", UGARITIC); 9150 aliases.put("VAII", VAI); 9151 aliases.put("VITH", VITHKUQI); 9152 aliases.put("WARA", WARANG_CITI); 9153 aliases.put("WCHO", WANCHO); 9154 aliases.put("XPEO", OLD_PERSIAN); 9155 aliases.put("XSUX", CUNEIFORM); 9156 aliases.put("YEZI", YEZIDI); 9157 aliases.put("YIII", YI); 9158 aliases.put("ZANB", ZANABAZAR_SQUARE); 9159 aliases.put("ZINH", INHERITED); 9160 aliases.put("ZYYY", COMMON); 9161 aliases.put("ZZZZ", UNKNOWN); 9162 } 9163 9164 /** 9165 * Returns the enum constant representing the Unicode script of which 9166 * the given character (Unicode code point) is assigned to. 9167 * 9168 * @param codePoint the character (Unicode code point) in question. 9169 * @return The {@code UnicodeScript} constant representing the 9170 * Unicode script of which this character is assigned to. 9171 * 9172 * @throws IllegalArgumentException if the specified 9173 * {@code codePoint} is an invalid Unicode code point. 9174 * @see Character#isValidCodePoint(int) 9175 * 9176 */ 9177 public static UnicodeScript of(int codePoint) { 9178 if (!isValidCodePoint(codePoint)) 9179 throw new IllegalArgumentException( 9180 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9181 int type = getType(codePoint); 9182 // leave SURROGATE and PRIVATE_USE for table lookup 9183 if (type == UNASSIGNED) 9184 return UNKNOWN; 9185 int index = Arrays.binarySearch(scriptStarts, codePoint); 9186 if (index < 0) 9187 index = -index - 2; 9188 return scripts[index]; 9189 } 9190 9191 /** 9192 * Returns the UnicodeScript constant with the given Unicode script 9193 * name or the script name alias. Script names and their aliases are 9194 * determined by The Unicode Standard. The files {@code Scripts.txt} 9195 * and {@code PropertyValueAliases.txt} define script names 9196 * and the script name aliases for a particular version of the 9197 * standard. The {@link Character} class specifies the version of 9198 * the standard that it supports. 9199 * <p> 9200 * Character case is ignored for all of the valid script names. 9201 * The en_US locale's case mapping rules are used to provide 9202 * case-insensitive string comparisons for script name validation. 9203 * 9204 * @param scriptName A {@code UnicodeScript} name. 9205 * @return The {@code UnicodeScript} constant identified 9206 * by {@code scriptName} 9207 * @throws IllegalArgumentException if {@code scriptName} is an 9208 * invalid name 9209 * @throws NullPointerException if {@code scriptName} is null 9210 */ 9211 public static final UnicodeScript forName(String scriptName) { 9212 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 9213 //.replace(' ', '_')); 9214 UnicodeScript sc = aliases.get(scriptName); 9215 if (sc != null) 9216 return sc; 9217 return valueOf(scriptName); 9218 } 9219 } 9220 9221 /** 9222 * The value of the {@code Character}. 9223 * 9224 * @serial 9225 */ 9226 private final char value; 9227 9228 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 9229 @java.io.Serial 9230 private static final long serialVersionUID = 3786198910865385080L; 9231 9232 /** 9233 * Constructs a newly allocated {@code Character} object that 9234 * represents the specified {@code char} value. 9235 * 9236 * @param value the value to be represented by the 9237 * {@code Character} object. 9238 * 9239 * @deprecated 9240 * It is rarely appropriate to use this constructor. The static factory 9241 * {@link #valueOf(char)} is generally a better choice, as it is 9242 * likely to yield significantly better space and time performance. 9243 */ 9244 @Deprecated(since="9", forRemoval = true) 9245 public Character(char value) { 9246 this.value = value; 9247 } 9248 9249 private static final class CharacterCache { 9250 private CharacterCache(){} 9251 9252 @Stable 9253 static final Character[] cache; 9254 static Character[] archivedCache; 9255 9256 static { 9257 int size = 127 + 1; 9258 9259 // Load and use the archived cache if it exists 9260 CDS.initializeFromArchive(CharacterCache.class); 9261 if (archivedCache == null) { 9262 Character[] c = new Character[size]; 9263 for (int i = 0; i < size; i++) { 9264 c[i] = new Character((char) i); 9265 } 9266 archivedCache = c; 9267 } 9268 cache = archivedCache; 9269 assert cache.length == size; 9270 } 9271 } 9272 9273 /** 9274 * Returns a {@code Character} instance representing the specified 9275 * {@code char} value. 9276 * If a new {@code Character} instance is not required, this method 9277 * should generally be used in preference to the constructor 9278 * {@link #Character(char)}, as this method is likely to yield 9279 * significantly better space and time performance by caching 9280 * frequently requested values. 9281 * 9282 * This method will always cache values in the range {@code 9283 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 9284 * cache other values outside of this range. 9285 * 9286 * @param c a char value. 9287 * @return a {@code Character} instance representing {@code c}. 9288 * @since 1.5 9289 */ 9290 @IntrinsicCandidate 9291 @DeserializeConstructor 9292 public static Character valueOf(char c) { 9293 if (c <= 127) { // must cache 9294 return CharacterCache.cache[(int)c]; 9295 } 9296 return new Character(c); 9297 } 9298 9299 /** 9300 * Returns the value of this {@code Character} object. 9301 * @return the primitive {@code char} value represented by 9302 * this object. 9303 */ 9304 @IntrinsicCandidate 9305 public char charValue() { 9306 return value; 9307 } 9308 9309 /** 9310 * Returns a hash code for this {@code Character}; equal to the result 9311 * of invoking {@code charValue()}. 9312 * 9313 * @return a hash code value for this {@code Character} 9314 */ 9315 @Override 9316 public int hashCode() { 9317 return Character.hashCode(value); 9318 } 9319 9320 /** 9321 * Returns a hash code for a {@code char} value; compatible with 9322 * {@code Character.hashCode()}. 9323 * 9324 * @since 1.8 9325 * 9326 * @param value The {@code char} for which to return a hash code. 9327 * @return a hash code value for a {@code char} value. 9328 */ 9329 public static int hashCode(char value) { 9330 return (int)value; 9331 } 9332 9333 /** 9334 * Compares this object against the specified object. 9335 * The result is {@code true} if and only if the argument is not 9336 * {@code null} and is a {@code Character} object that 9337 * represents the same {@code char} value as this object. 9338 * 9339 * @param obj the object to compare with. 9340 * @return {@code true} if the objects are the same; 9341 * {@code false} otherwise. 9342 */ 9343 public boolean equals(Object obj) { 9344 if (obj instanceof Character c) { 9345 return value == c.charValue(); 9346 } 9347 return false; 9348 } 9349 9350 /** 9351 * Returns a {@code String} object representing this 9352 * {@code Character}'s value. The result is a string of 9353 * length 1 whose sole component is the primitive 9354 * {@code char} value represented by this 9355 * {@code Character} object. 9356 * 9357 * @return a string representation of this object. 9358 */ 9359 @Override 9360 public String toString() { 9361 return String.valueOf(value); 9362 } 9363 9364 /** 9365 * Returns a {@code String} object representing the 9366 * specified {@code char}. The result is a string of length 9367 * 1 consisting solely of the specified {@code char}. 9368 * 9369 * @apiNote This method cannot handle <a 9370 * href="#supplementary"> supplementary characters</a>. To support 9371 * all Unicode characters, including supplementary characters, use 9372 * the {@link #toString(int)} method. 9373 * 9374 * @param c the {@code char} to be converted 9375 * @return the string representation of the specified {@code char} 9376 * @since 1.4 9377 */ 9378 public static String toString(char c) { 9379 return String.valueOf(c); 9380 } 9381 9382 /** 9383 * Returns a {@code String} object representing the 9384 * specified character (Unicode code point). The result is a string of 9385 * length 1 or 2, consisting solely of the specified {@code codePoint}. 9386 * 9387 * @param codePoint the {@code codePoint} to be converted 9388 * @return the string representation of the specified {@code codePoint} 9389 * @throws IllegalArgumentException if the specified 9390 * {@code codePoint} is not a {@linkplain #isValidCodePoint 9391 * valid Unicode code point}. 9392 * @since 11 9393 */ 9394 public static String toString(int codePoint) { 9395 return String.valueOfCodePoint(codePoint); 9396 } 9397 9398 /** 9399 * Determines whether the specified code point is a valid 9400 * <a href="http://www.unicode.org/glossary/#code_point"> 9401 * Unicode code point value</a>. 9402 * 9403 * @param codePoint the Unicode code point to be tested 9404 * @return {@code true} if the specified code point value is between 9405 * {@link #MIN_CODE_POINT} and 9406 * {@link #MAX_CODE_POINT} inclusive; 9407 * {@code false} otherwise. 9408 * @since 1.5 9409 */ 9410 public static boolean isValidCodePoint(int codePoint) { 9411 // Optimized form of: 9412 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 9413 int plane = codePoint >>> 16; 9414 return plane < ((MAX_CODE_POINT + 1) >>> 16); 9415 } 9416 9417 /** 9418 * Determines whether the specified character (Unicode code point) 9419 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 9420 * Such code points can be represented using a single {@code char}. 9421 * 9422 * @param codePoint the character (Unicode code point) to be tested 9423 * @return {@code true} if the specified code point is between 9424 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 9425 * {@code false} otherwise. 9426 * @since 1.7 9427 */ 9428 public static boolean isBmpCodePoint(int codePoint) { 9429 return codePoint >>> 16 == 0; 9430 // Optimized form of: 9431 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 9432 // We consistently use logical shift (>>>) to facilitate 9433 // additional runtime optimizations. 9434 } 9435 9436 /** 9437 * Determines whether the specified character (Unicode code point) 9438 * is in the <a href="#supplementary">supplementary character</a> range. 9439 * 9440 * @param codePoint the character (Unicode code point) to be tested 9441 * @return {@code true} if the specified code point is between 9442 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 9443 * {@link #MAX_CODE_POINT} inclusive; 9444 * {@code false} otherwise. 9445 * @since 1.5 9446 */ 9447 public static boolean isSupplementaryCodePoint(int codePoint) { 9448 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 9449 && codePoint < MAX_CODE_POINT + 1; 9450 } 9451 9452 /** 9453 * Determines if the given {@code char} value is a 9454 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9455 * Unicode high-surrogate code unit</a> 9456 * (also known as <i>leading-surrogate code unit</i>). 9457 * 9458 * <p>Such values do not represent characters by themselves, 9459 * but are used in the representation of 9460 * <a href="#supplementary">supplementary characters</a> 9461 * in the UTF-16 encoding. 9462 * 9463 * @param ch the {@code char} value to be tested. 9464 * @return {@code true} if the {@code char} value is between 9465 * {@link #MIN_HIGH_SURROGATE} and 9466 * {@link #MAX_HIGH_SURROGATE} inclusive; 9467 * {@code false} otherwise. 9468 * @see Character#isLowSurrogate(char) 9469 * @see Character.UnicodeBlock#of(int) 9470 * @since 1.5 9471 */ 9472 public static boolean isHighSurrogate(char ch) { 9473 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 9474 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 9475 } 9476 9477 /** 9478 * Determines if the given {@code char} value is a 9479 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9480 * Unicode low-surrogate code unit</a> 9481 * (also known as <i>trailing-surrogate code unit</i>). 9482 * 9483 * <p>Such values do not represent characters by themselves, 9484 * but are used in the representation of 9485 * <a href="#supplementary">supplementary characters</a> 9486 * in the UTF-16 encoding. 9487 * 9488 * @param ch the {@code char} value to be tested. 9489 * @return {@code true} if the {@code char} value is between 9490 * {@link #MIN_LOW_SURROGATE} and 9491 * {@link #MAX_LOW_SURROGATE} inclusive; 9492 * {@code false} otherwise. 9493 * @see Character#isHighSurrogate(char) 9494 * @since 1.5 9495 */ 9496 public static boolean isLowSurrogate(char ch) { 9497 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 9498 } 9499 9500 /** 9501 * Determines if the given {@code char} value is a Unicode 9502 * <i>surrogate code unit</i>. 9503 * 9504 * <p>Such values do not represent characters by themselves, 9505 * but are used in the representation of 9506 * <a href="#supplementary">supplementary characters</a> 9507 * in the UTF-16 encoding. 9508 * 9509 * <p>A char value is a surrogate code unit if and only if it is either 9510 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 9511 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 9512 * 9513 * @param ch the {@code char} value to be tested. 9514 * @return {@code true} if the {@code char} value is between 9515 * {@link #MIN_SURROGATE} and 9516 * {@link #MAX_SURROGATE} inclusive; 9517 * {@code false} otherwise. 9518 * @since 1.7 9519 */ 9520 public static boolean isSurrogate(char ch) { 9521 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 9522 } 9523 9524 /** 9525 * Determines whether the specified pair of {@code char} 9526 * values is a valid 9527 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9528 * Unicode surrogate pair</a>. 9529 * 9530 * <p>This method is equivalent to the expression: 9531 * <blockquote><pre>{@code 9532 * isHighSurrogate(high) && isLowSurrogate(low) 9533 * }</pre></blockquote> 9534 * 9535 * @param high the high-surrogate code value to be tested 9536 * @param low the low-surrogate code value to be tested 9537 * @return {@code true} if the specified high and 9538 * low-surrogate code values represent a valid surrogate pair; 9539 * {@code false} otherwise. 9540 * @since 1.5 9541 */ 9542 public static boolean isSurrogatePair(char high, char low) { 9543 return isHighSurrogate(high) && isLowSurrogate(low); 9544 } 9545 9546 /** 9547 * Determines the number of {@code char} values needed to 9548 * represent the specified character (Unicode code point). If the 9549 * specified character is equal to or greater than 0x10000, then 9550 * the method returns 2. Otherwise, the method returns 1. 9551 * 9552 * <p>This method doesn't validate the specified character to be a 9553 * valid Unicode code point. The caller must validate the 9554 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 9555 * if necessary. 9556 * 9557 * @param codePoint the character (Unicode code point) to be tested. 9558 * @return 2 if the character is a valid supplementary character; 1 otherwise. 9559 * @see Character#isSupplementaryCodePoint(int) 9560 * @since 1.5 9561 */ 9562 public static int charCount(int codePoint) { 9563 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 9564 } 9565 9566 /** 9567 * Converts the specified surrogate pair to its supplementary code 9568 * point value. This method does not validate the specified 9569 * surrogate pair. The caller must validate it using {@link 9570 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 9571 * 9572 * @param high the high-surrogate code unit 9573 * @param low the low-surrogate code unit 9574 * @return the supplementary code point composed from the 9575 * specified surrogate pair. 9576 * @since 1.5 9577 */ 9578 public static int toCodePoint(char high, char low) { 9579 // Optimized form of: 9580 // return ((high - MIN_HIGH_SURROGATE) << 10) 9581 // + (low - MIN_LOW_SURROGATE) 9582 // + MIN_SUPPLEMENTARY_CODE_POINT; 9583 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 9584 - (MIN_HIGH_SURROGATE << 10) 9585 - MIN_LOW_SURROGATE); 9586 } 9587 9588 /** 9589 * Returns the code point at the given index of the 9590 * {@code CharSequence}. If the {@code char} value at 9591 * the given index in the {@code CharSequence} is in the 9592 * high-surrogate range, the following index is less than the 9593 * length of the {@code CharSequence}, and the 9594 * {@code char} value at the following index is in the 9595 * low-surrogate range, then the supplementary code point 9596 * corresponding to this surrogate pair is returned. Otherwise, 9597 * the {@code char} value at the given index is returned. 9598 * 9599 * @param seq a sequence of {@code char} values (Unicode code 9600 * units) 9601 * @param index the index to the {@code char} values (Unicode 9602 * code units) in {@code seq} to be converted 9603 * @return the Unicode code point at the given index 9604 * @throws NullPointerException if {@code seq} is null. 9605 * @throws IndexOutOfBoundsException if the value 9606 * {@code index} is negative or not less than 9607 * {@link CharSequence#length() seq.length()}. 9608 * @since 1.5 9609 */ 9610 public static int codePointAt(CharSequence seq, int index) { 9611 char c1 = seq.charAt(index); 9612 if (isHighSurrogate(c1) && ++index < seq.length()) { 9613 char c2 = seq.charAt(index); 9614 if (isLowSurrogate(c2)) { 9615 return toCodePoint(c1, c2); 9616 } 9617 } 9618 return c1; 9619 } 9620 9621 /** 9622 * Returns the code point at the given index of the 9623 * {@code char} array. If the {@code char} value at 9624 * the given index in the {@code char} array is in the 9625 * high-surrogate range, the following index is less than the 9626 * length of the {@code char} array, and the 9627 * {@code char} value at the following index is in the 9628 * low-surrogate range, then the supplementary code point 9629 * corresponding to this surrogate pair is returned. Otherwise, 9630 * the {@code char} value at the given index is returned. 9631 * 9632 * @param a the {@code char} array 9633 * @param index the index to the {@code char} values (Unicode 9634 * code units) in the {@code char} array to be converted 9635 * @return the Unicode code point at the given index 9636 * @throws NullPointerException if {@code a} is null. 9637 * @throws IndexOutOfBoundsException if the value 9638 * {@code index} is negative or not less than 9639 * the length of the {@code char} array. 9640 * @since 1.5 9641 */ 9642 public static int codePointAt(char[] a, int index) { 9643 return codePointAtImpl(a, index, a.length); 9644 } 9645 9646 /** 9647 * Returns the code point at the given index of the 9648 * {@code char} array, where only array elements with 9649 * {@code index} less than {@code limit} can be used. If 9650 * the {@code char} value at the given index in the 9651 * {@code char} array is in the high-surrogate range, the 9652 * following index is less than the {@code limit}, and the 9653 * {@code char} value at the following index is in the 9654 * low-surrogate range, then the supplementary code point 9655 * corresponding to this surrogate pair is returned. Otherwise, 9656 * the {@code char} value at the given index is returned. 9657 * 9658 * @param a the {@code char} array 9659 * @param index the index to the {@code char} values (Unicode 9660 * code units) in the {@code char} array to be converted 9661 * @param limit the index after the last array element that 9662 * can be used in the {@code char} array 9663 * @return the Unicode code point at the given index 9664 * @throws NullPointerException if {@code a} is null. 9665 * @throws IndexOutOfBoundsException if the {@code index} 9666 * argument is negative or not less than the {@code limit} 9667 * argument, or if the {@code limit} argument is negative or 9668 * greater than the length of the {@code char} array. 9669 * @since 1.5 9670 */ 9671 public static int codePointAt(char[] a, int index, int limit) { 9672 if (index >= limit || index < 0 || limit > a.length) { 9673 throw new IndexOutOfBoundsException(); 9674 } 9675 return codePointAtImpl(a, index, limit); 9676 } 9677 9678 // throws ArrayIndexOutOfBoundsException if index out of bounds 9679 static int codePointAtImpl(char[] a, int index, int limit) { 9680 char c1 = a[index]; 9681 if (isHighSurrogate(c1) && ++index < limit) { 9682 char c2 = a[index]; 9683 if (isLowSurrogate(c2)) { 9684 return toCodePoint(c1, c2); 9685 } 9686 } 9687 return c1; 9688 } 9689 9690 /** 9691 * Returns the code point preceding the given index of the 9692 * {@code CharSequence}. If the {@code char} value at 9693 * {@code (index - 1)} in the {@code CharSequence} is in 9694 * the low-surrogate range, {@code (index - 2)} is not 9695 * negative, and the {@code char} value at {@code (index - 2)} 9696 * in the {@code CharSequence} is in the 9697 * high-surrogate range, then the supplementary code point 9698 * corresponding to this surrogate pair is returned. Otherwise, 9699 * the {@code char} value at {@code (index - 1)} is 9700 * returned. 9701 * 9702 * @param seq the {@code CharSequence} instance 9703 * @param index the index following the code point that should be returned 9704 * @return the Unicode code point value before the given index. 9705 * @throws NullPointerException if {@code seq} is null. 9706 * @throws IndexOutOfBoundsException if the {@code index} 9707 * argument is less than 1 or greater than {@link 9708 * CharSequence#length() seq.length()}. 9709 * @since 1.5 9710 */ 9711 public static int codePointBefore(CharSequence seq, int index) { 9712 char c2 = seq.charAt(--index); 9713 if (isLowSurrogate(c2) && index > 0) { 9714 char c1 = seq.charAt(--index); 9715 if (isHighSurrogate(c1)) { 9716 return toCodePoint(c1, c2); 9717 } 9718 } 9719 return c2; 9720 } 9721 9722 /** 9723 * Returns the code point preceding the given index of the 9724 * {@code char} array. If the {@code char} value at 9725 * {@code (index - 1)} in the {@code char} array is in 9726 * the low-surrogate range, {@code (index - 2)} is not 9727 * negative, and the {@code char} value at {@code (index - 2)} 9728 * in the {@code char} array is in the 9729 * high-surrogate range, then the supplementary code point 9730 * corresponding to this surrogate pair is returned. Otherwise, 9731 * the {@code char} value at {@code (index - 1)} is 9732 * returned. 9733 * 9734 * @param a the {@code char} array 9735 * @param index the index following the code point that should be returned 9736 * @return the Unicode code point value before the given index. 9737 * @throws NullPointerException if {@code a} is null. 9738 * @throws IndexOutOfBoundsException if the {@code index} 9739 * argument is less than 1 or greater than the length of the 9740 * {@code char} array 9741 * @since 1.5 9742 */ 9743 public static int codePointBefore(char[] a, int index) { 9744 return codePointBeforeImpl(a, index, 0); 9745 } 9746 9747 /** 9748 * Returns the code point preceding the given index of the 9749 * {@code char} array, where only array elements with 9750 * {@code index} greater than or equal to {@code start} 9751 * can be used. If the {@code char} value at {@code (index - 1)} 9752 * in the {@code char} array is in the 9753 * low-surrogate range, {@code (index - 2)} is not less than 9754 * {@code start}, and the {@code char} value at 9755 * {@code (index - 2)} in the {@code char} array is in 9756 * the high-surrogate range, then the supplementary code point 9757 * corresponding to this surrogate pair is returned. Otherwise, 9758 * the {@code char} value at {@code (index - 1)} is 9759 * returned. 9760 * 9761 * @param a the {@code char} array 9762 * @param index the index following the code point that should be returned 9763 * @param start the index of the first array element in the 9764 * {@code char} array 9765 * @return the Unicode code point value before the given index. 9766 * @throws NullPointerException if {@code a} is null. 9767 * @throws IndexOutOfBoundsException if the {@code index} 9768 * argument is not greater than the {@code start} argument or 9769 * is greater than the length of the {@code char} array, or 9770 * if the {@code start} argument is negative or not less than 9771 * the length of the {@code char} array. 9772 * @since 1.5 9773 */ 9774 public static int codePointBefore(char[] a, int index, int start) { 9775 if (index <= start || start < 0 || index > a.length) { 9776 throw new IndexOutOfBoundsException(); 9777 } 9778 return codePointBeforeImpl(a, index, start); 9779 } 9780 9781 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 9782 static int codePointBeforeImpl(char[] a, int index, int start) { 9783 char c2 = a[--index]; 9784 if (isLowSurrogate(c2) && index > start) { 9785 char c1 = a[--index]; 9786 if (isHighSurrogate(c1)) { 9787 return toCodePoint(c1, c2); 9788 } 9789 } 9790 return c2; 9791 } 9792 9793 /** 9794 * Returns the leading surrogate (a 9795 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9796 * high surrogate code unit</a>) of the 9797 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9798 * surrogate pair</a> 9799 * representing the specified supplementary character (Unicode 9800 * code point) in the UTF-16 encoding. If the specified character 9801 * is not a 9802 * <a href="Character.html#supplementary">supplementary character</a>, 9803 * an unspecified {@code char} is returned. 9804 * 9805 * <p>If 9806 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9807 * is {@code true}, then 9808 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 9809 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 9810 * are also always {@code true}. 9811 * 9812 * @param codePoint a supplementary character (Unicode code point) 9813 * @return the leading surrogate code unit used to represent the 9814 * character in the UTF-16 encoding 9815 * @since 1.7 9816 */ 9817 public static char highSurrogate(int codePoint) { 9818 return (char) ((codePoint >>> 10) 9819 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 9820 } 9821 9822 /** 9823 * Returns the trailing surrogate (a 9824 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9825 * low surrogate code unit</a>) of the 9826 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9827 * surrogate pair</a> 9828 * representing the specified supplementary character (Unicode 9829 * code point) in the UTF-16 encoding. If the specified character 9830 * is not a 9831 * <a href="Character.html#supplementary">supplementary character</a>, 9832 * an unspecified {@code char} is returned. 9833 * 9834 * <p>If 9835 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9836 * is {@code true}, then 9837 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 9838 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 9839 * are also always {@code true}. 9840 * 9841 * @param codePoint a supplementary character (Unicode code point) 9842 * @return the trailing surrogate code unit used to represent the 9843 * character in the UTF-16 encoding 9844 * @since 1.7 9845 */ 9846 public static char lowSurrogate(int codePoint) { 9847 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 9848 } 9849 9850 /** 9851 * Converts the specified character (Unicode code point) to its 9852 * UTF-16 representation. If the specified code point is a BMP 9853 * (Basic Multilingual Plane or Plane 0) value, the same value is 9854 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 9855 * specified code point is a supplementary character, its 9856 * surrogate values are stored in {@code dst[dstIndex]} 9857 * (high-surrogate) and {@code dst[dstIndex+1]} 9858 * (low-surrogate), and 2 is returned. 9859 * 9860 * @param codePoint the character (Unicode code point) to be converted. 9861 * @param dst an array of {@code char} in which the 9862 * {@code codePoint}'s UTF-16 value is stored. 9863 * @param dstIndex the start index into the {@code dst} 9864 * array where the converted value is stored. 9865 * @return 1 if the code point is a BMP code point, 2 if the 9866 * code point is a supplementary code point. 9867 * @throws IllegalArgumentException if the specified 9868 * {@code codePoint} is not a valid Unicode code point. 9869 * @throws NullPointerException if the specified {@code dst} is null. 9870 * @throws IndexOutOfBoundsException if {@code dstIndex} 9871 * is negative or not less than {@code dst.length}, or if 9872 * {@code dst} at {@code dstIndex} doesn't have enough 9873 * array element(s) to store the resulting {@code char} 9874 * value(s). (If {@code dstIndex} is equal to 9875 * {@code dst.length-1} and the specified 9876 * {@code codePoint} is a supplementary character, the 9877 * high-surrogate value is not stored in 9878 * {@code dst[dstIndex]}.) 9879 * @since 1.5 9880 */ 9881 public static int toChars(int codePoint, char[] dst, int dstIndex) { 9882 if (isBmpCodePoint(codePoint)) { 9883 dst[dstIndex] = (char) codePoint; 9884 return 1; 9885 } else if (isValidCodePoint(codePoint)) { 9886 toSurrogates(codePoint, dst, dstIndex); 9887 return 2; 9888 } else { 9889 throw new IllegalArgumentException( 9890 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9891 } 9892 } 9893 9894 /** 9895 * Converts the specified character (Unicode code point) to its 9896 * UTF-16 representation stored in a {@code char} array. If 9897 * the specified code point is a BMP (Basic Multilingual Plane or 9898 * Plane 0) value, the resulting {@code char} array has 9899 * the same value as {@code codePoint}. If the specified code 9900 * point is a supplementary code point, the resulting 9901 * {@code char} array has the corresponding surrogate pair. 9902 * 9903 * @param codePoint a Unicode code point 9904 * @return a {@code char} array having 9905 * {@code codePoint}'s UTF-16 representation. 9906 * @throws IllegalArgumentException if the specified 9907 * {@code codePoint} is not a valid Unicode code point. 9908 * @since 1.5 9909 */ 9910 public static char[] toChars(int codePoint) { 9911 if (isBmpCodePoint(codePoint)) { 9912 return new char[] { (char) codePoint }; 9913 } else if (isValidCodePoint(codePoint)) { 9914 char[] result = new char[2]; 9915 toSurrogates(codePoint, result, 0); 9916 return result; 9917 } else { 9918 throw new IllegalArgumentException( 9919 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9920 } 9921 } 9922 9923 static void toSurrogates(int codePoint, char[] dst, int index) { 9924 // We write elements "backwards" to guarantee all-or-nothing 9925 dst[index+1] = lowSurrogate(codePoint); 9926 dst[index] = highSurrogate(codePoint); 9927 } 9928 9929 /** 9930 * Returns the number of Unicode code points in the text range of 9931 * the specified char sequence. The text range begins at the 9932 * specified {@code beginIndex} and extends to the 9933 * {@code char} at index {@code endIndex - 1}. Thus the 9934 * length (in {@code char}s) of the text range is 9935 * {@code endIndex-beginIndex}. Unpaired surrogates within 9936 * the text range count as one code point each. 9937 * 9938 * @param seq the char sequence 9939 * @param beginIndex the index to the first {@code char} of 9940 * the text range. 9941 * @param endIndex the index after the last {@code char} of 9942 * the text range. 9943 * @return the number of Unicode code points in the specified text 9944 * range 9945 * @throws NullPointerException if {@code seq} is null. 9946 * @throws IndexOutOfBoundsException if the 9947 * {@code beginIndex} is negative, or {@code endIndex} 9948 * is larger than the length of the given sequence, or 9949 * {@code beginIndex} is larger than {@code endIndex}. 9950 * @since 1.5 9951 */ 9952 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 9953 Objects.checkFromToIndex(beginIndex, endIndex, seq.length()); 9954 int n = endIndex - beginIndex; 9955 for (int i = beginIndex; i < endIndex; ) { 9956 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 9957 isLowSurrogate(seq.charAt(i))) { 9958 n--; 9959 i++; 9960 } 9961 } 9962 return n; 9963 } 9964 9965 /** 9966 * Returns the number of Unicode code points in a subarray of the 9967 * {@code char} array argument. The {@code offset} 9968 * argument is the index of the first {@code char} of the 9969 * subarray and the {@code count} argument specifies the 9970 * length of the subarray in {@code char}s. Unpaired 9971 * surrogates within the subarray count as one code point each. 9972 * 9973 * @param a the {@code char} array 9974 * @param offset the index of the first {@code char} in the 9975 * given {@code char} array 9976 * @param count the length of the subarray in {@code char}s 9977 * @return the number of Unicode code points in the specified subarray 9978 * @throws NullPointerException if {@code a} is null. 9979 * @throws IndexOutOfBoundsException if {@code offset} or 9980 * {@code count} is negative, or if {@code offset + 9981 * count} is larger than the length of the given array. 9982 * @since 1.5 9983 */ 9984 public static int codePointCount(char[] a, int offset, int count) { 9985 Objects.checkFromIndexSize(offset, count, a.length); 9986 return codePointCountImpl(a, offset, count); 9987 } 9988 9989 static int codePointCountImpl(char[] a, int offset, int count) { 9990 int endIndex = offset + count; 9991 int n = count; 9992 for (int i = offset; i < endIndex; ) { 9993 if (isHighSurrogate(a[i++]) && i < endIndex && 9994 isLowSurrogate(a[i])) { 9995 n--; 9996 i++; 9997 } 9998 } 9999 return n; 10000 } 10001 10002 /** 10003 * Returns the index within the given char sequence that is offset 10004 * from the given {@code index} by {@code codePointOffset} 10005 * code points. Unpaired surrogates within the text range given by 10006 * {@code index} and {@code codePointOffset} count as 10007 * one code point each. 10008 * 10009 * @param seq the char sequence 10010 * @param index the index to be offset 10011 * @param codePointOffset the offset in code points 10012 * @return the index within the char sequence 10013 * @throws NullPointerException if {@code seq} is null. 10014 * @throws IndexOutOfBoundsException if {@code index} 10015 * is negative or larger than the length of the char sequence, 10016 * or if {@code codePointOffset} is positive and the 10017 * subsequence starting with {@code index} has fewer than 10018 * {@code codePointOffset} code points, or if 10019 * {@code codePointOffset} is negative and the subsequence 10020 * before {@code index} has fewer than the absolute value 10021 * of {@code codePointOffset} code points. 10022 * @since 1.5 10023 */ 10024 public static int offsetByCodePoints(CharSequence seq, int index, 10025 int codePointOffset) { 10026 int length = seq.length(); 10027 if (index < 0 || index > length) { 10028 throw new IndexOutOfBoundsException(); 10029 } 10030 10031 int x = index; 10032 if (codePointOffset >= 0) { 10033 int i; 10034 for (i = 0; x < length && i < codePointOffset; i++) { 10035 if (isHighSurrogate(seq.charAt(x++)) && x < length && 10036 isLowSurrogate(seq.charAt(x))) { 10037 x++; 10038 } 10039 } 10040 if (i < codePointOffset) { 10041 throw new IndexOutOfBoundsException(); 10042 } 10043 } else { 10044 int i; 10045 for (i = codePointOffset; x > 0 && i < 0; i++) { 10046 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 10047 isHighSurrogate(seq.charAt(x-1))) { 10048 x--; 10049 } 10050 } 10051 if (i < 0) { 10052 throw new IndexOutOfBoundsException(); 10053 } 10054 } 10055 return x; 10056 } 10057 10058 /** 10059 * Returns the index within the given {@code char} subarray 10060 * that is offset from the given {@code index} by 10061 * {@code codePointOffset} code points. The 10062 * {@code start} and {@code count} arguments specify a 10063 * subarray of the {@code char} array. Unpaired surrogates 10064 * within the text range given by {@code index} and 10065 * {@code codePointOffset} count as one code point each. 10066 * 10067 * @param a the {@code char} array 10068 * @param start the index of the first {@code char} of the 10069 * subarray 10070 * @param count the length of the subarray in {@code char}s 10071 * @param index the index to be offset 10072 * @param codePointOffset the offset in code points 10073 * @return the index within the subarray 10074 * @throws NullPointerException if {@code a} is null. 10075 * @throws IndexOutOfBoundsException 10076 * if {@code start} or {@code count} is negative, 10077 * or if {@code start + count} is larger than the length of 10078 * the given array, 10079 * or if {@code index} is less than {@code start} or 10080 * larger then {@code start + count}, 10081 * or if {@code codePointOffset} is positive and the text range 10082 * starting with {@code index} and ending with {@code start + count - 1} 10083 * has fewer than {@code codePointOffset} code 10084 * points, 10085 * or if {@code codePointOffset} is negative and the text range 10086 * starting with {@code start} and ending with {@code index - 1} 10087 * has fewer than the absolute value of 10088 * {@code codePointOffset} code points. 10089 * @since 1.5 10090 */ 10091 public static int offsetByCodePoints(char[] a, int start, int count, 10092 int index, int codePointOffset) { 10093 if (count > a.length-start || start < 0 || count < 0 10094 || index < start || index > start+count) { 10095 throw new IndexOutOfBoundsException(); 10096 } 10097 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 10098 } 10099 10100 static int offsetByCodePointsImpl(char[]a, int start, int count, 10101 int index, int codePointOffset) { 10102 int x = index; 10103 if (codePointOffset >= 0) { 10104 int limit = start + count; 10105 int i; 10106 for (i = 0; x < limit && i < codePointOffset; i++) { 10107 if (isHighSurrogate(a[x++]) && x < limit && 10108 isLowSurrogate(a[x])) { 10109 x++; 10110 } 10111 } 10112 if (i < codePointOffset) { 10113 throw new IndexOutOfBoundsException(); 10114 } 10115 } else { 10116 int i; 10117 for (i = codePointOffset; x > start && i < 0; i++) { 10118 if (isLowSurrogate(a[--x]) && x > start && 10119 isHighSurrogate(a[x-1])) { 10120 x--; 10121 } 10122 } 10123 if (i < 0) { 10124 throw new IndexOutOfBoundsException(); 10125 } 10126 } 10127 return x; 10128 } 10129 10130 /** 10131 * Determines if the specified character is a lowercase character. 10132 * <p> 10133 * A character is lowercase if its general category type, provided 10134 * by {@code Character.getType(ch)}, is 10135 * {@code LOWERCASE_LETTER}, or it has contributory property 10136 * Other_Lowercase as defined by the Unicode Standard. 10137 * <p> 10138 * The following are examples of lowercase characters: 10139 * <blockquote><pre> 10140 * a b c d e f g h i j k l m n o p q r s t u v w x y z 10141 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 10142 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 10143 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 10144 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 10145 * </pre></blockquote> 10146 * <p> Many other Unicode characters are lowercase too. 10147 * 10148 * <p><b>Note:</b> This method cannot handle <a 10149 * href="#supplementary"> supplementary characters</a>. To support 10150 * all Unicode characters, including supplementary characters, use 10151 * the {@link #isLowerCase(int)} method. 10152 * 10153 * @param ch the character to be tested. 10154 * @return {@code true} if the character is lowercase; 10155 * {@code false} otherwise. 10156 * @see Character#isLowerCase(char) 10157 * @see Character#isTitleCase(char) 10158 * @see Character#toLowerCase(char) 10159 * @see Character#getType(char) 10160 */ 10161 public static boolean isLowerCase(char ch) { 10162 return isLowerCase((int)ch); 10163 } 10164 10165 /** 10166 * Determines if the specified character (Unicode code point) is a 10167 * lowercase character. 10168 * <p> 10169 * A character is lowercase if its general category type, provided 10170 * by {@link Character#getType getType(codePoint)}, is 10171 * {@code LOWERCASE_LETTER}, or it has contributory property 10172 * Other_Lowercase as defined by the Unicode Standard. 10173 * <p> 10174 * The following are examples of lowercase characters: 10175 * <blockquote><pre> 10176 * a b c d e f g h i j k l m n o p q r s t u v w x y z 10177 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 10178 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 10179 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 10180 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 10181 * </pre></blockquote> 10182 * <p> Many other Unicode characters are lowercase too. 10183 * 10184 * @param codePoint the character (Unicode code point) to be tested. 10185 * @return {@code true} if the character is lowercase; 10186 * {@code false} otherwise. 10187 * @see Character#isLowerCase(int) 10188 * @see Character#isTitleCase(int) 10189 * @see Character#toLowerCase(int) 10190 * @see Character#getType(int) 10191 * @since 1.5 10192 */ 10193 public static boolean isLowerCase(int codePoint) { 10194 return CharacterData.of(codePoint).isLowerCase(codePoint); 10195 } 10196 10197 /** 10198 * Determines if the specified character is an uppercase character. 10199 * <p> 10200 * A character is uppercase if its general category type, provided by 10201 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 10202 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 10203 * <p> 10204 * The following are examples of uppercase characters: 10205 * <blockquote><pre> 10206 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 10207 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 10208 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 10209 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 10210 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 10211 * </pre></blockquote> 10212 * <p> Many other Unicode characters are uppercase too. 10213 * 10214 * <p><b>Note:</b> This method cannot handle <a 10215 * href="#supplementary"> supplementary characters</a>. To support 10216 * all Unicode characters, including supplementary characters, use 10217 * the {@link #isUpperCase(int)} method. 10218 * 10219 * @param ch the character to be tested. 10220 * @return {@code true} if the character is uppercase; 10221 * {@code false} otherwise. 10222 * @see Character#isLowerCase(char) 10223 * @see Character#isTitleCase(char) 10224 * @see Character#toUpperCase(char) 10225 * @see Character#getType(char) 10226 * @since 1.0 10227 */ 10228 public static boolean isUpperCase(char ch) { 10229 return isUpperCase((int)ch); 10230 } 10231 10232 /** 10233 * Determines if the specified character (Unicode code point) is an uppercase character. 10234 * <p> 10235 * A character is uppercase if its general category type, provided by 10236 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 10237 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 10238 * <p> 10239 * The following are examples of uppercase characters: 10240 * <blockquote><pre> 10241 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 10242 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 10243 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 10244 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 10245 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 10246 * </pre></blockquote> 10247 * <p> Many other Unicode characters are uppercase too. 10248 * 10249 * @param codePoint the character (Unicode code point) to be tested. 10250 * @return {@code true} if the character is uppercase; 10251 * {@code false} otherwise. 10252 * @see Character#isLowerCase(int) 10253 * @see Character#isTitleCase(int) 10254 * @see Character#toUpperCase(int) 10255 * @see Character#getType(int) 10256 * @since 1.5 10257 */ 10258 public static boolean isUpperCase(int codePoint) { 10259 return CharacterData.of(codePoint).isUpperCase(codePoint); 10260 } 10261 10262 /** 10263 * Determines if the specified character is a titlecase character. 10264 * <p> 10265 * A character is a titlecase character if its general 10266 * category type, provided by {@code Character.getType(ch)}, 10267 * is {@code TITLECASE_LETTER}. 10268 * <p> 10269 * Some characters look like pairs of Latin letters. For example, there 10270 * is an uppercase letter that looks like "LJ" and has a corresponding 10271 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10272 * is the appropriate form to use when rendering a word in lowercase 10273 * with initial capitals, as for a book title. 10274 * <p> 10275 * These are some of the Unicode characters for which this method returns 10276 * {@code true}: 10277 * <ul> 10278 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10279 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10280 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10281 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10282 * </ul> 10283 * <p> Many other Unicode characters are titlecase too. 10284 * 10285 * <p><b>Note:</b> This method cannot handle <a 10286 * href="#supplementary"> supplementary characters</a>. To support 10287 * all Unicode characters, including supplementary characters, use 10288 * the {@link #isTitleCase(int)} method. 10289 * 10290 * @param ch the character to be tested. 10291 * @return {@code true} if the character is titlecase; 10292 * {@code false} otherwise. 10293 * @see Character#isLowerCase(char) 10294 * @see Character#isUpperCase(char) 10295 * @see Character#toTitleCase(char) 10296 * @see Character#getType(char) 10297 * @since 1.0.2 10298 */ 10299 public static boolean isTitleCase(char ch) { 10300 return isTitleCase((int)ch); 10301 } 10302 10303 /** 10304 * Determines if the specified character (Unicode code point) is a titlecase character. 10305 * <p> 10306 * A character is a titlecase character if its general 10307 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10308 * is {@code TITLECASE_LETTER}. 10309 * <p> 10310 * Some characters look like pairs of Latin letters. For example, there 10311 * is an uppercase letter that looks like "LJ" and has a corresponding 10312 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10313 * is the appropriate form to use when rendering a word in lowercase 10314 * with initial capitals, as for a book title. 10315 * <p> 10316 * These are some of the Unicode characters for which this method returns 10317 * {@code true}: 10318 * <ul> 10319 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10320 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10321 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10322 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10323 * </ul> 10324 * <p> Many other Unicode characters are titlecase too. 10325 * 10326 * @param codePoint the character (Unicode code point) to be tested. 10327 * @return {@code true} if the character is titlecase; 10328 * {@code false} otherwise. 10329 * @see Character#isLowerCase(int) 10330 * @see Character#isUpperCase(int) 10331 * @see Character#toTitleCase(int) 10332 * @see Character#getType(int) 10333 * @since 1.5 10334 */ 10335 public static boolean isTitleCase(int codePoint) { 10336 return getType(codePoint) == Character.TITLECASE_LETTER; 10337 } 10338 10339 /** 10340 * Determines if the specified character is a digit. 10341 * <p> 10342 * A character is a digit if its general category type, provided 10343 * by {@code Character.getType(ch)}, is 10344 * {@code DECIMAL_DIGIT_NUMBER}. 10345 * <p> 10346 * Some Unicode character ranges that contain digits: 10347 * <ul> 10348 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10349 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10350 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10351 * Arabic-Indic digits 10352 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10353 * Extended Arabic-Indic digits 10354 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10355 * Devanagari digits 10356 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10357 * Fullwidth digits 10358 * </ul> 10359 * 10360 * Many other character ranges contain digits as well. 10361 * 10362 * <p><b>Note:</b> This method cannot handle <a 10363 * href="#supplementary"> supplementary characters</a>. To support 10364 * all Unicode characters, including supplementary characters, use 10365 * the {@link #isDigit(int)} method. 10366 * 10367 * @param ch the character to be tested. 10368 * @return {@code true} if the character is a digit; 10369 * {@code false} otherwise. 10370 * @see Character#digit(char, int) 10371 * @see Character#forDigit(int, int) 10372 * @see Character#getType(char) 10373 */ 10374 public static boolean isDigit(char ch) { 10375 return isDigit((int)ch); 10376 } 10377 10378 /** 10379 * Determines if the specified character (Unicode code point) is a digit. 10380 * <p> 10381 * A character is a digit if its general category type, provided 10382 * by {@link Character#getType(int) getType(codePoint)}, is 10383 * {@code DECIMAL_DIGIT_NUMBER}. 10384 * <p> 10385 * Some Unicode character ranges that contain digits: 10386 * <ul> 10387 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10388 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10389 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10390 * Arabic-Indic digits 10391 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10392 * Extended Arabic-Indic digits 10393 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10394 * Devanagari digits 10395 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10396 * Fullwidth digits 10397 * </ul> 10398 * 10399 * Many other character ranges contain digits as well. 10400 * 10401 * @param codePoint the character (Unicode code point) to be tested. 10402 * @return {@code true} if the character is a digit; 10403 * {@code false} otherwise. 10404 * @see Character#forDigit(int, int) 10405 * @see Character#getType(int) 10406 * @since 1.5 10407 */ 10408 public static boolean isDigit(int codePoint) { 10409 return CharacterData.of(codePoint).isDigit(codePoint); 10410 } 10411 10412 /** 10413 * Determines if a character is defined in Unicode. 10414 * <p> 10415 * A character is defined if at least one of the following is true: 10416 * <ul> 10417 * <li>It has an entry in the UnicodeData file. 10418 * <li>It has a value in a range defined by the UnicodeData file. 10419 * </ul> 10420 * 10421 * <p><b>Note:</b> This method cannot handle <a 10422 * href="#supplementary"> supplementary characters</a>. To support 10423 * all Unicode characters, including supplementary characters, use 10424 * the {@link #isDefined(int)} method. 10425 * 10426 * @param ch the character to be tested 10427 * @return {@code true} if the character has a defined meaning 10428 * in Unicode; {@code false} otherwise. 10429 * @see Character#isDigit(char) 10430 * @see Character#isLetter(char) 10431 * @see Character#isLetterOrDigit(char) 10432 * @see Character#isLowerCase(char) 10433 * @see Character#isTitleCase(char) 10434 * @see Character#isUpperCase(char) 10435 * @since 1.0.2 10436 */ 10437 public static boolean isDefined(char ch) { 10438 return isDefined((int)ch); 10439 } 10440 10441 /** 10442 * Determines if a character (Unicode code point) is defined in Unicode. 10443 * <p> 10444 * A character is defined if at least one of the following is true: 10445 * <ul> 10446 * <li>It has an entry in the UnicodeData file. 10447 * <li>It has a value in a range defined by the UnicodeData file. 10448 * </ul> 10449 * 10450 * @param codePoint the character (Unicode code point) to be tested. 10451 * @return {@code true} if the character has a defined meaning 10452 * in Unicode; {@code false} otherwise. 10453 * @see Character#isDigit(int) 10454 * @see Character#isLetter(int) 10455 * @see Character#isLetterOrDigit(int) 10456 * @see Character#isLowerCase(int) 10457 * @see Character#isTitleCase(int) 10458 * @see Character#isUpperCase(int) 10459 * @since 1.5 10460 */ 10461 public static boolean isDefined(int codePoint) { 10462 return getType(codePoint) != Character.UNASSIGNED; 10463 } 10464 10465 /** 10466 * Determines if the specified character is a letter. 10467 * <p> 10468 * A character is considered to be a letter if its general 10469 * category type, provided by {@code Character.getType(ch)}, 10470 * is any of the following: 10471 * <ul> 10472 * <li> {@code UPPERCASE_LETTER} 10473 * <li> {@code LOWERCASE_LETTER} 10474 * <li> {@code TITLECASE_LETTER} 10475 * <li> {@code MODIFIER_LETTER} 10476 * <li> {@code OTHER_LETTER} 10477 * </ul> 10478 * 10479 * Not all letters have case. Many characters are 10480 * letters but are neither uppercase nor lowercase nor titlecase. 10481 * 10482 * <p><b>Note:</b> This method cannot handle <a 10483 * href="#supplementary"> supplementary characters</a>. To support 10484 * all Unicode characters, including supplementary characters, use 10485 * the {@link #isLetter(int)} method. 10486 * 10487 * @param ch the character to be tested. 10488 * @return {@code true} if the character is a letter; 10489 * {@code false} otherwise. 10490 * @see Character#isDigit(char) 10491 * @see Character#isJavaIdentifierStart(char) 10492 * @see Character#isJavaLetter(char) 10493 * @see Character#isJavaLetterOrDigit(char) 10494 * @see Character#isLetterOrDigit(char) 10495 * @see Character#isLowerCase(char) 10496 * @see Character#isTitleCase(char) 10497 * @see Character#isUnicodeIdentifierStart(char) 10498 * @see Character#isUpperCase(char) 10499 */ 10500 public static boolean isLetter(char ch) { 10501 return isLetter((int)ch); 10502 } 10503 10504 /** 10505 * Determines if the specified character (Unicode code point) is a letter. 10506 * <p> 10507 * A character is considered to be a letter if its general 10508 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10509 * is any of the following: 10510 * <ul> 10511 * <li> {@code UPPERCASE_LETTER} 10512 * <li> {@code LOWERCASE_LETTER} 10513 * <li> {@code TITLECASE_LETTER} 10514 * <li> {@code MODIFIER_LETTER} 10515 * <li> {@code OTHER_LETTER} 10516 * </ul> 10517 * 10518 * Not all letters have case. Many characters are 10519 * letters but are neither uppercase nor lowercase nor titlecase. 10520 * 10521 * @param codePoint the character (Unicode code point) to be tested. 10522 * @return {@code true} if the character is a letter; 10523 * {@code false} otherwise. 10524 * @see Character#isDigit(int) 10525 * @see Character#isJavaIdentifierStart(int) 10526 * @see Character#isLetterOrDigit(int) 10527 * @see Character#isLowerCase(int) 10528 * @see Character#isTitleCase(int) 10529 * @see Character#isUnicodeIdentifierStart(int) 10530 * @see Character#isUpperCase(int) 10531 * @since 1.5 10532 */ 10533 public static boolean isLetter(int codePoint) { 10534 return ((((1 << Character.UPPERCASE_LETTER) | 10535 (1 << Character.LOWERCASE_LETTER) | 10536 (1 << Character.TITLECASE_LETTER) | 10537 (1 << Character.MODIFIER_LETTER) | 10538 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 10539 != 0; 10540 } 10541 10542 /** 10543 * Determines if the specified character is a letter or digit. 10544 * <p> 10545 * A character is considered to be a letter or digit if either 10546 * {@code Character.isLetter(char ch)} or 10547 * {@code Character.isDigit(char ch)} returns 10548 * {@code true} for the character. 10549 * 10550 * <p><b>Note:</b> This method cannot handle <a 10551 * href="#supplementary"> supplementary characters</a>. To support 10552 * all Unicode characters, including supplementary characters, use 10553 * the {@link #isLetterOrDigit(int)} method. 10554 * 10555 * @param ch the character to be tested. 10556 * @return {@code true} if the character is a letter or digit; 10557 * {@code false} otherwise. 10558 * @see Character#isDigit(char) 10559 * @see Character#isJavaIdentifierPart(char) 10560 * @see Character#isJavaLetter(char) 10561 * @see Character#isJavaLetterOrDigit(char) 10562 * @see Character#isLetter(char) 10563 * @see Character#isUnicodeIdentifierPart(char) 10564 * @since 1.0.2 10565 */ 10566 public static boolean isLetterOrDigit(char ch) { 10567 return isLetterOrDigit((int)ch); 10568 } 10569 10570 /** 10571 * Determines if the specified character (Unicode code point) is a letter or digit. 10572 * <p> 10573 * A character is considered to be a letter or digit if either 10574 * {@link #isLetter(int) isLetter(codePoint)} or 10575 * {@link #isDigit(int) isDigit(codePoint)} returns 10576 * {@code true} for the character. 10577 * 10578 * @param codePoint the character (Unicode code point) to be tested. 10579 * @return {@code true} if the character is a letter or digit; 10580 * {@code false} otherwise. 10581 * @see Character#isDigit(int) 10582 * @see Character#isJavaIdentifierPart(int) 10583 * @see Character#isLetter(int) 10584 * @see Character#isUnicodeIdentifierPart(int) 10585 * @since 1.5 10586 */ 10587 public static boolean isLetterOrDigit(int codePoint) { 10588 return ((((1 << Character.UPPERCASE_LETTER) | 10589 (1 << Character.LOWERCASE_LETTER) | 10590 (1 << Character.TITLECASE_LETTER) | 10591 (1 << Character.MODIFIER_LETTER) | 10592 (1 << Character.OTHER_LETTER) | 10593 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 10594 != 0; 10595 } 10596 10597 /** 10598 * Determines if the specified character is permissible as the first 10599 * character in a Java identifier. 10600 * <p> 10601 * A character may start a Java identifier if and only if 10602 * one of the following conditions is true: 10603 * <ul> 10604 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10605 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10606 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10607 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10608 * </ul> 10609 * 10610 * @param ch the character to be tested. 10611 * @return {@code true} if the character may start a Java 10612 * identifier; {@code false} otherwise. 10613 * @see Character#isJavaLetterOrDigit(char) 10614 * @see Character#isJavaIdentifierStart(char) 10615 * @see Character#isJavaIdentifierPart(char) 10616 * @see Character#isLetter(char) 10617 * @see Character#isLetterOrDigit(char) 10618 * @see Character#isUnicodeIdentifierStart(char) 10619 * @since 1.0.2 10620 * @deprecated Replaced by isJavaIdentifierStart(char). 10621 */ 10622 @Deprecated(since="1.1") 10623 public static boolean isJavaLetter(char ch) { 10624 return isJavaIdentifierStart(ch); 10625 } 10626 10627 /** 10628 * Determines if the specified character may be part of a Java 10629 * identifier as other than the first character. 10630 * <p> 10631 * A character may be part of a Java identifier if and only if one 10632 * of the following conditions is true: 10633 * <ul> 10634 * <li> it is a letter 10635 * <li> it is a currency symbol (such as {@code '$'}) 10636 * <li> it is a connecting punctuation character (such as {@code '_'}) 10637 * <li> it is a digit 10638 * <li> it is a numeric letter (such as a Roman numeral character) 10639 * <li> it is a combining mark 10640 * <li> it is a non-spacing mark 10641 * <li> {@code isIdentifierIgnorable} returns 10642 * {@code true} for the character. 10643 * </ul> 10644 * 10645 * @param ch the character to be tested. 10646 * @return {@code true} if the character may be part of a 10647 * Java identifier; {@code false} otherwise. 10648 * @see Character#isJavaLetter(char) 10649 * @see Character#isJavaIdentifierStart(char) 10650 * @see Character#isJavaIdentifierPart(char) 10651 * @see Character#isLetter(char) 10652 * @see Character#isLetterOrDigit(char) 10653 * @see Character#isUnicodeIdentifierPart(char) 10654 * @see Character#isIdentifierIgnorable(char) 10655 * @since 1.0.2 10656 * @deprecated Replaced by isJavaIdentifierPart(char). 10657 */ 10658 @Deprecated(since="1.1") 10659 public static boolean isJavaLetterOrDigit(char ch) { 10660 return isJavaIdentifierPart(ch); 10661 } 10662 10663 /** 10664 * Determines if the specified character (Unicode code point) is alphabetic. 10665 * <p> 10666 * A character is considered to be alphabetic if its general category type, 10667 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 10668 * the following: 10669 * <ul> 10670 * <li> {@code UPPERCASE_LETTER} 10671 * <li> {@code LOWERCASE_LETTER} 10672 * <li> {@code TITLECASE_LETTER} 10673 * <li> {@code MODIFIER_LETTER} 10674 * <li> {@code OTHER_LETTER} 10675 * <li> {@code LETTER_NUMBER} 10676 * </ul> 10677 * or it has contributory property Other_Alphabetic as defined by the 10678 * Unicode Standard. 10679 * 10680 * @param codePoint the character (Unicode code point) to be tested. 10681 * @return {@code true} if the character is a Unicode alphabet 10682 * character, {@code false} otherwise. 10683 * @since 1.7 10684 */ 10685 public static boolean isAlphabetic(int codePoint) { 10686 return (((((1 << Character.UPPERCASE_LETTER) | 10687 (1 << Character.LOWERCASE_LETTER) | 10688 (1 << Character.TITLECASE_LETTER) | 10689 (1 << Character.MODIFIER_LETTER) | 10690 (1 << Character.OTHER_LETTER) | 10691 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 10692 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 10693 } 10694 10695 /** 10696 * Determines if the specified character (Unicode code point) is a CJKV 10697 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 10698 * the Unicode Standard. 10699 * 10700 * @param codePoint the character (Unicode code point) to be tested. 10701 * @return {@code true} if the character is a Unicode ideograph 10702 * character, {@code false} otherwise. 10703 * @since 1.7 10704 */ 10705 public static boolean isIdeographic(int codePoint) { 10706 return CharacterData.of(codePoint).isIdeographic(codePoint); 10707 } 10708 10709 /** 10710 * Determines if the specified character is 10711 * permissible as the first character in a Java identifier. 10712 * <p> 10713 * A character may start a Java identifier if and only if 10714 * one of the following conditions is true: 10715 * <ul> 10716 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10717 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10718 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10719 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10720 * </ul> 10721 * 10722 * <p><b>Note:</b> This method cannot handle <a 10723 * href="#supplementary"> supplementary characters</a>. To support 10724 * all Unicode characters, including supplementary characters, use 10725 * the {@link #isJavaIdentifierStart(int)} method. 10726 * 10727 * @param ch the character to be tested. 10728 * @return {@code true} if the character may start a Java identifier; 10729 * {@code false} otherwise. 10730 * @see Character#isJavaIdentifierPart(char) 10731 * @see Character#isLetter(char) 10732 * @see Character#isUnicodeIdentifierStart(char) 10733 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10734 * @since 1.1 10735 */ 10736 @SuppressWarnings("doclint:reference") // cross-module links 10737 public static boolean isJavaIdentifierStart(char ch) { 10738 return isJavaIdentifierStart((int)ch); 10739 } 10740 10741 /** 10742 * Determines if the character (Unicode code point) is 10743 * permissible as the first character in a Java identifier. 10744 * <p> 10745 * A character may start a Java identifier if and only if 10746 * one of the following conditions is true: 10747 * <ul> 10748 * <li> {@link #isLetter(int) isLetter(codePoint)} 10749 * returns {@code true} 10750 * <li> {@link #getType(int) getType(codePoint)} 10751 * returns {@code LETTER_NUMBER} 10752 * <li> the referenced character is a currency symbol (such as {@code '$'}) 10753 * <li> the referenced character is a connecting punctuation character 10754 * (such as {@code '_'}). 10755 * </ul> 10756 * 10757 * @param codePoint the character (Unicode code point) to be tested. 10758 * @return {@code true} if the character may start a Java identifier; 10759 * {@code false} otherwise. 10760 * @see Character#isJavaIdentifierPart(int) 10761 * @see Character#isLetter(int) 10762 * @see Character#isUnicodeIdentifierStart(int) 10763 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10764 * @since 1.5 10765 */ 10766 @SuppressWarnings("doclint:reference") // cross-module links 10767 public static boolean isJavaIdentifierStart(int codePoint) { 10768 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 10769 } 10770 10771 /** 10772 * Determines if the specified character may be part of a Java 10773 * identifier as other than the first character. 10774 * <p> 10775 * A character may be part of a Java identifier if any of the following 10776 * conditions are true: 10777 * <ul> 10778 * <li> it is a letter 10779 * <li> it is a currency symbol (such as {@code '$'}) 10780 * <li> it is a connecting punctuation character (such as {@code '_'}) 10781 * <li> it is a digit 10782 * <li> it is a numeric letter (such as a Roman numeral character) 10783 * <li> it is a combining mark 10784 * <li> it is a non-spacing mark 10785 * <li> {@code isIdentifierIgnorable} returns 10786 * {@code true} for the character 10787 * </ul> 10788 * 10789 * <p><b>Note:</b> This method cannot handle <a 10790 * href="#supplementary"> supplementary characters</a>. To support 10791 * all Unicode characters, including supplementary characters, use 10792 * the {@link #isJavaIdentifierPart(int)} method. 10793 * 10794 * @param ch the character to be tested. 10795 * @return {@code true} if the character may be part of a 10796 * Java identifier; {@code false} otherwise. 10797 * @see Character#isIdentifierIgnorable(char) 10798 * @see Character#isJavaIdentifierStart(char) 10799 * @see Character#isLetterOrDigit(char) 10800 * @see Character#isUnicodeIdentifierPart(char) 10801 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10802 * @since 1.1 10803 */ 10804 @SuppressWarnings("doclint:reference") // cross-module links 10805 public static boolean isJavaIdentifierPart(char ch) { 10806 return isJavaIdentifierPart((int)ch); 10807 } 10808 10809 /** 10810 * Determines if the character (Unicode code point) may be part of a Java 10811 * identifier as other than the first character. 10812 * <p> 10813 * A character may be part of a Java identifier if any of the following 10814 * conditions are true: 10815 * <ul> 10816 * <li> it is a letter 10817 * <li> it is a currency symbol (such as {@code '$'}) 10818 * <li> it is a connecting punctuation character (such as {@code '_'}) 10819 * <li> it is a digit 10820 * <li> it is a numeric letter (such as a Roman numeral character) 10821 * <li> it is a combining mark 10822 * <li> it is a non-spacing mark 10823 * <li> {@link #isIdentifierIgnorable(int) 10824 * isIdentifierIgnorable(codePoint)} returns {@code true} for 10825 * the code point 10826 * </ul> 10827 * 10828 * @param codePoint the character (Unicode code point) to be tested. 10829 * @return {@code true} if the character may be part of a 10830 * Java identifier; {@code false} otherwise. 10831 * @see Character#isIdentifierIgnorable(int) 10832 * @see Character#isJavaIdentifierStart(int) 10833 * @see Character#isLetterOrDigit(int) 10834 * @see Character#isUnicodeIdentifierPart(int) 10835 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10836 * @since 1.5 10837 */ 10838 @SuppressWarnings("doclint:reference") // cross-module links 10839 public static boolean isJavaIdentifierPart(int codePoint) { 10840 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 10841 } 10842 10843 /** 10844 * Determines if the specified character is permissible as the 10845 * first character in a Unicode identifier. 10846 * <p> 10847 * A character may start a Unicode identifier if and only if 10848 * one of the following conditions is true: 10849 * <ul> 10850 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10851 * <li> {@link #getType(char) getType(ch)} returns 10852 * {@code LETTER_NUMBER}. 10853 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10854 * {@code Other_ID_Start}</a> character. 10855 * </ul> 10856 * <p> 10857 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10858 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10859 * with the following profile of UAX31: 10860 * <pre> 10861 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10862 * </pre> 10863 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10864 * compatibility. 10865 * 10866 * <p><b>Note:</b> This method cannot handle <a 10867 * href="#supplementary"> supplementary characters</a>. To support 10868 * all Unicode characters, including supplementary characters, use 10869 * the {@link #isUnicodeIdentifierStart(int)} method. 10870 * 10871 * @param ch the character to be tested. 10872 * @return {@code true} if the character may start a Unicode 10873 * identifier; {@code false} otherwise. 10874 * 10875 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10876 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10877 * @see Character#isJavaIdentifierStart(char) 10878 * @see Character#isLetter(char) 10879 * @see Character#isUnicodeIdentifierPart(char) 10880 * @since 1.1 10881 */ 10882 public static boolean isUnicodeIdentifierStart(char ch) { 10883 return isUnicodeIdentifierStart((int)ch); 10884 } 10885 10886 /** 10887 * Determines if the specified character (Unicode code point) is permissible as the 10888 * first character in a Unicode identifier. 10889 * <p> 10890 * A character may start a Unicode identifier if and only if 10891 * one of the following conditions is true: 10892 * <ul> 10893 * <li> {@link #isLetter(int) isLetter(codePoint)} 10894 * returns {@code true} 10895 * <li> {@link #getType(int) getType(codePoint)} 10896 * returns {@code LETTER_NUMBER}. 10897 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10898 * {@code Other_ID_Start}</a> character. 10899 * </ul> 10900 * <p> 10901 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10902 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10903 * with the following profile of UAX31: 10904 * <pre> 10905 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10906 * </pre> 10907 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10908 * compatibility. 10909 * 10910 * @param codePoint the character (Unicode code point) to be tested. 10911 * @return {@code true} if the character may start a Unicode 10912 * identifier; {@code false} otherwise. 10913 * 10914 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10915 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10916 * @see Character#isJavaIdentifierStart(int) 10917 * @see Character#isLetter(int) 10918 * @see Character#isUnicodeIdentifierPart(int) 10919 * @since 1.5 10920 */ 10921 public static boolean isUnicodeIdentifierStart(int codePoint) { 10922 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 10923 } 10924 10925 /** 10926 * Determines if the specified character may be part of a Unicode 10927 * identifier as other than the first character. 10928 * <p> 10929 * A character may be part of a Unicode identifier if and only if 10930 * one of the following statements is true: 10931 * <ul> 10932 * <li> it is a letter 10933 * <li> it is a connecting punctuation character (such as {@code '_'}) 10934 * <li> it is a digit 10935 * <li> it is a numeric letter (such as a Roman numeral character) 10936 * <li> it is a combining mark 10937 * <li> it is a non-spacing mark 10938 * <li> {@code isIdentifierIgnorable} returns 10939 * {@code true} for this character. 10940 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10941 * {@code Other_ID_Start}</a> character. 10942 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10943 * {@code Other_ID_Continue}</a> character. 10944 * </ul> 10945 * <p> 10946 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10947 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10948 * with the following profile of UAX31: 10949 * <pre> 10950 * Continue := Start + ID_Continue + ignorable 10951 * Medial := empty 10952 * ignorable := isIdentifierIgnorable(char) returns true for the character 10953 * </pre> 10954 * {@code ignorable} is added to {@code Continue} for backward 10955 * compatibility. 10956 * 10957 * <p><b>Note:</b> This method cannot handle <a 10958 * href="#supplementary"> supplementary characters</a>. To support 10959 * all Unicode characters, including supplementary characters, use 10960 * the {@link #isUnicodeIdentifierPart(int)} method. 10961 * 10962 * @param ch the character to be tested. 10963 * @return {@code true} if the character may be part of a 10964 * Unicode identifier; {@code false} otherwise. 10965 * 10966 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10967 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10968 * @see Character#isIdentifierIgnorable(char) 10969 * @see Character#isJavaIdentifierPart(char) 10970 * @see Character#isLetterOrDigit(char) 10971 * @see Character#isUnicodeIdentifierStart(char) 10972 * @since 1.1 10973 */ 10974 public static boolean isUnicodeIdentifierPart(char ch) { 10975 return isUnicodeIdentifierPart((int)ch); 10976 } 10977 10978 /** 10979 * Determines if the specified character (Unicode code point) may be part of a Unicode 10980 * identifier as other than the first character. 10981 * <p> 10982 * A character may be part of a Unicode identifier if and only if 10983 * one of the following statements is true: 10984 * <ul> 10985 * <li> it is a letter 10986 * <li> it is a connecting punctuation character (such as {@code '_'}) 10987 * <li> it is a digit 10988 * <li> it is a numeric letter (such as a Roman numeral character) 10989 * <li> it is a combining mark 10990 * <li> it is a non-spacing mark 10991 * <li> {@code isIdentifierIgnorable} returns 10992 * {@code true} for this character. 10993 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10994 * {@code Other_ID_Start}</a> character. 10995 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10996 * {@code Other_ID_Continue}</a> character. 10997 * </ul> 10998 * <p> 10999 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 11000 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 11001 * with the following profile of UAX31: 11002 * <pre> 11003 * Continue := Start + ID_Continue + ignorable 11004 * Medial := empty 11005 * ignorable := isIdentifierIgnorable(int) returns true for the character 11006 * </pre> 11007 * {@code ignorable} is added to {@code Continue} for backward 11008 * compatibility. 11009 * 11010 * @param codePoint the character (Unicode code point) to be tested. 11011 * @return {@code true} if the character may be part of a 11012 * Unicode identifier; {@code false} otherwise. 11013 * 11014 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 11015 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 11016 * @see Character#isIdentifierIgnorable(int) 11017 * @see Character#isJavaIdentifierPart(int) 11018 * @see Character#isLetterOrDigit(int) 11019 * @see Character#isUnicodeIdentifierStart(int) 11020 * @since 1.5 11021 */ 11022 public static boolean isUnicodeIdentifierPart(int codePoint) { 11023 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 11024 } 11025 11026 /** 11027 * Determines if the specified character should be regarded as 11028 * an ignorable character in a Java identifier or a Unicode identifier. 11029 * <p> 11030 * The following Unicode characters are ignorable in a Java identifier 11031 * or a Unicode identifier: 11032 * <ul> 11033 * <li>ISO control characters that are not whitespace 11034 * <ul> 11035 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 11036 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 11037 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 11038 * </ul> 11039 * 11040 * <li>all characters that have the {@code FORMAT} general 11041 * category value 11042 * </ul> 11043 * 11044 * <p><b>Note:</b> This method cannot handle <a 11045 * href="#supplementary"> supplementary characters</a>. To support 11046 * all Unicode characters, including supplementary characters, use 11047 * the {@link #isIdentifierIgnorable(int)} method. 11048 * 11049 * @param ch the character to be tested. 11050 * @return {@code true} if the character is an ignorable control 11051 * character that may be part of a Java or Unicode identifier; 11052 * {@code false} otherwise. 11053 * @see Character#isJavaIdentifierPart(char) 11054 * @see Character#isUnicodeIdentifierPart(char) 11055 * @since 1.1 11056 */ 11057 public static boolean isIdentifierIgnorable(char ch) { 11058 return isIdentifierIgnorable((int)ch); 11059 } 11060 11061 /** 11062 * Determines if the specified character (Unicode code point) should be regarded as 11063 * an ignorable character in a Java identifier or a Unicode identifier. 11064 * <p> 11065 * The following Unicode characters are ignorable in a Java identifier 11066 * or a Unicode identifier: 11067 * <ul> 11068 * <li>ISO control characters that are not whitespace 11069 * <ul> 11070 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 11071 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 11072 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 11073 * </ul> 11074 * 11075 * <li>all characters that have the {@code FORMAT} general 11076 * category value 11077 * </ul> 11078 * 11079 * @param codePoint the character (Unicode code point) to be tested. 11080 * @return {@code true} if the character is an ignorable control 11081 * character that may be part of a Java or Unicode identifier; 11082 * {@code false} otherwise. 11083 * @see Character#isJavaIdentifierPart(int) 11084 * @see Character#isUnicodeIdentifierPart(int) 11085 * @since 1.5 11086 */ 11087 public static boolean isIdentifierIgnorable(int codePoint) { 11088 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 11089 } 11090 11091 /** 11092 * Determines if the specified character (Unicode code point) is an Emoji. 11093 * <p> 11094 * A character is considered to be an Emoji if and only if it has the {@code Emoji} 11095 * property, defined in 11096 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11097 * Unicode Emoji (Technical Standard #51)</a>. 11098 * 11099 * @param codePoint the character (Unicode code point) to be tested. 11100 * @return {@code true} if the character is an Emoji; 11101 * {@code false} otherwise. 11102 * @since 21 11103 */ 11104 public static boolean isEmoji(int codePoint) { 11105 return CharacterData.of(codePoint).isEmoji(codePoint); 11106 } 11107 11108 /** 11109 * Determines if the specified character (Unicode code point) has the 11110 * Emoji Presentation property by default. 11111 * <p> 11112 * A character is considered to have the Emoji Presentation property if and 11113 * only if it has the {@code Emoji_Presentation} property, defined in 11114 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11115 * Unicode Emoji (Technical Standard #51)</a>. 11116 * 11117 * @param codePoint the character (Unicode code point) to be tested. 11118 * @return {@code true} if the character has the Emoji Presentation 11119 * property; {@code false} otherwise. 11120 * @since 21 11121 */ 11122 public static boolean isEmojiPresentation(int codePoint) { 11123 return CharacterData.of(codePoint).isEmojiPresentation(codePoint); 11124 } 11125 11126 /** 11127 * Determines if the specified character (Unicode code point) is an 11128 * Emoji Modifier. 11129 * <p> 11130 * A character is considered to be an Emoji Modifier if and only if it has 11131 * the {@code Emoji_Modifier} property, defined in 11132 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11133 * Unicode Emoji (Technical Standard #51)</a>. 11134 * 11135 * @param codePoint the character (Unicode code point) to be tested. 11136 * @return {@code true} if the character is an Emoji Modifier; 11137 * {@code false} otherwise. 11138 * @since 21 11139 */ 11140 public static boolean isEmojiModifier(int codePoint) { 11141 return CharacterData.of(codePoint).isEmojiModifier(codePoint); 11142 } 11143 11144 /** 11145 * Determines if the specified character (Unicode code point) is an 11146 * Emoji Modifier Base. 11147 * <p> 11148 * A character is considered to be an Emoji Modifier Base if and only if it has 11149 * the {@code Emoji_Modifier_Base} property, defined in 11150 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11151 * Unicode Emoji (Technical Standard #51)</a>. 11152 * 11153 * @param codePoint the character (Unicode code point) to be tested. 11154 * @return {@code true} if the character is an Emoji Modifier Base; 11155 * {@code false} otherwise. 11156 * @since 21 11157 */ 11158 public static boolean isEmojiModifierBase(int codePoint) { 11159 return CharacterData.of(codePoint).isEmojiModifierBase(codePoint); 11160 } 11161 11162 /** 11163 * Determines if the specified character (Unicode code point) is an 11164 * Emoji Component. 11165 * <p> 11166 * A character is considered to be an Emoji Component if and only if it has 11167 * the {@code Emoji_Component} property, defined in 11168 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11169 * Unicode Emoji (Technical Standard #51)</a>. 11170 * 11171 * @param codePoint the character (Unicode code point) to be tested. 11172 * @return {@code true} if the character is an Emoji Component; 11173 * {@code false} otherwise. 11174 * @since 21 11175 */ 11176 public static boolean isEmojiComponent(int codePoint) { 11177 return CharacterData.of(codePoint).isEmojiComponent(codePoint); 11178 } 11179 11180 /** 11181 * Determines if the specified character (Unicode code point) is 11182 * an Extended Pictographic. 11183 * <p> 11184 * A character is considered to be an Extended Pictographic if and only if it has 11185 * the {@code Extended_Pictographic} property, defined in 11186 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11187 * Unicode Emoji (Technical Standard #51)</a>. 11188 * 11189 * @param codePoint the character (Unicode code point) to be tested. 11190 * @return {@code true} if the character is an Extended Pictographic; 11191 * {@code false} otherwise. 11192 * @since 21 11193 */ 11194 public static boolean isExtendedPictographic(int codePoint) { 11195 return CharacterData.of(codePoint).isExtendedPictographic(codePoint); 11196 } 11197 11198 /** 11199 * Converts the character argument to lowercase using case 11200 * mapping information from the UnicodeData file. 11201 * <p> 11202 * Note that 11203 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 11204 * does not always return {@code true} for some ranges of 11205 * characters, particularly those that are symbols or ideographs. 11206 * 11207 * <p>In general, {@link String#toLowerCase()} should be used to map 11208 * characters to lowercase. {@code String} case mapping methods 11209 * have several benefits over {@code Character} case mapping methods. 11210 * {@code String} case mapping methods can perform locale-sensitive 11211 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11212 * the {@code Character} case mapping methods cannot. 11213 * 11214 * <p><b>Note:</b> This method cannot handle <a 11215 * href="#supplementary"> supplementary characters</a>. To support 11216 * all Unicode characters, including supplementary characters, use 11217 * the {@link #toLowerCase(int)} method. 11218 * 11219 * @param ch the character to be converted. 11220 * @return the lowercase equivalent of the character, if any; 11221 * otherwise, the character itself. 11222 * @see Character#isLowerCase(char) 11223 * @see String#toLowerCase() 11224 */ 11225 public static char toLowerCase(char ch) { 11226 return (char)toLowerCase((int)ch); 11227 } 11228 11229 /** 11230 * Converts the character (Unicode code point) argument to 11231 * lowercase using case mapping information from the UnicodeData 11232 * file. 11233 * 11234 * <p> Note that 11235 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 11236 * does not always return {@code true} for some ranges of 11237 * characters, particularly those that are symbols or ideographs. 11238 * 11239 * <p>In general, {@link String#toLowerCase()} should be used to map 11240 * characters to lowercase. {@code String} case mapping methods 11241 * have several benefits over {@code Character} case mapping methods. 11242 * {@code String} case mapping methods can perform locale-sensitive 11243 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11244 * the {@code Character} case mapping methods cannot. 11245 * 11246 * @param codePoint the character (Unicode code point) to be converted. 11247 * @return the lowercase equivalent of the character (Unicode code 11248 * point), if any; otherwise, the character itself. 11249 * @see Character#isLowerCase(int) 11250 * @see String#toLowerCase() 11251 * 11252 * @since 1.5 11253 */ 11254 public static int toLowerCase(int codePoint) { 11255 return CharacterData.of(codePoint).toLowerCase(codePoint); 11256 } 11257 11258 /** 11259 * Converts the character argument to uppercase using case mapping 11260 * information from the UnicodeData file. 11261 * <p> 11262 * Note that 11263 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 11264 * does not always return {@code true} for some ranges of 11265 * characters, particularly those that are symbols or ideographs. 11266 * 11267 * <p>In general, {@link String#toUpperCase()} should be used to map 11268 * characters to uppercase. {@code String} case mapping methods 11269 * have several benefits over {@code Character} case mapping methods. 11270 * {@code String} case mapping methods can perform locale-sensitive 11271 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11272 * the {@code Character} case mapping methods cannot. 11273 * 11274 * <p><b>Note:</b> This method cannot handle <a 11275 * href="#supplementary"> supplementary characters</a>. To support 11276 * all Unicode characters, including supplementary characters, use 11277 * the {@link #toUpperCase(int)} method. 11278 * 11279 * @param ch the character to be converted. 11280 * @return the uppercase equivalent of the character, if any; 11281 * otherwise, the character itself. 11282 * @see Character#isUpperCase(char) 11283 * @see String#toUpperCase() 11284 */ 11285 public static char toUpperCase(char ch) { 11286 return (char)toUpperCase((int)ch); 11287 } 11288 11289 /** 11290 * Converts the character (Unicode code point) argument to 11291 * uppercase using case mapping information from the UnicodeData 11292 * file. 11293 * 11294 * <p>Note that 11295 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 11296 * does not always return {@code true} for some ranges of 11297 * characters, particularly those that are symbols or ideographs. 11298 * 11299 * <p>In general, {@link String#toUpperCase()} should be used to map 11300 * characters to uppercase. {@code String} case mapping methods 11301 * have several benefits over {@code Character} case mapping methods. 11302 * {@code String} case mapping methods can perform locale-sensitive 11303 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11304 * the {@code Character} case mapping methods cannot. 11305 * 11306 * @param codePoint the character (Unicode code point) to be converted. 11307 * @return the uppercase equivalent of the character, if any; 11308 * otherwise, the character itself. 11309 * @see Character#isUpperCase(int) 11310 * @see String#toUpperCase() 11311 * 11312 * @since 1.5 11313 */ 11314 public static int toUpperCase(int codePoint) { 11315 return CharacterData.of(codePoint).toUpperCase(codePoint); 11316 } 11317 11318 /** 11319 * Converts the character argument to titlecase using case mapping 11320 * information from the UnicodeData file. If a character has no 11321 * explicit titlecase mapping and is not itself a titlecase char 11322 * according to UnicodeData, then the uppercase mapping is 11323 * returned as an equivalent titlecase mapping. If the 11324 * {@code char} argument is already a titlecase 11325 * {@code char}, the same {@code char} value will be 11326 * returned. 11327 * <p> 11328 * Note that 11329 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 11330 * does not always return {@code true} for some ranges of 11331 * characters. 11332 * 11333 * <p><b>Note:</b> This method cannot handle <a 11334 * href="#supplementary"> supplementary characters</a>. To support 11335 * all Unicode characters, including supplementary characters, use 11336 * the {@link #toTitleCase(int)} method. 11337 * 11338 * @param ch the character to be converted. 11339 * @return the titlecase equivalent of the character, if any; 11340 * otherwise, the character itself. 11341 * @see Character#isTitleCase(char) 11342 * @see Character#toLowerCase(char) 11343 * @see Character#toUpperCase(char) 11344 * @since 1.0.2 11345 */ 11346 public static char toTitleCase(char ch) { 11347 return (char)toTitleCase((int)ch); 11348 } 11349 11350 /** 11351 * Converts the character (Unicode code point) argument to titlecase using case mapping 11352 * information from the UnicodeData file. If a character has no 11353 * explicit titlecase mapping and is not itself a titlecase char 11354 * according to UnicodeData, then the uppercase mapping is 11355 * returned as an equivalent titlecase mapping. If the 11356 * character argument is already a titlecase 11357 * character, the same character value will be 11358 * returned. 11359 * 11360 * <p>Note that 11361 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 11362 * does not always return {@code true} for some ranges of 11363 * characters. 11364 * 11365 * @param codePoint the character (Unicode code point) to be converted. 11366 * @return the titlecase equivalent of the character, if any; 11367 * otherwise, the character itself. 11368 * @see Character#isTitleCase(int) 11369 * @see Character#toLowerCase(int) 11370 * @see Character#toUpperCase(int) 11371 * @since 1.5 11372 */ 11373 public static int toTitleCase(int codePoint) { 11374 return CharacterData.of(codePoint).toTitleCase(codePoint); 11375 } 11376 11377 /** 11378 * Returns the numeric value of the character {@code ch} in the 11379 * specified radix. 11380 * <p> 11381 * If the radix is not in the range {@code MIN_RADIX} ≤ 11382 * {@code radix} ≤ {@code MAX_RADIX} or if the 11383 * value of {@code ch} is not a valid digit in the specified 11384 * radix, {@code -1} is returned. A character is a valid digit 11385 * if at least one of the following is true: 11386 * <ul> 11387 * <li>The method {@code isDigit} is {@code true} of the character 11388 * and the Unicode decimal digit value of the character (or its 11389 * single-character decomposition) is less than the specified radix. 11390 * In this case the decimal digit value is returned. 11391 * <li>The character is one of the uppercase Latin letters 11392 * {@code 'A'} through {@code 'Z'} and its code is less than 11393 * {@code radix + 'A' - 10}. 11394 * In this case, {@code ch - 'A' + 10} 11395 * is returned. 11396 * <li>The character is one of the lowercase Latin letters 11397 * {@code 'a'} through {@code 'z'} and its code is less than 11398 * {@code radix + 'a' - 10}. 11399 * In this case, {@code ch - 'a' + 10} 11400 * is returned. 11401 * <li>The character is one of the fullwidth uppercase Latin letters A 11402 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11403 * and its code is less than 11404 * {@code radix + '\u005CuFF21' - 10}. 11405 * In this case, {@code ch - '\u005CuFF21' + 10} 11406 * is returned. 11407 * <li>The character is one of the fullwidth lowercase Latin letters a 11408 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11409 * and its code is less than 11410 * {@code radix + '\u005CuFF41' - 10}. 11411 * In this case, {@code ch - '\u005CuFF41' + 10} 11412 * is returned. 11413 * </ul> 11414 * 11415 * <p><b>Note:</b> This method cannot handle <a 11416 * href="#supplementary"> supplementary characters</a>. To support 11417 * all Unicode characters, including supplementary characters, use 11418 * the {@link #digit(int, int)} method. 11419 * 11420 * @param ch the character to be converted. 11421 * @param radix the radix. 11422 * @return the numeric value represented by the character in the 11423 * specified radix. 11424 * @see Character#forDigit(int, int) 11425 * @see Character#isDigit(char) 11426 */ 11427 public static int digit(char ch, int radix) { 11428 return digit((int)ch, radix); 11429 } 11430 11431 /** 11432 * Returns the numeric value of the specified character (Unicode 11433 * code point) in the specified radix. 11434 * 11435 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 11436 * {@code radix} ≤ {@code MAX_RADIX} or if the 11437 * character is not a valid digit in the specified 11438 * radix, {@code -1} is returned. A character is a valid digit 11439 * if at least one of the following is true: 11440 * <ul> 11441 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 11442 * and the Unicode decimal digit value of the character (or its 11443 * single-character decomposition) is less than the specified radix. 11444 * In this case the decimal digit value is returned. 11445 * <li>The character is one of the uppercase Latin letters 11446 * {@code 'A'} through {@code 'Z'} and its code is less than 11447 * {@code radix + 'A' - 10}. 11448 * In this case, {@code codePoint - 'A' + 10} 11449 * is returned. 11450 * <li>The character is one of the lowercase Latin letters 11451 * {@code 'a'} through {@code 'z'} and its code is less than 11452 * {@code radix + 'a' - 10}. 11453 * In this case, {@code codePoint - 'a' + 10} 11454 * is returned. 11455 * <li>The character is one of the fullwidth uppercase Latin letters A 11456 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11457 * and its code is less than 11458 * {@code radix + '\u005CuFF21' - 10}. 11459 * In this case, 11460 * {@code codePoint - '\u005CuFF21' + 10} 11461 * is returned. 11462 * <li>The character is one of the fullwidth lowercase Latin letters a 11463 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11464 * and its code is less than 11465 * {@code radix + '\u005CuFF41'- 10}. 11466 * In this case, 11467 * {@code codePoint - '\u005CuFF41' + 10} 11468 * is returned. 11469 * </ul> 11470 * 11471 * @param codePoint the character (Unicode code point) to be converted. 11472 * @param radix the radix. 11473 * @return the numeric value represented by the character in the 11474 * specified radix. 11475 * @see Character#forDigit(int, int) 11476 * @see Character#isDigit(int) 11477 * @since 1.5 11478 */ 11479 public static int digit(int codePoint, int radix) { 11480 return CharacterData.of(codePoint).digit(codePoint, radix); 11481 } 11482 11483 /** 11484 * Returns the {@code int} value that the specified Unicode 11485 * character represents. For example, the character 11486 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 11487 * an int with a value of 50. 11488 * <p> 11489 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11490 * {@code '\u005Cu005A'}), lowercase 11491 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11492 * full width variant ({@code '\u005CuFF21'} through 11493 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11494 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11495 * through 35. This is independent of the Unicode specification, 11496 * which does not assign numeric values to these {@code char} 11497 * values. 11498 * <p> 11499 * If the character does not have a numeric value, then -1 is returned. 11500 * If the character has a numeric value that cannot be represented as a 11501 * nonnegative integer (for example, a fractional value), then -2 11502 * is returned. 11503 * 11504 * <p><b>Note:</b> This method cannot handle <a 11505 * href="#supplementary"> supplementary characters</a>. To support 11506 * all Unicode characters, including supplementary characters, use 11507 * the {@link #getNumericValue(int)} method. 11508 * 11509 * @param ch the character to be converted. 11510 * @return the numeric value of the character, as a nonnegative {@code int} 11511 * value; -2 if the character has a numeric value but the value 11512 * can not be represented as a nonnegative {@code int} value; 11513 * -1 if the character has no numeric value. 11514 * @see Character#forDigit(int, int) 11515 * @see Character#isDigit(char) 11516 * @since 1.1 11517 */ 11518 public static int getNumericValue(char ch) { 11519 return getNumericValue((int)ch); 11520 } 11521 11522 /** 11523 * Returns the {@code int} value that the specified 11524 * character (Unicode code point) represents. For example, the character 11525 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 11526 * an {@code int} with a value of 50. 11527 * <p> 11528 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11529 * {@code '\u005Cu005A'}), lowercase 11530 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11531 * full width variant ({@code '\u005CuFF21'} through 11532 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11533 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11534 * through 35. This is independent of the Unicode specification, 11535 * which does not assign numeric values to these {@code char} 11536 * values. 11537 * <p> 11538 * If the character does not have a numeric value, then -1 is returned. 11539 * If the character has a numeric value that cannot be represented as a 11540 * nonnegative integer (for example, a fractional value), then -2 11541 * is returned. 11542 * 11543 * @param codePoint the character (Unicode code point) to be converted. 11544 * @return the numeric value of the character, as a nonnegative {@code int} 11545 * value; -2 if the character has a numeric value but the value 11546 * can not be represented as a nonnegative {@code int} value; 11547 * -1 if the character has no numeric value. 11548 * @see Character#forDigit(int, int) 11549 * @see Character#isDigit(int) 11550 * @since 1.5 11551 */ 11552 public static int getNumericValue(int codePoint) { 11553 return CharacterData.of(codePoint).getNumericValue(codePoint); 11554 } 11555 11556 /** 11557 * Determines if the specified character is ISO-LATIN-1 white space. 11558 * This method returns {@code true} for the following five 11559 * characters only: 11560 * <table class="striped"> 11561 * <caption style="display:none">truechars</caption> 11562 * <thead> 11563 * <tr><th scope="col">Character 11564 * <th scope="col">Code 11565 * <th scope="col">Name 11566 * </thead> 11567 * <tbody> 11568 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 11569 * <td>{@code HORIZONTAL TABULATION}</td></tr> 11570 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 11571 * <td>{@code NEW LINE}</td></tr> 11572 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 11573 * <td>{@code FORM FEED}</td></tr> 11574 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 11575 * <td>{@code CARRIAGE RETURN}</td></tr> 11576 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 11577 * <td>{@code SPACE}</td></tr> 11578 * </tbody> 11579 * </table> 11580 * 11581 * @param ch the character to be tested. 11582 * @return {@code true} if the character is ISO-LATIN-1 white 11583 * space; {@code false} otherwise. 11584 * @see Character#isSpaceChar(char) 11585 * @see Character#isWhitespace(char) 11586 * @deprecated Replaced by isWhitespace(char). 11587 */ 11588 @Deprecated(since="1.1") 11589 public static boolean isSpace(char ch) { 11590 return (ch <= 0x0020) && 11591 (((((1L << 0x0009) | 11592 (1L << 0x000A) | 11593 (1L << 0x000C) | 11594 (1L << 0x000D) | 11595 (1L << 0x0020)) >> ch) & 1L) != 0); 11596 } 11597 11598 11599 /** 11600 * Determines if the specified character is a Unicode space character. 11601 * A character is considered to be a space character if and only if 11602 * it is specified to be a space character by the Unicode Standard. This 11603 * method returns true if the character's general category type is any of 11604 * the following: 11605 * <ul> 11606 * <li> {@code SPACE_SEPARATOR} 11607 * <li> {@code LINE_SEPARATOR} 11608 * <li> {@code PARAGRAPH_SEPARATOR} 11609 * </ul> 11610 * 11611 * <p><b>Note:</b> This method cannot handle <a 11612 * href="#supplementary"> supplementary characters</a>. To support 11613 * all Unicode characters, including supplementary characters, use 11614 * the {@link #isSpaceChar(int)} method. 11615 * 11616 * @param ch the character to be tested. 11617 * @return {@code true} if the character is a space character; 11618 * {@code false} otherwise. 11619 * @see Character#isWhitespace(char) 11620 * @since 1.1 11621 */ 11622 public static boolean isSpaceChar(char ch) { 11623 return isSpaceChar((int)ch); 11624 } 11625 11626 /** 11627 * Determines if the specified character (Unicode code point) is a 11628 * Unicode space character. A character is considered to be a 11629 * space character if and only if it is specified to be a space 11630 * character by the Unicode Standard. This method returns true if 11631 * the character's general category type is any of the following: 11632 * 11633 * <ul> 11634 * <li> {@link #SPACE_SEPARATOR} 11635 * <li> {@link #LINE_SEPARATOR} 11636 * <li> {@link #PARAGRAPH_SEPARATOR} 11637 * </ul> 11638 * 11639 * @param codePoint the character (Unicode code point) to be tested. 11640 * @return {@code true} if the character is a space character; 11641 * {@code false} otherwise. 11642 * @see Character#isWhitespace(int) 11643 * @since 1.5 11644 */ 11645 public static boolean isSpaceChar(int codePoint) { 11646 return ((((1 << Character.SPACE_SEPARATOR) | 11647 (1 << Character.LINE_SEPARATOR) | 11648 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 11649 != 0; 11650 } 11651 11652 /** 11653 * Determines if the specified character is white space according to Java. 11654 * A character is a Java whitespace character if and only if it satisfies 11655 * one of the following criteria: 11656 * <ul> 11657 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 11658 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 11659 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11660 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11661 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11662 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11663 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11664 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11665 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11666 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11667 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11668 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11669 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11670 * </ul> 11671 * 11672 * <p><b>Note:</b> This method cannot handle <a 11673 * href="#supplementary"> supplementary characters</a>. To support 11674 * all Unicode characters, including supplementary characters, use 11675 * the {@link #isWhitespace(int)} method. 11676 * 11677 * @param ch the character to be tested. 11678 * @return {@code true} if the character is a Java whitespace 11679 * character; {@code false} otherwise. 11680 * @see Character#isSpaceChar(char) 11681 * @since 1.1 11682 */ 11683 public static boolean isWhitespace(char ch) { 11684 return isWhitespace((int)ch); 11685 } 11686 11687 /** 11688 * Determines if the specified character (Unicode code point) is 11689 * white space according to Java. A character is a Java 11690 * whitespace character if and only if it satisfies one of the 11691 * following criteria: 11692 * <ul> 11693 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 11694 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 11695 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11696 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11697 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11698 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11699 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11700 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11701 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11702 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11703 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11704 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11705 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11706 * </ul> 11707 * 11708 * @param codePoint the character (Unicode code point) to be tested. 11709 * @return {@code true} if the character is a Java whitespace 11710 * character; {@code false} otherwise. 11711 * @see Character#isSpaceChar(int) 11712 * @since 1.5 11713 */ 11714 public static boolean isWhitespace(int codePoint) { 11715 return CharacterData.of(codePoint).isWhitespace(codePoint); 11716 } 11717 11718 /** 11719 * Determines if the specified character is an ISO control 11720 * character. A character is considered to be an ISO control 11721 * character if its code is in the range {@code '\u005Cu0000'} 11722 * through {@code '\u005Cu001F'} or in the range 11723 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11724 * 11725 * <p><b>Note:</b> This method cannot handle <a 11726 * href="#supplementary"> supplementary characters</a>. To support 11727 * all Unicode characters, including supplementary characters, use 11728 * the {@link #isISOControl(int)} method. 11729 * 11730 * @param ch the character to be tested. 11731 * @return {@code true} if the character is an ISO control character; 11732 * {@code false} otherwise. 11733 * 11734 * @see Character#isSpaceChar(char) 11735 * @see Character#isWhitespace(char) 11736 * @since 1.1 11737 */ 11738 public static boolean isISOControl(char ch) { 11739 return isISOControl((int)ch); 11740 } 11741 11742 /** 11743 * Determines if the referenced character (Unicode code point) is an ISO control 11744 * character. A character is considered to be an ISO control 11745 * character if its code is in the range {@code '\u005Cu0000'} 11746 * through {@code '\u005Cu001F'} or in the range 11747 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11748 * 11749 * @param codePoint the character (Unicode code point) to be tested. 11750 * @return {@code true} if the character is an ISO control character; 11751 * {@code false} otherwise. 11752 * @see Character#isSpaceChar(int) 11753 * @see Character#isWhitespace(int) 11754 * @since 1.5 11755 */ 11756 public static boolean isISOControl(int codePoint) { 11757 // Optimized form of: 11758 // (codePoint >= 0x00 && codePoint <= 0x1F) || 11759 // (codePoint >= 0x7F && codePoint <= 0x9F); 11760 return codePoint <= 0x9F && 11761 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 11762 } 11763 11764 /** 11765 * Returns a value indicating a character's general category. 11766 * 11767 * <p><b>Note:</b> This method cannot handle <a 11768 * href="#supplementary"> supplementary characters</a>. To support 11769 * all Unicode characters, including supplementary characters, use 11770 * the {@link #getType(int)} method. 11771 * 11772 * @param ch the character to be tested. 11773 * @return a value of type {@code int} representing the 11774 * character's general category. 11775 * @see Character#COMBINING_SPACING_MARK 11776 * @see Character#CONNECTOR_PUNCTUATION 11777 * @see Character#CONTROL 11778 * @see Character#CURRENCY_SYMBOL 11779 * @see Character#DASH_PUNCTUATION 11780 * @see Character#DECIMAL_DIGIT_NUMBER 11781 * @see Character#ENCLOSING_MARK 11782 * @see Character#END_PUNCTUATION 11783 * @see Character#FINAL_QUOTE_PUNCTUATION 11784 * @see Character#FORMAT 11785 * @see Character#INITIAL_QUOTE_PUNCTUATION 11786 * @see Character#LETTER_NUMBER 11787 * @see Character#LINE_SEPARATOR 11788 * @see Character#LOWERCASE_LETTER 11789 * @see Character#MATH_SYMBOL 11790 * @see Character#MODIFIER_LETTER 11791 * @see Character#MODIFIER_SYMBOL 11792 * @see Character#NON_SPACING_MARK 11793 * @see Character#OTHER_LETTER 11794 * @see Character#OTHER_NUMBER 11795 * @see Character#OTHER_PUNCTUATION 11796 * @see Character#OTHER_SYMBOL 11797 * @see Character#PARAGRAPH_SEPARATOR 11798 * @see Character#PRIVATE_USE 11799 * @see Character#SPACE_SEPARATOR 11800 * @see Character#START_PUNCTUATION 11801 * @see Character#SURROGATE 11802 * @see Character#TITLECASE_LETTER 11803 * @see Character#UNASSIGNED 11804 * @see Character#UPPERCASE_LETTER 11805 * @since 1.1 11806 */ 11807 public static int getType(char ch) { 11808 return getType((int)ch); 11809 } 11810 11811 /** 11812 * Returns a value indicating a character's general category. 11813 * 11814 * @param codePoint the character (Unicode code point) to be tested. 11815 * @return a value of type {@code int} representing the 11816 * character's general category. 11817 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 11818 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 11819 * @see Character#CONTROL CONTROL 11820 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 11821 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 11822 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 11823 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 11824 * @see Character#END_PUNCTUATION END_PUNCTUATION 11825 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 11826 * @see Character#FORMAT FORMAT 11827 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 11828 * @see Character#LETTER_NUMBER LETTER_NUMBER 11829 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 11830 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 11831 * @see Character#MATH_SYMBOL MATH_SYMBOL 11832 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 11833 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 11834 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 11835 * @see Character#OTHER_LETTER OTHER_LETTER 11836 * @see Character#OTHER_NUMBER OTHER_NUMBER 11837 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 11838 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 11839 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 11840 * @see Character#PRIVATE_USE PRIVATE_USE 11841 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 11842 * @see Character#START_PUNCTUATION START_PUNCTUATION 11843 * @see Character#SURROGATE SURROGATE 11844 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 11845 * @see Character#UNASSIGNED UNASSIGNED 11846 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 11847 * @since 1.5 11848 */ 11849 public static int getType(int codePoint) { 11850 return CharacterData.of(codePoint).getType(codePoint); 11851 } 11852 11853 /** 11854 * Determines the character representation for a specific digit in 11855 * the specified radix. If the value of {@code radix} is not a 11856 * valid radix, or the value of {@code digit} is not a valid 11857 * digit in the specified radix, the null character 11858 * ({@code '\u005Cu0000'}) is returned. 11859 * <p> 11860 * The {@code radix} argument is valid if it is greater than or 11861 * equal to {@code MIN_RADIX} and less than or equal to 11862 * {@code MAX_RADIX}. The {@code digit} argument is valid if 11863 * {@code 0 <= digit < radix}. 11864 * <p> 11865 * If the digit is less than 10, then 11866 * {@code '0' + digit} is returned. Otherwise, the value 11867 * {@code 'a' + digit - 10} is returned. 11868 * 11869 * @param digit the number to convert to a character. 11870 * @param radix the radix. 11871 * @return the {@code char} representation of the specified digit 11872 * in the specified radix. 11873 * @see Character#MIN_RADIX 11874 * @see Character#MAX_RADIX 11875 * @see Character#digit(char, int) 11876 */ 11877 public static char forDigit(int digit, int radix) { 11878 if ((digit >= radix) || (digit < 0)) { 11879 return '\0'; 11880 } 11881 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 11882 return '\0'; 11883 } 11884 if (digit < 10) { 11885 return (char)('0' + digit); 11886 } 11887 return (char)('a' - 10 + digit); 11888 } 11889 11890 /** 11891 * Returns the Unicode directionality property for the given 11892 * character. Character directionality is used to calculate the 11893 * visual ordering of text. The directionality value of undefined 11894 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 11895 * 11896 * <p><b>Note:</b> This method cannot handle <a 11897 * href="#supplementary"> supplementary characters</a>. To support 11898 * all Unicode characters, including supplementary characters, use 11899 * the {@link #getDirectionality(int)} method. 11900 * 11901 * @param ch {@code char} for which the directionality property 11902 * is requested. 11903 * @return the directionality property of the {@code char} value. 11904 * 11905 * @see Character#DIRECTIONALITY_UNDEFINED 11906 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 11907 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 11908 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11909 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 11910 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11911 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11912 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 11913 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11914 * @see Character#DIRECTIONALITY_NONSPACING_MARK 11915 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 11916 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 11917 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 11918 * @see Character#DIRECTIONALITY_WHITESPACE 11919 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 11920 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11921 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11922 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11923 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11924 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11925 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11926 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11927 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 11928 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11929 * @since 1.4 11930 */ 11931 public static byte getDirectionality(char ch) { 11932 return getDirectionality((int)ch); 11933 } 11934 11935 /** 11936 * Returns the Unicode directionality property for the given 11937 * character (Unicode code point). Character directionality is 11938 * used to calculate the visual ordering of text. The 11939 * directionality value of undefined character is {@link 11940 * #DIRECTIONALITY_UNDEFINED}. 11941 * 11942 * @param codePoint the character (Unicode code point) for which 11943 * the directionality property is requested. 11944 * @return the directionality property of the character. 11945 * 11946 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 11947 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 11948 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 11949 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11950 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 11951 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11952 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11953 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 11954 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11955 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 11956 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 11957 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 11958 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 11959 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 11960 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 11961 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11962 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11963 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11964 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11965 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11966 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11967 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11968 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 11969 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11970 * @since 1.5 11971 */ 11972 public static byte getDirectionality(int codePoint) { 11973 return CharacterData.of(codePoint).getDirectionality(codePoint); 11974 } 11975 11976 /** 11977 * Determines whether the character is mirrored according to the 11978 * Unicode specification. Mirrored characters should have their 11979 * glyphs horizontally mirrored when displayed in text that is 11980 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 11981 * PARENTHESIS is semantically defined to be an <i>opening 11982 * parenthesis</i>. This will appear as a "(" in text that is 11983 * left-to-right but as a ")" in text that is right-to-left. 11984 * 11985 * <p><b>Note:</b> This method cannot handle <a 11986 * href="#supplementary"> supplementary characters</a>. To support 11987 * all Unicode characters, including supplementary characters, use 11988 * the {@link #isMirrored(int)} method. 11989 * 11990 * @param ch {@code char} for which the mirrored property is requested 11991 * @return {@code true} if the char is mirrored, {@code false} 11992 * if the {@code char} is not mirrored or is not defined. 11993 * @since 1.4 11994 */ 11995 public static boolean isMirrored(char ch) { 11996 return isMirrored((int)ch); 11997 } 11998 11999 /** 12000 * Determines whether the specified character (Unicode code point) 12001 * is mirrored according to the Unicode specification. Mirrored 12002 * characters should have their glyphs horizontally mirrored when 12003 * displayed in text that is right-to-left. For example, 12004 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 12005 * defined to be an <i>opening parenthesis</i>. This will appear 12006 * as a "(" in text that is left-to-right but as a ")" in text 12007 * that is right-to-left. 12008 * 12009 * @param codePoint the character (Unicode code point) to be tested. 12010 * @return {@code true} if the character is mirrored, {@code false} 12011 * if the character is not mirrored or is not defined. 12012 * @since 1.5 12013 */ 12014 public static boolean isMirrored(int codePoint) { 12015 return CharacterData.of(codePoint).isMirrored(codePoint); 12016 } 12017 12018 /** 12019 * Compares two {@code Character} objects numerically. 12020 * 12021 * @param anotherCharacter the {@code Character} to be compared. 12022 * @return the value {@code 0} if the argument {@code Character} 12023 * is equal to this {@code Character}; a value less than 12024 * {@code 0} if this {@code Character} is numerically less 12025 * than the {@code Character} argument; and a value greater than 12026 * {@code 0} if this {@code Character} is numerically greater 12027 * than the {@code Character} argument (unsigned comparison). 12028 * Note that this is strictly a numerical comparison; it is not 12029 * locale-dependent. 12030 * @since 1.2 12031 */ 12032 public int compareTo(Character anotherCharacter) { 12033 return compare(this.value, anotherCharacter.value); 12034 } 12035 12036 /** 12037 * Compares two {@code char} values numerically. 12038 * The value returned is identical to what would be returned by: 12039 * <pre> 12040 * Character.valueOf(x).compareTo(Character.valueOf(y)) 12041 * </pre> 12042 * 12043 * @param x the first {@code char} to compare 12044 * @param y the second {@code char} to compare 12045 * @return the value {@code 0} if {@code x == y}; 12046 * a value less than {@code 0} if {@code x < y}; and 12047 * a value greater than {@code 0} if {@code x > y} 12048 * @since 1.7 12049 */ 12050 public static int compare(char x, char y) { 12051 return x - y; 12052 } 12053 12054 /** 12055 * Converts the character (Unicode code point) argument to uppercase using 12056 * information from the UnicodeData file. 12057 * 12058 * @param codePoint the character (Unicode code point) to be converted. 12059 * @return either the uppercase equivalent of the character, if 12060 * any, or an error flag ({@code Character.ERROR}) 12061 * that indicates that a 1:M {@code char} mapping exists. 12062 * @see Character#isLowerCase(char) 12063 * @see Character#isUpperCase(char) 12064 * @see Character#toLowerCase(char) 12065 * @see Character#toTitleCase(char) 12066 * @since 1.4 12067 */ 12068 static int toUpperCaseEx(int codePoint) { 12069 assert isValidCodePoint(codePoint); 12070 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 12071 } 12072 12073 /** 12074 * Converts the character (Unicode code point) argument to uppercase using case 12075 * mapping information from the SpecialCasing file in the Unicode 12076 * specification. If a character has no explicit uppercase 12077 * mapping, then the {@code char} itself is returned in the 12078 * {@code char[]}. 12079 * 12080 * @param codePoint the character (Unicode code point) to be converted. 12081 * @return a {@code char[]} with the uppercased character. 12082 * @since 1.4 12083 */ 12084 static char[] toUpperCaseCharArray(int codePoint) { 12085 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 12086 assert isBmpCodePoint(codePoint); 12087 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 12088 } 12089 12090 /** 12091 * The number of bits used to represent a {@code char} value in unsigned 12092 * binary form, constant {@code 16}. 12093 * 12094 * @since 1.5 12095 */ 12096 public static final int SIZE = 16; 12097 12098 /** 12099 * The number of bytes used to represent a {@code char} value in unsigned 12100 * binary form. 12101 * 12102 * @since 1.8 12103 */ 12104 public static final int BYTES = SIZE / Byte.SIZE; 12105 12106 /** 12107 * Returns the value obtained by reversing the order of the bytes in the 12108 * specified {@code char} value. 12109 * 12110 * @param ch The {@code char} of which to reverse the byte order. 12111 * @return the value obtained by reversing (or, equivalently, swapping) 12112 * the bytes in the specified {@code char} value. 12113 * @since 1.5 12114 */ 12115 @IntrinsicCandidate 12116 public static char reverseBytes(char ch) { 12117 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 12118 } 12119 12120 /** 12121 * Returns the name of the specified character 12122 * {@code codePoint}, or null if the code point is 12123 * {@link #UNASSIGNED unassigned}. 12124 * <p> 12125 * If the specified character is not assigned a name by 12126 * the <i>UnicodeData</i> file (part of the Unicode Character 12127 * Database maintained by the Unicode Consortium), the returned 12128 * name is the same as the result of the expression: 12129 * 12130 * <blockquote>{@code 12131 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 12132 * + " " 12133 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12134 * 12135 * }</blockquote> 12136 * 12137 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name 12138 * returned by this method follows the naming scheme in the 12139 * "Unicode Name Property" section of the Unicode Standard. For other 12140 * code points, such as Hangul/Ideographs, The name generation rule above 12141 * differs from the one defined in the Unicode Standard. 12142 * 12143 * @param codePoint the character (Unicode code point) 12144 * 12145 * @return the name of the specified character, or null if 12146 * the code point is unassigned. 12147 * 12148 * @throws IllegalArgumentException if the specified 12149 * {@code codePoint} is not a valid Unicode 12150 * code point. 12151 * 12152 * @since 1.7 12153 */ 12154 public static String getName(int codePoint) { 12155 if (!isValidCodePoint(codePoint)) { 12156 throw new IllegalArgumentException( 12157 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 12158 } 12159 String name = CharacterName.getInstance().getName(codePoint); 12160 if (name != null) 12161 return name; 12162 if (getType(codePoint) == UNASSIGNED) 12163 return null; 12164 UnicodeBlock block = UnicodeBlock.of(codePoint); 12165 if (block != null) 12166 return block.toString().replace('_', ' ') + " " 12167 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12168 // should never come here 12169 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12170 } 12171 12172 /** 12173 * Returns the code point value of the Unicode character specified by 12174 * the given character name. 12175 * <p> 12176 * If a character is not assigned a name by the <i>UnicodeData</i> 12177 * file (part of the Unicode Character Database maintained by the Unicode 12178 * Consortium), its name is defined as the result of the expression: 12179 * 12180 * <blockquote>{@code 12181 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 12182 * + " " 12183 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12184 * 12185 * }</blockquote> 12186 * <p> 12187 * The {@code name} matching is case insensitive, with any leading and 12188 * trailing whitespace character removed. 12189 * 12190 * For the code points in the <i>UnicodeData</i> file, this method 12191 * recognizes the name which conforms to the name defined in the 12192 * "Unicode Name Property" section in the Unicode Standard. For other 12193 * code points, this method recognizes the name generated with 12194 * {@link #getName(int)} method. 12195 * 12196 * @param name the character name 12197 * 12198 * @return the code point value of the character specified by its name. 12199 * 12200 * @throws IllegalArgumentException if the specified {@code name} 12201 * is not a valid character name. 12202 * @throws NullPointerException if {@code name} is {@code null} 12203 * 12204 * @since 9 12205 */ 12206 public static int codePointOf(String name) { 12207 name = name.trim().toUpperCase(Locale.ROOT); 12208 int cp = CharacterName.getInstance().getCodePoint(name); 12209 if (cp != -1) 12210 return cp; 12211 try { 12212 int off = name.lastIndexOf(' '); 12213 if (off != -1) { 12214 cp = Integer.parseInt(name, off + 1, name.length(), 16); 12215 if (isValidCodePoint(cp) && name.equals(getName(cp))) 12216 return cp; 12217 } 12218 } catch (Exception x) {} 12219 throw new IllegalArgumentException("Unrecognized character name :" + name); 12220 } 12221 }