1 /* 2 * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import jdk.internal.misc.CDS; 29 import jdk.internal.value.DeserializeConstructor; 30 import jdk.internal.vm.annotation.IntrinsicCandidate; 31 import jdk.internal.vm.annotation.Stable; 32 33 import java.lang.constant.Constable; 34 import java.lang.constant.DynamicConstantDesc; 35 import java.util.Arrays; 36 import java.util.HashMap; 37 import java.util.Locale; 38 import java.util.Map; 39 import java.util.Objects; 40 import java.util.Optional; 41 42 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST; 43 import static java.lang.constant.ConstantDescs.CD_char; 44 import static java.lang.constant.ConstantDescs.DEFAULT_NAME; 45 46 /** 47 * The {@code Character} class is the {@linkplain 48 * java.lang##wrapperClass wrapper class} for values of the primitive 49 * type {@code char}. An object of type {@code Character} contains a 50 * single field whose type is {@code char}. 51 * 52 * <p>In addition, this class provides a large number of static methods for 53 * determining a character's category (lowercase letter, digit, etc.) 54 * and for converting characters from uppercase to lowercase and vice 55 * versa. 56 * 57 * <h2><a id="conformance">Unicode Conformance</a></h2> 58 * <p> 59 * The fields and methods of class {@code Character} are defined in terms 60 * of character information from the Unicode Standard, specifically the 61 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 62 * This file specifies properties including name and category for every 63 * assigned Unicode code point or character range. The file is available 64 * from the Unicode Consortium at 65 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 66 * <p> 67 * Character information is based on the Unicode Standard, version 15.1. 68 * <p> 69 * The Java platform has supported different versions of the Unicode 70 * Standard over time. Upgrades to newer versions of the Unicode Standard 71 * occurred in the following Java releases, each indicating the new version: 72 * <table class="striped"> 73 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption> 74 * <thead> 75 * <tr><th scope="col">Java release</th> 76 * <th scope="col">Unicode version</th></tr> 77 * </thead> 78 * <tbody> 79 * <tr><th scope="row" style="text-align:left">Java SE 22</th> 80 * <td>Unicode 15.1</td></tr> 81 * <tr><th scope="row" style="text-align:left">Java SE 20</th> 82 * <td>Unicode 15.0</td></tr> 83 * <tr><th scope="row" style="text-align:left">Java SE 19</th> 84 * <td>Unicode 14.0</td></tr> 85 * <tr><th scope="row" style="text-align:left">Java SE 15</th> 86 * <td>Unicode 13.0</td></tr> 87 * <tr><th scope="row" style="text-align:left">Java SE 13</th> 88 * <td>Unicode 12.1</td></tr> 89 * <tr><th scope="row" style="text-align:left">Java SE 12</th> 90 * <td>Unicode 11.0</td></tr> 91 * <tr><th scope="row" style="text-align:left">Java SE 11</th> 92 * <td>Unicode 10.0</td></tr> 93 * <tr><th scope="row" style="text-align:left">Java SE 9</th> 94 * <td>Unicode 8.0</td></tr> 95 * <tr><th scope="row" style="text-align:left">Java SE 8</th> 96 * <td>Unicode 6.2</td></tr> 97 * <tr><th scope="row" style="text-align:left">Java SE 7</th> 98 * <td>Unicode 6.0</td></tr> 99 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th> 100 * <td>Unicode 4.0</td></tr> 101 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th> 102 * <td>Unicode 3.0</td></tr> 103 * <tr><th scope="row" style="text-align:left">JDK 1.1</th> 104 * <td>Unicode 2.0</td></tr> 105 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th> 106 * <td>Unicode 1.1.5</td></tr> 107 * </tbody> 108 * </table> 109 * Variations from these base Unicode versions, such as recognized appendixes, 110 * are documented elsewhere. 111 * <h2><a id="unicode">Unicode Character Representations</a></h2> 112 * 113 * <p>The {@code char} data type (and therefore the value that a 114 * {@code Character} object encapsulates) are based on the 115 * original Unicode specification, which defined characters as 116 * fixed-width 16-bit entities. The Unicode Standard has since been 117 * changed to allow for characters whose representation requires more 118 * than 16 bits. The range of legal <em>code point</em>s is now 119 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 120 * (Refer to the <a 121 * href="http://www.unicode.org/reports/tr27/#notation"><i> 122 * definition</i></a> of the U+<i>n</i> notation in the Unicode 123 * Standard.) 124 * 125 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 126 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 127 * <a id="supplementary">Characters</a> whose code points are greater 128 * than U+FFFF are called <em>supplementary character</em>s. The Java 129 * platform uses the UTF-16 representation in {@code char} arrays and 130 * in the {@code String} and {@code StringBuffer} classes. In 131 * this representation, supplementary characters are represented as a pair 132 * of {@code char} values, the first from the <em>high-surrogates</em> 133 * range, (\uD800-\uDBFF), the second from the 134 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 135 * 136 * <p>A {@code char} value, therefore, represents Basic 137 * Multilingual Plane (BMP) code points, including the surrogate 138 * code points, or code units of the UTF-16 encoding. An 139 * {@code int} value represents all Unicode code points, 140 * including supplementary code points. The lower (least significant) 141 * 21 bits of {@code int} are used to represent Unicode code 142 * points and the upper (most significant) 11 bits must be zero. 143 * Unless otherwise specified, the behavior with respect to 144 * supplementary characters and surrogate {@code char} values is 145 * as follows: 146 * 147 * <ul> 148 * <li>The methods that only accept a {@code char} value cannot support 149 * supplementary characters. They treat {@code char} values from the 150 * surrogate ranges as undefined characters. For example, 151 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 152 * this specific value if followed by any low-surrogate value in a string 153 * would represent a letter. 154 * 155 * <li>The methods that accept an {@code int} value support all 156 * Unicode characters, including supplementary characters. For 157 * example, {@code Character.isLetter(0x2F81A)} returns 158 * {@code true} because the code point value represents a letter 159 * (a CJK ideograph). 160 * </ul> 161 * 162 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 163 * used for character values in the range between U+0000 and U+10FFFF, 164 * and <em>Unicode code unit</em> is used for 16-bit 165 * {@code char} values that are code units of the <em>UTF-16</em> 166 * encoding. For more information on Unicode terminology, refer to the 167 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 168 * 169 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a> 170 * class; programmers should treat instances that are 171 * {@linkplain #equals(Object) equal} as interchangeable and should not 172 * use instances for synchronization, or unpredictable behavior may 173 * occur. For example, in a future release, synchronization may fail. 174 * 175 * @spec https://www.unicode.org/reports/tr27 Unicode 3.1.0 176 * @author Lee Boynton 177 * @author Guy Steele 178 * @author Akira Tanaka 179 * @author Martin Buchholz 180 * @author Ulf Zibis 181 * @since 1.0 182 */ 183 @jdk.internal.MigratedValueClass 184 @jdk.internal.ValueBased 185 public final class Character implements java.io.Serializable, Comparable<Character>, Constable { 186 /** 187 * The minimum radix available for conversion to and from strings. 188 * The constant value of this field is the smallest value permitted 189 * for the radix argument in radix-conversion methods such as the 190 * {@code digit} method, the {@code forDigit} method, and the 191 * {@code toString} method of class {@code Integer}. 192 * 193 * @see Character#digit(char, int) 194 * @see Character#forDigit(int, int) 195 * @see Integer#toString(int, int) 196 * @see Integer#valueOf(String) 197 */ 198 public static final int MIN_RADIX = 2; 199 200 /** 201 * The maximum radix available for conversion to and from strings. 202 * The constant value of this field is the largest value permitted 203 * for the radix argument in radix-conversion methods such as the 204 * {@code digit} method, the {@code forDigit} method, and the 205 * {@code toString} method of class {@code Integer}. 206 * 207 * @see Character#digit(char, int) 208 * @see Character#forDigit(int, int) 209 * @see Integer#toString(int, int) 210 * @see Integer#valueOf(String) 211 */ 212 public static final int MAX_RADIX = 36; 213 214 /** 215 * The constant value of this field is the smallest value of type 216 * {@code char}, {@code '\u005Cu0000'}. 217 * 218 * @since 1.0.2 219 */ 220 public static final char MIN_VALUE = '\u0000'; 221 222 /** 223 * The constant value of this field is the largest value of type 224 * {@code char}, {@code '\u005CuFFFF'}. 225 * 226 * @since 1.0.2 227 */ 228 public static final char MAX_VALUE = '\uFFFF'; 229 230 /** 231 * The {@code Class} instance representing the primitive type 232 * {@code char}. 233 * 234 * @since 1.1 235 */ 236 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 237 238 /* 239 * Normative general types 240 */ 241 242 /* 243 * General character types 244 */ 245 246 /** 247 * General category "Cn" in the Unicode specification. 248 * @since 1.1 249 */ 250 public static final byte UNASSIGNED = 0; 251 252 /** 253 * General category "Lu" in the Unicode specification. 254 * @since 1.1 255 */ 256 public static final byte UPPERCASE_LETTER = 1; 257 258 /** 259 * General category "Ll" in the Unicode specification. 260 * @since 1.1 261 */ 262 public static final byte LOWERCASE_LETTER = 2; 263 264 /** 265 * General category "Lt" in the Unicode specification. 266 * @since 1.1 267 */ 268 public static final byte TITLECASE_LETTER = 3; 269 270 /** 271 * General category "Lm" in the Unicode specification. 272 * @since 1.1 273 */ 274 public static final byte MODIFIER_LETTER = 4; 275 276 /** 277 * General category "Lo" in the Unicode specification. 278 * @since 1.1 279 */ 280 public static final byte OTHER_LETTER = 5; 281 282 /** 283 * General category "Mn" in the Unicode specification. 284 * @since 1.1 285 */ 286 public static final byte NON_SPACING_MARK = 6; 287 288 /** 289 * General category "Me" in the Unicode specification. 290 * @since 1.1 291 */ 292 public static final byte ENCLOSING_MARK = 7; 293 294 /** 295 * General category "Mc" in the Unicode specification. 296 * @since 1.1 297 */ 298 public static final byte COMBINING_SPACING_MARK = 8; 299 300 /** 301 * General category "Nd" in the Unicode specification. 302 * @since 1.1 303 */ 304 public static final byte DECIMAL_DIGIT_NUMBER = 9; 305 306 /** 307 * General category "Nl" in the Unicode specification. 308 * @since 1.1 309 */ 310 public static final byte LETTER_NUMBER = 10; 311 312 /** 313 * General category "No" in the Unicode specification. 314 * @since 1.1 315 */ 316 public static final byte OTHER_NUMBER = 11; 317 318 /** 319 * General category "Zs" in the Unicode specification. 320 * @since 1.1 321 */ 322 public static final byte SPACE_SEPARATOR = 12; 323 324 /** 325 * General category "Zl" in the Unicode specification. 326 * @since 1.1 327 */ 328 public static final byte LINE_SEPARATOR = 13; 329 330 /** 331 * General category "Zp" in the Unicode specification. 332 * @since 1.1 333 */ 334 public static final byte PARAGRAPH_SEPARATOR = 14; 335 336 /** 337 * General category "Cc" in the Unicode specification. 338 * @since 1.1 339 */ 340 public static final byte CONTROL = 15; 341 342 /** 343 * General category "Cf" in the Unicode specification. 344 * @since 1.1 345 */ 346 public static final byte FORMAT = 16; 347 348 /** 349 * General category "Co" in the Unicode specification. 350 * @since 1.1 351 */ 352 public static final byte PRIVATE_USE = 18; 353 354 /** 355 * General category "Cs" in the Unicode specification. 356 * @since 1.1 357 */ 358 public static final byte SURROGATE = 19; 359 360 /** 361 * General category "Pd" in the Unicode specification. 362 * @since 1.1 363 */ 364 public static final byte DASH_PUNCTUATION = 20; 365 366 /** 367 * General category "Ps" in the Unicode specification. 368 * @since 1.1 369 */ 370 public static final byte START_PUNCTUATION = 21; 371 372 /** 373 * General category "Pe" in the Unicode specification. 374 * @since 1.1 375 */ 376 public static final byte END_PUNCTUATION = 22; 377 378 /** 379 * General category "Pc" in the Unicode specification. 380 * @since 1.1 381 */ 382 public static final byte CONNECTOR_PUNCTUATION = 23; 383 384 /** 385 * General category "Po" in the Unicode specification. 386 * @since 1.1 387 */ 388 public static final byte OTHER_PUNCTUATION = 24; 389 390 /** 391 * General category "Sm" in the Unicode specification. 392 * @since 1.1 393 */ 394 public static final byte MATH_SYMBOL = 25; 395 396 /** 397 * General category "Sc" in the Unicode specification. 398 * @since 1.1 399 */ 400 public static final byte CURRENCY_SYMBOL = 26; 401 402 /** 403 * General category "Sk" in the Unicode specification. 404 * @since 1.1 405 */ 406 public static final byte MODIFIER_SYMBOL = 27; 407 408 /** 409 * General category "So" in the Unicode specification. 410 * @since 1.1 411 */ 412 public static final byte OTHER_SYMBOL = 28; 413 414 /** 415 * General category "Pi" in the Unicode specification. 416 * @since 1.4 417 */ 418 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 419 420 /** 421 * General category "Pf" in the Unicode specification. 422 * @since 1.4 423 */ 424 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 425 426 /** 427 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 428 */ 429 static final int ERROR = 0xFFFFFFFF; 430 431 432 /** 433 * Undefined bidirectional character type. Undefined {@code char} 434 * values have undefined directionality in the Unicode specification. 435 * @since 1.4 436 */ 437 public static final byte DIRECTIONALITY_UNDEFINED = -1; 438 439 /** 440 * Strong bidirectional character type "L" in the Unicode specification. 441 * @since 1.4 442 */ 443 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 444 445 /** 446 * Strong bidirectional character type "R" in the Unicode specification. 447 * @since 1.4 448 */ 449 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 450 451 /** 452 * Strong bidirectional character type "AL" in the Unicode specification. 453 * @since 1.4 454 */ 455 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 456 457 /** 458 * Weak bidirectional character type "EN" in the Unicode specification. 459 * @since 1.4 460 */ 461 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 462 463 /** 464 * Weak bidirectional character type "ES" in the Unicode specification. 465 * @since 1.4 466 */ 467 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 468 469 /** 470 * Weak bidirectional character type "ET" in the Unicode specification. 471 * @since 1.4 472 */ 473 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 474 475 /** 476 * Weak bidirectional character type "AN" in the Unicode specification. 477 * @since 1.4 478 */ 479 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 480 481 /** 482 * Weak bidirectional character type "CS" in the Unicode specification. 483 * @since 1.4 484 */ 485 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 486 487 /** 488 * Weak bidirectional character type "NSM" in the Unicode specification. 489 * @since 1.4 490 */ 491 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 492 493 /** 494 * Weak bidirectional character type "BN" in the Unicode specification. 495 * @since 1.4 496 */ 497 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 498 499 /** 500 * Neutral bidirectional character type "B" in the Unicode specification. 501 * @since 1.4 502 */ 503 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 504 505 /** 506 * Neutral bidirectional character type "S" in the Unicode specification. 507 * @since 1.4 508 */ 509 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 510 511 /** 512 * Neutral bidirectional character type "WS" in the Unicode specification. 513 * @since 1.4 514 */ 515 public static final byte DIRECTIONALITY_WHITESPACE = 12; 516 517 /** 518 * Neutral bidirectional character type "ON" in the Unicode specification. 519 * @since 1.4 520 */ 521 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 522 523 /** 524 * Strong bidirectional character type "LRE" in the Unicode specification. 525 * @since 1.4 526 */ 527 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 528 529 /** 530 * Strong bidirectional character type "LRO" in the Unicode specification. 531 * @since 1.4 532 */ 533 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 534 535 /** 536 * Strong bidirectional character type "RLE" in the Unicode specification. 537 * @since 1.4 538 */ 539 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 540 541 /** 542 * Strong bidirectional character type "RLO" in the Unicode specification. 543 * @since 1.4 544 */ 545 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 546 547 /** 548 * Weak bidirectional character type "PDF" in the Unicode specification. 549 * @since 1.4 550 */ 551 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 552 553 /** 554 * Weak bidirectional character type "LRI" in the Unicode specification. 555 * @since 9 556 */ 557 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 558 559 /** 560 * Weak bidirectional character type "RLI" in the Unicode specification. 561 * @since 9 562 */ 563 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 564 565 /** 566 * Weak bidirectional character type "FSI" in the Unicode specification. 567 * @since 9 568 */ 569 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 570 571 /** 572 * Weak bidirectional character type "PDI" in the Unicode specification. 573 * @since 9 574 */ 575 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 576 577 /** 578 * The minimum value of a 579 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 580 * Unicode high-surrogate code unit</a> 581 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 582 * A high-surrogate is also known as a <i>leading-surrogate</i>. 583 * 584 * @since 1.5 585 */ 586 public static final char MIN_HIGH_SURROGATE = '\uD800'; 587 588 /** 589 * The maximum value of a 590 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 591 * Unicode high-surrogate code unit</a> 592 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 593 * A high-surrogate is also known as a <i>leading-surrogate</i>. 594 * 595 * @since 1.5 596 */ 597 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 598 599 /** 600 * The minimum value of a 601 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 602 * Unicode low-surrogate code unit</a> 603 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 604 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 605 * 606 * @since 1.5 607 */ 608 public static final char MIN_LOW_SURROGATE = '\uDC00'; 609 610 /** 611 * The maximum value of a 612 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 613 * Unicode low-surrogate code unit</a> 614 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 615 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 616 * 617 * @since 1.5 618 */ 619 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 620 621 /** 622 * The minimum value of a Unicode surrogate code unit in the 623 * UTF-16 encoding, constant {@code '\u005CuD800'}. 624 * 625 * @since 1.5 626 */ 627 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 628 629 /** 630 * The maximum value of a Unicode surrogate code unit in the 631 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 632 * 633 * @since 1.5 634 */ 635 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 636 637 /** 638 * The minimum value of a 639 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 640 * Unicode supplementary code point</a>, constant {@code U+10000}. 641 * 642 * @since 1.5 643 */ 644 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 645 646 /** 647 * The minimum value of a 648 * <a href="http://www.unicode.org/glossary/#code_point"> 649 * Unicode code point</a>, constant {@code U+0000}. 650 * 651 * @since 1.5 652 */ 653 public static final int MIN_CODE_POINT = 0x000000; 654 655 /** 656 * The maximum value of a 657 * <a href="http://www.unicode.org/glossary/#code_point"> 658 * Unicode code point</a>, constant {@code U+10FFFF}. 659 * 660 * @since 1.5 661 */ 662 public static final int MAX_CODE_POINT = 0X10FFFF; 663 664 /** 665 * Returns an {@link Optional} containing the nominal descriptor for this 666 * instance. 667 * 668 * @return an {@link Optional} describing the {@linkplain Character} instance 669 * @since 15 670 */ 671 @Override 672 public Optional<DynamicConstantDesc<Character>> describeConstable() { 673 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value)); 674 } 675 676 /** 677 * Instances of this class represent particular subsets of the Unicode 678 * character set. The only family of subsets defined in the 679 * {@code Character} class is {@link Character.UnicodeBlock}. 680 * Other portions of the Java API may define other subsets for their 681 * own purposes. 682 * 683 * @since 1.2 684 */ 685 public static class Subset { 686 687 private String name; 688 689 /** 690 * Constructs a new {@code Subset} instance. 691 * 692 * @param name The name of this subset 693 * @throws NullPointerException if name is {@code null} 694 */ 695 protected Subset(String name) { 696 if (name == null) { 697 throw new NullPointerException("name"); 698 } 699 this.name = name; 700 } 701 702 /** 703 * Compares two {@code Subset} objects for equality. 704 * This method returns {@code true} if and only if 705 * {@code this} and the argument refer to the same 706 * object; since this method is {@code final}, this 707 * guarantee holds for all subclasses. 708 */ 709 public final boolean equals(Object obj) { 710 return (this == obj); 711 } 712 713 /** 714 * Returns the standard hash code as defined by the 715 * {@link Object#hashCode} method. This method 716 * is {@code final} in order to ensure that the 717 * {@code equals} and {@code hashCode} methods will 718 * be consistent in all subclasses. 719 */ 720 public final int hashCode() { 721 return super.hashCode(); 722 } 723 724 /** 725 * Returns the name of this subset. 726 */ 727 public final String toString() { 728 return name; 729 } 730 } 731 732 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 733 // for the latest specification of Unicode Blocks. 734 735 /** 736 * A family of character subsets representing the character blocks in the 737 * Unicode specification. Character blocks generally define characters 738 * used for a specific script or purpose. A character is contained by 739 * at most one Unicode block. 740 * 741 * @since 1.2 742 */ 743 public static final class UnicodeBlock extends Subset { 744 /** 745 * NUM_ENTITIES should match the total number of UnicodeBlocks. 746 * It should be adjusted whenever the Unicode Character Database 747 * is upgraded. 748 */ 749 private static final int NUM_ENTITIES = 759; 750 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES); 751 752 /** 753 * Creates a UnicodeBlock with the given identifier name. 754 * This name must be the same as the block identifier. 755 */ 756 private UnicodeBlock(String idName) { 757 super(idName); 758 map.put(idName, this); 759 } 760 761 /** 762 * Creates a UnicodeBlock with the given identifier name and 763 * alias name. 764 */ 765 private UnicodeBlock(String idName, String alias) { 766 this(idName); 767 map.put(alias, this); 768 } 769 770 /** 771 * Creates a UnicodeBlock with the given identifier name and 772 * alias names. 773 */ 774 private UnicodeBlock(String idName, String... aliases) { 775 this(idName); 776 for (String alias : aliases) 777 map.put(alias, this); 778 } 779 780 /** 781 * Constant for the "Basic Latin" Unicode character block. 782 * @since 1.2 783 */ 784 public static final UnicodeBlock BASIC_LATIN = 785 new UnicodeBlock("BASIC_LATIN", 786 "BASIC LATIN", 787 "BASICLATIN"); 788 789 /** 790 * Constant for the "Latin-1 Supplement" Unicode character block. 791 * @since 1.2 792 */ 793 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 794 new UnicodeBlock("LATIN_1_SUPPLEMENT", 795 "LATIN-1 SUPPLEMENT", 796 "LATIN-1SUPPLEMENT"); 797 798 /** 799 * Constant for the "Latin Extended-A" Unicode character block. 800 * @since 1.2 801 */ 802 public static final UnicodeBlock LATIN_EXTENDED_A = 803 new UnicodeBlock("LATIN_EXTENDED_A", 804 "LATIN EXTENDED-A", 805 "LATINEXTENDED-A"); 806 807 /** 808 * Constant for the "Latin Extended-B" Unicode character block. 809 * @since 1.2 810 */ 811 public static final UnicodeBlock LATIN_EXTENDED_B = 812 new UnicodeBlock("LATIN_EXTENDED_B", 813 "LATIN EXTENDED-B", 814 "LATINEXTENDED-B"); 815 816 /** 817 * Constant for the "IPA Extensions" Unicode character block. 818 * @since 1.2 819 */ 820 public static final UnicodeBlock IPA_EXTENSIONS = 821 new UnicodeBlock("IPA_EXTENSIONS", 822 "IPA EXTENSIONS", 823 "IPAEXTENSIONS"); 824 825 /** 826 * Constant for the "Spacing Modifier Letters" Unicode character block. 827 * @since 1.2 828 */ 829 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 830 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 831 "SPACING MODIFIER LETTERS", 832 "SPACINGMODIFIERLETTERS"); 833 834 /** 835 * Constant for the "Combining Diacritical Marks" Unicode character block. 836 * @since 1.2 837 */ 838 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 839 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 840 "COMBINING DIACRITICAL MARKS", 841 "COMBININGDIACRITICALMARKS"); 842 843 /** 844 * Constant for the "Greek and Coptic" Unicode character block. 845 * <p> 846 * This block was previously known as the "Greek" block. 847 * 848 * @since 1.2 849 */ 850 public static final UnicodeBlock GREEK = 851 new UnicodeBlock("GREEK", 852 "GREEK AND COPTIC", 853 "GREEKANDCOPTIC"); 854 855 /** 856 * Constant for the "Cyrillic" Unicode character block. 857 * @since 1.2 858 */ 859 public static final UnicodeBlock CYRILLIC = 860 new UnicodeBlock("CYRILLIC"); 861 862 /** 863 * Constant for the "Armenian" Unicode character block. 864 * @since 1.2 865 */ 866 public static final UnicodeBlock ARMENIAN = 867 new UnicodeBlock("ARMENIAN"); 868 869 /** 870 * Constant for the "Hebrew" Unicode character block. 871 * @since 1.2 872 */ 873 public static final UnicodeBlock HEBREW = 874 new UnicodeBlock("HEBREW"); 875 876 /** 877 * Constant for the "Arabic" Unicode character block. 878 * @since 1.2 879 */ 880 public static final UnicodeBlock ARABIC = 881 new UnicodeBlock("ARABIC"); 882 883 /** 884 * Constant for the "Devanagari" Unicode character block. 885 * @since 1.2 886 */ 887 public static final UnicodeBlock DEVANAGARI = 888 new UnicodeBlock("DEVANAGARI"); 889 890 /** 891 * Constant for the "Bengali" Unicode character block. 892 * @since 1.2 893 */ 894 public static final UnicodeBlock BENGALI = 895 new UnicodeBlock("BENGALI"); 896 897 /** 898 * Constant for the "Gurmukhi" Unicode character block. 899 * @since 1.2 900 */ 901 public static final UnicodeBlock GURMUKHI = 902 new UnicodeBlock("GURMUKHI"); 903 904 /** 905 * Constant for the "Gujarati" Unicode character block. 906 * @since 1.2 907 */ 908 public static final UnicodeBlock GUJARATI = 909 new UnicodeBlock("GUJARATI"); 910 911 /** 912 * Constant for the "Oriya" Unicode character block. 913 * @since 1.2 914 */ 915 public static final UnicodeBlock ORIYA = 916 new UnicodeBlock("ORIYA"); 917 918 /** 919 * Constant for the "Tamil" Unicode character block. 920 * @since 1.2 921 */ 922 public static final UnicodeBlock TAMIL = 923 new UnicodeBlock("TAMIL"); 924 925 /** 926 * Constant for the "Telugu" Unicode character block. 927 * @since 1.2 928 */ 929 public static final UnicodeBlock TELUGU = 930 new UnicodeBlock("TELUGU"); 931 932 /** 933 * Constant for the "Kannada" Unicode character block. 934 * @since 1.2 935 */ 936 public static final UnicodeBlock KANNADA = 937 new UnicodeBlock("KANNADA"); 938 939 /** 940 * Constant for the "Malayalam" Unicode character block. 941 * @since 1.2 942 */ 943 public static final UnicodeBlock MALAYALAM = 944 new UnicodeBlock("MALAYALAM"); 945 946 /** 947 * Constant for the "Thai" Unicode character block. 948 * @since 1.2 949 */ 950 public static final UnicodeBlock THAI = 951 new UnicodeBlock("THAI"); 952 953 /** 954 * Constant for the "Lao" Unicode character block. 955 * @since 1.2 956 */ 957 public static final UnicodeBlock LAO = 958 new UnicodeBlock("LAO"); 959 960 /** 961 * Constant for the "Tibetan" Unicode character block. 962 * @since 1.2 963 */ 964 public static final UnicodeBlock TIBETAN = 965 new UnicodeBlock("TIBETAN"); 966 967 /** 968 * Constant for the "Georgian" Unicode character block. 969 * @since 1.2 970 */ 971 public static final UnicodeBlock GEORGIAN = 972 new UnicodeBlock("GEORGIAN"); 973 974 /** 975 * Constant for the "Hangul Jamo" Unicode character block. 976 * @since 1.2 977 */ 978 public static final UnicodeBlock HANGUL_JAMO = 979 new UnicodeBlock("HANGUL_JAMO", 980 "HANGUL JAMO", 981 "HANGULJAMO"); 982 983 /** 984 * Constant for the "Latin Extended Additional" Unicode character block. 985 * @since 1.2 986 */ 987 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 988 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 989 "LATIN EXTENDED ADDITIONAL", 990 "LATINEXTENDEDADDITIONAL"); 991 992 /** 993 * Constant for the "Greek Extended" Unicode character block. 994 * @since 1.2 995 */ 996 public static final UnicodeBlock GREEK_EXTENDED = 997 new UnicodeBlock("GREEK_EXTENDED", 998 "GREEK EXTENDED", 999 "GREEKEXTENDED"); 1000 1001 /** 1002 * Constant for the "General Punctuation" Unicode character block. 1003 * @since 1.2 1004 */ 1005 public static final UnicodeBlock GENERAL_PUNCTUATION = 1006 new UnicodeBlock("GENERAL_PUNCTUATION", 1007 "GENERAL PUNCTUATION", 1008 "GENERALPUNCTUATION"); 1009 1010 /** 1011 * Constant for the "Superscripts and Subscripts" Unicode character 1012 * block. 1013 * @since 1.2 1014 */ 1015 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 1016 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 1017 "SUPERSCRIPTS AND SUBSCRIPTS", 1018 "SUPERSCRIPTSANDSUBSCRIPTS"); 1019 1020 /** 1021 * Constant for the "Currency Symbols" Unicode character block. 1022 * @since 1.2 1023 */ 1024 public static final UnicodeBlock CURRENCY_SYMBOLS = 1025 new UnicodeBlock("CURRENCY_SYMBOLS", 1026 "CURRENCY SYMBOLS", 1027 "CURRENCYSYMBOLS"); 1028 1029 /** 1030 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 1031 * character block. 1032 * <p> 1033 * This block was previously known as "Combining Marks for Symbols". 1034 * @since 1.2 1035 */ 1036 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 1037 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 1038 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 1039 "COMBININGDIACRITICALMARKSFORSYMBOLS", 1040 "COMBINING MARKS FOR SYMBOLS", 1041 "COMBININGMARKSFORSYMBOLS"); 1042 1043 /** 1044 * Constant for the "Letterlike Symbols" Unicode character block. 1045 * @since 1.2 1046 */ 1047 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 1048 new UnicodeBlock("LETTERLIKE_SYMBOLS", 1049 "LETTERLIKE SYMBOLS", 1050 "LETTERLIKESYMBOLS"); 1051 1052 /** 1053 * Constant for the "Number Forms" Unicode character block. 1054 * @since 1.2 1055 */ 1056 public static final UnicodeBlock NUMBER_FORMS = 1057 new UnicodeBlock("NUMBER_FORMS", 1058 "NUMBER FORMS", 1059 "NUMBERFORMS"); 1060 1061 /** 1062 * Constant for the "Arrows" Unicode character block. 1063 * @since 1.2 1064 */ 1065 public static final UnicodeBlock ARROWS = 1066 new UnicodeBlock("ARROWS"); 1067 1068 /** 1069 * Constant for the "Mathematical Operators" Unicode character block. 1070 * @since 1.2 1071 */ 1072 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1073 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1074 "MATHEMATICAL OPERATORS", 1075 "MATHEMATICALOPERATORS"); 1076 1077 /** 1078 * Constant for the "Miscellaneous Technical" Unicode character block. 1079 * @since 1.2 1080 */ 1081 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1082 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1083 "MISCELLANEOUS TECHNICAL", 1084 "MISCELLANEOUSTECHNICAL"); 1085 1086 /** 1087 * Constant for the "Control Pictures" Unicode character block. 1088 * @since 1.2 1089 */ 1090 public static final UnicodeBlock CONTROL_PICTURES = 1091 new UnicodeBlock("CONTROL_PICTURES", 1092 "CONTROL PICTURES", 1093 "CONTROLPICTURES"); 1094 1095 /** 1096 * Constant for the "Optical Character Recognition" Unicode character block. 1097 * @since 1.2 1098 */ 1099 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1100 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1101 "OPTICAL CHARACTER RECOGNITION", 1102 "OPTICALCHARACTERRECOGNITION"); 1103 1104 /** 1105 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1106 * @since 1.2 1107 */ 1108 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1109 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1110 "ENCLOSED ALPHANUMERICS", 1111 "ENCLOSEDALPHANUMERICS"); 1112 1113 /** 1114 * Constant for the "Box Drawing" Unicode character block. 1115 * @since 1.2 1116 */ 1117 public static final UnicodeBlock BOX_DRAWING = 1118 new UnicodeBlock("BOX_DRAWING", 1119 "BOX DRAWING", 1120 "BOXDRAWING"); 1121 1122 /** 1123 * Constant for the "Block Elements" Unicode character block. 1124 * @since 1.2 1125 */ 1126 public static final UnicodeBlock BLOCK_ELEMENTS = 1127 new UnicodeBlock("BLOCK_ELEMENTS", 1128 "BLOCK ELEMENTS", 1129 "BLOCKELEMENTS"); 1130 1131 /** 1132 * Constant for the "Geometric Shapes" Unicode character block. 1133 * @since 1.2 1134 */ 1135 public static final UnicodeBlock GEOMETRIC_SHAPES = 1136 new UnicodeBlock("GEOMETRIC_SHAPES", 1137 "GEOMETRIC SHAPES", 1138 "GEOMETRICSHAPES"); 1139 1140 /** 1141 * Constant for the "Miscellaneous Symbols" Unicode character block. 1142 * @since 1.2 1143 */ 1144 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1145 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1146 "MISCELLANEOUS SYMBOLS", 1147 "MISCELLANEOUSSYMBOLS"); 1148 1149 /** 1150 * Constant for the "Dingbats" Unicode character block. 1151 * @since 1.2 1152 */ 1153 public static final UnicodeBlock DINGBATS = 1154 new UnicodeBlock("DINGBATS"); 1155 1156 /** 1157 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1158 * @since 1.2 1159 */ 1160 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1161 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1162 "CJK SYMBOLS AND PUNCTUATION", 1163 "CJKSYMBOLSANDPUNCTUATION"); 1164 1165 /** 1166 * Constant for the "Hiragana" Unicode character block. 1167 * @since 1.2 1168 */ 1169 public static final UnicodeBlock HIRAGANA = 1170 new UnicodeBlock("HIRAGANA"); 1171 1172 /** 1173 * Constant for the "Katakana" Unicode character block. 1174 * @since 1.2 1175 */ 1176 public static final UnicodeBlock KATAKANA = 1177 new UnicodeBlock("KATAKANA"); 1178 1179 /** 1180 * Constant for the "Bopomofo" Unicode character block. 1181 * @since 1.2 1182 */ 1183 public static final UnicodeBlock BOPOMOFO = 1184 new UnicodeBlock("BOPOMOFO"); 1185 1186 /** 1187 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1188 * @since 1.2 1189 */ 1190 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1191 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1192 "HANGUL COMPATIBILITY JAMO", 1193 "HANGULCOMPATIBILITYJAMO"); 1194 1195 /** 1196 * Constant for the "Kanbun" Unicode character block. 1197 * @since 1.2 1198 */ 1199 public static final UnicodeBlock KANBUN = 1200 new UnicodeBlock("KANBUN"); 1201 1202 /** 1203 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1204 * @since 1.2 1205 */ 1206 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1207 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1208 "ENCLOSED CJK LETTERS AND MONTHS", 1209 "ENCLOSEDCJKLETTERSANDMONTHS"); 1210 1211 /** 1212 * Constant for the "CJK Compatibility" Unicode character block. 1213 * @since 1.2 1214 */ 1215 public static final UnicodeBlock CJK_COMPATIBILITY = 1216 new UnicodeBlock("CJK_COMPATIBILITY", 1217 "CJK COMPATIBILITY", 1218 "CJKCOMPATIBILITY"); 1219 1220 /** 1221 * Constant for the "CJK Unified Ideographs" Unicode character block. 1222 * @since 1.2 1223 */ 1224 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1225 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1226 "CJK UNIFIED IDEOGRAPHS", 1227 "CJKUNIFIEDIDEOGRAPHS"); 1228 1229 /** 1230 * Constant for the "Hangul Syllables" Unicode character block. 1231 * @since 1.2 1232 */ 1233 public static final UnicodeBlock HANGUL_SYLLABLES = 1234 new UnicodeBlock("HANGUL_SYLLABLES", 1235 "HANGUL SYLLABLES", 1236 "HANGULSYLLABLES"); 1237 1238 /** 1239 * Constant for the "Private Use Area" Unicode character block. 1240 * @since 1.2 1241 */ 1242 public static final UnicodeBlock PRIVATE_USE_AREA = 1243 new UnicodeBlock("PRIVATE_USE_AREA", 1244 "PRIVATE USE AREA", 1245 "PRIVATEUSEAREA"); 1246 1247 /** 1248 * Constant for the "CJK Compatibility Ideographs" Unicode character 1249 * block. 1250 * @since 1.2 1251 */ 1252 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1253 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1254 "CJK COMPATIBILITY IDEOGRAPHS", 1255 "CJKCOMPATIBILITYIDEOGRAPHS"); 1256 1257 /** 1258 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1259 * @since 1.2 1260 */ 1261 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1262 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1263 "ALPHABETIC PRESENTATION FORMS", 1264 "ALPHABETICPRESENTATIONFORMS"); 1265 1266 /** 1267 * Constant for the "Arabic Presentation Forms-A" Unicode character 1268 * block. 1269 * @since 1.2 1270 */ 1271 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1272 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1273 "ARABIC PRESENTATION FORMS-A", 1274 "ARABICPRESENTATIONFORMS-A"); 1275 1276 /** 1277 * Constant for the "Combining Half Marks" Unicode character block. 1278 * @since 1.2 1279 */ 1280 public static final UnicodeBlock COMBINING_HALF_MARKS = 1281 new UnicodeBlock("COMBINING_HALF_MARKS", 1282 "COMBINING HALF MARKS", 1283 "COMBININGHALFMARKS"); 1284 1285 /** 1286 * Constant for the "CJK Compatibility Forms" Unicode character block. 1287 * @since 1.2 1288 */ 1289 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1290 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1291 "CJK COMPATIBILITY FORMS", 1292 "CJKCOMPATIBILITYFORMS"); 1293 1294 /** 1295 * Constant for the "Small Form Variants" Unicode character block. 1296 * @since 1.2 1297 */ 1298 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1299 new UnicodeBlock("SMALL_FORM_VARIANTS", 1300 "SMALL FORM VARIANTS", 1301 "SMALLFORMVARIANTS"); 1302 1303 /** 1304 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1305 * @since 1.2 1306 */ 1307 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1308 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1309 "ARABIC PRESENTATION FORMS-B", 1310 "ARABICPRESENTATIONFORMS-B"); 1311 1312 /** 1313 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1314 * block. 1315 * @since 1.2 1316 */ 1317 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1318 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1319 "HALFWIDTH AND FULLWIDTH FORMS", 1320 "HALFWIDTHANDFULLWIDTHFORMS"); 1321 1322 /** 1323 * Constant for the "Specials" Unicode character block. 1324 * @since 1.2 1325 */ 1326 public static final UnicodeBlock SPECIALS = 1327 new UnicodeBlock("SPECIALS"); 1328 1329 /** 1330 * @deprecated 1331 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1332 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1333 * These constants match the block definitions of the Unicode Standard. 1334 * The {@link #of(char)} and {@link #of(int)} methods return the 1335 * standard constants. 1336 */ 1337 @Deprecated(since="1.5") 1338 public static final UnicodeBlock SURROGATES_AREA = 1339 new UnicodeBlock("SURROGATES_AREA"); 1340 1341 /** 1342 * Constant for the "Syriac" Unicode character block. 1343 * @since 1.4 1344 */ 1345 public static final UnicodeBlock SYRIAC = 1346 new UnicodeBlock("SYRIAC"); 1347 1348 /** 1349 * Constant for the "Thaana" Unicode character block. 1350 * @since 1.4 1351 */ 1352 public static final UnicodeBlock THAANA = 1353 new UnicodeBlock("THAANA"); 1354 1355 /** 1356 * Constant for the "Sinhala" Unicode character block. 1357 * @since 1.4 1358 */ 1359 public static final UnicodeBlock SINHALA = 1360 new UnicodeBlock("SINHALA"); 1361 1362 /** 1363 * Constant for the "Myanmar" Unicode character block. 1364 * @since 1.4 1365 */ 1366 public static final UnicodeBlock MYANMAR = 1367 new UnicodeBlock("MYANMAR"); 1368 1369 /** 1370 * Constant for the "Ethiopic" Unicode character block. 1371 * @since 1.4 1372 */ 1373 public static final UnicodeBlock ETHIOPIC = 1374 new UnicodeBlock("ETHIOPIC"); 1375 1376 /** 1377 * Constant for the "Cherokee" Unicode character block. 1378 * @since 1.4 1379 */ 1380 public static final UnicodeBlock CHEROKEE = 1381 new UnicodeBlock("CHEROKEE"); 1382 1383 /** 1384 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1385 * @since 1.4 1386 */ 1387 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1388 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1389 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1390 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1391 1392 /** 1393 * Constant for the "Ogham" Unicode character block. 1394 * @since 1.4 1395 */ 1396 public static final UnicodeBlock OGHAM = 1397 new UnicodeBlock("OGHAM"); 1398 1399 /** 1400 * Constant for the "Runic" Unicode character block. 1401 * @since 1.4 1402 */ 1403 public static final UnicodeBlock RUNIC = 1404 new UnicodeBlock("RUNIC"); 1405 1406 /** 1407 * Constant for the "Khmer" Unicode character block. 1408 * @since 1.4 1409 */ 1410 public static final UnicodeBlock KHMER = 1411 new UnicodeBlock("KHMER"); 1412 1413 /** 1414 * Constant for the "Mongolian" Unicode character block. 1415 * @since 1.4 1416 */ 1417 public static final UnicodeBlock MONGOLIAN = 1418 new UnicodeBlock("MONGOLIAN"); 1419 1420 /** 1421 * Constant for the "Braille Patterns" Unicode character block. 1422 * @since 1.4 1423 */ 1424 public static final UnicodeBlock BRAILLE_PATTERNS = 1425 new UnicodeBlock("BRAILLE_PATTERNS", 1426 "BRAILLE PATTERNS", 1427 "BRAILLEPATTERNS"); 1428 1429 /** 1430 * Constant for the "CJK Radicals Supplement" Unicode character block. 1431 * @since 1.4 1432 */ 1433 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1434 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1435 "CJK RADICALS SUPPLEMENT", 1436 "CJKRADICALSSUPPLEMENT"); 1437 1438 /** 1439 * Constant for the "Kangxi Radicals" Unicode character block. 1440 * @since 1.4 1441 */ 1442 public static final UnicodeBlock KANGXI_RADICALS = 1443 new UnicodeBlock("KANGXI_RADICALS", 1444 "KANGXI RADICALS", 1445 "KANGXIRADICALS"); 1446 1447 /** 1448 * Constant for the "Ideographic Description Characters" Unicode character block. 1449 * @since 1.4 1450 */ 1451 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1452 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1453 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1454 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1455 1456 /** 1457 * Constant for the "Bopomofo Extended" Unicode character block. 1458 * @since 1.4 1459 */ 1460 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1461 new UnicodeBlock("BOPOMOFO_EXTENDED", 1462 "BOPOMOFO EXTENDED", 1463 "BOPOMOFOEXTENDED"); 1464 1465 /** 1466 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1467 * @since 1.4 1468 */ 1469 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1470 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1471 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1472 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1473 1474 /** 1475 * Constant for the "Yi Syllables" Unicode character block. 1476 * @since 1.4 1477 */ 1478 public static final UnicodeBlock YI_SYLLABLES = 1479 new UnicodeBlock("YI_SYLLABLES", 1480 "YI SYLLABLES", 1481 "YISYLLABLES"); 1482 1483 /** 1484 * Constant for the "Yi Radicals" Unicode character block. 1485 * @since 1.4 1486 */ 1487 public static final UnicodeBlock YI_RADICALS = 1488 new UnicodeBlock("YI_RADICALS", 1489 "YI RADICALS", 1490 "YIRADICALS"); 1491 1492 /** 1493 * Constant for the "Cyrillic Supplement" Unicode character block. 1494 * This block was previously known as the "Cyrillic Supplementary" block. 1495 * @since 1.5 1496 */ 1497 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1498 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1499 "CYRILLIC SUPPLEMENTARY", 1500 "CYRILLICSUPPLEMENTARY", 1501 "CYRILLIC SUPPLEMENT", 1502 "CYRILLICSUPPLEMENT"); 1503 1504 /** 1505 * Constant for the "Tagalog" Unicode character block. 1506 * @since 1.5 1507 */ 1508 public static final UnicodeBlock TAGALOG = 1509 new UnicodeBlock("TAGALOG"); 1510 1511 /** 1512 * Constant for the "Hanunoo" Unicode character block. 1513 * @since 1.5 1514 */ 1515 public static final UnicodeBlock HANUNOO = 1516 new UnicodeBlock("HANUNOO"); 1517 1518 /** 1519 * Constant for the "Buhid" Unicode character block. 1520 * @since 1.5 1521 */ 1522 public static final UnicodeBlock BUHID = 1523 new UnicodeBlock("BUHID"); 1524 1525 /** 1526 * Constant for the "Tagbanwa" Unicode character block. 1527 * @since 1.5 1528 */ 1529 public static final UnicodeBlock TAGBANWA = 1530 new UnicodeBlock("TAGBANWA"); 1531 1532 /** 1533 * Constant for the "Limbu" Unicode character block. 1534 * @since 1.5 1535 */ 1536 public static final UnicodeBlock LIMBU = 1537 new UnicodeBlock("LIMBU"); 1538 1539 /** 1540 * Constant for the "Tai Le" Unicode character block. 1541 * @since 1.5 1542 */ 1543 public static final UnicodeBlock TAI_LE = 1544 new UnicodeBlock("TAI_LE", 1545 "TAI LE", 1546 "TAILE"); 1547 1548 /** 1549 * Constant for the "Khmer Symbols" Unicode character block. 1550 * @since 1.5 1551 */ 1552 public static final UnicodeBlock KHMER_SYMBOLS = 1553 new UnicodeBlock("KHMER_SYMBOLS", 1554 "KHMER SYMBOLS", 1555 "KHMERSYMBOLS"); 1556 1557 /** 1558 * Constant for the "Phonetic Extensions" Unicode character block. 1559 * @since 1.5 1560 */ 1561 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1562 new UnicodeBlock("PHONETIC_EXTENSIONS", 1563 "PHONETIC EXTENSIONS", 1564 "PHONETICEXTENSIONS"); 1565 1566 /** 1567 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1568 * @since 1.5 1569 */ 1570 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1571 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1572 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1573 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1574 1575 /** 1576 * Constant for the "Supplemental Arrows-A" Unicode character block. 1577 * @since 1.5 1578 */ 1579 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1580 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1581 "SUPPLEMENTAL ARROWS-A", 1582 "SUPPLEMENTALARROWS-A"); 1583 1584 /** 1585 * Constant for the "Supplemental Arrows-B" Unicode character block. 1586 * @since 1.5 1587 */ 1588 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1589 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1590 "SUPPLEMENTAL ARROWS-B", 1591 "SUPPLEMENTALARROWS-B"); 1592 1593 /** 1594 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1595 * character block. 1596 * @since 1.5 1597 */ 1598 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1599 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1600 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1601 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1602 1603 /** 1604 * Constant for the "Supplemental Mathematical Operators" Unicode 1605 * character block. 1606 * @since 1.5 1607 */ 1608 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1609 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1610 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1611 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1612 1613 /** 1614 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1615 * block. 1616 * @since 1.5 1617 */ 1618 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1619 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1620 "MISCELLANEOUS SYMBOLS AND ARROWS", 1621 "MISCELLANEOUSSYMBOLSANDARROWS"); 1622 1623 /** 1624 * Constant for the "Katakana Phonetic Extensions" Unicode character 1625 * block. 1626 * @since 1.5 1627 */ 1628 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1629 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1630 "KATAKANA PHONETIC EXTENSIONS", 1631 "KATAKANAPHONETICEXTENSIONS"); 1632 1633 /** 1634 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1635 * @since 1.5 1636 */ 1637 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1638 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1639 "YIJING HEXAGRAM SYMBOLS", 1640 "YIJINGHEXAGRAMSYMBOLS"); 1641 1642 /** 1643 * Constant for the "Variation Selectors" Unicode character block. 1644 * @since 1.5 1645 */ 1646 public static final UnicodeBlock VARIATION_SELECTORS = 1647 new UnicodeBlock("VARIATION_SELECTORS", 1648 "VARIATION SELECTORS", 1649 "VARIATIONSELECTORS"); 1650 1651 /** 1652 * Constant for the "Linear B Syllabary" Unicode character block. 1653 * @since 1.5 1654 */ 1655 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1656 new UnicodeBlock("LINEAR_B_SYLLABARY", 1657 "LINEAR B SYLLABARY", 1658 "LINEARBSYLLABARY"); 1659 1660 /** 1661 * Constant for the "Linear B Ideograms" Unicode character block. 1662 * @since 1.5 1663 */ 1664 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1665 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1666 "LINEAR B IDEOGRAMS", 1667 "LINEARBIDEOGRAMS"); 1668 1669 /** 1670 * Constant for the "Aegean Numbers" Unicode character block. 1671 * @since 1.5 1672 */ 1673 public static final UnicodeBlock AEGEAN_NUMBERS = 1674 new UnicodeBlock("AEGEAN_NUMBERS", 1675 "AEGEAN NUMBERS", 1676 "AEGEANNUMBERS"); 1677 1678 /** 1679 * Constant for the "Old Italic" Unicode character block. 1680 * @since 1.5 1681 */ 1682 public static final UnicodeBlock OLD_ITALIC = 1683 new UnicodeBlock("OLD_ITALIC", 1684 "OLD ITALIC", 1685 "OLDITALIC"); 1686 1687 /** 1688 * Constant for the "Gothic" Unicode character block. 1689 * @since 1.5 1690 */ 1691 public static final UnicodeBlock GOTHIC = 1692 new UnicodeBlock("GOTHIC"); 1693 1694 /** 1695 * Constant for the "Ugaritic" Unicode character block. 1696 * @since 1.5 1697 */ 1698 public static final UnicodeBlock UGARITIC = 1699 new UnicodeBlock("UGARITIC"); 1700 1701 /** 1702 * Constant for the "Deseret" Unicode character block. 1703 * @since 1.5 1704 */ 1705 public static final UnicodeBlock DESERET = 1706 new UnicodeBlock("DESERET"); 1707 1708 /** 1709 * Constant for the "Shavian" Unicode character block. 1710 * @since 1.5 1711 */ 1712 public static final UnicodeBlock SHAVIAN = 1713 new UnicodeBlock("SHAVIAN"); 1714 1715 /** 1716 * Constant for the "Osmanya" Unicode character block. 1717 * @since 1.5 1718 */ 1719 public static final UnicodeBlock OSMANYA = 1720 new UnicodeBlock("OSMANYA"); 1721 1722 /** 1723 * Constant for the "Cypriot Syllabary" Unicode character block. 1724 * @since 1.5 1725 */ 1726 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1727 new UnicodeBlock("CYPRIOT_SYLLABARY", 1728 "CYPRIOT SYLLABARY", 1729 "CYPRIOTSYLLABARY"); 1730 1731 /** 1732 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1733 * @since 1.5 1734 */ 1735 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1736 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1737 "BYZANTINE MUSICAL SYMBOLS", 1738 "BYZANTINEMUSICALSYMBOLS"); 1739 1740 /** 1741 * Constant for the "Musical Symbols" Unicode character block. 1742 * @since 1.5 1743 */ 1744 public static final UnicodeBlock MUSICAL_SYMBOLS = 1745 new UnicodeBlock("MUSICAL_SYMBOLS", 1746 "MUSICAL SYMBOLS", 1747 "MUSICALSYMBOLS"); 1748 1749 /** 1750 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1751 * @since 1.5 1752 */ 1753 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1754 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1755 "TAI XUAN JING SYMBOLS", 1756 "TAIXUANJINGSYMBOLS"); 1757 1758 /** 1759 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1760 * character block. 1761 * @since 1.5 1762 */ 1763 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1764 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1765 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1766 "MATHEMATICALALPHANUMERICSYMBOLS"); 1767 1768 /** 1769 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1770 * character block. 1771 * @since 1.5 1772 */ 1773 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1774 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1775 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1776 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1777 1778 /** 1779 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1780 * @since 1.5 1781 */ 1782 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1783 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1784 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1785 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1786 1787 /** 1788 * Constant for the "Tags" Unicode character block. 1789 * @since 1.5 1790 */ 1791 public static final UnicodeBlock TAGS = 1792 new UnicodeBlock("TAGS"); 1793 1794 /** 1795 * Constant for the "Variation Selectors Supplement" Unicode character 1796 * block. 1797 * @since 1.5 1798 */ 1799 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1800 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1801 "VARIATION SELECTORS SUPPLEMENT", 1802 "VARIATIONSELECTORSSUPPLEMENT"); 1803 1804 /** 1805 * Constant for the "Supplementary Private Use Area-A" Unicode character 1806 * block. 1807 * @since 1.5 1808 */ 1809 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1810 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1811 "SUPPLEMENTARY PRIVATE USE AREA-A", 1812 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1813 1814 /** 1815 * Constant for the "Supplementary Private Use Area-B" Unicode character 1816 * block. 1817 * @since 1.5 1818 */ 1819 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1820 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1821 "SUPPLEMENTARY PRIVATE USE AREA-B", 1822 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1823 1824 /** 1825 * Constant for the "High Surrogates" Unicode character block. 1826 * This block represents codepoint values in the high surrogate 1827 * range: U+D800 through U+DB7F 1828 * 1829 * @since 1.5 1830 */ 1831 public static final UnicodeBlock HIGH_SURROGATES = 1832 new UnicodeBlock("HIGH_SURROGATES", 1833 "HIGH SURROGATES", 1834 "HIGHSURROGATES"); 1835 1836 /** 1837 * Constant for the "High Private Use Surrogates" Unicode character 1838 * block. 1839 * This block represents codepoint values in the private use high 1840 * surrogate range: U+DB80 through U+DBFF 1841 * 1842 * @since 1.5 1843 */ 1844 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1845 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1846 "HIGH PRIVATE USE SURROGATES", 1847 "HIGHPRIVATEUSESURROGATES"); 1848 1849 /** 1850 * Constant for the "Low Surrogates" Unicode character block. 1851 * This block represents codepoint values in the low surrogate 1852 * range: U+DC00 through U+DFFF 1853 * 1854 * @since 1.5 1855 */ 1856 public static final UnicodeBlock LOW_SURROGATES = 1857 new UnicodeBlock("LOW_SURROGATES", 1858 "LOW SURROGATES", 1859 "LOWSURROGATES"); 1860 1861 /** 1862 * Constant for the "Arabic Supplement" Unicode character block. 1863 * @since 1.7 1864 */ 1865 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1866 new UnicodeBlock("ARABIC_SUPPLEMENT", 1867 "ARABIC SUPPLEMENT", 1868 "ARABICSUPPLEMENT"); 1869 1870 /** 1871 * Constant for the "NKo" Unicode character block. 1872 * @since 1.7 1873 */ 1874 public static final UnicodeBlock NKO = 1875 new UnicodeBlock("NKO"); 1876 1877 /** 1878 * Constant for the "Samaritan" Unicode character block. 1879 * @since 1.7 1880 */ 1881 public static final UnicodeBlock SAMARITAN = 1882 new UnicodeBlock("SAMARITAN"); 1883 1884 /** 1885 * Constant for the "Mandaic" Unicode character block. 1886 * @since 1.7 1887 */ 1888 public static final UnicodeBlock MANDAIC = 1889 new UnicodeBlock("MANDAIC"); 1890 1891 /** 1892 * Constant for the "Ethiopic Supplement" Unicode character block. 1893 * @since 1.7 1894 */ 1895 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1896 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1897 "ETHIOPIC SUPPLEMENT", 1898 "ETHIOPICSUPPLEMENT"); 1899 1900 /** 1901 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1902 * Unicode character block. 1903 * @since 1.7 1904 */ 1905 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1906 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1907 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1908 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1909 1910 /** 1911 * Constant for the "New Tai Lue" Unicode character block. 1912 * @since 1.7 1913 */ 1914 public static final UnicodeBlock NEW_TAI_LUE = 1915 new UnicodeBlock("NEW_TAI_LUE", 1916 "NEW TAI LUE", 1917 "NEWTAILUE"); 1918 1919 /** 1920 * Constant for the "Buginese" Unicode character block. 1921 * @since 1.7 1922 */ 1923 public static final UnicodeBlock BUGINESE = 1924 new UnicodeBlock("BUGINESE"); 1925 1926 /** 1927 * Constant for the "Tai Tham" Unicode character block. 1928 * @since 1.7 1929 */ 1930 public static final UnicodeBlock TAI_THAM = 1931 new UnicodeBlock("TAI_THAM", 1932 "TAI THAM", 1933 "TAITHAM"); 1934 1935 /** 1936 * Constant for the "Balinese" Unicode character block. 1937 * @since 1.7 1938 */ 1939 public static final UnicodeBlock BALINESE = 1940 new UnicodeBlock("BALINESE"); 1941 1942 /** 1943 * Constant for the "Sundanese" Unicode character block. 1944 * @since 1.7 1945 */ 1946 public static final UnicodeBlock SUNDANESE = 1947 new UnicodeBlock("SUNDANESE"); 1948 1949 /** 1950 * Constant for the "Batak" Unicode character block. 1951 * @since 1.7 1952 */ 1953 public static final UnicodeBlock BATAK = 1954 new UnicodeBlock("BATAK"); 1955 1956 /** 1957 * Constant for the "Lepcha" Unicode character block. 1958 * @since 1.7 1959 */ 1960 public static final UnicodeBlock LEPCHA = 1961 new UnicodeBlock("LEPCHA"); 1962 1963 /** 1964 * Constant for the "Ol Chiki" Unicode character block. 1965 * @since 1.7 1966 */ 1967 public static final UnicodeBlock OL_CHIKI = 1968 new UnicodeBlock("OL_CHIKI", 1969 "OL CHIKI", 1970 "OLCHIKI"); 1971 1972 /** 1973 * Constant for the "Vedic Extensions" Unicode character block. 1974 * @since 1.7 1975 */ 1976 public static final UnicodeBlock VEDIC_EXTENSIONS = 1977 new UnicodeBlock("VEDIC_EXTENSIONS", 1978 "VEDIC EXTENSIONS", 1979 "VEDICEXTENSIONS"); 1980 1981 /** 1982 * Constant for the "Phonetic Extensions Supplement" Unicode character 1983 * block. 1984 * @since 1.7 1985 */ 1986 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1987 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1988 "PHONETIC EXTENSIONS SUPPLEMENT", 1989 "PHONETICEXTENSIONSSUPPLEMENT"); 1990 1991 /** 1992 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1993 * character block. 1994 * @since 1.7 1995 */ 1996 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1997 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1998 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1999 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 2000 2001 /** 2002 * Constant for the "Glagolitic" Unicode character block. 2003 * @since 1.7 2004 */ 2005 public static final UnicodeBlock GLAGOLITIC = 2006 new UnicodeBlock("GLAGOLITIC"); 2007 2008 /** 2009 * Constant for the "Latin Extended-C" Unicode character block. 2010 * @since 1.7 2011 */ 2012 public static final UnicodeBlock LATIN_EXTENDED_C = 2013 new UnicodeBlock("LATIN_EXTENDED_C", 2014 "LATIN EXTENDED-C", 2015 "LATINEXTENDED-C"); 2016 2017 /** 2018 * Constant for the "Coptic" Unicode character block. 2019 * @since 1.7 2020 */ 2021 public static final UnicodeBlock COPTIC = 2022 new UnicodeBlock("COPTIC"); 2023 2024 /** 2025 * Constant for the "Georgian Supplement" Unicode character block. 2026 * @since 1.7 2027 */ 2028 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2029 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 2030 "GEORGIAN SUPPLEMENT", 2031 "GEORGIANSUPPLEMENT"); 2032 2033 /** 2034 * Constant for the "Tifinagh" Unicode character block. 2035 * @since 1.7 2036 */ 2037 public static final UnicodeBlock TIFINAGH = 2038 new UnicodeBlock("TIFINAGH"); 2039 2040 /** 2041 * Constant for the "Ethiopic Extended" Unicode character block. 2042 * @since 1.7 2043 */ 2044 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2045 new UnicodeBlock("ETHIOPIC_EXTENDED", 2046 "ETHIOPIC EXTENDED", 2047 "ETHIOPICEXTENDED"); 2048 2049 /** 2050 * Constant for the "Cyrillic Extended-A" Unicode character block. 2051 * @since 1.7 2052 */ 2053 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2054 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2055 "CYRILLIC EXTENDED-A", 2056 "CYRILLICEXTENDED-A"); 2057 2058 /** 2059 * Constant for the "Supplemental Punctuation" Unicode character block. 2060 * @since 1.7 2061 */ 2062 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2063 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2064 "SUPPLEMENTAL PUNCTUATION", 2065 "SUPPLEMENTALPUNCTUATION"); 2066 2067 /** 2068 * Constant for the "CJK Strokes" Unicode character block. 2069 * @since 1.7 2070 */ 2071 public static final UnicodeBlock CJK_STROKES = 2072 new UnicodeBlock("CJK_STROKES", 2073 "CJK STROKES", 2074 "CJKSTROKES"); 2075 2076 /** 2077 * Constant for the "Lisu" Unicode character block. 2078 * @since 1.7 2079 */ 2080 public static final UnicodeBlock LISU = 2081 new UnicodeBlock("LISU"); 2082 2083 /** 2084 * Constant for the "Vai" Unicode character block. 2085 * @since 1.7 2086 */ 2087 public static final UnicodeBlock VAI = 2088 new UnicodeBlock("VAI"); 2089 2090 /** 2091 * Constant for the "Cyrillic Extended-B" Unicode character block. 2092 * @since 1.7 2093 */ 2094 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2095 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2096 "CYRILLIC EXTENDED-B", 2097 "CYRILLICEXTENDED-B"); 2098 2099 /** 2100 * Constant for the "Bamum" Unicode character block. 2101 * @since 1.7 2102 */ 2103 public static final UnicodeBlock BAMUM = 2104 new UnicodeBlock("BAMUM"); 2105 2106 /** 2107 * Constant for the "Modifier Tone Letters" Unicode character block. 2108 * @since 1.7 2109 */ 2110 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2111 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2112 "MODIFIER TONE LETTERS", 2113 "MODIFIERTONELETTERS"); 2114 2115 /** 2116 * Constant for the "Latin Extended-D" Unicode character block. 2117 * @since 1.7 2118 */ 2119 public static final UnicodeBlock LATIN_EXTENDED_D = 2120 new UnicodeBlock("LATIN_EXTENDED_D", 2121 "LATIN EXTENDED-D", 2122 "LATINEXTENDED-D"); 2123 2124 /** 2125 * Constant for the "Syloti Nagri" Unicode character block. 2126 * @since 1.7 2127 */ 2128 public static final UnicodeBlock SYLOTI_NAGRI = 2129 new UnicodeBlock("SYLOTI_NAGRI", 2130 "SYLOTI NAGRI", 2131 "SYLOTINAGRI"); 2132 2133 /** 2134 * Constant for the "Common Indic Number Forms" Unicode character block. 2135 * @since 1.7 2136 */ 2137 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2138 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2139 "COMMON INDIC NUMBER FORMS", 2140 "COMMONINDICNUMBERFORMS"); 2141 2142 /** 2143 * Constant for the "Phags-pa" Unicode character block. 2144 * @since 1.7 2145 */ 2146 public static final UnicodeBlock PHAGS_PA = 2147 new UnicodeBlock("PHAGS_PA", 2148 "PHAGS-PA"); 2149 2150 /** 2151 * Constant for the "Saurashtra" Unicode character block. 2152 * @since 1.7 2153 */ 2154 public static final UnicodeBlock SAURASHTRA = 2155 new UnicodeBlock("SAURASHTRA"); 2156 2157 /** 2158 * Constant for the "Devanagari Extended" Unicode character block. 2159 * @since 1.7 2160 */ 2161 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2162 new UnicodeBlock("DEVANAGARI_EXTENDED", 2163 "DEVANAGARI EXTENDED", 2164 "DEVANAGARIEXTENDED"); 2165 2166 /** 2167 * Constant for the "Kayah Li" Unicode character block. 2168 * @since 1.7 2169 */ 2170 public static final UnicodeBlock KAYAH_LI = 2171 new UnicodeBlock("KAYAH_LI", 2172 "KAYAH LI", 2173 "KAYAHLI"); 2174 2175 /** 2176 * Constant for the "Rejang" Unicode character block. 2177 * @since 1.7 2178 */ 2179 public static final UnicodeBlock REJANG = 2180 new UnicodeBlock("REJANG"); 2181 2182 /** 2183 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2184 * @since 1.7 2185 */ 2186 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2187 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2188 "HANGUL JAMO EXTENDED-A", 2189 "HANGULJAMOEXTENDED-A"); 2190 2191 /** 2192 * Constant for the "Javanese" Unicode character block. 2193 * @since 1.7 2194 */ 2195 public static final UnicodeBlock JAVANESE = 2196 new UnicodeBlock("JAVANESE"); 2197 2198 /** 2199 * Constant for the "Cham" Unicode character block. 2200 * @since 1.7 2201 */ 2202 public static final UnicodeBlock CHAM = 2203 new UnicodeBlock("CHAM"); 2204 2205 /** 2206 * Constant for the "Myanmar Extended-A" Unicode character block. 2207 * @since 1.7 2208 */ 2209 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2210 new UnicodeBlock("MYANMAR_EXTENDED_A", 2211 "MYANMAR EXTENDED-A", 2212 "MYANMAREXTENDED-A"); 2213 2214 /** 2215 * Constant for the "Tai Viet" Unicode character block. 2216 * @since 1.7 2217 */ 2218 public static final UnicodeBlock TAI_VIET = 2219 new UnicodeBlock("TAI_VIET", 2220 "TAI VIET", 2221 "TAIVIET"); 2222 2223 /** 2224 * Constant for the "Ethiopic Extended-A" Unicode character block. 2225 * @since 1.7 2226 */ 2227 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2228 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2229 "ETHIOPIC EXTENDED-A", 2230 "ETHIOPICEXTENDED-A"); 2231 2232 /** 2233 * Constant for the "Meetei Mayek" Unicode character block. 2234 * @since 1.7 2235 */ 2236 public static final UnicodeBlock MEETEI_MAYEK = 2237 new UnicodeBlock("MEETEI_MAYEK", 2238 "MEETEI MAYEK", 2239 "MEETEIMAYEK"); 2240 2241 /** 2242 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2243 * @since 1.7 2244 */ 2245 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2246 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2247 "HANGUL JAMO EXTENDED-B", 2248 "HANGULJAMOEXTENDED-B"); 2249 2250 /** 2251 * Constant for the "Vertical Forms" Unicode character block. 2252 * @since 1.7 2253 */ 2254 public static final UnicodeBlock VERTICAL_FORMS = 2255 new UnicodeBlock("VERTICAL_FORMS", 2256 "VERTICAL FORMS", 2257 "VERTICALFORMS"); 2258 2259 /** 2260 * Constant for the "Ancient Greek Numbers" Unicode character block. 2261 * @since 1.7 2262 */ 2263 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2264 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2265 "ANCIENT GREEK NUMBERS", 2266 "ANCIENTGREEKNUMBERS"); 2267 2268 /** 2269 * Constant for the "Ancient Symbols" Unicode character block. 2270 * @since 1.7 2271 */ 2272 public static final UnicodeBlock ANCIENT_SYMBOLS = 2273 new UnicodeBlock("ANCIENT_SYMBOLS", 2274 "ANCIENT SYMBOLS", 2275 "ANCIENTSYMBOLS"); 2276 2277 /** 2278 * Constant for the "Phaistos Disc" Unicode character block. 2279 * @since 1.7 2280 */ 2281 public static final UnicodeBlock PHAISTOS_DISC = 2282 new UnicodeBlock("PHAISTOS_DISC", 2283 "PHAISTOS DISC", 2284 "PHAISTOSDISC"); 2285 2286 /** 2287 * Constant for the "Lycian" Unicode character block. 2288 * @since 1.7 2289 */ 2290 public static final UnicodeBlock LYCIAN = 2291 new UnicodeBlock("LYCIAN"); 2292 2293 /** 2294 * Constant for the "Carian" Unicode character block. 2295 * @since 1.7 2296 */ 2297 public static final UnicodeBlock CARIAN = 2298 new UnicodeBlock("CARIAN"); 2299 2300 /** 2301 * Constant for the "Old Persian" Unicode character block. 2302 * @since 1.7 2303 */ 2304 public static final UnicodeBlock OLD_PERSIAN = 2305 new UnicodeBlock("OLD_PERSIAN", 2306 "OLD PERSIAN", 2307 "OLDPERSIAN"); 2308 2309 /** 2310 * Constant for the "Imperial Aramaic" Unicode character block. 2311 * @since 1.7 2312 */ 2313 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2314 new UnicodeBlock("IMPERIAL_ARAMAIC", 2315 "IMPERIAL ARAMAIC", 2316 "IMPERIALARAMAIC"); 2317 2318 /** 2319 * Constant for the "Phoenician" Unicode character block. 2320 * @since 1.7 2321 */ 2322 public static final UnicodeBlock PHOENICIAN = 2323 new UnicodeBlock("PHOENICIAN"); 2324 2325 /** 2326 * Constant for the "Lydian" Unicode character block. 2327 * @since 1.7 2328 */ 2329 public static final UnicodeBlock LYDIAN = 2330 new UnicodeBlock("LYDIAN"); 2331 2332 /** 2333 * Constant for the "Kharoshthi" Unicode character block. 2334 * @since 1.7 2335 */ 2336 public static final UnicodeBlock KHAROSHTHI = 2337 new UnicodeBlock("KHAROSHTHI"); 2338 2339 /** 2340 * Constant for the "Old South Arabian" Unicode character block. 2341 * @since 1.7 2342 */ 2343 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2344 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2345 "OLD SOUTH ARABIAN", 2346 "OLDSOUTHARABIAN"); 2347 2348 /** 2349 * Constant for the "Avestan" Unicode character block. 2350 * @since 1.7 2351 */ 2352 public static final UnicodeBlock AVESTAN = 2353 new UnicodeBlock("AVESTAN"); 2354 2355 /** 2356 * Constant for the "Inscriptional Parthian" Unicode character block. 2357 * @since 1.7 2358 */ 2359 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2360 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2361 "INSCRIPTIONAL PARTHIAN", 2362 "INSCRIPTIONALPARTHIAN"); 2363 2364 /** 2365 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2366 * @since 1.7 2367 */ 2368 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2369 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2370 "INSCRIPTIONAL PAHLAVI", 2371 "INSCRIPTIONALPAHLAVI"); 2372 2373 /** 2374 * Constant for the "Old Turkic" Unicode character block. 2375 * @since 1.7 2376 */ 2377 public static final UnicodeBlock OLD_TURKIC = 2378 new UnicodeBlock("OLD_TURKIC", 2379 "OLD TURKIC", 2380 "OLDTURKIC"); 2381 2382 /** 2383 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2384 * @since 1.7 2385 */ 2386 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2387 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2388 "RUMI NUMERAL SYMBOLS", 2389 "RUMINUMERALSYMBOLS"); 2390 2391 /** 2392 * Constant for the "Brahmi" Unicode character block. 2393 * @since 1.7 2394 */ 2395 public static final UnicodeBlock BRAHMI = 2396 new UnicodeBlock("BRAHMI"); 2397 2398 /** 2399 * Constant for the "Kaithi" Unicode character block. 2400 * @since 1.7 2401 */ 2402 public static final UnicodeBlock KAITHI = 2403 new UnicodeBlock("KAITHI"); 2404 2405 /** 2406 * Constant for the "Cuneiform" Unicode character block. 2407 * @since 1.7 2408 */ 2409 public static final UnicodeBlock CUNEIFORM = 2410 new UnicodeBlock("CUNEIFORM"); 2411 2412 /** 2413 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2414 * character block. 2415 * @since 1.7 2416 */ 2417 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2418 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2419 "CUNEIFORM NUMBERS AND PUNCTUATION", 2420 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2421 2422 /** 2423 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2424 * @since 1.7 2425 */ 2426 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2427 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2428 "EGYPTIAN HIEROGLYPHS", 2429 "EGYPTIANHIEROGLYPHS"); 2430 2431 /** 2432 * Constant for the "Bamum Supplement" Unicode character block. 2433 * @since 1.7 2434 */ 2435 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2436 new UnicodeBlock("BAMUM_SUPPLEMENT", 2437 "BAMUM SUPPLEMENT", 2438 "BAMUMSUPPLEMENT"); 2439 2440 /** 2441 * Constant for the "Kana Supplement" Unicode character block. 2442 * @since 1.7 2443 */ 2444 public static final UnicodeBlock KANA_SUPPLEMENT = 2445 new UnicodeBlock("KANA_SUPPLEMENT", 2446 "KANA SUPPLEMENT", 2447 "KANASUPPLEMENT"); 2448 2449 /** 2450 * Constant for the "Ancient Greek Musical Notation" Unicode character 2451 * block. 2452 * @since 1.7 2453 */ 2454 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2455 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2456 "ANCIENT GREEK MUSICAL NOTATION", 2457 "ANCIENTGREEKMUSICALNOTATION"); 2458 2459 /** 2460 * Constant for the "Counting Rod Numerals" Unicode character block. 2461 * @since 1.7 2462 */ 2463 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2464 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2465 "COUNTING ROD NUMERALS", 2466 "COUNTINGRODNUMERALS"); 2467 2468 /** 2469 * Constant for the "Mahjong Tiles" Unicode character block. 2470 * @since 1.7 2471 */ 2472 public static final UnicodeBlock MAHJONG_TILES = 2473 new UnicodeBlock("MAHJONG_TILES", 2474 "MAHJONG TILES", 2475 "MAHJONGTILES"); 2476 2477 /** 2478 * Constant for the "Domino Tiles" Unicode character block. 2479 * @since 1.7 2480 */ 2481 public static final UnicodeBlock DOMINO_TILES = 2482 new UnicodeBlock("DOMINO_TILES", 2483 "DOMINO TILES", 2484 "DOMINOTILES"); 2485 2486 /** 2487 * Constant for the "Playing Cards" Unicode character block. 2488 * @since 1.7 2489 */ 2490 public static final UnicodeBlock PLAYING_CARDS = 2491 new UnicodeBlock("PLAYING_CARDS", 2492 "PLAYING CARDS", 2493 "PLAYINGCARDS"); 2494 2495 /** 2496 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2497 * block. 2498 * @since 1.7 2499 */ 2500 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2501 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2502 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2503 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2504 2505 /** 2506 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2507 * block. 2508 * @since 1.7 2509 */ 2510 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2511 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2512 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2513 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2514 2515 /** 2516 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2517 * character block. 2518 * @since 1.7 2519 */ 2520 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2521 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2522 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2523 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2524 2525 /** 2526 * Constant for the "Emoticons" Unicode character block. 2527 * @since 1.7 2528 */ 2529 public static final UnicodeBlock EMOTICONS = 2530 new UnicodeBlock("EMOTICONS"); 2531 2532 /** 2533 * Constant for the "Transport And Map Symbols" Unicode character block. 2534 * @since 1.7 2535 */ 2536 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2537 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2538 "TRANSPORT AND MAP SYMBOLS", 2539 "TRANSPORTANDMAPSYMBOLS"); 2540 2541 /** 2542 * Constant for the "Alchemical Symbols" Unicode character block. 2543 * @since 1.7 2544 */ 2545 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2546 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2547 "ALCHEMICAL SYMBOLS", 2548 "ALCHEMICALSYMBOLS"); 2549 2550 /** 2551 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2552 * character block. 2553 * @since 1.7 2554 */ 2555 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2556 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2557 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2558 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2559 2560 /** 2561 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2562 * character block. 2563 * @since 1.7 2564 */ 2565 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2566 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2567 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2568 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2569 2570 /** 2571 * Constant for the "Arabic Extended-A" Unicode character block. 2572 * @since 1.8 2573 */ 2574 public static final UnicodeBlock ARABIC_EXTENDED_A = 2575 new UnicodeBlock("ARABIC_EXTENDED_A", 2576 "ARABIC EXTENDED-A", 2577 "ARABICEXTENDED-A"); 2578 2579 /** 2580 * Constant for the "Sundanese Supplement" Unicode character block. 2581 * @since 1.8 2582 */ 2583 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2584 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2585 "SUNDANESE SUPPLEMENT", 2586 "SUNDANESESUPPLEMENT"); 2587 2588 /** 2589 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2590 * @since 1.8 2591 */ 2592 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2593 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2594 "MEETEI MAYEK EXTENSIONS", 2595 "MEETEIMAYEKEXTENSIONS"); 2596 2597 /** 2598 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2599 * @since 1.8 2600 */ 2601 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2602 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2603 "MEROITIC HIEROGLYPHS", 2604 "MEROITICHIEROGLYPHS"); 2605 2606 /** 2607 * Constant for the "Meroitic Cursive" Unicode character block. 2608 * @since 1.8 2609 */ 2610 public static final UnicodeBlock MEROITIC_CURSIVE = 2611 new UnicodeBlock("MEROITIC_CURSIVE", 2612 "MEROITIC CURSIVE", 2613 "MEROITICCURSIVE"); 2614 2615 /** 2616 * Constant for the "Sora Sompeng" Unicode character block. 2617 * @since 1.8 2618 */ 2619 public static final UnicodeBlock SORA_SOMPENG = 2620 new UnicodeBlock("SORA_SOMPENG", 2621 "SORA SOMPENG", 2622 "SORASOMPENG"); 2623 2624 /** 2625 * Constant for the "Chakma" Unicode character block. 2626 * @since 1.8 2627 */ 2628 public static final UnicodeBlock CHAKMA = 2629 new UnicodeBlock("CHAKMA"); 2630 2631 /** 2632 * Constant for the "Sharada" Unicode character block. 2633 * @since 1.8 2634 */ 2635 public static final UnicodeBlock SHARADA = 2636 new UnicodeBlock("SHARADA"); 2637 2638 /** 2639 * Constant for the "Takri" Unicode character block. 2640 * @since 1.8 2641 */ 2642 public static final UnicodeBlock TAKRI = 2643 new UnicodeBlock("TAKRI"); 2644 2645 /** 2646 * Constant for the "Miao" Unicode character block. 2647 * @since 1.8 2648 */ 2649 public static final UnicodeBlock MIAO = 2650 new UnicodeBlock("MIAO"); 2651 2652 /** 2653 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2654 * character block. 2655 * @since 1.8 2656 */ 2657 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2658 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2659 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2660 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2661 2662 /** 2663 * Constant for the "Combining Diacritical Marks Extended" Unicode 2664 * character block. 2665 * @since 9 2666 */ 2667 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2668 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2669 "COMBINING DIACRITICAL MARKS EXTENDED", 2670 "COMBININGDIACRITICALMARKSEXTENDED"); 2671 2672 /** 2673 * Constant for the "Myanmar Extended-B" Unicode character block. 2674 * @since 9 2675 */ 2676 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2677 new UnicodeBlock("MYANMAR_EXTENDED_B", 2678 "MYANMAR EXTENDED-B", 2679 "MYANMAREXTENDED-B"); 2680 2681 /** 2682 * Constant for the "Latin Extended-E" Unicode character block. 2683 * @since 9 2684 */ 2685 public static final UnicodeBlock LATIN_EXTENDED_E = 2686 new UnicodeBlock("LATIN_EXTENDED_E", 2687 "LATIN EXTENDED-E", 2688 "LATINEXTENDED-E"); 2689 2690 /** 2691 * Constant for the "Coptic Epact Numbers" Unicode character block. 2692 * @since 9 2693 */ 2694 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2695 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2696 "COPTIC EPACT NUMBERS", 2697 "COPTICEPACTNUMBERS"); 2698 2699 /** 2700 * Constant for the "Old Permic" Unicode character block. 2701 * @since 9 2702 */ 2703 public static final UnicodeBlock OLD_PERMIC = 2704 new UnicodeBlock("OLD_PERMIC", 2705 "OLD PERMIC", 2706 "OLDPERMIC"); 2707 2708 /** 2709 * Constant for the "Elbasan" Unicode character block. 2710 * @since 9 2711 */ 2712 public static final UnicodeBlock ELBASAN = 2713 new UnicodeBlock("ELBASAN"); 2714 2715 /** 2716 * Constant for the "Caucasian Albanian" Unicode character block. 2717 * @since 9 2718 */ 2719 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2720 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2721 "CAUCASIAN ALBANIAN", 2722 "CAUCASIANALBANIAN"); 2723 2724 /** 2725 * Constant for the "Linear A" Unicode character block. 2726 * @since 9 2727 */ 2728 public static final UnicodeBlock LINEAR_A = 2729 new UnicodeBlock("LINEAR_A", 2730 "LINEAR A", 2731 "LINEARA"); 2732 2733 /** 2734 * Constant for the "Palmyrene" Unicode character block. 2735 * @since 9 2736 */ 2737 public static final UnicodeBlock PALMYRENE = 2738 new UnicodeBlock("PALMYRENE"); 2739 2740 /** 2741 * Constant for the "Nabataean" Unicode character block. 2742 * @since 9 2743 */ 2744 public static final UnicodeBlock NABATAEAN = 2745 new UnicodeBlock("NABATAEAN"); 2746 2747 /** 2748 * Constant for the "Old North Arabian" Unicode character block. 2749 * @since 9 2750 */ 2751 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2752 new UnicodeBlock("OLD_NORTH_ARABIAN", 2753 "OLD NORTH ARABIAN", 2754 "OLDNORTHARABIAN"); 2755 2756 /** 2757 * Constant for the "Manichaean" Unicode character block. 2758 * @since 9 2759 */ 2760 public static final UnicodeBlock MANICHAEAN = 2761 new UnicodeBlock("MANICHAEAN"); 2762 2763 /** 2764 * Constant for the "Psalter Pahlavi" Unicode character block. 2765 * @since 9 2766 */ 2767 public static final UnicodeBlock PSALTER_PAHLAVI = 2768 new UnicodeBlock("PSALTER_PAHLAVI", 2769 "PSALTER PAHLAVI", 2770 "PSALTERPAHLAVI"); 2771 2772 /** 2773 * Constant for the "Mahajani" Unicode character block. 2774 * @since 9 2775 */ 2776 public static final UnicodeBlock MAHAJANI = 2777 new UnicodeBlock("MAHAJANI"); 2778 2779 /** 2780 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2781 * @since 9 2782 */ 2783 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2784 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2785 "SINHALA ARCHAIC NUMBERS", 2786 "SINHALAARCHAICNUMBERS"); 2787 2788 /** 2789 * Constant for the "Khojki" Unicode character block. 2790 * @since 9 2791 */ 2792 public static final UnicodeBlock KHOJKI = 2793 new UnicodeBlock("KHOJKI"); 2794 2795 /** 2796 * Constant for the "Khudawadi" Unicode character block. 2797 * @since 9 2798 */ 2799 public static final UnicodeBlock KHUDAWADI = 2800 new UnicodeBlock("KHUDAWADI"); 2801 2802 /** 2803 * Constant for the "Grantha" Unicode character block. 2804 * @since 9 2805 */ 2806 public static final UnicodeBlock GRANTHA = 2807 new UnicodeBlock("GRANTHA"); 2808 2809 /** 2810 * Constant for the "Tirhuta" Unicode character block. 2811 * @since 9 2812 */ 2813 public static final UnicodeBlock TIRHUTA = 2814 new UnicodeBlock("TIRHUTA"); 2815 2816 /** 2817 * Constant for the "Siddham" Unicode character block. 2818 * @since 9 2819 */ 2820 public static final UnicodeBlock SIDDHAM = 2821 new UnicodeBlock("SIDDHAM"); 2822 2823 /** 2824 * Constant for the "Modi" Unicode character block. 2825 * @since 9 2826 */ 2827 public static final UnicodeBlock MODI = 2828 new UnicodeBlock("MODI"); 2829 2830 /** 2831 * Constant for the "Warang Citi" Unicode character block. 2832 * @since 9 2833 */ 2834 public static final UnicodeBlock WARANG_CITI = 2835 new UnicodeBlock("WARANG_CITI", 2836 "WARANG CITI", 2837 "WARANGCITI"); 2838 2839 /** 2840 * Constant for the "Pau Cin Hau" Unicode character block. 2841 * @since 9 2842 */ 2843 public static final UnicodeBlock PAU_CIN_HAU = 2844 new UnicodeBlock("PAU_CIN_HAU", 2845 "PAU CIN HAU", 2846 "PAUCINHAU"); 2847 2848 /** 2849 * Constant for the "Mro" Unicode character block. 2850 * @since 9 2851 */ 2852 public static final UnicodeBlock MRO = 2853 new UnicodeBlock("MRO"); 2854 2855 /** 2856 * Constant for the "Bassa Vah" Unicode character block. 2857 * @since 9 2858 */ 2859 public static final UnicodeBlock BASSA_VAH = 2860 new UnicodeBlock("BASSA_VAH", 2861 "BASSA VAH", 2862 "BASSAVAH"); 2863 2864 /** 2865 * Constant for the "Pahawh Hmong" Unicode character block. 2866 * @since 9 2867 */ 2868 public static final UnicodeBlock PAHAWH_HMONG = 2869 new UnicodeBlock("PAHAWH_HMONG", 2870 "PAHAWH HMONG", 2871 "PAHAWHHMONG"); 2872 2873 /** 2874 * Constant for the "Duployan" Unicode character block. 2875 * @since 9 2876 */ 2877 public static final UnicodeBlock DUPLOYAN = 2878 new UnicodeBlock("DUPLOYAN"); 2879 2880 /** 2881 * Constant for the "Shorthand Format Controls" Unicode character block. 2882 * @since 9 2883 */ 2884 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2885 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2886 "SHORTHAND FORMAT CONTROLS", 2887 "SHORTHANDFORMATCONTROLS"); 2888 2889 /** 2890 * Constant for the "Mende Kikakui" Unicode character block. 2891 * @since 9 2892 */ 2893 public static final UnicodeBlock MENDE_KIKAKUI = 2894 new UnicodeBlock("MENDE_KIKAKUI", 2895 "MENDE KIKAKUI", 2896 "MENDEKIKAKUI"); 2897 2898 /** 2899 * Constant for the "Ornamental Dingbats" Unicode character block. 2900 * @since 9 2901 */ 2902 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2903 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2904 "ORNAMENTAL DINGBATS", 2905 "ORNAMENTALDINGBATS"); 2906 2907 /** 2908 * Constant for the "Geometric Shapes Extended" Unicode character block. 2909 * @since 9 2910 */ 2911 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2912 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2913 "GEOMETRIC SHAPES EXTENDED", 2914 "GEOMETRICSHAPESEXTENDED"); 2915 2916 /** 2917 * Constant for the "Supplemental Arrows-C" Unicode character block. 2918 * @since 9 2919 */ 2920 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2921 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2922 "SUPPLEMENTAL ARROWS-C", 2923 "SUPPLEMENTALARROWS-C"); 2924 2925 /** 2926 * Constant for the "Cherokee Supplement" Unicode character block. 2927 * @since 9 2928 */ 2929 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2930 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2931 "CHEROKEE SUPPLEMENT", 2932 "CHEROKEESUPPLEMENT"); 2933 2934 /** 2935 * Constant for the "Hatran" Unicode character block. 2936 * @since 9 2937 */ 2938 public static final UnicodeBlock HATRAN = 2939 new UnicodeBlock("HATRAN"); 2940 2941 /** 2942 * Constant for the "Old Hungarian" Unicode character block. 2943 * @since 9 2944 */ 2945 public static final UnicodeBlock OLD_HUNGARIAN = 2946 new UnicodeBlock("OLD_HUNGARIAN", 2947 "OLD HUNGARIAN", 2948 "OLDHUNGARIAN"); 2949 2950 /** 2951 * Constant for the "Multani" Unicode character block. 2952 * @since 9 2953 */ 2954 public static final UnicodeBlock MULTANI = 2955 new UnicodeBlock("MULTANI"); 2956 2957 /** 2958 * Constant for the "Ahom" Unicode character block. 2959 * @since 9 2960 */ 2961 public static final UnicodeBlock AHOM = 2962 new UnicodeBlock("AHOM"); 2963 2964 /** 2965 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 2966 * @since 9 2967 */ 2968 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2969 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 2970 "EARLY DYNASTIC CUNEIFORM", 2971 "EARLYDYNASTICCUNEIFORM"); 2972 2973 /** 2974 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 2975 * @since 9 2976 */ 2977 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2978 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 2979 "ANATOLIAN HIEROGLYPHS", 2980 "ANATOLIANHIEROGLYPHS"); 2981 2982 /** 2983 * Constant for the "Sutton SignWriting" Unicode character block. 2984 * @since 9 2985 */ 2986 public static final UnicodeBlock SUTTON_SIGNWRITING = 2987 new UnicodeBlock("SUTTON_SIGNWRITING", 2988 "SUTTON SIGNWRITING", 2989 "SUTTONSIGNWRITING"); 2990 2991 /** 2992 * Constant for the "Supplemental Symbols and Pictographs" Unicode 2993 * character block. 2994 * @since 9 2995 */ 2996 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2997 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2998 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 2999 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 3000 3001 /** 3002 * Constant for the "CJK Unified Ideographs Extension E" Unicode 3003 * character block. 3004 * @since 9 3005 */ 3006 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 3007 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 3008 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 3009 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 3010 3011 /** 3012 * Constant for the "Syriac Supplement" Unicode 3013 * character block. 3014 * @since 11 3015 */ 3016 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 3017 new UnicodeBlock("SYRIAC_SUPPLEMENT", 3018 "SYRIAC SUPPLEMENT", 3019 "SYRIACSUPPLEMENT"); 3020 3021 /** 3022 * Constant for the "Cyrillic Extended-C" Unicode 3023 * character block. 3024 * @since 11 3025 */ 3026 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 3027 new UnicodeBlock("CYRILLIC_EXTENDED_C", 3028 "CYRILLIC EXTENDED-C", 3029 "CYRILLICEXTENDED-C"); 3030 3031 /** 3032 * Constant for the "Osage" Unicode 3033 * character block. 3034 * @since 11 3035 */ 3036 public static final UnicodeBlock OSAGE = 3037 new UnicodeBlock("OSAGE"); 3038 3039 /** 3040 * Constant for the "Newa" Unicode 3041 * character block. 3042 * @since 11 3043 */ 3044 public static final UnicodeBlock NEWA = 3045 new UnicodeBlock("NEWA"); 3046 3047 /** 3048 * Constant for the "Mongolian Supplement" Unicode 3049 * character block. 3050 * @since 11 3051 */ 3052 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3053 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3054 "MONGOLIAN SUPPLEMENT", 3055 "MONGOLIANSUPPLEMENT"); 3056 3057 /** 3058 * Constant for the "Marchen" Unicode 3059 * character block. 3060 * @since 11 3061 */ 3062 public static final UnicodeBlock MARCHEN = 3063 new UnicodeBlock("MARCHEN"); 3064 3065 /** 3066 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3067 * character block. 3068 * @since 11 3069 */ 3070 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3071 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3072 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3073 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3074 3075 /** 3076 * Constant for the "Tangut" Unicode 3077 * character block. 3078 * @since 11 3079 */ 3080 public static final UnicodeBlock TANGUT = 3081 new UnicodeBlock("TANGUT"); 3082 3083 /** 3084 * Constant for the "Tangut Components" Unicode 3085 * character block. 3086 * @since 11 3087 */ 3088 public static final UnicodeBlock TANGUT_COMPONENTS = 3089 new UnicodeBlock("TANGUT_COMPONENTS", 3090 "TANGUT COMPONENTS", 3091 "TANGUTCOMPONENTS"); 3092 3093 /** 3094 * Constant for the "Kana Extended-A" Unicode 3095 * character block. 3096 * @since 11 3097 */ 3098 public static final UnicodeBlock KANA_EXTENDED_A = 3099 new UnicodeBlock("KANA_EXTENDED_A", 3100 "KANA EXTENDED-A", 3101 "KANAEXTENDED-A"); 3102 /** 3103 * Constant for the "Glagolitic Supplement" Unicode 3104 * character block. 3105 * @since 11 3106 */ 3107 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3108 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3109 "GLAGOLITIC SUPPLEMENT", 3110 "GLAGOLITICSUPPLEMENT"); 3111 /** 3112 * Constant for the "Adlam" Unicode 3113 * character block. 3114 * @since 11 3115 */ 3116 public static final UnicodeBlock ADLAM = 3117 new UnicodeBlock("ADLAM"); 3118 3119 /** 3120 * Constant for the "Masaram Gondi" Unicode 3121 * character block. 3122 * @since 11 3123 */ 3124 public static final UnicodeBlock MASARAM_GONDI = 3125 new UnicodeBlock("MASARAM_GONDI", 3126 "MASARAM GONDI", 3127 "MASARAMGONDI"); 3128 3129 /** 3130 * Constant for the "Zanabazar Square" Unicode 3131 * character block. 3132 * @since 11 3133 */ 3134 public static final UnicodeBlock ZANABAZAR_SQUARE = 3135 new UnicodeBlock("ZANABAZAR_SQUARE", 3136 "ZANABAZAR SQUARE", 3137 "ZANABAZARSQUARE"); 3138 3139 /** 3140 * Constant for the "Nushu" Unicode 3141 * character block. 3142 * @since 11 3143 */ 3144 public static final UnicodeBlock NUSHU = 3145 new UnicodeBlock("NUSHU"); 3146 3147 /** 3148 * Constant for the "Soyombo" Unicode 3149 * character block. 3150 * @since 11 3151 */ 3152 public static final UnicodeBlock SOYOMBO = 3153 new UnicodeBlock("SOYOMBO"); 3154 3155 /** 3156 * Constant for the "Bhaiksuki" Unicode 3157 * character block. 3158 * @since 11 3159 */ 3160 public static final UnicodeBlock BHAIKSUKI = 3161 new UnicodeBlock("BHAIKSUKI"); 3162 3163 /** 3164 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3165 * character block. 3166 * @since 11 3167 */ 3168 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3169 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3170 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3171 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3172 /** 3173 * Constant for the "Georgian Extended" Unicode 3174 * character block. 3175 * @since 12 3176 */ 3177 public static final UnicodeBlock GEORGIAN_EXTENDED = 3178 new UnicodeBlock("GEORGIAN_EXTENDED", 3179 "GEORGIAN EXTENDED", 3180 "GEORGIANEXTENDED"); 3181 3182 /** 3183 * Constant for the "Hanifi Rohingya" Unicode 3184 * character block. 3185 * @since 12 3186 */ 3187 public static final UnicodeBlock HANIFI_ROHINGYA = 3188 new UnicodeBlock("HANIFI_ROHINGYA", 3189 "HANIFI ROHINGYA", 3190 "HANIFIROHINGYA"); 3191 3192 /** 3193 * Constant for the "Old Sogdian" Unicode 3194 * character block. 3195 * @since 12 3196 */ 3197 public static final UnicodeBlock OLD_SOGDIAN = 3198 new UnicodeBlock("OLD_SOGDIAN", 3199 "OLD SOGDIAN", 3200 "OLDSOGDIAN"); 3201 3202 /** 3203 * Constant for the "Sogdian" Unicode 3204 * character block. 3205 * @since 12 3206 */ 3207 public static final UnicodeBlock SOGDIAN = 3208 new UnicodeBlock("SOGDIAN"); 3209 3210 /** 3211 * Constant for the "Dogra" Unicode 3212 * character block. 3213 * @since 12 3214 */ 3215 public static final UnicodeBlock DOGRA = 3216 new UnicodeBlock("DOGRA"); 3217 3218 /** 3219 * Constant for the "Gunjala Gondi" Unicode 3220 * character block. 3221 * @since 12 3222 */ 3223 public static final UnicodeBlock GUNJALA_GONDI = 3224 new UnicodeBlock("GUNJALA_GONDI", 3225 "GUNJALA GONDI", 3226 "GUNJALAGONDI"); 3227 3228 /** 3229 * Constant for the "Makasar" Unicode 3230 * character block. 3231 * @since 12 3232 */ 3233 public static final UnicodeBlock MAKASAR = 3234 new UnicodeBlock("MAKASAR"); 3235 3236 /** 3237 * Constant for the "Medefaidrin" Unicode 3238 * character block. 3239 * @since 12 3240 */ 3241 public static final UnicodeBlock MEDEFAIDRIN = 3242 new UnicodeBlock("MEDEFAIDRIN"); 3243 3244 /** 3245 * Constant for the "Mayan Numerals" Unicode 3246 * character block. 3247 * @since 12 3248 */ 3249 public static final UnicodeBlock MAYAN_NUMERALS = 3250 new UnicodeBlock("MAYAN_NUMERALS", 3251 "MAYAN NUMERALS", 3252 "MAYANNUMERALS"); 3253 3254 /** 3255 * Constant for the "Indic Siyaq Numbers" Unicode 3256 * character block. 3257 * @since 12 3258 */ 3259 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3260 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3261 "INDIC SIYAQ NUMBERS", 3262 "INDICSIYAQNUMBERS"); 3263 3264 /** 3265 * Constant for the "Chess Symbols" Unicode 3266 * character block. 3267 * @since 12 3268 */ 3269 public static final UnicodeBlock CHESS_SYMBOLS = 3270 new UnicodeBlock("CHESS_SYMBOLS", 3271 "CHESS SYMBOLS", 3272 "CHESSSYMBOLS"); 3273 3274 /** 3275 * Constant for the "Elymaic" Unicode 3276 * character block. 3277 * @since 13 3278 */ 3279 public static final UnicodeBlock ELYMAIC = 3280 new UnicodeBlock("ELYMAIC"); 3281 3282 /** 3283 * Constant for the "Nandinagari" Unicode 3284 * character block. 3285 * @since 13 3286 */ 3287 public static final UnicodeBlock NANDINAGARI = 3288 new UnicodeBlock("NANDINAGARI"); 3289 3290 /** 3291 * Constant for the "Tamil Supplement" Unicode 3292 * character block. 3293 * @since 13 3294 */ 3295 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3296 new UnicodeBlock("TAMIL_SUPPLEMENT", 3297 "TAMIL SUPPLEMENT", 3298 "TAMILSUPPLEMENT"); 3299 3300 /** 3301 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3302 * character block. 3303 * @since 13 3304 */ 3305 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3306 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3307 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3308 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3309 3310 /** 3311 * Constant for the "Small Kana Extension" Unicode 3312 * character block. 3313 * @since 13 3314 */ 3315 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3316 new UnicodeBlock("SMALL_KANA_EXTENSION", 3317 "SMALL KANA EXTENSION", 3318 "SMALLKANAEXTENSION"); 3319 3320 /** 3321 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3322 * character block. 3323 * @since 13 3324 */ 3325 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3326 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3327 "NYIAKENG PUACHUE HMONG", 3328 "NYIAKENGPUACHUEHMONG"); 3329 3330 /** 3331 * Constant for the "Wancho" Unicode 3332 * character block. 3333 * @since 13 3334 */ 3335 public static final UnicodeBlock WANCHO = 3336 new UnicodeBlock("WANCHO"); 3337 3338 /** 3339 * Constant for the "Ottoman Siyaq Numbers" Unicode 3340 * character block. 3341 * @since 13 3342 */ 3343 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3344 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3345 "OTTOMAN SIYAQ NUMBERS", 3346 "OTTOMANSIYAQNUMBERS"); 3347 3348 /** 3349 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3350 * character block. 3351 * @since 13 3352 */ 3353 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3354 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3355 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3356 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3357 3358 /** 3359 * Constant for the "Yezidi" Unicode 3360 * character block. 3361 * @since 15 3362 */ 3363 public static final UnicodeBlock YEZIDI = 3364 new UnicodeBlock("YEZIDI"); 3365 3366 /** 3367 * Constant for the "Chorasmian" Unicode 3368 * character block. 3369 * @since 15 3370 */ 3371 public static final UnicodeBlock CHORASMIAN = 3372 new UnicodeBlock("CHORASMIAN"); 3373 3374 /** 3375 * Constant for the "Dives Akuru" Unicode 3376 * character block. 3377 * @since 15 3378 */ 3379 public static final UnicodeBlock DIVES_AKURU = 3380 new UnicodeBlock("DIVES_AKURU", 3381 "DIVES AKURU", 3382 "DIVESAKURU"); 3383 3384 /** 3385 * Constant for the "Lisu Supplement" Unicode 3386 * character block. 3387 * @since 15 3388 */ 3389 public static final UnicodeBlock LISU_SUPPLEMENT = 3390 new UnicodeBlock("LISU_SUPPLEMENT", 3391 "LISU SUPPLEMENT", 3392 "LISUSUPPLEMENT"); 3393 3394 /** 3395 * Constant for the "Khitan Small Script" Unicode 3396 * character block. 3397 * @since 15 3398 */ 3399 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 3400 new UnicodeBlock("KHITAN_SMALL_SCRIPT", 3401 "KHITAN SMALL SCRIPT", 3402 "KHITANSMALLSCRIPT"); 3403 3404 /** 3405 * Constant for the "Tangut Supplement" Unicode 3406 * character block. 3407 * @since 15 3408 */ 3409 public static final UnicodeBlock TANGUT_SUPPLEMENT = 3410 new UnicodeBlock("TANGUT_SUPPLEMENT", 3411 "TANGUT SUPPLEMENT", 3412 "TANGUTSUPPLEMENT"); 3413 3414 /** 3415 * Constant for the "Symbols for Legacy Computing" Unicode 3416 * character block. 3417 * @since 15 3418 */ 3419 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 3420 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", 3421 "SYMBOLS FOR LEGACY COMPUTING", 3422 "SYMBOLSFORLEGACYCOMPUTING"); 3423 3424 /** 3425 * Constant for the "CJK Unified Ideographs Extension G" Unicode 3426 * character block. 3427 * @since 15 3428 */ 3429 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 3430 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 3431 "CJK UNIFIED IDEOGRAPHS EXTENSION G", 3432 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG"); 3433 3434 /** 3435 * Constant for the "Arabic Extended-B" Unicode 3436 * character block. 3437 * @since 19 3438 */ 3439 public static final UnicodeBlock ARABIC_EXTENDED_B = 3440 new UnicodeBlock("ARABIC_EXTENDED_B", 3441 "ARABIC EXTENDED-B", 3442 "ARABICEXTENDED-B"); 3443 3444 /** 3445 * Constant for the "Vithkuqi" Unicode 3446 * character block. 3447 * @since 19 3448 */ 3449 public static final UnicodeBlock VITHKUQI = 3450 new UnicodeBlock("VITHKUQI"); 3451 3452 /** 3453 * Constant for the "Latin Extended-F" Unicode 3454 * character block. 3455 * @since 19 3456 */ 3457 public static final UnicodeBlock LATIN_EXTENDED_F = 3458 new UnicodeBlock("LATIN_EXTENDED_F", 3459 "LATIN EXTENDED-F", 3460 "LATINEXTENDED-F"); 3461 3462 /** 3463 * Constant for the "Old Uyghur" Unicode 3464 * character block. 3465 * @since 19 3466 */ 3467 public static final UnicodeBlock OLD_UYGHUR = 3468 new UnicodeBlock("OLD_UYGHUR", 3469 "OLD UYGHUR", 3470 "OLDUYGHUR"); 3471 3472 /** 3473 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode 3474 * character block. 3475 * @since 19 3476 */ 3477 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 3478 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 3479 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A", 3480 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A"); 3481 3482 /** 3483 * Constant for the "Cypro-Minoan" Unicode 3484 * character block. 3485 * @since 19 3486 */ 3487 public static final UnicodeBlock CYPRO_MINOAN = 3488 new UnicodeBlock("CYPRO_MINOAN", 3489 "CYPRO-MINOAN", 3490 "CYPRO-MINOAN"); 3491 3492 /** 3493 * Constant for the "Tangsa" Unicode 3494 * character block. 3495 * @since 19 3496 */ 3497 public static final UnicodeBlock TANGSA = 3498 new UnicodeBlock("TANGSA"); 3499 3500 /** 3501 * Constant for the "Kana Extended-B" Unicode 3502 * character block. 3503 * @since 19 3504 */ 3505 public static final UnicodeBlock KANA_EXTENDED_B = 3506 new UnicodeBlock("KANA_EXTENDED_B", 3507 "KANA EXTENDED-B", 3508 "KANAEXTENDED-B"); 3509 3510 /** 3511 * Constant for the "Znamenny Musical Notation" Unicode 3512 * character block. 3513 * @since 19 3514 */ 3515 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 3516 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 3517 "ZNAMENNY MUSICAL NOTATION", 3518 "ZNAMENNYMUSICALNOTATION"); 3519 3520 /** 3521 * Constant for the "Latin Extended-G" Unicode 3522 * character block. 3523 * @since 19 3524 */ 3525 public static final UnicodeBlock LATIN_EXTENDED_G = 3526 new UnicodeBlock("LATIN_EXTENDED_G", 3527 "LATIN EXTENDED-G", 3528 "LATINEXTENDED-G"); 3529 3530 /** 3531 * Constant for the "Toto" Unicode 3532 * character block. 3533 * @since 19 3534 */ 3535 public static final UnicodeBlock TOTO = 3536 new UnicodeBlock("TOTO"); 3537 3538 /** 3539 * Constant for the "Ethiopic Extended-B" Unicode 3540 * character block. 3541 * @since 19 3542 */ 3543 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 3544 new UnicodeBlock("ETHIOPIC_EXTENDED_B", 3545 "ETHIOPIC EXTENDED-B", 3546 "ETHIOPICEXTENDED-B"); 3547 3548 /** 3549 * Constant for the "Arabic Extended-C" Unicode 3550 * character block. 3551 * @since 20 3552 */ 3553 public static final UnicodeBlock ARABIC_EXTENDED_C = 3554 new UnicodeBlock("ARABIC_EXTENDED_C", 3555 "ARABIC EXTENDED-C", 3556 "ARABICEXTENDED-C"); 3557 3558 /** 3559 * Constant for the "Devanagari Extended-A" Unicode 3560 * character block. 3561 * @since 20 3562 */ 3563 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 3564 new UnicodeBlock("DEVANAGARI_EXTENDED_A", 3565 "DEVANAGARI EXTENDED-A", 3566 "DEVANAGARIEXTENDED-A"); 3567 3568 /** 3569 * Constant for the "Kawi" Unicode 3570 * character block. 3571 * @since 20 3572 */ 3573 public static final UnicodeBlock KAWI = 3574 new UnicodeBlock("KAWI"); 3575 3576 /** 3577 * Constant for the "Kaktovik Numerals" Unicode 3578 * character block. 3579 * @since 20 3580 */ 3581 public static final UnicodeBlock KAKTOVIK_NUMERALS = 3582 new UnicodeBlock("KAKTOVIK_NUMERALS", 3583 "KAKTOVIK NUMERALS", 3584 "KAKTOVIKNUMERALS"); 3585 3586 /** 3587 * Constant for the "Cyrillic Extended-D" Unicode 3588 * character block. 3589 * @since 20 3590 */ 3591 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 3592 new UnicodeBlock("CYRILLIC_EXTENDED_D", 3593 "CYRILLIC EXTENDED-D", 3594 "CYRILLICEXTENDED-D"); 3595 3596 /** 3597 * Constant for the "Nag Mundari" Unicode 3598 * character block. 3599 * @since 20 3600 */ 3601 public static final UnicodeBlock NAG_MUNDARI = 3602 new UnicodeBlock("NAG_MUNDARI", 3603 "NAG MUNDARI", 3604 "NAGMUNDARI"); 3605 3606 /** 3607 * Constant for the "CJK Unified Ideographs Extension H" Unicode 3608 * character block. 3609 * @since 20 3610 */ 3611 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 3612 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 3613 "CJK UNIFIED IDEOGRAPHS EXTENSION H", 3614 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH"); 3615 3616 /** 3617 * Constant for the "CJK Unified Ideographs Extension I" Unicode 3618 * character block. 3619 * @since 22 3620 */ 3621 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 3622 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I", 3623 "CJK UNIFIED IDEOGRAPHS EXTENSION I", 3624 "CJKUNIFIEDIDEOGRAPHSEXTENSIONI"); 3625 3626 private static final int[] blockStarts = { 3627 0x0000, // 0000..007F; Basic Latin 3628 0x0080, // 0080..00FF; Latin-1 Supplement 3629 0x0100, // 0100..017F; Latin Extended-A 3630 0x0180, // 0180..024F; Latin Extended-B 3631 0x0250, // 0250..02AF; IPA Extensions 3632 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3633 0x0300, // 0300..036F; Combining Diacritical Marks 3634 0x0370, // 0370..03FF; Greek and Coptic 3635 0x0400, // 0400..04FF; Cyrillic 3636 0x0500, // 0500..052F; Cyrillic Supplement 3637 0x0530, // 0530..058F; Armenian 3638 0x0590, // 0590..05FF; Hebrew 3639 0x0600, // 0600..06FF; Arabic 3640 0x0700, // 0700..074F; Syriac 3641 0x0750, // 0750..077F; Arabic Supplement 3642 0x0780, // 0780..07BF; Thaana 3643 0x07C0, // 07C0..07FF; NKo 3644 0x0800, // 0800..083F; Samaritan 3645 0x0840, // 0840..085F; Mandaic 3646 0x0860, // 0860..086F; Syriac Supplement 3647 0x0870, // 0870..089F; Arabic Extended-B 3648 0x08A0, // 08A0..08FF; Arabic Extended-A 3649 0x0900, // 0900..097F; Devanagari 3650 0x0980, // 0980..09FF; Bengali 3651 0x0A00, // 0A00..0A7F; Gurmukhi 3652 0x0A80, // 0A80..0AFF; Gujarati 3653 0x0B00, // 0B00..0B7F; Oriya 3654 0x0B80, // 0B80..0BFF; Tamil 3655 0x0C00, // 0C00..0C7F; Telugu 3656 0x0C80, // 0C80..0CFF; Kannada 3657 0x0D00, // 0D00..0D7F; Malayalam 3658 0x0D80, // 0D80..0DFF; Sinhala 3659 0x0E00, // 0E00..0E7F; Thai 3660 0x0E80, // 0E80..0EFF; Lao 3661 0x0F00, // 0F00..0FFF; Tibetan 3662 0x1000, // 1000..109F; Myanmar 3663 0x10A0, // 10A0..10FF; Georgian 3664 0x1100, // 1100..11FF; Hangul Jamo 3665 0x1200, // 1200..137F; Ethiopic 3666 0x1380, // 1380..139F; Ethiopic Supplement 3667 0x13A0, // 13A0..13FF; Cherokee 3668 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3669 0x1680, // 1680..169F; Ogham 3670 0x16A0, // 16A0..16FF; Runic 3671 0x1700, // 1700..171F; Tagalog 3672 0x1720, // 1720..173F; Hanunoo 3673 0x1740, // 1740..175F; Buhid 3674 0x1760, // 1760..177F; Tagbanwa 3675 0x1780, // 1780..17FF; Khmer 3676 0x1800, // 1800..18AF; Mongolian 3677 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3678 0x1900, // 1900..194F; Limbu 3679 0x1950, // 1950..197F; Tai Le 3680 0x1980, // 1980..19DF; New Tai Lue 3681 0x19E0, // 19E0..19FF; Khmer Symbols 3682 0x1A00, // 1A00..1A1F; Buginese 3683 0x1A20, // 1A20..1AAF; Tai Tham 3684 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3685 0x1B00, // 1B00..1B7F; Balinese 3686 0x1B80, // 1B80..1BBF; Sundanese 3687 0x1BC0, // 1BC0..1BFF; Batak 3688 0x1C00, // 1C00..1C4F; Lepcha 3689 0x1C50, // 1C50..1C7F; Ol Chiki 3690 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3691 0x1C90, // 1C90..1CBF; Georgian Extended 3692 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3693 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3694 0x1D00, // 1D00..1D7F; Phonetic Extensions 3695 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3696 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3697 0x1E00, // 1E00..1EFF; Latin Extended Additional 3698 0x1F00, // 1F00..1FFF; Greek Extended 3699 0x2000, // 2000..206F; General Punctuation 3700 0x2070, // 2070..209F; Superscripts and Subscripts 3701 0x20A0, // 20A0..20CF; Currency Symbols 3702 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3703 0x2100, // 2100..214F; Letterlike Symbols 3704 0x2150, // 2150..218F; Number Forms 3705 0x2190, // 2190..21FF; Arrows 3706 0x2200, // 2200..22FF; Mathematical Operators 3707 0x2300, // 2300..23FF; Miscellaneous Technical 3708 0x2400, // 2400..243F; Control Pictures 3709 0x2440, // 2440..245F; Optical Character Recognition 3710 0x2460, // 2460..24FF; Enclosed Alphanumerics 3711 0x2500, // 2500..257F; Box Drawing 3712 0x2580, // 2580..259F; Block Elements 3713 0x25A0, // 25A0..25FF; Geometric Shapes 3714 0x2600, // 2600..26FF; Miscellaneous Symbols 3715 0x2700, // 2700..27BF; Dingbats 3716 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3717 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3718 0x2800, // 2800..28FF; Braille Patterns 3719 0x2900, // 2900..297F; Supplemental Arrows-B 3720 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3721 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3722 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3723 0x2C00, // 2C00..2C5F; Glagolitic 3724 0x2C60, // 2C60..2C7F; Latin Extended-C 3725 0x2C80, // 2C80..2CFF; Coptic 3726 0x2D00, // 2D00..2D2F; Georgian Supplement 3727 0x2D30, // 2D30..2D7F; Tifinagh 3728 0x2D80, // 2D80..2DDF; Ethiopic Extended 3729 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3730 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3731 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3732 0x2F00, // 2F00..2FDF; Kangxi Radicals 3733 0x2FE0, // unassigned 3734 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3735 0x3000, // 3000..303F; CJK Symbols and Punctuation 3736 0x3040, // 3040..309F; Hiragana 3737 0x30A0, // 30A0..30FF; Katakana 3738 0x3100, // 3100..312F; Bopomofo 3739 0x3130, // 3130..318F; Hangul Compatibility Jamo 3740 0x3190, // 3190..319F; Kanbun 3741 0x31A0, // 31A0..31BF; Bopomofo Extended 3742 0x31C0, // 31C0..31EF; CJK Strokes 3743 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3744 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3745 0x3300, // 3300..33FF; CJK Compatibility 3746 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3747 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3748 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3749 0xA000, // A000..A48F; Yi Syllables 3750 0xA490, // A490..A4CF; Yi Radicals 3751 0xA4D0, // A4D0..A4FF; Lisu 3752 0xA500, // A500..A63F; Vai 3753 0xA640, // A640..A69F; Cyrillic Extended-B 3754 0xA6A0, // A6A0..A6FF; Bamum 3755 0xA700, // A700..A71F; Modifier Tone Letters 3756 0xA720, // A720..A7FF; Latin Extended-D 3757 0xA800, // A800..A82F; Syloti Nagri 3758 0xA830, // A830..A83F; Common Indic Number Forms 3759 0xA840, // A840..A87F; Phags-pa 3760 0xA880, // A880..A8DF; Saurashtra 3761 0xA8E0, // A8E0..A8FF; Devanagari Extended 3762 0xA900, // A900..A92F; Kayah Li 3763 0xA930, // A930..A95F; Rejang 3764 0xA960, // A960..A97F; Hangul Jamo Extended-A 3765 0xA980, // A980..A9DF; Javanese 3766 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3767 0xAA00, // AA00..AA5F; Cham 3768 0xAA60, // AA60..AA7F; Myanmar Extended-A 3769 0xAA80, // AA80..AADF; Tai Viet 3770 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3771 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3772 0xAB30, // AB30..AB6F; Latin Extended-E 3773 0xAB70, // AB70..ABBF; Cherokee Supplement 3774 0xABC0, // ABC0..ABFF; Meetei Mayek 3775 0xAC00, // AC00..D7AF; Hangul Syllables 3776 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3777 0xD800, // D800..DB7F; High Surrogates 3778 0xDB80, // DB80..DBFF; High Private Use Surrogates 3779 0xDC00, // DC00..DFFF; Low Surrogates 3780 0xE000, // E000..F8FF; Private Use Area 3781 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3782 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3783 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3784 0xFE00, // FE00..FE0F; Variation Selectors 3785 0xFE10, // FE10..FE1F; Vertical Forms 3786 0xFE20, // FE20..FE2F; Combining Half Marks 3787 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3788 0xFE50, // FE50..FE6F; Small Form Variants 3789 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3790 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3791 0xFFF0, // FFF0..FFFF; Specials 3792 0x10000, // 10000..1007F; Linear B Syllabary 3793 0x10080, // 10080..100FF; Linear B Ideograms 3794 0x10100, // 10100..1013F; Aegean Numbers 3795 0x10140, // 10140..1018F; Ancient Greek Numbers 3796 0x10190, // 10190..101CF; Ancient Symbols 3797 0x101D0, // 101D0..101FF; Phaistos Disc 3798 0x10200, // unassigned 3799 0x10280, // 10280..1029F; Lycian 3800 0x102A0, // 102A0..102DF; Carian 3801 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3802 0x10300, // 10300..1032F; Old Italic 3803 0x10330, // 10330..1034F; Gothic 3804 0x10350, // 10350..1037F; Old Permic 3805 0x10380, // 10380..1039F; Ugaritic 3806 0x103A0, // 103A0..103DF; Old Persian 3807 0x103E0, // unassigned 3808 0x10400, // 10400..1044F; Deseret 3809 0x10450, // 10450..1047F; Shavian 3810 0x10480, // 10480..104AF; Osmanya 3811 0x104B0, // 104B0..104FF; Osage 3812 0x10500, // 10500..1052F; Elbasan 3813 0x10530, // 10530..1056F; Caucasian Albanian 3814 0x10570, // 10570..105BF; Vithkuqi 3815 0x105C0, // unassigned 3816 0x10600, // 10600..1077F; Linear A 3817 0x10780, // 10780..107BF; Latin Extended-F 3818 0x107C0, // unassigned 3819 0x10800, // 10800..1083F; Cypriot Syllabary 3820 0x10840, // 10840..1085F; Imperial Aramaic 3821 0x10860, // 10860..1087F; Palmyrene 3822 0x10880, // 10880..108AF; Nabataean 3823 0x108B0, // unassigned 3824 0x108E0, // 108E0..108FF; Hatran 3825 0x10900, // 10900..1091F; Phoenician 3826 0x10920, // 10920..1093F; Lydian 3827 0x10940, // unassigned 3828 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3829 0x109A0, // 109A0..109FF; Meroitic Cursive 3830 0x10A00, // 10A00..10A5F; Kharoshthi 3831 0x10A60, // 10A60..10A7F; Old South Arabian 3832 0x10A80, // 10A80..10A9F; Old North Arabian 3833 0x10AA0, // unassigned 3834 0x10AC0, // 10AC0..10AFF; Manichaean 3835 0x10B00, // 10B00..10B3F; Avestan 3836 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3837 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3838 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3839 0x10BB0, // unassigned 3840 0x10C00, // 10C00..10C4F; Old Turkic 3841 0x10C50, // unassigned 3842 0x10C80, // 10C80..10CFF; Old Hungarian 3843 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3844 0x10D40, // unassigned 3845 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3846 0x10E80, // 10E80..10EBF; Yezidi 3847 0x10EC0, // 10EC0..10EFF; Arabic Extended-C 3848 0x10F00, // 10F00..10F2F; Old Sogdian 3849 0x10F30, // 10F30..10F6F; Sogdian 3850 0x10F70, // 10F70..10FAF; Old Uyghur 3851 0x10FB0, // 10FB0..10FDF; Chorasmian 3852 0x10FE0, // 10FE0..10FFF; Elymaic 3853 0x11000, // 11000..1107F; Brahmi 3854 0x11080, // 11080..110CF; Kaithi 3855 0x110D0, // 110D0..110FF; Sora Sompeng 3856 0x11100, // 11100..1114F; Chakma 3857 0x11150, // 11150..1117F; Mahajani 3858 0x11180, // 11180..111DF; Sharada 3859 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3860 0x11200, // 11200..1124F; Khojki 3861 0x11250, // unassigned 3862 0x11280, // 11280..112AF; Multani 3863 0x112B0, // 112B0..112FF; Khudawadi 3864 0x11300, // 11300..1137F; Grantha 3865 0x11380, // unassigned 3866 0x11400, // 11400..1147F; Newa 3867 0x11480, // 11480..114DF; Tirhuta 3868 0x114E0, // unassigned 3869 0x11580, // 11580..115FF; Siddham 3870 0x11600, // 11600..1165F; Modi 3871 0x11660, // 11660..1167F; Mongolian Supplement 3872 0x11680, // 11680..116CF; Takri 3873 0x116D0, // unassigned 3874 0x11700, // 11700..1174F; Ahom 3875 0x11750, // unassigned 3876 0x11800, // 11800..1184F; Dogra 3877 0x11850, // unassigned 3878 0x118A0, // 118A0..118FF; Warang Citi 3879 0x11900, // 11900..1195F; Dives Akuru 3880 0x11960, // unassigned 3881 0x119A0, // 119A0..119FF; Nandinagari 3882 0x11A00, // 11A00..11A4F; Zanabazar Square 3883 0x11A50, // 11A50..11AAF; Soyombo 3884 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 3885 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3886 0x11B00, // 11B00..11B5F; Devanagari Extended-A 3887 0x11B60, // unassigned 3888 0x11C00, // 11C00..11C6F; Bhaiksuki 3889 0x11C70, // 11C70..11CBF; Marchen 3890 0x11CC0, // unassigned 3891 0x11D00, // 11D00..11D5F; Masaram Gondi 3892 0x11D60, // 11D60..11DAF; Gunjala Gondi 3893 0x11DB0, // unassigned 3894 0x11EE0, // 11EE0..11EFF; Makasar 3895 0x11F00, // 11F00..11F5F; Kawi 3896 0x11F60, // unassigned 3897 0x11FB0, // 11FB0..11FBF; Lisu Supplement 3898 0x11FC0, // 11FC0..11FFF; Tamil Supplement 3899 0x12000, // 12000..123FF; Cuneiform 3900 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3901 0x12480, // 12480..1254F; Early Dynastic Cuneiform 3902 0x12550, // unassigned 3903 0x12F90, // 12F90..12FFF; Cypro-Minoan 3904 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3905 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls 3906 0x13460, // unassigned 3907 0x14400, // 14400..1467F; Anatolian Hieroglyphs 3908 0x14680, // unassigned 3909 0x16800, // 16800..16A3F; Bamum Supplement 3910 0x16A40, // 16A40..16A6F; Mro 3911 0x16A70, // 16A70..16ACF; Tangsa 3912 0x16AD0, // 16AD0..16AFF; Bassa Vah 3913 0x16B00, // 16B00..16B8F; Pahawh Hmong 3914 0x16B90, // unassigned 3915 0x16E40, // 16E40..16E9F; Medefaidrin 3916 0x16EA0, // unassigned 3917 0x16F00, // 16F00..16F9F; Miao 3918 0x16FA0, // unassigned 3919 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 3920 0x17000, // 17000..187FF; Tangut 3921 0x18800, // 18800..18AFF; Tangut Components 3922 0x18B00, // 18B00..18CFF; Khitan Small Script 3923 0x18D00, // 18D00..18D7F; Tangut Supplement 3924 0x18D80, // unassigned 3925 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B 3926 0x1B000, // 1B000..1B0FF; Kana Supplement 3927 0x1B100, // 1B100..1B12F; Kana Extended-A 3928 0x1B130, // 1B130..1B16F; Small Kana Extension 3929 0x1B170, // 1B170..1B2FF; Nushu 3930 0x1B300, // unassigned 3931 0x1BC00, // 1BC00..1BC9F; Duployan 3932 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 3933 0x1BCB0, // unassigned 3934 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation 3935 0x1CFD0, // unassigned 3936 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 3937 0x1D100, // 1D100..1D1FF; Musical Symbols 3938 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 3939 0x1D250, // unassigned 3940 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals 3941 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 3942 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 3943 0x1D360, // 1D360..1D37F; Counting Rod Numerals 3944 0x1D380, // unassigned 3945 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 3946 0x1D800, // 1D800..1DAAF; Sutton SignWriting 3947 0x1DAB0, // unassigned 3948 0x1DF00, // 1DF00..1DFFF; Latin Extended-G 3949 0x1E000, // 1E000..1E02F; Glagolitic Supplement 3950 0x1E030, // 1E030..1E08F; Cyrillic Extended-D 3951 0x1E090, // unassigned 3952 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 3953 0x1E150, // unassigned 3954 0x1E290, // 1E290..1E2BF; Toto 3955 0x1E2C0, // 1E2C0..1E2FF; Wancho 3956 0x1E300, // unassigned 3957 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari 3958 0x1E500, // unassigned 3959 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B 3960 0x1E800, // 1E800..1E8DF; Mende Kikakui 3961 0x1E8E0, // unassigned 3962 0x1E900, // 1E900..1E95F; Adlam 3963 0x1E960, // unassigned 3964 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 3965 0x1ECC0, // unassigned 3966 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 3967 0x1ED50, // unassigned 3968 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 3969 0x1EF00, // unassigned 3970 0x1F000, // 1F000..1F02F; Mahjong Tiles 3971 0x1F030, // 1F030..1F09F; Domino Tiles 3972 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 3973 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 3974 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 3975 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 3976 0x1F600, // 1F600..1F64F; Emoticons 3977 0x1F650, // 1F650..1F67F; Ornamental Dingbats 3978 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 3979 0x1F700, // 1F700..1F77F; Alchemical Symbols 3980 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 3981 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 3982 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 3983 0x1FA00, // 1FA00..1FA6F; Chess Symbols 3984 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 3985 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing 3986 0x1FC00, // unassigned 3987 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 3988 0x2A6E0, // unassigned 3989 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 3990 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 3991 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 3992 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 3993 0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I 3994 0x2EE60, // unassigned 3995 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 3996 0x2FA20, // unassigned 3997 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G 3998 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H 3999 0x323B0, // unassigned 4000 0xE0000, // E0000..E007F; Tags 4001 0xE0080, // unassigned 4002 0xE0100, // E0100..E01EF; Variation Selectors Supplement 4003 0xE01F0, // unassigned 4004 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 4005 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 4006 }; 4007 4008 private static final UnicodeBlock[] blocks = { 4009 BASIC_LATIN, 4010 LATIN_1_SUPPLEMENT, 4011 LATIN_EXTENDED_A, 4012 LATIN_EXTENDED_B, 4013 IPA_EXTENSIONS, 4014 SPACING_MODIFIER_LETTERS, 4015 COMBINING_DIACRITICAL_MARKS, 4016 GREEK, 4017 CYRILLIC, 4018 CYRILLIC_SUPPLEMENTARY, 4019 ARMENIAN, 4020 HEBREW, 4021 ARABIC, 4022 SYRIAC, 4023 ARABIC_SUPPLEMENT, 4024 THAANA, 4025 NKO, 4026 SAMARITAN, 4027 MANDAIC, 4028 SYRIAC_SUPPLEMENT, 4029 ARABIC_EXTENDED_B, 4030 ARABIC_EXTENDED_A, 4031 DEVANAGARI, 4032 BENGALI, 4033 GURMUKHI, 4034 GUJARATI, 4035 ORIYA, 4036 TAMIL, 4037 TELUGU, 4038 KANNADA, 4039 MALAYALAM, 4040 SINHALA, 4041 THAI, 4042 LAO, 4043 TIBETAN, 4044 MYANMAR, 4045 GEORGIAN, 4046 HANGUL_JAMO, 4047 ETHIOPIC, 4048 ETHIOPIC_SUPPLEMENT, 4049 CHEROKEE, 4050 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 4051 OGHAM, 4052 RUNIC, 4053 TAGALOG, 4054 HANUNOO, 4055 BUHID, 4056 TAGBANWA, 4057 KHMER, 4058 MONGOLIAN, 4059 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 4060 LIMBU, 4061 TAI_LE, 4062 NEW_TAI_LUE, 4063 KHMER_SYMBOLS, 4064 BUGINESE, 4065 TAI_THAM, 4066 COMBINING_DIACRITICAL_MARKS_EXTENDED, 4067 BALINESE, 4068 SUNDANESE, 4069 BATAK, 4070 LEPCHA, 4071 OL_CHIKI, 4072 CYRILLIC_EXTENDED_C, 4073 GEORGIAN_EXTENDED, 4074 SUNDANESE_SUPPLEMENT, 4075 VEDIC_EXTENSIONS, 4076 PHONETIC_EXTENSIONS, 4077 PHONETIC_EXTENSIONS_SUPPLEMENT, 4078 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 4079 LATIN_EXTENDED_ADDITIONAL, 4080 GREEK_EXTENDED, 4081 GENERAL_PUNCTUATION, 4082 SUPERSCRIPTS_AND_SUBSCRIPTS, 4083 CURRENCY_SYMBOLS, 4084 COMBINING_MARKS_FOR_SYMBOLS, 4085 LETTERLIKE_SYMBOLS, 4086 NUMBER_FORMS, 4087 ARROWS, 4088 MATHEMATICAL_OPERATORS, 4089 MISCELLANEOUS_TECHNICAL, 4090 CONTROL_PICTURES, 4091 OPTICAL_CHARACTER_RECOGNITION, 4092 ENCLOSED_ALPHANUMERICS, 4093 BOX_DRAWING, 4094 BLOCK_ELEMENTS, 4095 GEOMETRIC_SHAPES, 4096 MISCELLANEOUS_SYMBOLS, 4097 DINGBATS, 4098 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 4099 SUPPLEMENTAL_ARROWS_A, 4100 BRAILLE_PATTERNS, 4101 SUPPLEMENTAL_ARROWS_B, 4102 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 4103 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 4104 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 4105 GLAGOLITIC, 4106 LATIN_EXTENDED_C, 4107 COPTIC, 4108 GEORGIAN_SUPPLEMENT, 4109 TIFINAGH, 4110 ETHIOPIC_EXTENDED, 4111 CYRILLIC_EXTENDED_A, 4112 SUPPLEMENTAL_PUNCTUATION, 4113 CJK_RADICALS_SUPPLEMENT, 4114 KANGXI_RADICALS, 4115 null, 4116 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 4117 CJK_SYMBOLS_AND_PUNCTUATION, 4118 HIRAGANA, 4119 KATAKANA, 4120 BOPOMOFO, 4121 HANGUL_COMPATIBILITY_JAMO, 4122 KANBUN, 4123 BOPOMOFO_EXTENDED, 4124 CJK_STROKES, 4125 KATAKANA_PHONETIC_EXTENSIONS, 4126 ENCLOSED_CJK_LETTERS_AND_MONTHS, 4127 CJK_COMPATIBILITY, 4128 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 4129 YIJING_HEXAGRAM_SYMBOLS, 4130 CJK_UNIFIED_IDEOGRAPHS, 4131 YI_SYLLABLES, 4132 YI_RADICALS, 4133 LISU, 4134 VAI, 4135 CYRILLIC_EXTENDED_B, 4136 BAMUM, 4137 MODIFIER_TONE_LETTERS, 4138 LATIN_EXTENDED_D, 4139 SYLOTI_NAGRI, 4140 COMMON_INDIC_NUMBER_FORMS, 4141 PHAGS_PA, 4142 SAURASHTRA, 4143 DEVANAGARI_EXTENDED, 4144 KAYAH_LI, 4145 REJANG, 4146 HANGUL_JAMO_EXTENDED_A, 4147 JAVANESE, 4148 MYANMAR_EXTENDED_B, 4149 CHAM, 4150 MYANMAR_EXTENDED_A, 4151 TAI_VIET, 4152 MEETEI_MAYEK_EXTENSIONS, 4153 ETHIOPIC_EXTENDED_A, 4154 LATIN_EXTENDED_E, 4155 CHEROKEE_SUPPLEMENT, 4156 MEETEI_MAYEK, 4157 HANGUL_SYLLABLES, 4158 HANGUL_JAMO_EXTENDED_B, 4159 HIGH_SURROGATES, 4160 HIGH_PRIVATE_USE_SURROGATES, 4161 LOW_SURROGATES, 4162 PRIVATE_USE_AREA, 4163 CJK_COMPATIBILITY_IDEOGRAPHS, 4164 ALPHABETIC_PRESENTATION_FORMS, 4165 ARABIC_PRESENTATION_FORMS_A, 4166 VARIATION_SELECTORS, 4167 VERTICAL_FORMS, 4168 COMBINING_HALF_MARKS, 4169 CJK_COMPATIBILITY_FORMS, 4170 SMALL_FORM_VARIANTS, 4171 ARABIC_PRESENTATION_FORMS_B, 4172 HALFWIDTH_AND_FULLWIDTH_FORMS, 4173 SPECIALS, 4174 LINEAR_B_SYLLABARY, 4175 LINEAR_B_IDEOGRAMS, 4176 AEGEAN_NUMBERS, 4177 ANCIENT_GREEK_NUMBERS, 4178 ANCIENT_SYMBOLS, 4179 PHAISTOS_DISC, 4180 null, 4181 LYCIAN, 4182 CARIAN, 4183 COPTIC_EPACT_NUMBERS, 4184 OLD_ITALIC, 4185 GOTHIC, 4186 OLD_PERMIC, 4187 UGARITIC, 4188 OLD_PERSIAN, 4189 null, 4190 DESERET, 4191 SHAVIAN, 4192 OSMANYA, 4193 OSAGE, 4194 ELBASAN, 4195 CAUCASIAN_ALBANIAN, 4196 VITHKUQI, 4197 null, 4198 LINEAR_A, 4199 LATIN_EXTENDED_F, 4200 null, 4201 CYPRIOT_SYLLABARY, 4202 IMPERIAL_ARAMAIC, 4203 PALMYRENE, 4204 NABATAEAN, 4205 null, 4206 HATRAN, 4207 PHOENICIAN, 4208 LYDIAN, 4209 null, 4210 MEROITIC_HIEROGLYPHS, 4211 MEROITIC_CURSIVE, 4212 KHAROSHTHI, 4213 OLD_SOUTH_ARABIAN, 4214 OLD_NORTH_ARABIAN, 4215 null, 4216 MANICHAEAN, 4217 AVESTAN, 4218 INSCRIPTIONAL_PARTHIAN, 4219 INSCRIPTIONAL_PAHLAVI, 4220 PSALTER_PAHLAVI, 4221 null, 4222 OLD_TURKIC, 4223 null, 4224 OLD_HUNGARIAN, 4225 HANIFI_ROHINGYA, 4226 null, 4227 RUMI_NUMERAL_SYMBOLS, 4228 YEZIDI, 4229 ARABIC_EXTENDED_C, 4230 OLD_SOGDIAN, 4231 SOGDIAN, 4232 OLD_UYGHUR, 4233 CHORASMIAN, 4234 ELYMAIC, 4235 BRAHMI, 4236 KAITHI, 4237 SORA_SOMPENG, 4238 CHAKMA, 4239 MAHAJANI, 4240 SHARADA, 4241 SINHALA_ARCHAIC_NUMBERS, 4242 KHOJKI, 4243 null, 4244 MULTANI, 4245 KHUDAWADI, 4246 GRANTHA, 4247 null, 4248 NEWA, 4249 TIRHUTA, 4250 null, 4251 SIDDHAM, 4252 MODI, 4253 MONGOLIAN_SUPPLEMENT, 4254 TAKRI, 4255 null, 4256 AHOM, 4257 null, 4258 DOGRA, 4259 null, 4260 WARANG_CITI, 4261 DIVES_AKURU, 4262 null, 4263 NANDINAGARI, 4264 ZANABAZAR_SQUARE, 4265 SOYOMBO, 4266 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A, 4267 PAU_CIN_HAU, 4268 DEVANAGARI_EXTENDED_A, 4269 null, 4270 BHAIKSUKI, 4271 MARCHEN, 4272 null, 4273 MASARAM_GONDI, 4274 GUNJALA_GONDI, 4275 null, 4276 MAKASAR, 4277 KAWI, 4278 null, 4279 LISU_SUPPLEMENT, 4280 TAMIL_SUPPLEMENT, 4281 CUNEIFORM, 4282 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 4283 EARLY_DYNASTIC_CUNEIFORM, 4284 null, 4285 CYPRO_MINOAN, 4286 EGYPTIAN_HIEROGLYPHS, 4287 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 4288 null, 4289 ANATOLIAN_HIEROGLYPHS, 4290 null, 4291 BAMUM_SUPPLEMENT, 4292 MRO, 4293 TANGSA, 4294 BASSA_VAH, 4295 PAHAWH_HMONG, 4296 null, 4297 MEDEFAIDRIN, 4298 null, 4299 MIAO, 4300 null, 4301 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 4302 TANGUT, 4303 TANGUT_COMPONENTS, 4304 KHITAN_SMALL_SCRIPT, 4305 TANGUT_SUPPLEMENT, 4306 null, 4307 KANA_EXTENDED_B, 4308 KANA_SUPPLEMENT, 4309 KANA_EXTENDED_A, 4310 SMALL_KANA_EXTENSION, 4311 NUSHU, 4312 null, 4313 DUPLOYAN, 4314 SHORTHAND_FORMAT_CONTROLS, 4315 null, 4316 ZNAMENNY_MUSICAL_NOTATION, 4317 null, 4318 BYZANTINE_MUSICAL_SYMBOLS, 4319 MUSICAL_SYMBOLS, 4320 ANCIENT_GREEK_MUSICAL_NOTATION, 4321 null, 4322 KAKTOVIK_NUMERALS, 4323 MAYAN_NUMERALS, 4324 TAI_XUAN_JING_SYMBOLS, 4325 COUNTING_ROD_NUMERALS, 4326 null, 4327 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 4328 SUTTON_SIGNWRITING, 4329 null, 4330 LATIN_EXTENDED_G, 4331 GLAGOLITIC_SUPPLEMENT, 4332 CYRILLIC_EXTENDED_D, 4333 null, 4334 NYIAKENG_PUACHUE_HMONG, 4335 null, 4336 TOTO, 4337 WANCHO, 4338 null, 4339 NAG_MUNDARI, 4340 null, 4341 ETHIOPIC_EXTENDED_B, 4342 MENDE_KIKAKUI, 4343 null, 4344 ADLAM, 4345 null, 4346 INDIC_SIYAQ_NUMBERS, 4347 null, 4348 OTTOMAN_SIYAQ_NUMBERS, 4349 null, 4350 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 4351 null, 4352 MAHJONG_TILES, 4353 DOMINO_TILES, 4354 PLAYING_CARDS, 4355 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 4356 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 4357 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 4358 EMOTICONS, 4359 ORNAMENTAL_DINGBATS, 4360 TRANSPORT_AND_MAP_SYMBOLS, 4361 ALCHEMICAL_SYMBOLS, 4362 GEOMETRIC_SHAPES_EXTENDED, 4363 SUPPLEMENTAL_ARROWS_C, 4364 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 4365 CHESS_SYMBOLS, 4366 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 4367 SYMBOLS_FOR_LEGACY_COMPUTING, 4368 null, 4369 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 4370 null, 4371 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 4372 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 4373 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 4374 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 4375 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I, 4376 null, 4377 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 4378 null, 4379 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, 4380 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H, 4381 null, 4382 TAGS, 4383 null, 4384 VARIATION_SELECTORS_SUPPLEMENT, 4385 null, 4386 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 4387 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 4388 }; 4389 4390 4391 /** 4392 * Returns the object representing the Unicode block containing the 4393 * given character, or {@code null} if the character is not a 4394 * member of a defined block. 4395 * 4396 * <p><b>Note:</b> This method cannot handle 4397 * <a href="Character.html#supplementary"> supplementary 4398 * characters</a>. To support all Unicode characters, including 4399 * supplementary characters, use the {@link #of(int)} method. 4400 * 4401 * @param c The character in question 4402 * @return The {@code UnicodeBlock} instance representing the 4403 * Unicode block of which this character is a member, or 4404 * {@code null} if the character is not a member of any 4405 * Unicode block 4406 */ 4407 public static UnicodeBlock of(char c) { 4408 return of((int)c); 4409 } 4410 4411 /** 4412 * Returns the object representing the Unicode block 4413 * containing the given character (Unicode code point), or 4414 * {@code null} if the character is not a member of a 4415 * defined block. 4416 * 4417 * @param codePoint the character (Unicode code point) in question. 4418 * @return The {@code UnicodeBlock} instance representing the 4419 * Unicode block of which this character is a member, or 4420 * {@code null} if the character is not a member of any 4421 * Unicode block 4422 * @throws IllegalArgumentException if the specified 4423 * {@code codePoint} is an invalid Unicode code point. 4424 * @see Character#isValidCodePoint(int) 4425 * @since 1.5 4426 */ 4427 public static UnicodeBlock of(int codePoint) { 4428 if (!isValidCodePoint(codePoint)) { 4429 throw new IllegalArgumentException( 4430 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4431 } 4432 4433 int top, bottom, current; 4434 bottom = 0; 4435 top = blockStarts.length; 4436 current = top/2; 4437 4438 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4439 while (top - bottom > 1) { 4440 if (codePoint >= blockStarts[current]) { 4441 bottom = current; 4442 } else { 4443 top = current; 4444 } 4445 current = (top + bottom) / 2; 4446 } 4447 return blocks[current]; 4448 } 4449 4450 /** 4451 * Returns the UnicodeBlock with the given name. Block 4452 * names are determined by The Unicode Standard. The file 4453 * {@code Blocks.txt} defines blocks for a particular 4454 * version of the standard. The {@link Character} class specifies 4455 * the version of the standard that it supports. 4456 * <p> 4457 * This method accepts block names in the following forms: 4458 * <ol> 4459 * <li> Canonical block names as defined by the Unicode Standard. 4460 * For example, the standard defines a "Basic Latin" block. Therefore, this 4461 * method accepts "Basic Latin" as a valid block name. The documentation of 4462 * each UnicodeBlock provides the canonical name. 4463 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4464 * is a valid block name for the "Basic Latin" block. 4465 * <li>The text representation of each constant UnicodeBlock identifier. 4466 * For example, this method will return the {@link #BASIC_LATIN} block if 4467 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4468 * hyphens in the canonical name with underscores. 4469 * </ol> 4470 * Finally, character case is ignored for all of the valid block name forms. 4471 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4472 * The en_US locale's case mapping rules are used to provide case-insensitive 4473 * string comparisons for block name validation. 4474 * <p> 4475 * If the Unicode Standard changes block names, both the previous and 4476 * current names will be accepted. 4477 * 4478 * @param blockName A {@code UnicodeBlock} name. 4479 * @return The {@code UnicodeBlock} instance identified 4480 * by {@code blockName} 4481 * @throws IllegalArgumentException if {@code blockName} is an 4482 * invalid name 4483 * @throws NullPointerException if {@code blockName} is null 4484 * @since 1.5 4485 */ 4486 public static final UnicodeBlock forName(String blockName) { 4487 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4488 if (block == null) { 4489 throw new IllegalArgumentException("Not a valid block name: " 4490 + blockName); 4491 } 4492 return block; 4493 } 4494 } 4495 4496 4497 /** 4498 * A family of character subsets representing the character scripts 4499 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4500 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4501 * character is assigned to a single Unicode script, either a specific 4502 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4503 * one of the following three special values, 4504 * {@link Character.UnicodeScript#INHERITED Inherited}, 4505 * {@link Character.UnicodeScript#COMMON Common} or 4506 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4507 * 4508 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property 4509 * @since 1.7 4510 */ 4511 public static enum UnicodeScript { 4512 /** 4513 * Unicode script "Common". 4514 */ 4515 COMMON, 4516 4517 /** 4518 * Unicode script "Latin". 4519 */ 4520 LATIN, 4521 4522 /** 4523 * Unicode script "Greek". 4524 */ 4525 GREEK, 4526 4527 /** 4528 * Unicode script "Cyrillic". 4529 */ 4530 CYRILLIC, 4531 4532 /** 4533 * Unicode script "Armenian". 4534 */ 4535 ARMENIAN, 4536 4537 /** 4538 * Unicode script "Hebrew". 4539 */ 4540 HEBREW, 4541 4542 /** 4543 * Unicode script "Arabic". 4544 */ 4545 ARABIC, 4546 4547 /** 4548 * Unicode script "Syriac". 4549 */ 4550 SYRIAC, 4551 4552 /** 4553 * Unicode script "Thaana". 4554 */ 4555 THAANA, 4556 4557 /** 4558 * Unicode script "Devanagari". 4559 */ 4560 DEVANAGARI, 4561 4562 /** 4563 * Unicode script "Bengali". 4564 */ 4565 BENGALI, 4566 4567 /** 4568 * Unicode script "Gurmukhi". 4569 */ 4570 GURMUKHI, 4571 4572 /** 4573 * Unicode script "Gujarati". 4574 */ 4575 GUJARATI, 4576 4577 /** 4578 * Unicode script "Oriya". 4579 */ 4580 ORIYA, 4581 4582 /** 4583 * Unicode script "Tamil". 4584 */ 4585 TAMIL, 4586 4587 /** 4588 * Unicode script "Telugu". 4589 */ 4590 TELUGU, 4591 4592 /** 4593 * Unicode script "Kannada". 4594 */ 4595 KANNADA, 4596 4597 /** 4598 * Unicode script "Malayalam". 4599 */ 4600 MALAYALAM, 4601 4602 /** 4603 * Unicode script "Sinhala". 4604 */ 4605 SINHALA, 4606 4607 /** 4608 * Unicode script "Thai". 4609 */ 4610 THAI, 4611 4612 /** 4613 * Unicode script "Lao". 4614 */ 4615 LAO, 4616 4617 /** 4618 * Unicode script "Tibetan". 4619 */ 4620 TIBETAN, 4621 4622 /** 4623 * Unicode script "Myanmar". 4624 */ 4625 MYANMAR, 4626 4627 /** 4628 * Unicode script "Georgian". 4629 */ 4630 GEORGIAN, 4631 4632 /** 4633 * Unicode script "Hangul". 4634 */ 4635 HANGUL, 4636 4637 /** 4638 * Unicode script "Ethiopic". 4639 */ 4640 ETHIOPIC, 4641 4642 /** 4643 * Unicode script "Cherokee". 4644 */ 4645 CHEROKEE, 4646 4647 /** 4648 * Unicode script "Canadian_Aboriginal". 4649 */ 4650 CANADIAN_ABORIGINAL, 4651 4652 /** 4653 * Unicode script "Ogham". 4654 */ 4655 OGHAM, 4656 4657 /** 4658 * Unicode script "Runic". 4659 */ 4660 RUNIC, 4661 4662 /** 4663 * Unicode script "Khmer". 4664 */ 4665 KHMER, 4666 4667 /** 4668 * Unicode script "Mongolian". 4669 */ 4670 MONGOLIAN, 4671 4672 /** 4673 * Unicode script "Hiragana". 4674 */ 4675 HIRAGANA, 4676 4677 /** 4678 * Unicode script "Katakana". 4679 */ 4680 KATAKANA, 4681 4682 /** 4683 * Unicode script "Bopomofo". 4684 */ 4685 BOPOMOFO, 4686 4687 /** 4688 * Unicode script "Han". 4689 */ 4690 HAN, 4691 4692 /** 4693 * Unicode script "Yi". 4694 */ 4695 YI, 4696 4697 /** 4698 * Unicode script "Old_Italic". 4699 */ 4700 OLD_ITALIC, 4701 4702 /** 4703 * Unicode script "Gothic". 4704 */ 4705 GOTHIC, 4706 4707 /** 4708 * Unicode script "Deseret". 4709 */ 4710 DESERET, 4711 4712 /** 4713 * Unicode script "Inherited". 4714 */ 4715 INHERITED, 4716 4717 /** 4718 * Unicode script "Tagalog". 4719 */ 4720 TAGALOG, 4721 4722 /** 4723 * Unicode script "Hanunoo". 4724 */ 4725 HANUNOO, 4726 4727 /** 4728 * Unicode script "Buhid". 4729 */ 4730 BUHID, 4731 4732 /** 4733 * Unicode script "Tagbanwa". 4734 */ 4735 TAGBANWA, 4736 4737 /** 4738 * Unicode script "Limbu". 4739 */ 4740 LIMBU, 4741 4742 /** 4743 * Unicode script "Tai_Le". 4744 */ 4745 TAI_LE, 4746 4747 /** 4748 * Unicode script "Linear_B". 4749 */ 4750 LINEAR_B, 4751 4752 /** 4753 * Unicode script "Ugaritic". 4754 */ 4755 UGARITIC, 4756 4757 /** 4758 * Unicode script "Shavian". 4759 */ 4760 SHAVIAN, 4761 4762 /** 4763 * Unicode script "Osmanya". 4764 */ 4765 OSMANYA, 4766 4767 /** 4768 * Unicode script "Cypriot". 4769 */ 4770 CYPRIOT, 4771 4772 /** 4773 * Unicode script "Braille". 4774 */ 4775 BRAILLE, 4776 4777 /** 4778 * Unicode script "Buginese". 4779 */ 4780 BUGINESE, 4781 4782 /** 4783 * Unicode script "Coptic". 4784 */ 4785 COPTIC, 4786 4787 /** 4788 * Unicode script "New_Tai_Lue". 4789 */ 4790 NEW_TAI_LUE, 4791 4792 /** 4793 * Unicode script "Glagolitic". 4794 */ 4795 GLAGOLITIC, 4796 4797 /** 4798 * Unicode script "Tifinagh". 4799 */ 4800 TIFINAGH, 4801 4802 /** 4803 * Unicode script "Syloti_Nagri". 4804 */ 4805 SYLOTI_NAGRI, 4806 4807 /** 4808 * Unicode script "Old_Persian". 4809 */ 4810 OLD_PERSIAN, 4811 4812 /** 4813 * Unicode script "Kharoshthi". 4814 */ 4815 KHAROSHTHI, 4816 4817 /** 4818 * Unicode script "Balinese". 4819 */ 4820 BALINESE, 4821 4822 /** 4823 * Unicode script "Cuneiform". 4824 */ 4825 CUNEIFORM, 4826 4827 /** 4828 * Unicode script "Phoenician". 4829 */ 4830 PHOENICIAN, 4831 4832 /** 4833 * Unicode script "Phags_Pa". 4834 */ 4835 PHAGS_PA, 4836 4837 /** 4838 * Unicode script "Nko". 4839 */ 4840 NKO, 4841 4842 /** 4843 * Unicode script "Sundanese". 4844 */ 4845 SUNDANESE, 4846 4847 /** 4848 * Unicode script "Batak". 4849 */ 4850 BATAK, 4851 4852 /** 4853 * Unicode script "Lepcha". 4854 */ 4855 LEPCHA, 4856 4857 /** 4858 * Unicode script "Ol_Chiki". 4859 */ 4860 OL_CHIKI, 4861 4862 /** 4863 * Unicode script "Vai". 4864 */ 4865 VAI, 4866 4867 /** 4868 * Unicode script "Saurashtra". 4869 */ 4870 SAURASHTRA, 4871 4872 /** 4873 * Unicode script "Kayah_Li". 4874 */ 4875 KAYAH_LI, 4876 4877 /** 4878 * Unicode script "Rejang". 4879 */ 4880 REJANG, 4881 4882 /** 4883 * Unicode script "Lycian". 4884 */ 4885 LYCIAN, 4886 4887 /** 4888 * Unicode script "Carian". 4889 */ 4890 CARIAN, 4891 4892 /** 4893 * Unicode script "Lydian". 4894 */ 4895 LYDIAN, 4896 4897 /** 4898 * Unicode script "Cham". 4899 */ 4900 CHAM, 4901 4902 /** 4903 * Unicode script "Tai_Tham". 4904 */ 4905 TAI_THAM, 4906 4907 /** 4908 * Unicode script "Tai_Viet". 4909 */ 4910 TAI_VIET, 4911 4912 /** 4913 * Unicode script "Avestan". 4914 */ 4915 AVESTAN, 4916 4917 /** 4918 * Unicode script "Egyptian_Hieroglyphs". 4919 */ 4920 EGYPTIAN_HIEROGLYPHS, 4921 4922 /** 4923 * Unicode script "Samaritan". 4924 */ 4925 SAMARITAN, 4926 4927 /** 4928 * Unicode script "Mandaic". 4929 */ 4930 MANDAIC, 4931 4932 /** 4933 * Unicode script "Lisu". 4934 */ 4935 LISU, 4936 4937 /** 4938 * Unicode script "Bamum". 4939 */ 4940 BAMUM, 4941 4942 /** 4943 * Unicode script "Javanese". 4944 */ 4945 JAVANESE, 4946 4947 /** 4948 * Unicode script "Meetei_Mayek". 4949 */ 4950 MEETEI_MAYEK, 4951 4952 /** 4953 * Unicode script "Imperial_Aramaic". 4954 */ 4955 IMPERIAL_ARAMAIC, 4956 4957 /** 4958 * Unicode script "Old_South_Arabian". 4959 */ 4960 OLD_SOUTH_ARABIAN, 4961 4962 /** 4963 * Unicode script "Inscriptional_Parthian". 4964 */ 4965 INSCRIPTIONAL_PARTHIAN, 4966 4967 /** 4968 * Unicode script "Inscriptional_Pahlavi". 4969 */ 4970 INSCRIPTIONAL_PAHLAVI, 4971 4972 /** 4973 * Unicode script "Old_Turkic". 4974 */ 4975 OLD_TURKIC, 4976 4977 /** 4978 * Unicode script "Brahmi". 4979 */ 4980 BRAHMI, 4981 4982 /** 4983 * Unicode script "Kaithi". 4984 */ 4985 KAITHI, 4986 4987 /** 4988 * Unicode script "Meroitic Hieroglyphs". 4989 * @since 1.8 4990 */ 4991 MEROITIC_HIEROGLYPHS, 4992 4993 /** 4994 * Unicode script "Meroitic Cursive". 4995 * @since 1.8 4996 */ 4997 MEROITIC_CURSIVE, 4998 4999 /** 5000 * Unicode script "Sora Sompeng". 5001 * @since 1.8 5002 */ 5003 SORA_SOMPENG, 5004 5005 /** 5006 * Unicode script "Chakma". 5007 * @since 1.8 5008 */ 5009 CHAKMA, 5010 5011 /** 5012 * Unicode script "Sharada". 5013 * @since 1.8 5014 */ 5015 SHARADA, 5016 5017 /** 5018 * Unicode script "Takri". 5019 * @since 1.8 5020 */ 5021 TAKRI, 5022 5023 /** 5024 * Unicode script "Miao". 5025 * @since 1.8 5026 */ 5027 MIAO, 5028 5029 /** 5030 * Unicode script "Caucasian Albanian". 5031 * @since 9 5032 */ 5033 CAUCASIAN_ALBANIAN, 5034 5035 /** 5036 * Unicode script "Bassa Vah". 5037 * @since 9 5038 */ 5039 BASSA_VAH, 5040 5041 /** 5042 * Unicode script "Duployan". 5043 * @since 9 5044 */ 5045 DUPLOYAN, 5046 5047 /** 5048 * Unicode script "Elbasan". 5049 * @since 9 5050 */ 5051 ELBASAN, 5052 5053 /** 5054 * Unicode script "Grantha". 5055 * @since 9 5056 */ 5057 GRANTHA, 5058 5059 /** 5060 * Unicode script "Pahawh Hmong". 5061 * @since 9 5062 */ 5063 PAHAWH_HMONG, 5064 5065 /** 5066 * Unicode script "Khojki". 5067 * @since 9 5068 */ 5069 KHOJKI, 5070 5071 /** 5072 * Unicode script "Linear A". 5073 * @since 9 5074 */ 5075 LINEAR_A, 5076 5077 /** 5078 * Unicode script "Mahajani". 5079 * @since 9 5080 */ 5081 MAHAJANI, 5082 5083 /** 5084 * Unicode script "Manichaean". 5085 * @since 9 5086 */ 5087 MANICHAEAN, 5088 5089 /** 5090 * Unicode script "Mende Kikakui". 5091 * @since 9 5092 */ 5093 MENDE_KIKAKUI, 5094 5095 /** 5096 * Unicode script "Modi". 5097 * @since 9 5098 */ 5099 MODI, 5100 5101 /** 5102 * Unicode script "Mro". 5103 * @since 9 5104 */ 5105 MRO, 5106 5107 /** 5108 * Unicode script "Old North Arabian". 5109 * @since 9 5110 */ 5111 OLD_NORTH_ARABIAN, 5112 5113 /** 5114 * Unicode script "Nabataean". 5115 * @since 9 5116 */ 5117 NABATAEAN, 5118 5119 /** 5120 * Unicode script "Palmyrene". 5121 * @since 9 5122 */ 5123 PALMYRENE, 5124 5125 /** 5126 * Unicode script "Pau Cin Hau". 5127 * @since 9 5128 */ 5129 PAU_CIN_HAU, 5130 5131 /** 5132 * Unicode script "Old Permic". 5133 * @since 9 5134 */ 5135 OLD_PERMIC, 5136 5137 /** 5138 * Unicode script "Psalter Pahlavi". 5139 * @since 9 5140 */ 5141 PSALTER_PAHLAVI, 5142 5143 /** 5144 * Unicode script "Siddham". 5145 * @since 9 5146 */ 5147 SIDDHAM, 5148 5149 /** 5150 * Unicode script "Khudawadi". 5151 * @since 9 5152 */ 5153 KHUDAWADI, 5154 5155 /** 5156 * Unicode script "Tirhuta". 5157 * @since 9 5158 */ 5159 TIRHUTA, 5160 5161 /** 5162 * Unicode script "Warang Citi". 5163 * @since 9 5164 */ 5165 WARANG_CITI, 5166 5167 /** 5168 * Unicode script "Ahom". 5169 * @since 9 5170 */ 5171 AHOM, 5172 5173 /** 5174 * Unicode script "Anatolian Hieroglyphs". 5175 * @since 9 5176 */ 5177 ANATOLIAN_HIEROGLYPHS, 5178 5179 /** 5180 * Unicode script "Hatran". 5181 * @since 9 5182 */ 5183 HATRAN, 5184 5185 /** 5186 * Unicode script "Multani". 5187 * @since 9 5188 */ 5189 MULTANI, 5190 5191 /** 5192 * Unicode script "Old Hungarian". 5193 * @since 9 5194 */ 5195 OLD_HUNGARIAN, 5196 5197 /** 5198 * Unicode script "SignWriting". 5199 * @since 9 5200 */ 5201 SIGNWRITING, 5202 5203 /** 5204 * Unicode script "Adlam". 5205 * @since 11 5206 */ 5207 ADLAM, 5208 5209 /** 5210 * Unicode script "Bhaiksuki". 5211 * @since 11 5212 */ 5213 BHAIKSUKI, 5214 5215 /** 5216 * Unicode script "Marchen". 5217 * @since 11 5218 */ 5219 MARCHEN, 5220 5221 /** 5222 * Unicode script "Newa". 5223 * @since 11 5224 */ 5225 NEWA, 5226 5227 /** 5228 * Unicode script "Osage". 5229 * @since 11 5230 */ 5231 OSAGE, 5232 5233 /** 5234 * Unicode script "Tangut". 5235 * @since 11 5236 */ 5237 TANGUT, 5238 5239 /** 5240 * Unicode script "Masaram Gondi". 5241 * @since 11 5242 */ 5243 MASARAM_GONDI, 5244 5245 /** 5246 * Unicode script "Nushu". 5247 * @since 11 5248 */ 5249 NUSHU, 5250 5251 /** 5252 * Unicode script "Soyombo". 5253 * @since 11 5254 */ 5255 SOYOMBO, 5256 5257 /** 5258 * Unicode script "Zanabazar Square". 5259 * @since 11 5260 */ 5261 ZANABAZAR_SQUARE, 5262 5263 /** 5264 * Unicode script "Hanifi Rohingya". 5265 * @since 12 5266 */ 5267 HANIFI_ROHINGYA, 5268 5269 /** 5270 * Unicode script "Old Sogdian". 5271 * @since 12 5272 */ 5273 OLD_SOGDIAN, 5274 5275 /** 5276 * Unicode script "Sogdian". 5277 * @since 12 5278 */ 5279 SOGDIAN, 5280 5281 /** 5282 * Unicode script "Dogra". 5283 * @since 12 5284 */ 5285 DOGRA, 5286 5287 /** 5288 * Unicode script "Gunjala Gondi". 5289 * @since 12 5290 */ 5291 GUNJALA_GONDI, 5292 5293 /** 5294 * Unicode script "Makasar". 5295 * @since 12 5296 */ 5297 MAKASAR, 5298 5299 /** 5300 * Unicode script "Medefaidrin". 5301 * @since 12 5302 */ 5303 MEDEFAIDRIN, 5304 5305 /** 5306 * Unicode script "Elymaic". 5307 * @since 13 5308 */ 5309 ELYMAIC, 5310 5311 /** 5312 * Unicode script "Nandinagari". 5313 * @since 13 5314 */ 5315 NANDINAGARI, 5316 5317 /** 5318 * Unicode script "Nyiakeng Puachue Hmong". 5319 * @since 13 5320 */ 5321 NYIAKENG_PUACHUE_HMONG, 5322 5323 /** 5324 * Unicode script "Wancho". 5325 * @since 13 5326 */ 5327 WANCHO, 5328 5329 /** 5330 * Unicode script "Yezidi". 5331 * @since 15 5332 */ 5333 YEZIDI, 5334 5335 /** 5336 * Unicode script "Chorasmian". 5337 * @since 15 5338 */ 5339 CHORASMIAN, 5340 5341 /** 5342 * Unicode script "Dives Akuru". 5343 * @since 15 5344 */ 5345 DIVES_AKURU, 5346 5347 /** 5348 * Unicode script "Khitan Small Script". 5349 * @since 15 5350 */ 5351 KHITAN_SMALL_SCRIPT, 5352 5353 /** 5354 * Unicode script "Vithkuqi". 5355 * @since 19 5356 */ 5357 VITHKUQI, 5358 5359 /** 5360 * Unicode script "Old Uyghur". 5361 * @since 19 5362 */ 5363 OLD_UYGHUR, 5364 5365 /** 5366 * Unicode script "Cypro Minoan". 5367 * @since 19 5368 */ 5369 CYPRO_MINOAN, 5370 5371 /** 5372 * Unicode script "Tangsa". 5373 * @since 19 5374 */ 5375 TANGSA, 5376 5377 /** 5378 * Unicode script "Toto". 5379 * @since 19 5380 */ 5381 TOTO, 5382 5383 /** 5384 * Unicode script "Kawi". 5385 * @since 20 5386 */ 5387 KAWI, 5388 5389 /** 5390 * Unicode script "Nag Mundari". 5391 * @since 20 5392 */ 5393 NAG_MUNDARI, 5394 5395 /** 5396 * Unicode script "Unknown". 5397 */ 5398 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map. 5399 5400 private static final int[] scriptStarts = { 5401 0x0000, // 0000..0040; COMMON 5402 0x0041, // 0041..005A; LATIN 5403 0x005B, // 005B..0060; COMMON 5404 0x0061, // 0061..007A; LATIN 5405 0x007B, // 007B..00A9; COMMON 5406 0x00AA, // 00AA ; LATIN 5407 0x00AB, // 00AB..00B9; COMMON 5408 0x00BA, // 00BA ; LATIN 5409 0x00BB, // 00BB..00BF; COMMON 5410 0x00C0, // 00C0..00D6; LATIN 5411 0x00D7, // 00D7 ; COMMON 5412 0x00D8, // 00D8..00F6; LATIN 5413 0x00F7, // 00F7 ; COMMON 5414 0x00F8, // 00F8..02B8; LATIN 5415 0x02B9, // 02B9..02DF; COMMON 5416 0x02E0, // 02E0..02E4; LATIN 5417 0x02E5, // 02E5..02E9; COMMON 5418 0x02EA, // 02EA..02EB; BOPOMOFO 5419 0x02EC, // 02EC..02FF; COMMON 5420 0x0300, // 0300..036F; INHERITED 5421 0x0370, // 0370..0373; GREEK 5422 0x0374, // 0374 ; COMMON 5423 0x0375, // 0375..0377; GREEK 5424 0x0378, // 0378..0379; UNKNOWN 5425 0x037A, // 037A..037D; GREEK 5426 0x037E, // 037E ; COMMON 5427 0x037F, // 037F ; GREEK 5428 0x0380, // 0380..0383; UNKNOWN 5429 0x0384, // 0384 ; GREEK 5430 0x0385, // 0385 ; COMMON 5431 0x0386, // 0386 ; GREEK 5432 0x0387, // 0387 ; COMMON 5433 0x0388, // 0388..038A; GREEK 5434 0x038B, // 038B ; UNKNOWN 5435 0x038C, // 038C ; GREEK 5436 0x038D, // 038D ; UNKNOWN 5437 0x038E, // 038E..03A1; GREEK 5438 0x03A2, // 03A2 ; UNKNOWN 5439 0x03A3, // 03A3..03E1; GREEK 5440 0x03E2, // 03E2..03EF; COPTIC 5441 0x03F0, // 03F0..03FF; GREEK 5442 0x0400, // 0400..0484; CYRILLIC 5443 0x0485, // 0485..0486; INHERITED 5444 0x0487, // 0487..052F; CYRILLIC 5445 0x0530, // 0530 ; UNKNOWN 5446 0x0531, // 0531..0556; ARMENIAN 5447 0x0557, // 0557..0558; UNKNOWN 5448 0x0559, // 0559..058A; ARMENIAN 5449 0x058B, // 058B..058C; UNKNOWN 5450 0x058D, // 058D..058F; ARMENIAN 5451 0x0590, // 0590 ; UNKNOWN 5452 0x0591, // 0591..05C7; HEBREW 5453 0x05C8, // 05C8..05CF; UNKNOWN 5454 0x05D0, // 05D0..05EA; HEBREW 5455 0x05EB, // 05EB..05EE; UNKNOWN 5456 0x05EF, // 05EF..05F4; HEBREW 5457 0x05F5, // 05F5..05FF; UNKNOWN 5458 0x0600, // 0600..0604; ARABIC 5459 0x0605, // 0605 ; COMMON 5460 0x0606, // 0606..060B; ARABIC 5461 0x060C, // 060C ; COMMON 5462 0x060D, // 060D..061A; ARABIC 5463 0x061B, // 061B ; COMMON 5464 0x061C, // 061C..061E; ARABIC 5465 0x061F, // 061F ; COMMON 5466 0x0620, // 0620..063F; ARABIC 5467 0x0640, // 0640 ; COMMON 5468 0x0641, // 0641..064A; ARABIC 5469 0x064B, // 064B..0655; INHERITED 5470 0x0656, // 0656..066F; ARABIC 5471 0x0670, // 0670 ; INHERITED 5472 0x0671, // 0671..06DC; ARABIC 5473 0x06DD, // 06DD ; COMMON 5474 0x06DE, // 06DE..06FF; ARABIC 5475 0x0700, // 0700..070D; SYRIAC 5476 0x070E, // 070E ; UNKNOWN 5477 0x070F, // 070F..074A; SYRIAC 5478 0x074B, // 074B..074C; UNKNOWN 5479 0x074D, // 074D..074F; SYRIAC 5480 0x0750, // 0750..077F; ARABIC 5481 0x0780, // 0780..07B1; THAANA 5482 0x07B2, // 07B2..07BF; UNKNOWN 5483 0x07C0, // 07C0..07FA; NKO 5484 0x07FB, // 07FB..07FC; UNKNOWN 5485 0x07FD, // 07FD..07FF; NKO 5486 0x0800, // 0800..082D; SAMARITAN 5487 0x082E, // 082E..082F; UNKNOWN 5488 0x0830, // 0830..083E; SAMARITAN 5489 0x083F, // 083F ; UNKNOWN 5490 0x0840, // 0840..085B; MANDAIC 5491 0x085C, // 085C..085D; UNKNOWN 5492 0x085E, // 085E ; MANDAIC 5493 0x085F, // 085F ; UNKNOWN 5494 0x0860, // 0860..086A; SYRIAC 5495 0x086B, // 086B..086F; UNKNOWN 5496 0x0870, // 0870..088E; ARABIC 5497 0x088F, // 088F ; UNKNOWN 5498 0x0890, // 0890..0891; ARABIC 5499 0x0892, // 0892..0897; UNKNOWN 5500 0x0898, // 0898..08E1; ARABIC 5501 0x08E2, // 08E2 ; COMMON 5502 0x08E3, // 08E3..08FF; ARABIC 5503 0x0900, // 0900..0950; DEVANAGARI 5504 0x0951, // 0951..0954; INHERITED 5505 0x0955, // 0955..0963; DEVANAGARI 5506 0x0964, // 0964..0965; COMMON 5507 0x0966, // 0966..097F; DEVANAGARI 5508 0x0980, // 0980..0983; BENGALI 5509 0x0984, // 0984 ; UNKNOWN 5510 0x0985, // 0985..098C; BENGALI 5511 0x098D, // 098D..098E; UNKNOWN 5512 0x098F, // 098F..0990; BENGALI 5513 0x0991, // 0991..0992; UNKNOWN 5514 0x0993, // 0993..09A8; BENGALI 5515 0x09A9, // 09A9 ; UNKNOWN 5516 0x09AA, // 09AA..09B0; BENGALI 5517 0x09B1, // 09B1 ; UNKNOWN 5518 0x09B2, // 09B2 ; BENGALI 5519 0x09B3, // 09B3..09B5; UNKNOWN 5520 0x09B6, // 09B6..09B9; BENGALI 5521 0x09BA, // 09BA..09BB; UNKNOWN 5522 0x09BC, // 09BC..09C4; BENGALI 5523 0x09C5, // 09C5..09C6; UNKNOWN 5524 0x09C7, // 09C7..09C8; BENGALI 5525 0x09C9, // 09C9..09CA; UNKNOWN 5526 0x09CB, // 09CB..09CE; BENGALI 5527 0x09CF, // 09CF..09D6; UNKNOWN 5528 0x09D7, // 09D7 ; BENGALI 5529 0x09D8, // 09D8..09DB; UNKNOWN 5530 0x09DC, // 09DC..09DD; BENGALI 5531 0x09DE, // 09DE ; UNKNOWN 5532 0x09DF, // 09DF..09E3; BENGALI 5533 0x09E4, // 09E4..09E5; UNKNOWN 5534 0x09E6, // 09E6..09FE; BENGALI 5535 0x09FF, // 09FF..0A00; UNKNOWN 5536 0x0A01, // 0A01..0A03; GURMUKHI 5537 0x0A04, // 0A04 ; UNKNOWN 5538 0x0A05, // 0A05..0A0A; GURMUKHI 5539 0x0A0B, // 0A0B..0A0E; UNKNOWN 5540 0x0A0F, // 0A0F..0A10; GURMUKHI 5541 0x0A11, // 0A11..0A12; UNKNOWN 5542 0x0A13, // 0A13..0A28; GURMUKHI 5543 0x0A29, // 0A29 ; UNKNOWN 5544 0x0A2A, // 0A2A..0A30; GURMUKHI 5545 0x0A31, // 0A31 ; UNKNOWN 5546 0x0A32, // 0A32..0A33; GURMUKHI 5547 0x0A34, // 0A34 ; UNKNOWN 5548 0x0A35, // 0A35..0A36; GURMUKHI 5549 0x0A37, // 0A37 ; UNKNOWN 5550 0x0A38, // 0A38..0A39; GURMUKHI 5551 0x0A3A, // 0A3A..0A3B; UNKNOWN 5552 0x0A3C, // 0A3C ; GURMUKHI 5553 0x0A3D, // 0A3D ; UNKNOWN 5554 0x0A3E, // 0A3E..0A42; GURMUKHI 5555 0x0A43, // 0A43..0A46; UNKNOWN 5556 0x0A47, // 0A47..0A48; GURMUKHI 5557 0x0A49, // 0A49..0A4A; UNKNOWN 5558 0x0A4B, // 0A4B..0A4D; GURMUKHI 5559 0x0A4E, // 0A4E..0A50; UNKNOWN 5560 0x0A51, // 0A51 ; GURMUKHI 5561 0x0A52, // 0A52..0A58; UNKNOWN 5562 0x0A59, // 0A59..0A5C; GURMUKHI 5563 0x0A5D, // 0A5D ; UNKNOWN 5564 0x0A5E, // 0A5E ; GURMUKHI 5565 0x0A5F, // 0A5F..0A65; UNKNOWN 5566 0x0A66, // 0A66..0A76; GURMUKHI 5567 0x0A77, // 0A77..0A80; UNKNOWN 5568 0x0A81, // 0A81..0A83; GUJARATI 5569 0x0A84, // 0A84 ; UNKNOWN 5570 0x0A85, // 0A85..0A8D; GUJARATI 5571 0x0A8E, // 0A8E ; UNKNOWN 5572 0x0A8F, // 0A8F..0A91; GUJARATI 5573 0x0A92, // 0A92 ; UNKNOWN 5574 0x0A93, // 0A93..0AA8; GUJARATI 5575 0x0AA9, // 0AA9 ; UNKNOWN 5576 0x0AAA, // 0AAA..0AB0; GUJARATI 5577 0x0AB1, // 0AB1 ; UNKNOWN 5578 0x0AB2, // 0AB2..0AB3; GUJARATI 5579 0x0AB4, // 0AB4 ; UNKNOWN 5580 0x0AB5, // 0AB5..0AB9; GUJARATI 5581 0x0ABA, // 0ABA..0ABB; UNKNOWN 5582 0x0ABC, // 0ABC..0AC5; GUJARATI 5583 0x0AC6, // 0AC6 ; UNKNOWN 5584 0x0AC7, // 0AC7..0AC9; GUJARATI 5585 0x0ACA, // 0ACA ; UNKNOWN 5586 0x0ACB, // 0ACB..0ACD; GUJARATI 5587 0x0ACE, // 0ACE..0ACF; UNKNOWN 5588 0x0AD0, // 0AD0 ; GUJARATI 5589 0x0AD1, // 0AD1..0ADF; UNKNOWN 5590 0x0AE0, // 0AE0..0AE3; GUJARATI 5591 0x0AE4, // 0AE4..0AE5; UNKNOWN 5592 0x0AE6, // 0AE6..0AF1; GUJARATI 5593 0x0AF2, // 0AF2..0AF8; UNKNOWN 5594 0x0AF9, // 0AF9..0AFF; GUJARATI 5595 0x0B00, // 0B00 ; UNKNOWN 5596 0x0B01, // 0B01..0B03; ORIYA 5597 0x0B04, // 0B04 ; UNKNOWN 5598 0x0B05, // 0B05..0B0C; ORIYA 5599 0x0B0D, // 0B0D..0B0E; UNKNOWN 5600 0x0B0F, // 0B0F..0B10; ORIYA 5601 0x0B11, // 0B11..0B12; UNKNOWN 5602 0x0B13, // 0B13..0B28; ORIYA 5603 0x0B29, // 0B29 ; UNKNOWN 5604 0x0B2A, // 0B2A..0B30; ORIYA 5605 0x0B31, // 0B31 ; UNKNOWN 5606 0x0B32, // 0B32..0B33; ORIYA 5607 0x0B34, // 0B34 ; UNKNOWN 5608 0x0B35, // 0B35..0B39; ORIYA 5609 0x0B3A, // 0B3A..0B3B; UNKNOWN 5610 0x0B3C, // 0B3C..0B44; ORIYA 5611 0x0B45, // 0B45..0B46; UNKNOWN 5612 0x0B47, // 0B47..0B48; ORIYA 5613 0x0B49, // 0B49..0B4A; UNKNOWN 5614 0x0B4B, // 0B4B..0B4D; ORIYA 5615 0x0B4E, // 0B4E..0B54; UNKNOWN 5616 0x0B55, // 0B55..0B57; ORIYA 5617 0x0B58, // 0B58..0B5B; UNKNOWN 5618 0x0B5C, // 0B5C..0B5D; ORIYA 5619 0x0B5E, // 0B5E ; UNKNOWN 5620 0x0B5F, // 0B5F..0B63; ORIYA 5621 0x0B64, // 0B64..0B65; UNKNOWN 5622 0x0B66, // 0B66..0B77; ORIYA 5623 0x0B78, // 0B78..0B81; UNKNOWN 5624 0x0B82, // 0B82..0B83; TAMIL 5625 0x0B84, // 0B84 ; UNKNOWN 5626 0x0B85, // 0B85..0B8A; TAMIL 5627 0x0B8B, // 0B8B..0B8D; UNKNOWN 5628 0x0B8E, // 0B8E..0B90; TAMIL 5629 0x0B91, // 0B91 ; UNKNOWN 5630 0x0B92, // 0B92..0B95; TAMIL 5631 0x0B96, // 0B96..0B98; UNKNOWN 5632 0x0B99, // 0B99..0B9A; TAMIL 5633 0x0B9B, // 0B9B ; UNKNOWN 5634 0x0B9C, // 0B9C ; TAMIL 5635 0x0B9D, // 0B9D ; UNKNOWN 5636 0x0B9E, // 0B9E..0B9F; TAMIL 5637 0x0BA0, // 0BA0..0BA2; UNKNOWN 5638 0x0BA3, // 0BA3..0BA4; TAMIL 5639 0x0BA5, // 0BA5..0BA7; UNKNOWN 5640 0x0BA8, // 0BA8..0BAA; TAMIL 5641 0x0BAB, // 0BAB..0BAD; UNKNOWN 5642 0x0BAE, // 0BAE..0BB9; TAMIL 5643 0x0BBA, // 0BBA..0BBD; UNKNOWN 5644 0x0BBE, // 0BBE..0BC2; TAMIL 5645 0x0BC3, // 0BC3..0BC5; UNKNOWN 5646 0x0BC6, // 0BC6..0BC8; TAMIL 5647 0x0BC9, // 0BC9 ; UNKNOWN 5648 0x0BCA, // 0BCA..0BCD; TAMIL 5649 0x0BCE, // 0BCE..0BCF; UNKNOWN 5650 0x0BD0, // 0BD0 ; TAMIL 5651 0x0BD1, // 0BD1..0BD6; UNKNOWN 5652 0x0BD7, // 0BD7 ; TAMIL 5653 0x0BD8, // 0BD8..0BE5; UNKNOWN 5654 0x0BE6, // 0BE6..0BFA; TAMIL 5655 0x0BFB, // 0BFB..0BFF; UNKNOWN 5656 0x0C00, // 0C00..0C0C; TELUGU 5657 0x0C0D, // 0C0D ; UNKNOWN 5658 0x0C0E, // 0C0E..0C10; TELUGU 5659 0x0C11, // 0C11 ; UNKNOWN 5660 0x0C12, // 0C12..0C28; TELUGU 5661 0x0C29, // 0C29 ; UNKNOWN 5662 0x0C2A, // 0C2A..0C39; TELUGU 5663 0x0C3A, // 0C3A..0C3B; UNKNOWN 5664 0x0C3C, // 0C3C..0C44; TELUGU 5665 0x0C45, // 0C45 ; UNKNOWN 5666 0x0C46, // 0C46..0C48; TELUGU 5667 0x0C49, // 0C49 ; UNKNOWN 5668 0x0C4A, // 0C4A..0C4D; TELUGU 5669 0x0C4E, // 0C4E..0C54; UNKNOWN 5670 0x0C55, // 0C55..0C56; TELUGU 5671 0x0C57, // 0C57 ; UNKNOWN 5672 0x0C58, // 0C58..0C5A; TELUGU 5673 0x0C5B, // 0C5B..0C5C; UNKNOWN 5674 0x0C5D, // 0C5D ; TELUGU 5675 0x0C5E, // 0C5E..0C5F; UNKNOWN 5676 0x0C60, // 0C60..0C63; TELUGU 5677 0x0C64, // 0C64..0C65; UNKNOWN 5678 0x0C66, // 0C66..0C6F; TELUGU 5679 0x0C70, // 0C70..0C76; UNKNOWN 5680 0x0C77, // 0C77..0C7F; TELUGU 5681 0x0C80, // 0C80..0C8C; KANNADA 5682 0x0C8D, // 0C8D ; UNKNOWN 5683 0x0C8E, // 0C8E..0C90; KANNADA 5684 0x0C91, // 0C91 ; UNKNOWN 5685 0x0C92, // 0C92..0CA8; KANNADA 5686 0x0CA9, // 0CA9 ; UNKNOWN 5687 0x0CAA, // 0CAA..0CB3; KANNADA 5688 0x0CB4, // 0CB4 ; UNKNOWN 5689 0x0CB5, // 0CB5..0CB9; KANNADA 5690 0x0CBA, // 0CBA..0CBB; UNKNOWN 5691 0x0CBC, // 0CBC..0CC4; KANNADA 5692 0x0CC5, // 0CC5 ; UNKNOWN 5693 0x0CC6, // 0CC6..0CC8; KANNADA 5694 0x0CC9, // 0CC9 ; UNKNOWN 5695 0x0CCA, // 0CCA..0CCD; KANNADA 5696 0x0CCE, // 0CCE..0CD4; UNKNOWN 5697 0x0CD5, // 0CD5..0CD6; KANNADA 5698 0x0CD7, // 0CD7..0CDC; UNKNOWN 5699 0x0CDD, // 0CDD..0CDE; KANNADA 5700 0x0CDF, // 0CDF ; UNKNOWN 5701 0x0CE0, // 0CE0..0CE3; KANNADA 5702 0x0CE4, // 0CE4..0CE5; UNKNOWN 5703 0x0CE6, // 0CE6..0CEF; KANNADA 5704 0x0CF0, // 0CF0 ; UNKNOWN 5705 0x0CF1, // 0CF1..0CF3; KANNADA 5706 0x0CF4, // 0CF4..0CFF; UNKNOWN 5707 0x0D00, // 0D00..0D0C; MALAYALAM 5708 0x0D0D, // 0D0D ; UNKNOWN 5709 0x0D0E, // 0D0E..0D10; MALAYALAM 5710 0x0D11, // 0D11 ; UNKNOWN 5711 0x0D12, // 0D12..0D44; MALAYALAM 5712 0x0D45, // 0D45 ; UNKNOWN 5713 0x0D46, // 0D46..0D48; MALAYALAM 5714 0x0D49, // 0D49 ; UNKNOWN 5715 0x0D4A, // 0D4A..0D4F; MALAYALAM 5716 0x0D50, // 0D50..0D53; UNKNOWN 5717 0x0D54, // 0D54..0D63; MALAYALAM 5718 0x0D64, // 0D64..0D65; UNKNOWN 5719 0x0D66, // 0D66..0D7F; MALAYALAM 5720 0x0D80, // 0D80 ; UNKNOWN 5721 0x0D81, // 0D81..0D83; SINHALA 5722 0x0D84, // 0D84 ; UNKNOWN 5723 0x0D85, // 0D85..0D96; SINHALA 5724 0x0D97, // 0D97..0D99; UNKNOWN 5725 0x0D9A, // 0D9A..0DB1; SINHALA 5726 0x0DB2, // 0DB2 ; UNKNOWN 5727 0x0DB3, // 0DB3..0DBB; SINHALA 5728 0x0DBC, // 0DBC ; UNKNOWN 5729 0x0DBD, // 0DBD ; SINHALA 5730 0x0DBE, // 0DBE..0DBF; UNKNOWN 5731 0x0DC0, // 0DC0..0DC6; SINHALA 5732 0x0DC7, // 0DC7..0DC9; UNKNOWN 5733 0x0DCA, // 0DCA ; SINHALA 5734 0x0DCB, // 0DCB..0DCE; UNKNOWN 5735 0x0DCF, // 0DCF..0DD4; SINHALA 5736 0x0DD5, // 0DD5 ; UNKNOWN 5737 0x0DD6, // 0DD6 ; SINHALA 5738 0x0DD7, // 0DD7 ; UNKNOWN 5739 0x0DD8, // 0DD8..0DDF; SINHALA 5740 0x0DE0, // 0DE0..0DE5; UNKNOWN 5741 0x0DE6, // 0DE6..0DEF; SINHALA 5742 0x0DF0, // 0DF0..0DF1; UNKNOWN 5743 0x0DF2, // 0DF2..0DF4; SINHALA 5744 0x0DF5, // 0DF5..0E00; UNKNOWN 5745 0x0E01, // 0E01..0E3A; THAI 5746 0x0E3B, // 0E3B..0E3E; UNKNOWN 5747 0x0E3F, // 0E3F ; COMMON 5748 0x0E40, // 0E40..0E5B; THAI 5749 0x0E5C, // 0E5C..0E80; UNKNOWN 5750 0x0E81, // 0E81..0E82; LAO 5751 0x0E83, // 0E83 ; UNKNOWN 5752 0x0E84, // 0E84 ; LAO 5753 0x0E85, // 0E85 ; UNKNOWN 5754 0x0E86, // 0E86..0E8A; LAO 5755 0x0E8B, // 0E8B ; UNKNOWN 5756 0x0E8C, // 0E8C..0EA3; LAO 5757 0x0EA4, // 0EA4 ; UNKNOWN 5758 0x0EA5, // 0EA5 ; LAO 5759 0x0EA6, // 0EA6 ; UNKNOWN 5760 0x0EA7, // 0EA7..0EBD; LAO 5761 0x0EBE, // 0EBE..0EBF; UNKNOWN 5762 0x0EC0, // 0EC0..0EC4; LAO 5763 0x0EC5, // 0EC5 ; UNKNOWN 5764 0x0EC6, // 0EC6 ; LAO 5765 0x0EC7, // 0EC7 ; UNKNOWN 5766 0x0EC8, // 0EC8..0ECE; LAO 5767 0x0ECF, // 0ECF ; UNKNOWN 5768 0x0ED0, // 0ED0..0ED9; LAO 5769 0x0EDA, // 0EDA..0EDB; UNKNOWN 5770 0x0EDC, // 0EDC..0EDF; LAO 5771 0x0EE0, // 0EE0..0EFF; UNKNOWN 5772 0x0F00, // 0F00..0F47; TIBETAN 5773 0x0F48, // 0F48 ; UNKNOWN 5774 0x0F49, // 0F49..0F6C; TIBETAN 5775 0x0F6D, // 0F6D..0F70; UNKNOWN 5776 0x0F71, // 0F71..0F97; TIBETAN 5777 0x0F98, // 0F98 ; UNKNOWN 5778 0x0F99, // 0F99..0FBC; TIBETAN 5779 0x0FBD, // 0FBD ; UNKNOWN 5780 0x0FBE, // 0FBE..0FCC; TIBETAN 5781 0x0FCD, // 0FCD ; UNKNOWN 5782 0x0FCE, // 0FCE..0FD4; TIBETAN 5783 0x0FD5, // 0FD5..0FD8; COMMON 5784 0x0FD9, // 0FD9..0FDA; TIBETAN 5785 0x0FDB, // 0FDB..0FFF; UNKNOWN 5786 0x1000, // 1000..109F; MYANMAR 5787 0x10A0, // 10A0..10C5; GEORGIAN 5788 0x10C6, // 10C6 ; UNKNOWN 5789 0x10C7, // 10C7 ; GEORGIAN 5790 0x10C8, // 10C8..10CC; UNKNOWN 5791 0x10CD, // 10CD ; GEORGIAN 5792 0x10CE, // 10CE..10CF; UNKNOWN 5793 0x10D0, // 10D0..10FA; GEORGIAN 5794 0x10FB, // 10FB ; COMMON 5795 0x10FC, // 10FC..10FF; GEORGIAN 5796 0x1100, // 1100..11FF; HANGUL 5797 0x1200, // 1200..1248; ETHIOPIC 5798 0x1249, // 1249 ; UNKNOWN 5799 0x124A, // 124A..124D; ETHIOPIC 5800 0x124E, // 124E..124F; UNKNOWN 5801 0x1250, // 1250..1256; ETHIOPIC 5802 0x1257, // 1257 ; UNKNOWN 5803 0x1258, // 1258 ; ETHIOPIC 5804 0x1259, // 1259 ; UNKNOWN 5805 0x125A, // 125A..125D; ETHIOPIC 5806 0x125E, // 125E..125F; UNKNOWN 5807 0x1260, // 1260..1288; ETHIOPIC 5808 0x1289, // 1289 ; UNKNOWN 5809 0x128A, // 128A..128D; ETHIOPIC 5810 0x128E, // 128E..128F; UNKNOWN 5811 0x1290, // 1290..12B0; ETHIOPIC 5812 0x12B1, // 12B1 ; UNKNOWN 5813 0x12B2, // 12B2..12B5; ETHIOPIC 5814 0x12B6, // 12B6..12B7; UNKNOWN 5815 0x12B8, // 12B8..12BE; ETHIOPIC 5816 0x12BF, // 12BF ; UNKNOWN 5817 0x12C0, // 12C0 ; ETHIOPIC 5818 0x12C1, // 12C1 ; UNKNOWN 5819 0x12C2, // 12C2..12C5; ETHIOPIC 5820 0x12C6, // 12C6..12C7; UNKNOWN 5821 0x12C8, // 12C8..12D6; ETHIOPIC 5822 0x12D7, // 12D7 ; UNKNOWN 5823 0x12D8, // 12D8..1310; ETHIOPIC 5824 0x1311, // 1311 ; UNKNOWN 5825 0x1312, // 1312..1315; ETHIOPIC 5826 0x1316, // 1316..1317; UNKNOWN 5827 0x1318, // 1318..135A; ETHIOPIC 5828 0x135B, // 135B..135C; UNKNOWN 5829 0x135D, // 135D..137C; ETHIOPIC 5830 0x137D, // 137D..137F; UNKNOWN 5831 0x1380, // 1380..1399; ETHIOPIC 5832 0x139A, // 139A..139F; UNKNOWN 5833 0x13A0, // 13A0..13F5; CHEROKEE 5834 0x13F6, // 13F6..13F7; UNKNOWN 5835 0x13F8, // 13F8..13FD; CHEROKEE 5836 0x13FE, // 13FE..13FF; UNKNOWN 5837 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 5838 0x1680, // 1680..169C; OGHAM 5839 0x169D, // 169D..169F; UNKNOWN 5840 0x16A0, // 16A0..16EA; RUNIC 5841 0x16EB, // 16EB..16ED; COMMON 5842 0x16EE, // 16EE..16F8; RUNIC 5843 0x16F9, // 16F9..16FF; UNKNOWN 5844 0x1700, // 1700..1715; TAGALOG 5845 0x1716, // 1716..171E; UNKNOWN 5846 0x171F, // 171F ; TAGALOG 5847 0x1720, // 1720..1734; HANUNOO 5848 0x1735, // 1735..1736; COMMON 5849 0x1737, // 1737..173F; UNKNOWN 5850 0x1740, // 1740..1753; BUHID 5851 0x1754, // 1754..175F; UNKNOWN 5852 0x1760, // 1760..176C; TAGBANWA 5853 0x176D, // 176D ; UNKNOWN 5854 0x176E, // 176E..1770; TAGBANWA 5855 0x1771, // 1771 ; UNKNOWN 5856 0x1772, // 1772..1773; TAGBANWA 5857 0x1774, // 1774..177F; UNKNOWN 5858 0x1780, // 1780..17DD; KHMER 5859 0x17DE, // 17DE..17DF; UNKNOWN 5860 0x17E0, // 17E0..17E9; KHMER 5861 0x17EA, // 17EA..17EF; UNKNOWN 5862 0x17F0, // 17F0..17F9; KHMER 5863 0x17FA, // 17FA..17FF; UNKNOWN 5864 0x1800, // 1800..1801; MONGOLIAN 5865 0x1802, // 1802..1803; COMMON 5866 0x1804, // 1804 ; MONGOLIAN 5867 0x1805, // 1805 ; COMMON 5868 0x1806, // 1806..1819; MONGOLIAN 5869 0x181A, // 181A..181F; UNKNOWN 5870 0x1820, // 1820..1878; MONGOLIAN 5871 0x1879, // 1879..187F; UNKNOWN 5872 0x1880, // 1880..18AA; MONGOLIAN 5873 0x18AB, // 18AB..18AF; UNKNOWN 5874 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 5875 0x18F6, // 18F6..18FF; UNKNOWN 5876 0x1900, // 1900..191E; LIMBU 5877 0x191F, // 191F ; UNKNOWN 5878 0x1920, // 1920..192B; LIMBU 5879 0x192C, // 192C..192F; UNKNOWN 5880 0x1930, // 1930..193B; LIMBU 5881 0x193C, // 193C..193F; UNKNOWN 5882 0x1940, // 1940 ; LIMBU 5883 0x1941, // 1941..1943; UNKNOWN 5884 0x1944, // 1944..194F; LIMBU 5885 0x1950, // 1950..196D; TAI_LE 5886 0x196E, // 196E..196F; UNKNOWN 5887 0x1970, // 1970..1974; TAI_LE 5888 0x1975, // 1975..197F; UNKNOWN 5889 0x1980, // 1980..19AB; NEW_TAI_LUE 5890 0x19AC, // 19AC..19AF; UNKNOWN 5891 0x19B0, // 19B0..19C9; NEW_TAI_LUE 5892 0x19CA, // 19CA..19CF; UNKNOWN 5893 0x19D0, // 19D0..19DA; NEW_TAI_LUE 5894 0x19DB, // 19DB..19DD; UNKNOWN 5895 0x19DE, // 19DE..19DF; NEW_TAI_LUE 5896 0x19E0, // 19E0..19FF; KHMER 5897 0x1A00, // 1A00..1A1B; BUGINESE 5898 0x1A1C, // 1A1C..1A1D; UNKNOWN 5899 0x1A1E, // 1A1E..1A1F; BUGINESE 5900 0x1A20, // 1A20..1A5E; TAI_THAM 5901 0x1A5F, // 1A5F ; UNKNOWN 5902 0x1A60, // 1A60..1A7C; TAI_THAM 5903 0x1A7D, // 1A7D..1A7E; UNKNOWN 5904 0x1A7F, // 1A7F..1A89; TAI_THAM 5905 0x1A8A, // 1A8A..1A8F; UNKNOWN 5906 0x1A90, // 1A90..1A99; TAI_THAM 5907 0x1A9A, // 1A9A..1A9F; UNKNOWN 5908 0x1AA0, // 1AA0..1AAD; TAI_THAM 5909 0x1AAE, // 1AAE..1AAF; UNKNOWN 5910 0x1AB0, // 1AB0..1ACE; INHERITED 5911 0x1ACF, // 1ACF..1AFF; UNKNOWN 5912 0x1B00, // 1B00..1B4C; BALINESE 5913 0x1B4D, // 1B4D..1B4F; UNKNOWN 5914 0x1B50, // 1B50..1B7E; BALINESE 5915 0x1B7F, // 1B7F ; UNKNOWN 5916 0x1B80, // 1B80..1BBF; SUNDANESE 5917 0x1BC0, // 1BC0..1BF3; BATAK 5918 0x1BF4, // 1BF4..1BFB; UNKNOWN 5919 0x1BFC, // 1BFC..1BFF; BATAK 5920 0x1C00, // 1C00..1C37; LEPCHA 5921 0x1C38, // 1C38..1C3A; UNKNOWN 5922 0x1C3B, // 1C3B..1C49; LEPCHA 5923 0x1C4A, // 1C4A..1C4C; UNKNOWN 5924 0x1C4D, // 1C4D..1C4F; LEPCHA 5925 0x1C50, // 1C50..1C7F; OL_CHIKI 5926 0x1C80, // 1C80..1C88; CYRILLIC 5927 0x1C89, // 1C89..1C8F; UNKNOWN 5928 0x1C90, // 1C90..1CBA; GEORGIAN 5929 0x1CBB, // 1CBB..1CBC; UNKNOWN 5930 0x1CBD, // 1CBD..1CBF; GEORGIAN 5931 0x1CC0, // 1CC0..1CC7; SUNDANESE 5932 0x1CC8, // 1CC8..1CCF; UNKNOWN 5933 0x1CD0, // 1CD0..1CD2; INHERITED 5934 0x1CD3, // 1CD3 ; COMMON 5935 0x1CD4, // 1CD4..1CE0; INHERITED 5936 0x1CE1, // 1CE1 ; COMMON 5937 0x1CE2, // 1CE2..1CE8; INHERITED 5938 0x1CE9, // 1CE9..1CEC; COMMON 5939 0x1CED, // 1CED ; INHERITED 5940 0x1CEE, // 1CEE..1CF3; COMMON 5941 0x1CF4, // 1CF4 ; INHERITED 5942 0x1CF5, // 1CF5..1CF7; COMMON 5943 0x1CF8, // 1CF8..1CF9; INHERITED 5944 0x1CFA, // 1CFA ; COMMON 5945 0x1CFB, // 1CFB..1CFF; UNKNOWN 5946 0x1D00, // 1D00..1D25; LATIN 5947 0x1D26, // 1D26..1D2A; GREEK 5948 0x1D2B, // 1D2B ; CYRILLIC 5949 0x1D2C, // 1D2C..1D5C; LATIN 5950 0x1D5D, // 1D5D..1D61; GREEK 5951 0x1D62, // 1D62..1D65; LATIN 5952 0x1D66, // 1D66..1D6A; GREEK 5953 0x1D6B, // 1D6B..1D77; LATIN 5954 0x1D78, // 1D78 ; CYRILLIC 5955 0x1D79, // 1D79..1DBE; LATIN 5956 0x1DBF, // 1DBF ; GREEK 5957 0x1DC0, // 1DC0..1DFF; INHERITED 5958 0x1E00, // 1E00..1EFF; LATIN 5959 0x1F00, // 1F00..1F15; GREEK 5960 0x1F16, // 1F16..1F17; UNKNOWN 5961 0x1F18, // 1F18..1F1D; GREEK 5962 0x1F1E, // 1F1E..1F1F; UNKNOWN 5963 0x1F20, // 1F20..1F45; GREEK 5964 0x1F46, // 1F46..1F47; UNKNOWN 5965 0x1F48, // 1F48..1F4D; GREEK 5966 0x1F4E, // 1F4E..1F4F; UNKNOWN 5967 0x1F50, // 1F50..1F57; GREEK 5968 0x1F58, // 1F58 ; UNKNOWN 5969 0x1F59, // 1F59 ; GREEK 5970 0x1F5A, // 1F5A ; UNKNOWN 5971 0x1F5B, // 1F5B ; GREEK 5972 0x1F5C, // 1F5C ; UNKNOWN 5973 0x1F5D, // 1F5D ; GREEK 5974 0x1F5E, // 1F5E ; UNKNOWN 5975 0x1F5F, // 1F5F..1F7D; GREEK 5976 0x1F7E, // 1F7E..1F7F; UNKNOWN 5977 0x1F80, // 1F80..1FB4; GREEK 5978 0x1FB5, // 1FB5 ; UNKNOWN 5979 0x1FB6, // 1FB6..1FC4; GREEK 5980 0x1FC5, // 1FC5 ; UNKNOWN 5981 0x1FC6, // 1FC6..1FD3; GREEK 5982 0x1FD4, // 1FD4..1FD5; UNKNOWN 5983 0x1FD6, // 1FD6..1FDB; GREEK 5984 0x1FDC, // 1FDC ; UNKNOWN 5985 0x1FDD, // 1FDD..1FEF; GREEK 5986 0x1FF0, // 1FF0..1FF1; UNKNOWN 5987 0x1FF2, // 1FF2..1FF4; GREEK 5988 0x1FF5, // 1FF5 ; UNKNOWN 5989 0x1FF6, // 1FF6..1FFE; GREEK 5990 0x1FFF, // 1FFF ; UNKNOWN 5991 0x2000, // 2000..200B; COMMON 5992 0x200C, // 200C..200D; INHERITED 5993 0x200E, // 200E..2064; COMMON 5994 0x2065, // 2065 ; UNKNOWN 5995 0x2066, // 2066..2070; COMMON 5996 0x2071, // 2071 ; LATIN 5997 0x2072, // 2072..2073; UNKNOWN 5998 0x2074, // 2074..207E; COMMON 5999 0x207F, // 207F ; LATIN 6000 0x2080, // 2080..208E; COMMON 6001 0x208F, // 208F ; UNKNOWN 6002 0x2090, // 2090..209C; LATIN 6003 0x209D, // 209D..209F; UNKNOWN 6004 0x20A0, // 20A0..20C0; COMMON 6005 0x20C1, // 20C1..20CF; UNKNOWN 6006 0x20D0, // 20D0..20F0; INHERITED 6007 0x20F1, // 20F1..20FF; UNKNOWN 6008 0x2100, // 2100..2125; COMMON 6009 0x2126, // 2126 ; GREEK 6010 0x2127, // 2127..2129; COMMON 6011 0x212A, // 212A..212B; LATIN 6012 0x212C, // 212C..2131; COMMON 6013 0x2132, // 2132 ; LATIN 6014 0x2133, // 2133..214D; COMMON 6015 0x214E, // 214E ; LATIN 6016 0x214F, // 214F..215F; COMMON 6017 0x2160, // 2160..2188; LATIN 6018 0x2189, // 2189..218B; COMMON 6019 0x218C, // 218C..218F; UNKNOWN 6020 0x2190, // 2190..2426; COMMON 6021 0x2427, // 2427..243F; UNKNOWN 6022 0x2440, // 2440..244A; COMMON 6023 0x244B, // 244B..245F; UNKNOWN 6024 0x2460, // 2460..27FF; COMMON 6025 0x2800, // 2800..28FF; BRAILLE 6026 0x2900, // 2900..2B73; COMMON 6027 0x2B74, // 2B74..2B75; UNKNOWN 6028 0x2B76, // 2B76..2B95; COMMON 6029 0x2B96, // 2B96 ; UNKNOWN 6030 0x2B97, // 2B97..2BFF; COMMON 6031 0x2C00, // 2C00..2C5F; GLAGOLITIC 6032 0x2C60, // 2C60..2C7F; LATIN 6033 0x2C80, // 2C80..2CF3; COPTIC 6034 0x2CF4, // 2CF4..2CF8; UNKNOWN 6035 0x2CF9, // 2CF9..2CFF; COPTIC 6036 0x2D00, // 2D00..2D25; GEORGIAN 6037 0x2D26, // 2D26 ; UNKNOWN 6038 0x2D27, // 2D27 ; GEORGIAN 6039 0x2D28, // 2D28..2D2C; UNKNOWN 6040 0x2D2D, // 2D2D ; GEORGIAN 6041 0x2D2E, // 2D2E..2D2F; UNKNOWN 6042 0x2D30, // 2D30..2D67; TIFINAGH 6043 0x2D68, // 2D68..2D6E; UNKNOWN 6044 0x2D6F, // 2D6F..2D70; TIFINAGH 6045 0x2D71, // 2D71..2D7E; UNKNOWN 6046 0x2D7F, // 2D7F ; TIFINAGH 6047 0x2D80, // 2D80..2D96; ETHIOPIC 6048 0x2D97, // 2D97..2D9F; UNKNOWN 6049 0x2DA0, // 2DA0..2DA6; ETHIOPIC 6050 0x2DA7, // 2DA7 ; UNKNOWN 6051 0x2DA8, // 2DA8..2DAE; ETHIOPIC 6052 0x2DAF, // 2DAF ; UNKNOWN 6053 0x2DB0, // 2DB0..2DB6; ETHIOPIC 6054 0x2DB7, // 2DB7 ; UNKNOWN 6055 0x2DB8, // 2DB8..2DBE; ETHIOPIC 6056 0x2DBF, // 2DBF ; UNKNOWN 6057 0x2DC0, // 2DC0..2DC6; ETHIOPIC 6058 0x2DC7, // 2DC7 ; UNKNOWN 6059 0x2DC8, // 2DC8..2DCE; ETHIOPIC 6060 0x2DCF, // 2DCF ; UNKNOWN 6061 0x2DD0, // 2DD0..2DD6; ETHIOPIC 6062 0x2DD7, // 2DD7 ; UNKNOWN 6063 0x2DD8, // 2DD8..2DDE; ETHIOPIC 6064 0x2DDF, // 2DDF ; UNKNOWN 6065 0x2DE0, // 2DE0..2DFF; CYRILLIC 6066 0x2E00, // 2E00..2E5D; COMMON 6067 0x2E5E, // 2E5E..2E7F; UNKNOWN 6068 0x2E80, // 2E80..2E99; HAN 6069 0x2E9A, // 2E9A ; UNKNOWN 6070 0x2E9B, // 2E9B..2EF3; HAN 6071 0x2EF4, // 2EF4..2EFF; UNKNOWN 6072 0x2F00, // 2F00..2FD5; HAN 6073 0x2FD6, // 2FD6..2FEF; UNKNOWN 6074 0x2FF0, // 2FF0..3004; COMMON 6075 0x3005, // 3005 ; HAN 6076 0x3006, // 3006 ; COMMON 6077 0x3007, // 3007 ; HAN 6078 0x3008, // 3008..3020; COMMON 6079 0x3021, // 3021..3029; HAN 6080 0x302A, // 302A..302D; INHERITED 6081 0x302E, // 302E..302F; HANGUL 6082 0x3030, // 3030..3037; COMMON 6083 0x3038, // 3038..303B; HAN 6084 0x303C, // 303C..303F; COMMON 6085 0x3040, // 3040 ; UNKNOWN 6086 0x3041, // 3041..3096; HIRAGANA 6087 0x3097, // 3097..3098; UNKNOWN 6088 0x3099, // 3099..309A; INHERITED 6089 0x309B, // 309B..309C; COMMON 6090 0x309D, // 309D..309F; HIRAGANA 6091 0x30A0, // 30A0 ; COMMON 6092 0x30A1, // 30A1..30FA; KATAKANA 6093 0x30FB, // 30FB..30FC; COMMON 6094 0x30FD, // 30FD..30FF; KATAKANA 6095 0x3100, // 3100..3104; UNKNOWN 6096 0x3105, // 3105..312F; BOPOMOFO 6097 0x3130, // 3130 ; UNKNOWN 6098 0x3131, // 3131..318E; HANGUL 6099 0x318F, // 318F ; UNKNOWN 6100 0x3190, // 3190..319F; COMMON 6101 0x31A0, // 31A0..31BF; BOPOMOFO 6102 0x31C0, // 31C0..31E3; COMMON 6103 0x31E4, // 31E4..31EE; UNKNOWN 6104 0x31EF, // 31EF ; COMMON 6105 0x31F0, // 31F0..31FF; KATAKANA 6106 0x3200, // 3200..321E; HANGUL 6107 0x321F, // 321F ; UNKNOWN 6108 0x3220, // 3220..325F; COMMON 6109 0x3260, // 3260..327E; HANGUL 6110 0x327F, // 327F..32CF; COMMON 6111 0x32D0, // 32D0..32FE; KATAKANA 6112 0x32FF, // 32FF ; COMMON 6113 0x3300, // 3300..3357; KATAKANA 6114 0x3358, // 3358..33FF; COMMON 6115 0x3400, // 3400..4DBF; HAN 6116 0x4DC0, // 4DC0..4DFF; COMMON 6117 0x4E00, // 4E00..9FFF; HAN 6118 0xA000, // A000..A48C; YI 6119 0xA48D, // A48D..A48F; UNKNOWN 6120 0xA490, // A490..A4C6; YI 6121 0xA4C7, // A4C7..A4CF; UNKNOWN 6122 0xA4D0, // A4D0..A4FF; LISU 6123 0xA500, // A500..A62B; VAI 6124 0xA62C, // A62C..A63F; UNKNOWN 6125 0xA640, // A640..A69F; CYRILLIC 6126 0xA6A0, // A6A0..A6F7; BAMUM 6127 0xA6F8, // A6F8..A6FF; UNKNOWN 6128 0xA700, // A700..A721; COMMON 6129 0xA722, // A722..A787; LATIN 6130 0xA788, // A788..A78A; COMMON 6131 0xA78B, // A78B..A7CA; LATIN 6132 0xA7CB, // A7CB..A7CF; UNKNOWN 6133 0xA7D0, // A7D0..A7D1; LATIN 6134 0xA7D2, // A7D2 ; UNKNOWN 6135 0xA7D3, // A7D3 ; LATIN 6136 0xA7D4, // A7D4 ; UNKNOWN 6137 0xA7D5, // A7D5..A7D9; LATIN 6138 0xA7DA, // A7DA..A7F1; UNKNOWN 6139 0xA7F2, // A7F2..A7FF; LATIN 6140 0xA800, // A800..A82C; SYLOTI_NAGRI 6141 0xA82D, // A82D..A82F; UNKNOWN 6142 0xA830, // A830..A839; COMMON 6143 0xA83A, // A83A..A83F; UNKNOWN 6144 0xA840, // A840..A877; PHAGS_PA 6145 0xA878, // A878..A87F; UNKNOWN 6146 0xA880, // A880..A8C5; SAURASHTRA 6147 0xA8C6, // A8C6..A8CD; UNKNOWN 6148 0xA8CE, // A8CE..A8D9; SAURASHTRA 6149 0xA8DA, // A8DA..A8DF; UNKNOWN 6150 0xA8E0, // A8E0..A8FF; DEVANAGARI 6151 0xA900, // A900..A92D; KAYAH_LI 6152 0xA92E, // A92E ; COMMON 6153 0xA92F, // A92F ; KAYAH_LI 6154 0xA930, // A930..A953; REJANG 6155 0xA954, // A954..A95E; UNKNOWN 6156 0xA95F, // A95F ; REJANG 6157 0xA960, // A960..A97C; HANGUL 6158 0xA97D, // A97D..A97F; UNKNOWN 6159 0xA980, // A980..A9CD; JAVANESE 6160 0xA9CE, // A9CE ; UNKNOWN 6161 0xA9CF, // A9CF ; COMMON 6162 0xA9D0, // A9D0..A9D9; JAVANESE 6163 0xA9DA, // A9DA..A9DD; UNKNOWN 6164 0xA9DE, // A9DE..A9DF; JAVANESE 6165 0xA9E0, // A9E0..A9FE; MYANMAR 6166 0xA9FF, // A9FF ; UNKNOWN 6167 0xAA00, // AA00..AA36; CHAM 6168 0xAA37, // AA37..AA3F; UNKNOWN 6169 0xAA40, // AA40..AA4D; CHAM 6170 0xAA4E, // AA4E..AA4F; UNKNOWN 6171 0xAA50, // AA50..AA59; CHAM 6172 0xAA5A, // AA5A..AA5B; UNKNOWN 6173 0xAA5C, // AA5C..AA5F; CHAM 6174 0xAA60, // AA60..AA7F; MYANMAR 6175 0xAA80, // AA80..AAC2; TAI_VIET 6176 0xAAC3, // AAC3..AADA; UNKNOWN 6177 0xAADB, // AADB..AADF; TAI_VIET 6178 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 6179 0xAAF7, // AAF7..AB00; UNKNOWN 6180 0xAB01, // AB01..AB06; ETHIOPIC 6181 0xAB07, // AB07..AB08; UNKNOWN 6182 0xAB09, // AB09..AB0E; ETHIOPIC 6183 0xAB0F, // AB0F..AB10; UNKNOWN 6184 0xAB11, // AB11..AB16; ETHIOPIC 6185 0xAB17, // AB17..AB1F; UNKNOWN 6186 0xAB20, // AB20..AB26; ETHIOPIC 6187 0xAB27, // AB27 ; UNKNOWN 6188 0xAB28, // AB28..AB2E; ETHIOPIC 6189 0xAB2F, // AB2F ; UNKNOWN 6190 0xAB30, // AB30..AB5A; LATIN 6191 0xAB5B, // AB5B ; COMMON 6192 0xAB5C, // AB5C..AB64; LATIN 6193 0xAB65, // AB65 ; GREEK 6194 0xAB66, // AB66..AB69; LATIN 6195 0xAB6A, // AB6A..AB6B; COMMON 6196 0xAB6C, // AB6C..AB6F; UNKNOWN 6197 0xAB70, // AB70..ABBF; CHEROKEE 6198 0xABC0, // ABC0..ABED; MEETEI_MAYEK 6199 0xABEE, // ABEE..ABEF; UNKNOWN 6200 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 6201 0xABFA, // ABFA..ABFF; UNKNOWN 6202 0xAC00, // AC00..D7A3; HANGUL 6203 0xD7A4, // D7A4..D7AF; UNKNOWN 6204 0xD7B0, // D7B0..D7C6; HANGUL 6205 0xD7C7, // D7C7..D7CA; UNKNOWN 6206 0xD7CB, // D7CB..D7FB; HANGUL 6207 0xD7FC, // D7FC..F8FF; UNKNOWN 6208 0xF900, // F900..FA6D; HAN 6209 0xFA6E, // FA6E..FA6F; UNKNOWN 6210 0xFA70, // FA70..FAD9; HAN 6211 0xFADA, // FADA..FAFF; UNKNOWN 6212 0xFB00, // FB00..FB06; LATIN 6213 0xFB07, // FB07..FB12; UNKNOWN 6214 0xFB13, // FB13..FB17; ARMENIAN 6215 0xFB18, // FB18..FB1C; UNKNOWN 6216 0xFB1D, // FB1D..FB36; HEBREW 6217 0xFB37, // FB37 ; UNKNOWN 6218 0xFB38, // FB38..FB3C; HEBREW 6219 0xFB3D, // FB3D ; UNKNOWN 6220 0xFB3E, // FB3E ; HEBREW 6221 0xFB3F, // FB3F ; UNKNOWN 6222 0xFB40, // FB40..FB41; HEBREW 6223 0xFB42, // FB42 ; UNKNOWN 6224 0xFB43, // FB43..FB44; HEBREW 6225 0xFB45, // FB45 ; UNKNOWN 6226 0xFB46, // FB46..FB4F; HEBREW 6227 0xFB50, // FB50..FBC2; ARABIC 6228 0xFBC3, // FBC3..FBD2; UNKNOWN 6229 0xFBD3, // FBD3..FD3D; ARABIC 6230 0xFD3E, // FD3E..FD3F; COMMON 6231 0xFD40, // FD40..FD8F; ARABIC 6232 0xFD90, // FD90..FD91; UNKNOWN 6233 0xFD92, // FD92..FDC7; ARABIC 6234 0xFDC8, // FDC8..FDCE; UNKNOWN 6235 0xFDCF, // FDCF ; ARABIC 6236 0xFDD0, // FDD0..FDEF; UNKNOWN 6237 0xFDF0, // FDF0..FDFF; ARABIC 6238 0xFE00, // FE00..FE0F; INHERITED 6239 0xFE10, // FE10..FE19; COMMON 6240 0xFE1A, // FE1A..FE1F; UNKNOWN 6241 0xFE20, // FE20..FE2D; INHERITED 6242 0xFE2E, // FE2E..FE2F; CYRILLIC 6243 0xFE30, // FE30..FE52; COMMON 6244 0xFE53, // FE53 ; UNKNOWN 6245 0xFE54, // FE54..FE66; COMMON 6246 0xFE67, // FE67 ; UNKNOWN 6247 0xFE68, // FE68..FE6B; COMMON 6248 0xFE6C, // FE6C..FE6F; UNKNOWN 6249 0xFE70, // FE70..FE74; ARABIC 6250 0xFE75, // FE75 ; UNKNOWN 6251 0xFE76, // FE76..FEFC; ARABIC 6252 0xFEFD, // FEFD..FEFE; UNKNOWN 6253 0xFEFF, // FEFF ; COMMON 6254 0xFF00, // FF00 ; UNKNOWN 6255 0xFF01, // FF01..FF20; COMMON 6256 0xFF21, // FF21..FF3A; LATIN 6257 0xFF3B, // FF3B..FF40; COMMON 6258 0xFF41, // FF41..FF5A; LATIN 6259 0xFF5B, // FF5B..FF65; COMMON 6260 0xFF66, // FF66..FF6F; KATAKANA 6261 0xFF70, // FF70 ; COMMON 6262 0xFF71, // FF71..FF9D; KATAKANA 6263 0xFF9E, // FF9E..FF9F; COMMON 6264 0xFFA0, // FFA0..FFBE; HANGUL 6265 0xFFBF, // FFBF..FFC1; UNKNOWN 6266 0xFFC2, // FFC2..FFC7; HANGUL 6267 0xFFC8, // FFC8..FFC9; UNKNOWN 6268 0xFFCA, // FFCA..FFCF; HANGUL 6269 0xFFD0, // FFD0..FFD1; UNKNOWN 6270 0xFFD2, // FFD2..FFD7; HANGUL 6271 0xFFD8, // FFD8..FFD9; UNKNOWN 6272 0xFFDA, // FFDA..FFDC; HANGUL 6273 0xFFDD, // FFDD..FFDF; UNKNOWN 6274 0xFFE0, // FFE0..FFE6; COMMON 6275 0xFFE7, // FFE7 ; UNKNOWN 6276 0xFFE8, // FFE8..FFEE; COMMON 6277 0xFFEF, // FFEF..FFF8; UNKNOWN 6278 0xFFF9, // FFF9..FFFD; COMMON 6279 0xFFFE, // FFFE..FFFF; UNKNOWN 6280 0x10000, // 10000..1000B; LINEAR_B 6281 0x1000C, // 1000C ; UNKNOWN 6282 0x1000D, // 1000D..10026; LINEAR_B 6283 0x10027, // 10027 ; UNKNOWN 6284 0x10028, // 10028..1003A; LINEAR_B 6285 0x1003B, // 1003B ; UNKNOWN 6286 0x1003C, // 1003C..1003D; LINEAR_B 6287 0x1003E, // 1003E ; UNKNOWN 6288 0x1003F, // 1003F..1004D; LINEAR_B 6289 0x1004E, // 1004E..1004F; UNKNOWN 6290 0x10050, // 10050..1005D; LINEAR_B 6291 0x1005E, // 1005E..1007F; UNKNOWN 6292 0x10080, // 10080..100FA; LINEAR_B 6293 0x100FB, // 100FB..100FF; UNKNOWN 6294 0x10100, // 10100..10102; COMMON 6295 0x10103, // 10103..10106; UNKNOWN 6296 0x10107, // 10107..10133; COMMON 6297 0x10134, // 10134..10136; UNKNOWN 6298 0x10137, // 10137..1013F; COMMON 6299 0x10140, // 10140..1018E; GREEK 6300 0x1018F, // 1018F ; UNKNOWN 6301 0x10190, // 10190..1019C; COMMON 6302 0x1019D, // 1019D..1019F; UNKNOWN 6303 0x101A0, // 101A0 ; GREEK 6304 0x101A1, // 101A1..101CF; UNKNOWN 6305 0x101D0, // 101D0..101FC; COMMON 6306 0x101FD, // 101FD ; INHERITED 6307 0x101FE, // 101FE..1027F; UNKNOWN 6308 0x10280, // 10280..1029C; LYCIAN 6309 0x1029D, // 1029D..1029F; UNKNOWN 6310 0x102A0, // 102A0..102D0; CARIAN 6311 0x102D1, // 102D1..102DF; UNKNOWN 6312 0x102E0, // 102E0 ; INHERITED 6313 0x102E1, // 102E1..102FB; COMMON 6314 0x102FC, // 102FC..102FF; UNKNOWN 6315 0x10300, // 10300..10323; OLD_ITALIC 6316 0x10324, // 10324..1032C; UNKNOWN 6317 0x1032D, // 1032D..1032F; OLD_ITALIC 6318 0x10330, // 10330..1034A; GOTHIC 6319 0x1034B, // 1034B..1034F; UNKNOWN 6320 0x10350, // 10350..1037A; OLD_PERMIC 6321 0x1037B, // 1037B..1037F; UNKNOWN 6322 0x10380, // 10380..1039D; UGARITIC 6323 0x1039E, // 1039E ; UNKNOWN 6324 0x1039F, // 1039F ; UGARITIC 6325 0x103A0, // 103A0..103C3; OLD_PERSIAN 6326 0x103C4, // 103C4..103C7; UNKNOWN 6327 0x103C8, // 103C8..103D5; OLD_PERSIAN 6328 0x103D6, // 103D6..103FF; UNKNOWN 6329 0x10400, // 10400..1044F; DESERET 6330 0x10450, // 10450..1047F; SHAVIAN 6331 0x10480, // 10480..1049D; OSMANYA 6332 0x1049E, // 1049E..1049F; UNKNOWN 6333 0x104A0, // 104A0..104A9; OSMANYA 6334 0x104AA, // 104AA..104AF; UNKNOWN 6335 0x104B0, // 104B0..104D3; OSAGE 6336 0x104D4, // 104D4..104D7; UNKNOWN 6337 0x104D8, // 104D8..104FB; OSAGE 6338 0x104FC, // 104FC..104FF; UNKNOWN 6339 0x10500, // 10500..10527; ELBASAN 6340 0x10528, // 10528..1052F; UNKNOWN 6341 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 6342 0x10564, // 10564..1056E; UNKNOWN 6343 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 6344 0x10570, // 10570..1057A; VITHKUQI 6345 0x1057B, // 1057B ; UNKNOWN 6346 0x1057C, // 1057C..1058A; VITHKUQI 6347 0x1058B, // 1058B ; UNKNOWN 6348 0x1058C, // 1058C..10592; VITHKUQI 6349 0x10593, // 10593 ; UNKNOWN 6350 0x10594, // 10594..10595; VITHKUQI 6351 0x10596, // 10596 ; UNKNOWN 6352 0x10597, // 10597..105A1; VITHKUQI 6353 0x105A2, // 105A2 ; UNKNOWN 6354 0x105A3, // 105A3..105B1; VITHKUQI 6355 0x105B2, // 105B2 ; UNKNOWN 6356 0x105B3, // 105B3..105B9; VITHKUQI 6357 0x105BA, // 105BA ; UNKNOWN 6358 0x105BB, // 105BB..105BC; VITHKUQI 6359 0x105BD, // 105BD..105FF; UNKNOWN 6360 0x10600, // 10600..10736; LINEAR_A 6361 0x10737, // 10737..1073F; UNKNOWN 6362 0x10740, // 10740..10755; LINEAR_A 6363 0x10756, // 10756..1075F; UNKNOWN 6364 0x10760, // 10760..10767; LINEAR_A 6365 0x10768, // 10768..1077F; UNKNOWN 6366 0x10780, // 10780..10785; LATIN 6367 0x10786, // 10786 ; UNKNOWN 6368 0x10787, // 10787..107B0; LATIN 6369 0x107B1, // 107B1 ; UNKNOWN 6370 0x107B2, // 107B2..107BA; LATIN 6371 0x107BB, // 107BB..107FF; UNKNOWN 6372 0x10800, // 10800..10805; CYPRIOT 6373 0x10806, // 10806..10807; UNKNOWN 6374 0x10808, // 10808 ; CYPRIOT 6375 0x10809, // 10809 ; UNKNOWN 6376 0x1080A, // 1080A..10835; CYPRIOT 6377 0x10836, // 10836 ; UNKNOWN 6378 0x10837, // 10837..10838; CYPRIOT 6379 0x10839, // 10839..1083B; UNKNOWN 6380 0x1083C, // 1083C ; CYPRIOT 6381 0x1083D, // 1083D..1083E; UNKNOWN 6382 0x1083F, // 1083F ; CYPRIOT 6383 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 6384 0x10856, // 10856 ; UNKNOWN 6385 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 6386 0x10860, // 10860..1087F; PALMYRENE 6387 0x10880, // 10880..1089E; NABATAEAN 6388 0x1089F, // 1089F..108A6; UNKNOWN 6389 0x108A7, // 108A7..108AF; NABATAEAN 6390 0x108B0, // 108B0..108DF; UNKNOWN 6391 0x108E0, // 108E0..108F2; HATRAN 6392 0x108F3, // 108F3 ; UNKNOWN 6393 0x108F4, // 108F4..108F5; HATRAN 6394 0x108F6, // 108F6..108FA; UNKNOWN 6395 0x108FB, // 108FB..108FF; HATRAN 6396 0x10900, // 10900..1091B; PHOENICIAN 6397 0x1091C, // 1091C..1091E; UNKNOWN 6398 0x1091F, // 1091F ; PHOENICIAN 6399 0x10920, // 10920..10939; LYDIAN 6400 0x1093A, // 1093A..1093E; UNKNOWN 6401 0x1093F, // 1093F ; LYDIAN 6402 0x10940, // 10940..1097F; UNKNOWN 6403 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 6404 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 6405 0x109B8, // 109B8..109BB; UNKNOWN 6406 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 6407 0x109D0, // 109D0..109D1; UNKNOWN 6408 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 6409 0x10A00, // 10A00..10A03; KHAROSHTHI 6410 0x10A04, // 10A04 ; UNKNOWN 6411 0x10A05, // 10A05..10A06; KHAROSHTHI 6412 0x10A07, // 10A07..10A0B; UNKNOWN 6413 0x10A0C, // 10A0C..10A13; KHAROSHTHI 6414 0x10A14, // 10A14 ; UNKNOWN 6415 0x10A15, // 10A15..10A17; KHAROSHTHI 6416 0x10A18, // 10A18 ; UNKNOWN 6417 0x10A19, // 10A19..10A35; KHAROSHTHI 6418 0x10A36, // 10A36..10A37; UNKNOWN 6419 0x10A38, // 10A38..10A3A; KHAROSHTHI 6420 0x10A3B, // 10A3B..10A3E; UNKNOWN 6421 0x10A3F, // 10A3F..10A48; KHAROSHTHI 6422 0x10A49, // 10A49..10A4F; UNKNOWN 6423 0x10A50, // 10A50..10A58; KHAROSHTHI 6424 0x10A59, // 10A59..10A5F; UNKNOWN 6425 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 6426 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 6427 0x10AA0, // 10AA0..10ABF; UNKNOWN 6428 0x10AC0, // 10AC0..10AE6; MANICHAEAN 6429 0x10AE7, // 10AE7..10AEA; UNKNOWN 6430 0x10AEB, // 10AEB..10AF6; MANICHAEAN 6431 0x10AF7, // 10AF7..10AFF; UNKNOWN 6432 0x10B00, // 10B00..10B35; AVESTAN 6433 0x10B36, // 10B36..10B38; UNKNOWN 6434 0x10B39, // 10B39..10B3F; AVESTAN 6435 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 6436 0x10B56, // 10B56..10B57; UNKNOWN 6437 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 6438 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 6439 0x10B73, // 10B73..10B77; UNKNOWN 6440 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 6441 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 6442 0x10B92, // 10B92..10B98; UNKNOWN 6443 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 6444 0x10B9D, // 10B9D..10BA8; UNKNOWN 6445 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 6446 0x10BB0, // 10BB0..10BFF; UNKNOWN 6447 0x10C00, // 10C00..10C48; OLD_TURKIC 6448 0x10C49, // 10C49..10C7F; UNKNOWN 6449 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 6450 0x10CB3, // 10CB3..10CBF; UNKNOWN 6451 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 6452 0x10CF3, // 10CF3..10CF9; UNKNOWN 6453 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 6454 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 6455 0x10D28, // 10D28..10D2F; UNKNOWN 6456 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 6457 0x10D3A, // 10D3A..10E5F; UNKNOWN 6458 0x10E60, // 10E60..10E7E; ARABIC 6459 0x10E7F, // 10E7F ; UNKNOWN 6460 0x10E80, // 10E80..10EA9; YEZIDI 6461 0x10EAA, // 10EAA ; UNKNOWN 6462 0x10EAB, // 10EAB..10EAD; YEZIDI 6463 0x10EAE, // 10EAE..10EAF; UNKNOWN 6464 0x10EB0, // 10EB0..10EB1; YEZIDI 6465 0x10EB2, // 10EB2..10EFC; UNKNOWN 6466 0x10EFD, // 10EFD..10EFF; ARABIC 6467 0x10F00, // 10F00..10F27; OLD_SOGDIAN 6468 0x10F28, // 10F28..10F2F; UNKNOWN 6469 0x10F30, // 10F30..10F59; SOGDIAN 6470 0x10F5A, // 10F5A..10F6F; UNKNOWN 6471 0x10F70, // 10F70..10F89; OLD_UYGHUR 6472 0x10F8A, // 10F8A..10FAF; UNKNOWN 6473 0x10FB0, // 10FB0..10FCB; CHORASMIAN 6474 0x10FCC, // 10FCC..10FDF; UNKNOWN 6475 0x10FE0, // 10FE0..10FF6; ELYMAIC 6476 0x10FF7, // 10FF7..10FFF; UNKNOWN 6477 0x11000, // 11000..1104D; BRAHMI 6478 0x1104E, // 1104E..11051; UNKNOWN 6479 0x11052, // 11052..11075; BRAHMI 6480 0x11076, // 11076..1107E; UNKNOWN 6481 0x1107F, // 1107F ; BRAHMI 6482 0x11080, // 11080..110C2; KAITHI 6483 0x110C3, // 110C3..110CC; UNKNOWN 6484 0x110CD, // 110CD ; KAITHI 6485 0x110CE, // 110CE..110CF; UNKNOWN 6486 0x110D0, // 110D0..110E8; SORA_SOMPENG 6487 0x110E9, // 110E9..110EF; UNKNOWN 6488 0x110F0, // 110F0..110F9; SORA_SOMPENG 6489 0x110FA, // 110FA..110FF; UNKNOWN 6490 0x11100, // 11100..11134; CHAKMA 6491 0x11135, // 11135 ; UNKNOWN 6492 0x11136, // 11136..11147; CHAKMA 6493 0x11148, // 11148..1114F; UNKNOWN 6494 0x11150, // 11150..11176; MAHAJANI 6495 0x11177, // 11177..1117F; UNKNOWN 6496 0x11180, // 11180..111DF; SHARADA 6497 0x111E0, // 111E0 ; UNKNOWN 6498 0x111E1, // 111E1..111F4; SINHALA 6499 0x111F5, // 111F5..111FF; UNKNOWN 6500 0x11200, // 11200..11211; KHOJKI 6501 0x11212, // 11212 ; UNKNOWN 6502 0x11213, // 11213..11241; KHOJKI 6503 0x11242, // 11242..1127F; UNKNOWN 6504 0x11280, // 11280..11286; MULTANI 6505 0x11287, // 11287 ; UNKNOWN 6506 0x11288, // 11288 ; MULTANI 6507 0x11289, // 11289 ; UNKNOWN 6508 0x1128A, // 1128A..1128D; MULTANI 6509 0x1128E, // 1128E ; UNKNOWN 6510 0x1128F, // 1128F..1129D; MULTANI 6511 0x1129E, // 1129E ; UNKNOWN 6512 0x1129F, // 1129F..112A9; MULTANI 6513 0x112AA, // 112AA..112AF; UNKNOWN 6514 0x112B0, // 112B0..112EA; KHUDAWADI 6515 0x112EB, // 112EB..112EF; UNKNOWN 6516 0x112F0, // 112F0..112F9; KHUDAWADI 6517 0x112FA, // 112FA..112FF; UNKNOWN 6518 0x11300, // 11300..11303; GRANTHA 6519 0x11304, // 11304 ; UNKNOWN 6520 0x11305, // 11305..1130C; GRANTHA 6521 0x1130D, // 1130D..1130E; UNKNOWN 6522 0x1130F, // 1130F..11310; GRANTHA 6523 0x11311, // 11311..11312; UNKNOWN 6524 0x11313, // 11313..11328; GRANTHA 6525 0x11329, // 11329 ; UNKNOWN 6526 0x1132A, // 1132A..11330; GRANTHA 6527 0x11331, // 11331 ; UNKNOWN 6528 0x11332, // 11332..11333; GRANTHA 6529 0x11334, // 11334 ; UNKNOWN 6530 0x11335, // 11335..11339; GRANTHA 6531 0x1133A, // 1133A ; UNKNOWN 6532 0x1133B, // 1133B ; INHERITED 6533 0x1133C, // 1133C..11344; GRANTHA 6534 0x11345, // 11345..11346; UNKNOWN 6535 0x11347, // 11347..11348; GRANTHA 6536 0x11349, // 11349..1134A; UNKNOWN 6537 0x1134B, // 1134B..1134D; GRANTHA 6538 0x1134E, // 1134E..1134F; UNKNOWN 6539 0x11350, // 11350 ; GRANTHA 6540 0x11351, // 11351..11356; UNKNOWN 6541 0x11357, // 11357 ; GRANTHA 6542 0x11358, // 11358..1135C; UNKNOWN 6543 0x1135D, // 1135D..11363; GRANTHA 6544 0x11364, // 11364..11365; UNKNOWN 6545 0x11366, // 11366..1136C; GRANTHA 6546 0x1136D, // 1136D..1136F; UNKNOWN 6547 0x11370, // 11370..11374; GRANTHA 6548 0x11375, // 11375..113FF; UNKNOWN 6549 0x11400, // 11400..1145B; NEWA 6550 0x1145C, // 1145C ; UNKNOWN 6551 0x1145D, // 1145D..11461; NEWA 6552 0x11462, // 11462..1147F; UNKNOWN 6553 0x11480, // 11480..114C7; TIRHUTA 6554 0x114C8, // 114C8..114CF; UNKNOWN 6555 0x114D0, // 114D0..114D9; TIRHUTA 6556 0x114DA, // 114DA..1157F; UNKNOWN 6557 0x11580, // 11580..115B5; SIDDHAM 6558 0x115B6, // 115B6..115B7; UNKNOWN 6559 0x115B8, // 115B8..115DD; SIDDHAM 6560 0x115DE, // 115DE..115FF; UNKNOWN 6561 0x11600, // 11600..11644; MODI 6562 0x11645, // 11645..1164F; UNKNOWN 6563 0x11650, // 11650..11659; MODI 6564 0x1165A, // 1165A..1165F; UNKNOWN 6565 0x11660, // 11660..1166C; MONGOLIAN 6566 0x1166D, // 1166D..1167F; UNKNOWN 6567 0x11680, // 11680..116B9; TAKRI 6568 0x116BA, // 116BA..116BF; UNKNOWN 6569 0x116C0, // 116C0..116C9; TAKRI 6570 0x116CA, // 116CA..116FF; UNKNOWN 6571 0x11700, // 11700..1171A; AHOM 6572 0x1171B, // 1171B..1171C; UNKNOWN 6573 0x1171D, // 1171D..1172B; AHOM 6574 0x1172C, // 1172C..1172F; UNKNOWN 6575 0x11730, // 11730..11746; AHOM 6576 0x11747, // 11747..117FF; UNKNOWN 6577 0x11800, // 11800..1183B; DOGRA 6578 0x1183C, // 1183C..1189F; UNKNOWN 6579 0x118A0, // 118A0..118F2; WARANG_CITI 6580 0x118F3, // 118F3..118FE; UNKNOWN 6581 0x118FF, // 118FF ; WARANG_CITI 6582 0x11900, // 11900..11906; DIVES_AKURU 6583 0x11907, // 11907..11908; UNKNOWN 6584 0x11909, // 11909 ; DIVES_AKURU 6585 0x1190A, // 1190A..1190B; UNKNOWN 6586 0x1190C, // 1190C..11913; DIVES_AKURU 6587 0x11914, // 11914 ; UNKNOWN 6588 0x11915, // 11915..11916; DIVES_AKURU 6589 0x11917, // 11917 ; UNKNOWN 6590 0x11918, // 11918..11935; DIVES_AKURU 6591 0x11936, // 11936 ; UNKNOWN 6592 0x11937, // 11937..11938; DIVES_AKURU 6593 0x11939, // 11939..1193A; UNKNOWN 6594 0x1193B, // 1193B..11946; DIVES_AKURU 6595 0x11947, // 11947..1194F; UNKNOWN 6596 0x11950, // 11950..11959; DIVES_AKURU 6597 0x1195A, // 1195A..1199F; UNKNOWN 6598 0x119A0, // 119A0..119A7; NANDINAGARI 6599 0x119A8, // 119A8..119A9; UNKNOWN 6600 0x119AA, // 119AA..119D7; NANDINAGARI 6601 0x119D8, // 119D8..119D9; UNKNOWN 6602 0x119DA, // 119DA..119E4; NANDINAGARI 6603 0x119E5, // 119E5..119FF; UNKNOWN 6604 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6605 0x11A48, // 11A48..11A4F; UNKNOWN 6606 0x11A50, // 11A50..11AA2; SOYOMBO 6607 0x11AA3, // 11AA3..11AAF; UNKNOWN 6608 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL 6609 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6610 0x11AF9, // 11AF9..11AFF; UNKNOWN 6611 0x11B00, // 11B00..11B09; DEVANAGARI 6612 0x11B0A, // 11B0A..11BFF; UNKNOWN 6613 0x11C00, // 11C00..11C08; BHAIKSUKI 6614 0x11C09, // 11C09 ; UNKNOWN 6615 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6616 0x11C37, // 11C37 ; UNKNOWN 6617 0x11C38, // 11C38..11C45; BHAIKSUKI 6618 0x11C46, // 11C46..11C4F; UNKNOWN 6619 0x11C50, // 11C50..11C6C; BHAIKSUKI 6620 0x11C6D, // 11C6D..11C6F; UNKNOWN 6621 0x11C70, // 11C70..11C8F; MARCHEN 6622 0x11C90, // 11C90..11C91; UNKNOWN 6623 0x11C92, // 11C92..11CA7; MARCHEN 6624 0x11CA8, // 11CA8 ; UNKNOWN 6625 0x11CA9, // 11CA9..11CB6; MARCHEN 6626 0x11CB7, // 11CB7..11CFF; UNKNOWN 6627 0x11D00, // 11D00..11D06; MASARAM_GONDI 6628 0x11D07, // 11D07 ; UNKNOWN 6629 0x11D08, // 11D08..11D09; MASARAM_GONDI 6630 0x11D0A, // 11D0A ; UNKNOWN 6631 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6632 0x11D37, // 11D37..11D39; UNKNOWN 6633 0x11D3A, // 11D3A ; MASARAM_GONDI 6634 0x11D3B, // 11D3B ; UNKNOWN 6635 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6636 0x11D3E, // 11D3E ; UNKNOWN 6637 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6638 0x11D48, // 11D48..11D4F; UNKNOWN 6639 0x11D50, // 11D50..11D59; MASARAM_GONDI 6640 0x11D5A, // 11D5A..11D5F; UNKNOWN 6641 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6642 0x11D66, // 11D66 ; UNKNOWN 6643 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6644 0x11D69, // 11D69 ; UNKNOWN 6645 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6646 0x11D8F, // 11D8F ; UNKNOWN 6647 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6648 0x11D92, // 11D92 ; UNKNOWN 6649 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6650 0x11D99, // 11D99..11D9F; UNKNOWN 6651 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6652 0x11DAA, // 11DAA..11EDF; UNKNOWN 6653 0x11EE0, // 11EE0..11EF8; MAKASAR 6654 0x11EF9, // 11EF9..11EFF; UNKNOWN 6655 0x11F00, // 11F00..11F10; KAWI 6656 0x11F11, // 11F11 ; UNKNOWN 6657 0x11F12, // 11F12..11F3A; KAWI 6658 0x11F3B, // 11F3B..11F3D; UNKNOWN 6659 0x11F3E, // 11F3E..11F59; KAWI 6660 0x11F5A, // 11F5A..11FAF; UNKNOWN 6661 0x11FB0, // 11FB0 ; LISU 6662 0x11FB1, // 11FB1..11FBF; UNKNOWN 6663 0x11FC0, // 11FC0..11FF1; TAMIL 6664 0x11FF2, // 11FF2..11FFE; UNKNOWN 6665 0x11FFF, // 11FFF ; TAMIL 6666 0x12000, // 12000..12399; CUNEIFORM 6667 0x1239A, // 1239A..123FF; UNKNOWN 6668 0x12400, // 12400..1246E; CUNEIFORM 6669 0x1246F, // 1246F ; UNKNOWN 6670 0x12470, // 12470..12474; CUNEIFORM 6671 0x12475, // 12475..1247F; UNKNOWN 6672 0x12480, // 12480..12543; CUNEIFORM 6673 0x12544, // 12544..12F8F; UNKNOWN 6674 0x12F90, // 12F90..12FF2; CYPRO_MINOAN 6675 0x12FF3, // 12FF3..12FFF; UNKNOWN 6676 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS 6677 0x13456, // 13456..143FF; UNKNOWN 6678 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6679 0x14647, // 14647..167FF; UNKNOWN 6680 0x16800, // 16800..16A38; BAMUM 6681 0x16A39, // 16A39..16A3F; UNKNOWN 6682 0x16A40, // 16A40..16A5E; MRO 6683 0x16A5F, // 16A5F ; UNKNOWN 6684 0x16A60, // 16A60..16A69; MRO 6685 0x16A6A, // 16A6A..16A6D; UNKNOWN 6686 0x16A6E, // 16A6E..16A6F; MRO 6687 0x16A70, // 16A70..16ABE; TANGSA 6688 0x16ABF, // 16ABF ; UNKNOWN 6689 0x16AC0, // 16AC0..16AC9; TANGSA 6690 0x16ACA, // 16ACA..16ACF; UNKNOWN 6691 0x16AD0, // 16AD0..16AED; BASSA_VAH 6692 0x16AEE, // 16AEE..16AEF; UNKNOWN 6693 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6694 0x16AF6, // 16AF6..16AFF; UNKNOWN 6695 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6696 0x16B46, // 16B46..16B4F; UNKNOWN 6697 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6698 0x16B5A, // 16B5A ; UNKNOWN 6699 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6700 0x16B62, // 16B62 ; UNKNOWN 6701 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6702 0x16B78, // 16B78..16B7C; UNKNOWN 6703 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6704 0x16B90, // 16B90..16E3F; UNKNOWN 6705 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6706 0x16E9B, // 16E9B..16EFF; UNKNOWN 6707 0x16F00, // 16F00..16F4A; MIAO 6708 0x16F4B, // 16F4B..16F4E; UNKNOWN 6709 0x16F4F, // 16F4F..16F87; MIAO 6710 0x16F88, // 16F88..16F8E; UNKNOWN 6711 0x16F8F, // 16F8F..16F9F; MIAO 6712 0x16FA0, // 16FA0..16FDF; UNKNOWN 6713 0x16FE0, // 16FE0 ; TANGUT 6714 0x16FE1, // 16FE1 ; NUSHU 6715 0x16FE2, // 16FE2..16FE3; HAN 6716 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT 6717 0x16FE5, // 16FE5..16FEF; UNKNOWN 6718 0x16FF0, // 16FF0..16FF1; HAN 6719 0x16FF2, // 16FF2..16FFF; UNKNOWN 6720 0x17000, // 17000..187F7; TANGUT 6721 0x187F8, // 187F8..187FF; UNKNOWN 6722 0x18800, // 18800..18AFF; TANGUT 6723 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT 6724 0x18CD6, // 18CD6..18CFF; UNKNOWN 6725 0x18D00, // 18D00..18D08; TANGUT 6726 0x18D09, // 18D09..1AFEF; UNKNOWN 6727 0x1AFF0, // 1AFF0..1AFF3; KATAKANA 6728 0x1AFF4, // 1AFF4 ; UNKNOWN 6729 0x1AFF5, // 1AFF5..1AFFB; KATAKANA 6730 0x1AFFC, // 1AFFC ; UNKNOWN 6731 0x1AFFD, // 1AFFD..1AFFE; KATAKANA 6732 0x1AFFF, // 1AFFF ; UNKNOWN 6733 0x1B000, // 1B000 ; KATAKANA 6734 0x1B001, // 1B001..1B11F; HIRAGANA 6735 0x1B120, // 1B120..1B122; KATAKANA 6736 0x1B123, // 1B123..1B131; UNKNOWN 6737 0x1B132, // 1B132 ; HIRAGANA 6738 0x1B133, // 1B133..1B14F; UNKNOWN 6739 0x1B150, // 1B150..1B152; HIRAGANA 6740 0x1B153, // 1B153..1B154; UNKNOWN 6741 0x1B155, // 1B155 ; KATAKANA 6742 0x1B156, // 1B156..1B163; UNKNOWN 6743 0x1B164, // 1B164..1B167; KATAKANA 6744 0x1B168, // 1B168..1B16F; UNKNOWN 6745 0x1B170, // 1B170..1B2FB; NUSHU 6746 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6747 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6748 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6749 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6750 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6751 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6752 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6753 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6754 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6755 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6756 0x1BCA0, // 1BCA0..1BCA3; COMMON 6757 0x1BCA4, // 1BCA4..1CEFF; UNKNOWN 6758 0x1CF00, // 1CF00..1CF2D; INHERITED 6759 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN 6760 0x1CF30, // 1CF30..1CF46; INHERITED 6761 0x1CF47, // 1CF47..1CF4F; UNKNOWN 6762 0x1CF50, // 1CF50..1CFC3; COMMON 6763 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN 6764 0x1D000, // 1D000..1D0F5; COMMON 6765 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6766 0x1D100, // 1D100..1D126; COMMON 6767 0x1D127, // 1D127..1D128; UNKNOWN 6768 0x1D129, // 1D129..1D166; COMMON 6769 0x1D167, // 1D167..1D169; INHERITED 6770 0x1D16A, // 1D16A..1D17A; COMMON 6771 0x1D17B, // 1D17B..1D182; INHERITED 6772 0x1D183, // 1D183..1D184; COMMON 6773 0x1D185, // 1D185..1D18B; INHERITED 6774 0x1D18C, // 1D18C..1D1A9; COMMON 6775 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6776 0x1D1AE, // 1D1AE..1D1EA; COMMON 6777 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN 6778 0x1D200, // 1D200..1D245; GREEK 6779 0x1D246, // 1D246..1D2BF; UNKNOWN 6780 0x1D2C0, // 1D2C0..1D2D3; COMMON 6781 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN 6782 0x1D2E0, // 1D2E0..1D2F3; COMMON 6783 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6784 0x1D300, // 1D300..1D356; COMMON 6785 0x1D357, // 1D357..1D35F; UNKNOWN 6786 0x1D360, // 1D360..1D378; COMMON 6787 0x1D379, // 1D379..1D3FF; UNKNOWN 6788 0x1D400, // 1D400..1D454; COMMON 6789 0x1D455, // 1D455 ; UNKNOWN 6790 0x1D456, // 1D456..1D49C; COMMON 6791 0x1D49D, // 1D49D ; UNKNOWN 6792 0x1D49E, // 1D49E..1D49F; COMMON 6793 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 6794 0x1D4A2, // 1D4A2 ; COMMON 6795 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 6796 0x1D4A5, // 1D4A5..1D4A6; COMMON 6797 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 6798 0x1D4A9, // 1D4A9..1D4AC; COMMON 6799 0x1D4AD, // 1D4AD ; UNKNOWN 6800 0x1D4AE, // 1D4AE..1D4B9; COMMON 6801 0x1D4BA, // 1D4BA ; UNKNOWN 6802 0x1D4BB, // 1D4BB ; COMMON 6803 0x1D4BC, // 1D4BC ; UNKNOWN 6804 0x1D4BD, // 1D4BD..1D4C3; COMMON 6805 0x1D4C4, // 1D4C4 ; UNKNOWN 6806 0x1D4C5, // 1D4C5..1D505; COMMON 6807 0x1D506, // 1D506 ; UNKNOWN 6808 0x1D507, // 1D507..1D50A; COMMON 6809 0x1D50B, // 1D50B..1D50C; UNKNOWN 6810 0x1D50D, // 1D50D..1D514; COMMON 6811 0x1D515, // 1D515 ; UNKNOWN 6812 0x1D516, // 1D516..1D51C; COMMON 6813 0x1D51D, // 1D51D ; UNKNOWN 6814 0x1D51E, // 1D51E..1D539; COMMON 6815 0x1D53A, // 1D53A ; UNKNOWN 6816 0x1D53B, // 1D53B..1D53E; COMMON 6817 0x1D53F, // 1D53F ; UNKNOWN 6818 0x1D540, // 1D540..1D544; COMMON 6819 0x1D545, // 1D545 ; UNKNOWN 6820 0x1D546, // 1D546 ; COMMON 6821 0x1D547, // 1D547..1D549; UNKNOWN 6822 0x1D54A, // 1D54A..1D550; COMMON 6823 0x1D551, // 1D551 ; UNKNOWN 6824 0x1D552, // 1D552..1D6A5; COMMON 6825 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 6826 0x1D6A8, // 1D6A8..1D7CB; COMMON 6827 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 6828 0x1D7CE, // 1D7CE..1D7FF; COMMON 6829 0x1D800, // 1D800..1DA8B; SIGNWRITING 6830 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 6831 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 6832 0x1DAA0, // 1DAA0 ; UNKNOWN 6833 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 6834 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN 6835 0x1DF00, // 1DF00..1DF1E; LATIN 6836 0x1DF1F, // 1DF1F..1DF24; UNKNOWN 6837 0x1DF25, // 1DF25..1DF2A; LATIN 6838 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN 6839 0x1E000, // 1E000..1E006; GLAGOLITIC 6840 0x1E007, // 1E007 ; UNKNOWN 6841 0x1E008, // 1E008..1E018; GLAGOLITIC 6842 0x1E019, // 1E019..1E01A; UNKNOWN 6843 0x1E01B, // 1E01B..1E021; GLAGOLITIC 6844 0x1E022, // 1E022 ; UNKNOWN 6845 0x1E023, // 1E023..1E024; GLAGOLITIC 6846 0x1E025, // 1E025 ; UNKNOWN 6847 0x1E026, // 1E026..1E02A; GLAGOLITIC 6848 0x1E02B, // 1E02B..1E02F; UNKNOWN 6849 0x1E030, // 1E030..1E06D; CYRILLIC 6850 0x1E06E, // 1E06E..1E08E; UNKNOWN 6851 0x1E08F, // 1E08F ; CYRILLIC 6852 0x1E090, // 1E090..1E0FF; UNKNOWN 6853 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 6854 0x1E12D, // 1E12D..1E12F; UNKNOWN 6855 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 6856 0x1E13E, // 1E13E..1E13F; UNKNOWN 6857 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 6858 0x1E14A, // 1E14A..1E14D; UNKNOWN 6859 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 6860 0x1E150, // 1E150..1E28F; UNKNOWN 6861 0x1E290, // 1E290..1E2AE; TOTO 6862 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN 6863 0x1E2C0, // 1E2C0..1E2F9; WANCHO 6864 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 6865 0x1E2FF, // 1E2FF ; WANCHO 6866 0x1E300, // 1E300..1E4CF; UNKNOWN 6867 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI 6868 0x1E4FA, // 1E4FA..1E7DF; UNKNOWN 6869 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC 6870 0x1E7E7, // 1E7E7 ; UNKNOWN 6871 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC 6872 0x1E7EC, // 1E7EC ; UNKNOWN 6873 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC 6874 0x1E7EF, // 1E7EF ; UNKNOWN 6875 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC 6876 0x1E7FF, // 1E7FF ; UNKNOWN 6877 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 6878 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 6879 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 6880 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 6881 0x1E900, // 1E900..1E94B; ADLAM 6882 0x1E94C, // 1E94C..1E94F; UNKNOWN 6883 0x1E950, // 1E950..1E959; ADLAM 6884 0x1E95A, // 1E95A..1E95D; UNKNOWN 6885 0x1E95E, // 1E95E..1E95F; ADLAM 6886 0x1E960, // 1E960..1EC70; UNKNOWN 6887 0x1EC71, // 1EC71..1ECB4; COMMON 6888 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 6889 0x1ED01, // 1ED01..1ED3D; COMMON 6890 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 6891 0x1EE00, // 1EE00..1EE03; ARABIC 6892 0x1EE04, // 1EE04 ; UNKNOWN 6893 0x1EE05, // 1EE05..1EE1F; ARABIC 6894 0x1EE20, // 1EE20 ; UNKNOWN 6895 0x1EE21, // 1EE21..1EE22; ARABIC 6896 0x1EE23, // 1EE23 ; UNKNOWN 6897 0x1EE24, // 1EE24 ; ARABIC 6898 0x1EE25, // 1EE25..1EE26; UNKNOWN 6899 0x1EE27, // 1EE27 ; ARABIC 6900 0x1EE28, // 1EE28 ; UNKNOWN 6901 0x1EE29, // 1EE29..1EE32; ARABIC 6902 0x1EE33, // 1EE33 ; UNKNOWN 6903 0x1EE34, // 1EE34..1EE37; ARABIC 6904 0x1EE38, // 1EE38 ; UNKNOWN 6905 0x1EE39, // 1EE39 ; ARABIC 6906 0x1EE3A, // 1EE3A ; UNKNOWN 6907 0x1EE3B, // 1EE3B ; ARABIC 6908 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 6909 0x1EE42, // 1EE42 ; ARABIC 6910 0x1EE43, // 1EE43..1EE46; UNKNOWN 6911 0x1EE47, // 1EE47 ; ARABIC 6912 0x1EE48, // 1EE48 ; UNKNOWN 6913 0x1EE49, // 1EE49 ; ARABIC 6914 0x1EE4A, // 1EE4A ; UNKNOWN 6915 0x1EE4B, // 1EE4B ; ARABIC 6916 0x1EE4C, // 1EE4C ; UNKNOWN 6917 0x1EE4D, // 1EE4D..1EE4F; ARABIC 6918 0x1EE50, // 1EE50 ; UNKNOWN 6919 0x1EE51, // 1EE51..1EE52; ARABIC 6920 0x1EE53, // 1EE53 ; UNKNOWN 6921 0x1EE54, // 1EE54 ; ARABIC 6922 0x1EE55, // 1EE55..1EE56; UNKNOWN 6923 0x1EE57, // 1EE57 ; ARABIC 6924 0x1EE58, // 1EE58 ; UNKNOWN 6925 0x1EE59, // 1EE59 ; ARABIC 6926 0x1EE5A, // 1EE5A ; UNKNOWN 6927 0x1EE5B, // 1EE5B ; ARABIC 6928 0x1EE5C, // 1EE5C ; UNKNOWN 6929 0x1EE5D, // 1EE5D ; ARABIC 6930 0x1EE5E, // 1EE5E ; UNKNOWN 6931 0x1EE5F, // 1EE5F ; ARABIC 6932 0x1EE60, // 1EE60 ; UNKNOWN 6933 0x1EE61, // 1EE61..1EE62; ARABIC 6934 0x1EE63, // 1EE63 ; UNKNOWN 6935 0x1EE64, // 1EE64 ; ARABIC 6936 0x1EE65, // 1EE65..1EE66; UNKNOWN 6937 0x1EE67, // 1EE67..1EE6A; ARABIC 6938 0x1EE6B, // 1EE6B ; UNKNOWN 6939 0x1EE6C, // 1EE6C..1EE72; ARABIC 6940 0x1EE73, // 1EE73 ; UNKNOWN 6941 0x1EE74, // 1EE74..1EE77; ARABIC 6942 0x1EE78, // 1EE78 ; UNKNOWN 6943 0x1EE79, // 1EE79..1EE7C; ARABIC 6944 0x1EE7D, // 1EE7D ; UNKNOWN 6945 0x1EE7E, // 1EE7E ; ARABIC 6946 0x1EE7F, // 1EE7F ; UNKNOWN 6947 0x1EE80, // 1EE80..1EE89; ARABIC 6948 0x1EE8A, // 1EE8A ; UNKNOWN 6949 0x1EE8B, // 1EE8B..1EE9B; ARABIC 6950 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 6951 0x1EEA1, // 1EEA1..1EEA3; ARABIC 6952 0x1EEA4, // 1EEA4 ; UNKNOWN 6953 0x1EEA5, // 1EEA5..1EEA9; ARABIC 6954 0x1EEAA, // 1EEAA ; UNKNOWN 6955 0x1EEAB, // 1EEAB..1EEBB; ARABIC 6956 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 6957 0x1EEF0, // 1EEF0..1EEF1; ARABIC 6958 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 6959 0x1F000, // 1F000..1F02B; COMMON 6960 0x1F02C, // 1F02C..1F02F; UNKNOWN 6961 0x1F030, // 1F030..1F093; COMMON 6962 0x1F094, // 1F094..1F09F; UNKNOWN 6963 0x1F0A0, // 1F0A0..1F0AE; COMMON 6964 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 6965 0x1F0B1, // 1F0B1..1F0BF; COMMON 6966 0x1F0C0, // 1F0C0 ; UNKNOWN 6967 0x1F0C1, // 1F0C1..1F0CF; COMMON 6968 0x1F0D0, // 1F0D0 ; UNKNOWN 6969 0x1F0D1, // 1F0D1..1F0F5; COMMON 6970 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 6971 0x1F100, // 1F100..1F1AD; COMMON 6972 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN 6973 0x1F1E6, // 1F1E6..1F1FF; COMMON 6974 0x1F200, // 1F200 ; HIRAGANA 6975 0x1F201, // 1F201..1F202; COMMON 6976 0x1F203, // 1F203..1F20F; UNKNOWN 6977 0x1F210, // 1F210..1F23B; COMMON 6978 0x1F23C, // 1F23C..1F23F; UNKNOWN 6979 0x1F240, // 1F240..1F248; COMMON 6980 0x1F249, // 1F249..1F24F; UNKNOWN 6981 0x1F250, // 1F250..1F251; COMMON 6982 0x1F252, // 1F252..1F25F; UNKNOWN 6983 0x1F260, // 1F260..1F265; COMMON 6984 0x1F266, // 1F266..1F2FF; UNKNOWN 6985 0x1F300, // 1F300..1F6D7; COMMON 6986 0x1F6D8, // 1F6D8..1F6DB; UNKNOWN 6987 0x1F6DC, // 1F6DC..1F6EC; COMMON 6988 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 6989 0x1F6F0, // 1F6F0..1F6FC; COMMON 6990 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN 6991 0x1F700, // 1F700..1F776; COMMON 6992 0x1F777, // 1F777..1F77A; UNKNOWN 6993 0x1F77B, // 1F77B..1F7D9; COMMON 6994 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN 6995 0x1F7E0, // 1F7E0..1F7EB; COMMON 6996 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN 6997 0x1F7F0, // 1F7F0 ; COMMON 6998 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN 6999 0x1F800, // 1F800..1F80B; COMMON 7000 0x1F80C, // 1F80C..1F80F; UNKNOWN 7001 0x1F810, // 1F810..1F847; COMMON 7002 0x1F848, // 1F848..1F84F; UNKNOWN 7003 0x1F850, // 1F850..1F859; COMMON 7004 0x1F85A, // 1F85A..1F85F; UNKNOWN 7005 0x1F860, // 1F860..1F887; COMMON 7006 0x1F888, // 1F888..1F88F; UNKNOWN 7007 0x1F890, // 1F890..1F8AD; COMMON 7008 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN 7009 0x1F8B0, // 1F8B0..1F8B1; COMMON 7010 0x1F8B2, // 1F8B2..1F8FF; UNKNOWN 7011 0x1F900, // 1F900..1FA53; COMMON 7012 0x1FA54, // 1FA54..1FA5F; UNKNOWN 7013 0x1FA60, // 1FA60..1FA6D; COMMON 7014 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 7015 0x1FA70, // 1FA70..1FA7C; COMMON 7016 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN 7017 0x1FA80, // 1FA80..1FA88; COMMON 7018 0x1FA89, // 1FA89..1FA8F; UNKNOWN 7019 0x1FA90, // 1FA90..1FABD; COMMON 7020 0x1FABE, // 1FABE ; UNKNOWN 7021 0x1FABF, // 1FABF..1FAC5; COMMON 7022 0x1FAC6, // 1FAC6..1FACD; UNKNOWN 7023 0x1FACE, // 1FACE..1FADB; COMMON 7024 0x1FADC, // 1FADC..1FADF; UNKNOWN 7025 0x1FAE0, // 1FAE0..1FAE8; COMMON 7026 0x1FAE9, // 1FAE9..1FAEF; UNKNOWN 7027 0x1FAF0, // 1FAF0..1FAF8; COMMON 7028 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN 7029 0x1FB00, // 1FB00..1FB92; COMMON 7030 0x1FB93, // 1FB93 ; UNKNOWN 7031 0x1FB94, // 1FB94..1FBCA; COMMON 7032 0x1FBCB, // 1FBCB..1FBEF; UNKNOWN 7033 0x1FBF0, // 1FBF0..1FBF9; COMMON 7034 0x1FBFA, // 1FBFA..1FFFF; UNKNOWN 7035 0x20000, // 20000..2A6DF; HAN 7036 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN 7037 0x2A700, // 2A700..2B739; HAN 7038 0x2B73A, // 2B73A..2B73F; UNKNOWN 7039 0x2B740, // 2B740..2B81D; HAN 7040 0x2B81E, // 2B81E..2B81F; UNKNOWN 7041 0x2B820, // 2B820..2CEA1; HAN 7042 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 7043 0x2CEB0, // 2CEB0..2EBE0; HAN 7044 0x2EBE1, // 2EBE1..2EBEF; UNKNOWN 7045 0x2EBF0, // 2EBF0..2EE5D; HAN 7046 0x2EE5E, // 2EE5E..2F7FF; UNKNOWN 7047 0x2F800, // 2F800..2FA1D; HAN 7048 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN 7049 0x30000, // 30000..3134A; HAN 7050 0x3134B, // 3134B..3134F; UNKNOWN 7051 0x31350, // 31350..323AF; HAN 7052 0x323B0, // 323B0..E0000; UNKNOWN 7053 0xE0001, // E0001 ; COMMON 7054 0xE0002, // E0002..E001F; UNKNOWN 7055 0xE0020, // E0020..E007F; COMMON 7056 0xE0080, // E0080..E00FF; UNKNOWN 7057 0xE0100, // E0100..E01EF; INHERITED 7058 0xE01F0, // E01F0..10FFFF; UNKNOWN 7059 }; 7060 7061 private static final UnicodeScript[] scripts = { 7062 COMMON, // 0000..0040 7063 LATIN, // 0041..005A 7064 COMMON, // 005B..0060 7065 LATIN, // 0061..007A 7066 COMMON, // 007B..00A9 7067 LATIN, // 00AA 7068 COMMON, // 00AB..00B9 7069 LATIN, // 00BA 7070 COMMON, // 00BB..00BF 7071 LATIN, // 00C0..00D6 7072 COMMON, // 00D7 7073 LATIN, // 00D8..00F6 7074 COMMON, // 00F7 7075 LATIN, // 00F8..02B8 7076 COMMON, // 02B9..02DF 7077 LATIN, // 02E0..02E4 7078 COMMON, // 02E5..02E9 7079 BOPOMOFO, // 02EA..02EB 7080 COMMON, // 02EC..02FF 7081 INHERITED, // 0300..036F 7082 GREEK, // 0370..0373 7083 COMMON, // 0374 7084 GREEK, // 0375..0377 7085 UNKNOWN, // 0378..0379 7086 GREEK, // 037A..037D 7087 COMMON, // 037E 7088 GREEK, // 037F 7089 UNKNOWN, // 0380..0383 7090 GREEK, // 0384 7091 COMMON, // 0385 7092 GREEK, // 0386 7093 COMMON, // 0387 7094 GREEK, // 0388..038A 7095 UNKNOWN, // 038B 7096 GREEK, // 038C 7097 UNKNOWN, // 038D 7098 GREEK, // 038E..03A1 7099 UNKNOWN, // 03A2 7100 GREEK, // 03A3..03E1 7101 COPTIC, // 03E2..03EF 7102 GREEK, // 03F0..03FF 7103 CYRILLIC, // 0400..0484 7104 INHERITED, // 0485..0486 7105 CYRILLIC, // 0487..052F 7106 UNKNOWN, // 0530 7107 ARMENIAN, // 0531..0556 7108 UNKNOWN, // 0557..0558 7109 ARMENIAN, // 0559..058A 7110 UNKNOWN, // 058B..058C 7111 ARMENIAN, // 058D..058F 7112 UNKNOWN, // 0590 7113 HEBREW, // 0591..05C7 7114 UNKNOWN, // 05C8..05CF 7115 HEBREW, // 05D0..05EA 7116 UNKNOWN, // 05EB..05EE 7117 HEBREW, // 05EF..05F4 7118 UNKNOWN, // 05F5..05FF 7119 ARABIC, // 0600..0604 7120 COMMON, // 0605 7121 ARABIC, // 0606..060B 7122 COMMON, // 060C 7123 ARABIC, // 060D..061A 7124 COMMON, // 061B 7125 ARABIC, // 061C..061E 7126 COMMON, // 061F 7127 ARABIC, // 0620..063F 7128 COMMON, // 0640 7129 ARABIC, // 0641..064A 7130 INHERITED, // 064B..0655 7131 ARABIC, // 0656..066F 7132 INHERITED, // 0670 7133 ARABIC, // 0671..06DC 7134 COMMON, // 06DD 7135 ARABIC, // 06DE..06FF 7136 SYRIAC, // 0700..070D 7137 UNKNOWN, // 070E 7138 SYRIAC, // 070F..074A 7139 UNKNOWN, // 074B..074C 7140 SYRIAC, // 074D..074F 7141 ARABIC, // 0750..077F 7142 THAANA, // 0780..07B1 7143 UNKNOWN, // 07B2..07BF 7144 NKO, // 07C0..07FA 7145 UNKNOWN, // 07FB..07FC 7146 NKO, // 07FD..07FF 7147 SAMARITAN, // 0800..082D 7148 UNKNOWN, // 082E..082F 7149 SAMARITAN, // 0830..083E 7150 UNKNOWN, // 083F 7151 MANDAIC, // 0840..085B 7152 UNKNOWN, // 085C..085D 7153 MANDAIC, // 085E 7154 UNKNOWN, // 085F 7155 SYRIAC, // 0860..086A 7156 UNKNOWN, // 086B..086F 7157 ARABIC, // 0870..088E 7158 UNKNOWN, // 088F 7159 ARABIC, // 0890..0891 7160 UNKNOWN, // 0892..0897 7161 ARABIC, // 0898..08E1 7162 COMMON, // 08E2 7163 ARABIC, // 08E3..08FF 7164 DEVANAGARI, // 0900..0950 7165 INHERITED, // 0951..0954 7166 DEVANAGARI, // 0955..0963 7167 COMMON, // 0964..0965 7168 DEVANAGARI, // 0966..097F 7169 BENGALI, // 0980..0983 7170 UNKNOWN, // 0984 7171 BENGALI, // 0985..098C 7172 UNKNOWN, // 098D..098E 7173 BENGALI, // 098F..0990 7174 UNKNOWN, // 0991..0992 7175 BENGALI, // 0993..09A8 7176 UNKNOWN, // 09A9 7177 BENGALI, // 09AA..09B0 7178 UNKNOWN, // 09B1 7179 BENGALI, // 09B2 7180 UNKNOWN, // 09B3..09B5 7181 BENGALI, // 09B6..09B9 7182 UNKNOWN, // 09BA..09BB 7183 BENGALI, // 09BC..09C4 7184 UNKNOWN, // 09C5..09C6 7185 BENGALI, // 09C7..09C8 7186 UNKNOWN, // 09C9..09CA 7187 BENGALI, // 09CB..09CE 7188 UNKNOWN, // 09CF..09D6 7189 BENGALI, // 09D7 7190 UNKNOWN, // 09D8..09DB 7191 BENGALI, // 09DC..09DD 7192 UNKNOWN, // 09DE 7193 BENGALI, // 09DF..09E3 7194 UNKNOWN, // 09E4..09E5 7195 BENGALI, // 09E6..09FE 7196 UNKNOWN, // 09FF..0A00 7197 GURMUKHI, // 0A01..0A03 7198 UNKNOWN, // 0A04 7199 GURMUKHI, // 0A05..0A0A 7200 UNKNOWN, // 0A0B..0A0E 7201 GURMUKHI, // 0A0F..0A10 7202 UNKNOWN, // 0A11..0A12 7203 GURMUKHI, // 0A13..0A28 7204 UNKNOWN, // 0A29 7205 GURMUKHI, // 0A2A..0A30 7206 UNKNOWN, // 0A31 7207 GURMUKHI, // 0A32..0A33 7208 UNKNOWN, // 0A34 7209 GURMUKHI, // 0A35..0A36 7210 UNKNOWN, // 0A37 7211 GURMUKHI, // 0A38..0A39 7212 UNKNOWN, // 0A3A..0A3B 7213 GURMUKHI, // 0A3C 7214 UNKNOWN, // 0A3D 7215 GURMUKHI, // 0A3E..0A42 7216 UNKNOWN, // 0A43..0A46 7217 GURMUKHI, // 0A47..0A48 7218 UNKNOWN, // 0A49..0A4A 7219 GURMUKHI, // 0A4B..0A4D 7220 UNKNOWN, // 0A4E..0A50 7221 GURMUKHI, // 0A51 7222 UNKNOWN, // 0A52..0A58 7223 GURMUKHI, // 0A59..0A5C 7224 UNKNOWN, // 0A5D 7225 GURMUKHI, // 0A5E 7226 UNKNOWN, // 0A5F..0A65 7227 GURMUKHI, // 0A66..0A76 7228 UNKNOWN, // 0A77..0A80 7229 GUJARATI, // 0A81..0A83 7230 UNKNOWN, // 0A84 7231 GUJARATI, // 0A85..0A8D 7232 UNKNOWN, // 0A8E 7233 GUJARATI, // 0A8F..0A91 7234 UNKNOWN, // 0A92 7235 GUJARATI, // 0A93..0AA8 7236 UNKNOWN, // 0AA9 7237 GUJARATI, // 0AAA..0AB0 7238 UNKNOWN, // 0AB1 7239 GUJARATI, // 0AB2..0AB3 7240 UNKNOWN, // 0AB4 7241 GUJARATI, // 0AB5..0AB9 7242 UNKNOWN, // 0ABA..0ABB 7243 GUJARATI, // 0ABC..0AC5 7244 UNKNOWN, // 0AC6 7245 GUJARATI, // 0AC7..0AC9 7246 UNKNOWN, // 0ACA 7247 GUJARATI, // 0ACB..0ACD 7248 UNKNOWN, // 0ACE..0ACF 7249 GUJARATI, // 0AD0 7250 UNKNOWN, // 0AD1..0ADF 7251 GUJARATI, // 0AE0..0AE3 7252 UNKNOWN, // 0AE4..0AE5 7253 GUJARATI, // 0AE6..0AF1 7254 UNKNOWN, // 0AF2..0AF8 7255 GUJARATI, // 0AF9..0AFF 7256 UNKNOWN, // 0B00 7257 ORIYA, // 0B01..0B03 7258 UNKNOWN, // 0B04 7259 ORIYA, // 0B05..0B0C 7260 UNKNOWN, // 0B0D..0B0E 7261 ORIYA, // 0B0F..0B10 7262 UNKNOWN, // 0B11..0B12 7263 ORIYA, // 0B13..0B28 7264 UNKNOWN, // 0B29 7265 ORIYA, // 0B2A..0B30 7266 UNKNOWN, // 0B31 7267 ORIYA, // 0B32..0B33 7268 UNKNOWN, // 0B34 7269 ORIYA, // 0B35..0B39 7270 UNKNOWN, // 0B3A..0B3B 7271 ORIYA, // 0B3C..0B44 7272 UNKNOWN, // 0B45..0B46 7273 ORIYA, // 0B47..0B48 7274 UNKNOWN, // 0B49..0B4A 7275 ORIYA, // 0B4B..0B4D 7276 UNKNOWN, // 0B4E..0B54 7277 ORIYA, // 0B55..0B57 7278 UNKNOWN, // 0B58..0B5B 7279 ORIYA, // 0B5C..0B5D 7280 UNKNOWN, // 0B5E 7281 ORIYA, // 0B5F..0B63 7282 UNKNOWN, // 0B64..0B65 7283 ORIYA, // 0B66..0B77 7284 UNKNOWN, // 0B78..0B81 7285 TAMIL, // 0B82..0B83 7286 UNKNOWN, // 0B84 7287 TAMIL, // 0B85..0B8A 7288 UNKNOWN, // 0B8B..0B8D 7289 TAMIL, // 0B8E..0B90 7290 UNKNOWN, // 0B91 7291 TAMIL, // 0B92..0B95 7292 UNKNOWN, // 0B96..0B98 7293 TAMIL, // 0B99..0B9A 7294 UNKNOWN, // 0B9B 7295 TAMIL, // 0B9C 7296 UNKNOWN, // 0B9D 7297 TAMIL, // 0B9E..0B9F 7298 UNKNOWN, // 0BA0..0BA2 7299 TAMIL, // 0BA3..0BA4 7300 UNKNOWN, // 0BA5..0BA7 7301 TAMIL, // 0BA8..0BAA 7302 UNKNOWN, // 0BAB..0BAD 7303 TAMIL, // 0BAE..0BB9 7304 UNKNOWN, // 0BBA..0BBD 7305 TAMIL, // 0BBE..0BC2 7306 UNKNOWN, // 0BC3..0BC5 7307 TAMIL, // 0BC6..0BC8 7308 UNKNOWN, // 0BC9 7309 TAMIL, // 0BCA..0BCD 7310 UNKNOWN, // 0BCE..0BCF 7311 TAMIL, // 0BD0 7312 UNKNOWN, // 0BD1..0BD6 7313 TAMIL, // 0BD7 7314 UNKNOWN, // 0BD8..0BE5 7315 TAMIL, // 0BE6..0BFA 7316 UNKNOWN, // 0BFB..0BFF 7317 TELUGU, // 0C00..0C0C 7318 UNKNOWN, // 0C0D 7319 TELUGU, // 0C0E..0C10 7320 UNKNOWN, // 0C11 7321 TELUGU, // 0C12..0C28 7322 UNKNOWN, // 0C29 7323 TELUGU, // 0C2A..0C39 7324 UNKNOWN, // 0C3A..0C3B 7325 TELUGU, // 0C3C..0C44 7326 UNKNOWN, // 0C45 7327 TELUGU, // 0C46..0C48 7328 UNKNOWN, // 0C49 7329 TELUGU, // 0C4A..0C4D 7330 UNKNOWN, // 0C4E..0C54 7331 TELUGU, // 0C55..0C56 7332 UNKNOWN, // 0C57 7333 TELUGU, // 0C58..0C5A 7334 UNKNOWN, // 0C5B..0C5C 7335 TELUGU, // 0C5D 7336 UNKNOWN, // 0C5E..0C5F 7337 TELUGU, // 0C60..0C63 7338 UNKNOWN, // 0C64..0C65 7339 TELUGU, // 0C66..0C6F 7340 UNKNOWN, // 0C70..0C76 7341 TELUGU, // 0C77..0C7F 7342 KANNADA, // 0C80..0C8C 7343 UNKNOWN, // 0C8D 7344 KANNADA, // 0C8E..0C90 7345 UNKNOWN, // 0C91 7346 KANNADA, // 0C92..0CA8 7347 UNKNOWN, // 0CA9 7348 KANNADA, // 0CAA..0CB3 7349 UNKNOWN, // 0CB4 7350 KANNADA, // 0CB5..0CB9 7351 UNKNOWN, // 0CBA..0CBB 7352 KANNADA, // 0CBC..0CC4 7353 UNKNOWN, // 0CC5 7354 KANNADA, // 0CC6..0CC8 7355 UNKNOWN, // 0CC9 7356 KANNADA, // 0CCA..0CCD 7357 UNKNOWN, // 0CCE..0CD4 7358 KANNADA, // 0CD5..0CD6 7359 UNKNOWN, // 0CD7..0CDC 7360 KANNADA, // 0CDD..0CDE 7361 UNKNOWN, // 0CDF 7362 KANNADA, // 0CE0..0CE3 7363 UNKNOWN, // 0CE4..0CE5 7364 KANNADA, // 0CE6..0CEF 7365 UNKNOWN, // 0CF0 7366 KANNADA, // 0CF1..0CF3 7367 UNKNOWN, // 0CF4..0CFF 7368 MALAYALAM, // 0D00..0D0C 7369 UNKNOWN, // 0D0D 7370 MALAYALAM, // 0D0E..0D10 7371 UNKNOWN, // 0D11 7372 MALAYALAM, // 0D12..0D44 7373 UNKNOWN, // 0D45 7374 MALAYALAM, // 0D46..0D48 7375 UNKNOWN, // 0D49 7376 MALAYALAM, // 0D4A..0D4F 7377 UNKNOWN, // 0D50..0D53 7378 MALAYALAM, // 0D54..0D63 7379 UNKNOWN, // 0D64..0D65 7380 MALAYALAM, // 0D66..0D7F 7381 UNKNOWN, // 0D80 7382 SINHALA, // 0D81..0D83 7383 UNKNOWN, // 0D84 7384 SINHALA, // 0D85..0D96 7385 UNKNOWN, // 0D97..0D99 7386 SINHALA, // 0D9A..0DB1 7387 UNKNOWN, // 0DB2 7388 SINHALA, // 0DB3..0DBB 7389 UNKNOWN, // 0DBC 7390 SINHALA, // 0DBD 7391 UNKNOWN, // 0DBE..0DBF 7392 SINHALA, // 0DC0..0DC6 7393 UNKNOWN, // 0DC7..0DC9 7394 SINHALA, // 0DCA 7395 UNKNOWN, // 0DCB..0DCE 7396 SINHALA, // 0DCF..0DD4 7397 UNKNOWN, // 0DD5 7398 SINHALA, // 0DD6 7399 UNKNOWN, // 0DD7 7400 SINHALA, // 0DD8..0DDF 7401 UNKNOWN, // 0DE0..0DE5 7402 SINHALA, // 0DE6..0DEF 7403 UNKNOWN, // 0DF0..0DF1 7404 SINHALA, // 0DF2..0DF4 7405 UNKNOWN, // 0DF5..0E00 7406 THAI, // 0E01..0E3A 7407 UNKNOWN, // 0E3B..0E3E 7408 COMMON, // 0E3F 7409 THAI, // 0E40..0E5B 7410 UNKNOWN, // 0E5C..0E80 7411 LAO, // 0E81..0E82 7412 UNKNOWN, // 0E83 7413 LAO, // 0E84 7414 UNKNOWN, // 0E85 7415 LAO, // 0E86..0E8A 7416 UNKNOWN, // 0E8B 7417 LAO, // 0E8C..0EA3 7418 UNKNOWN, // 0EA4 7419 LAO, // 0EA5 7420 UNKNOWN, // 0EA6 7421 LAO, // 0EA7..0EBD 7422 UNKNOWN, // 0EBE..0EBF 7423 LAO, // 0EC0..0EC4 7424 UNKNOWN, // 0EC5 7425 LAO, // 0EC6 7426 UNKNOWN, // 0EC7 7427 LAO, // 0EC8..0ECE 7428 UNKNOWN, // 0ECF 7429 LAO, // 0ED0..0ED9 7430 UNKNOWN, // 0EDA..0EDB 7431 LAO, // 0EDC..0EDF 7432 UNKNOWN, // 0EE0..0EFF 7433 TIBETAN, // 0F00..0F47 7434 UNKNOWN, // 0F48 7435 TIBETAN, // 0F49..0F6C 7436 UNKNOWN, // 0F6D..0F70 7437 TIBETAN, // 0F71..0F97 7438 UNKNOWN, // 0F98 7439 TIBETAN, // 0F99..0FBC 7440 UNKNOWN, // 0FBD 7441 TIBETAN, // 0FBE..0FCC 7442 UNKNOWN, // 0FCD 7443 TIBETAN, // 0FCE..0FD4 7444 COMMON, // 0FD5..0FD8 7445 TIBETAN, // 0FD9..0FDA 7446 UNKNOWN, // 0FDB..0FFF 7447 MYANMAR, // 1000..109F 7448 GEORGIAN, // 10A0..10C5 7449 UNKNOWN, // 10C6 7450 GEORGIAN, // 10C7 7451 UNKNOWN, // 10C8..10CC 7452 GEORGIAN, // 10CD 7453 UNKNOWN, // 10CE..10CF 7454 GEORGIAN, // 10D0..10FA 7455 COMMON, // 10FB 7456 GEORGIAN, // 10FC..10FF 7457 HANGUL, // 1100..11FF 7458 ETHIOPIC, // 1200..1248 7459 UNKNOWN, // 1249 7460 ETHIOPIC, // 124A..124D 7461 UNKNOWN, // 124E..124F 7462 ETHIOPIC, // 1250..1256 7463 UNKNOWN, // 1257 7464 ETHIOPIC, // 1258 7465 UNKNOWN, // 1259 7466 ETHIOPIC, // 125A..125D 7467 UNKNOWN, // 125E..125F 7468 ETHIOPIC, // 1260..1288 7469 UNKNOWN, // 1289 7470 ETHIOPIC, // 128A..128D 7471 UNKNOWN, // 128E..128F 7472 ETHIOPIC, // 1290..12B0 7473 UNKNOWN, // 12B1 7474 ETHIOPIC, // 12B2..12B5 7475 UNKNOWN, // 12B6..12B7 7476 ETHIOPIC, // 12B8..12BE 7477 UNKNOWN, // 12BF 7478 ETHIOPIC, // 12C0 7479 UNKNOWN, // 12C1 7480 ETHIOPIC, // 12C2..12C5 7481 UNKNOWN, // 12C6..12C7 7482 ETHIOPIC, // 12C8..12D6 7483 UNKNOWN, // 12D7 7484 ETHIOPIC, // 12D8..1310 7485 UNKNOWN, // 1311 7486 ETHIOPIC, // 1312..1315 7487 UNKNOWN, // 1316..1317 7488 ETHIOPIC, // 1318..135A 7489 UNKNOWN, // 135B..135C 7490 ETHIOPIC, // 135D..137C 7491 UNKNOWN, // 137D..137F 7492 ETHIOPIC, // 1380..1399 7493 UNKNOWN, // 139A..139F 7494 CHEROKEE, // 13A0..13F5 7495 UNKNOWN, // 13F6..13F7 7496 CHEROKEE, // 13F8..13FD 7497 UNKNOWN, // 13FE..13FF 7498 CANADIAN_ABORIGINAL, // 1400..167F 7499 OGHAM, // 1680..169C 7500 UNKNOWN, // 169D..169F 7501 RUNIC, // 16A0..16EA 7502 COMMON, // 16EB..16ED 7503 RUNIC, // 16EE..16F8 7504 UNKNOWN, // 16F9..16FF 7505 TAGALOG, // 1700..1715 7506 UNKNOWN, // 1716..171E 7507 TAGALOG, // 171F 7508 HANUNOO, // 1720..1734 7509 COMMON, // 1735..1736 7510 UNKNOWN, // 1737..173F 7511 BUHID, // 1740..1753 7512 UNKNOWN, // 1754..175F 7513 TAGBANWA, // 1760..176C 7514 UNKNOWN, // 176D 7515 TAGBANWA, // 176E..1770 7516 UNKNOWN, // 1771 7517 TAGBANWA, // 1772..1773 7518 UNKNOWN, // 1774..177F 7519 KHMER, // 1780..17DD 7520 UNKNOWN, // 17DE..17DF 7521 KHMER, // 17E0..17E9 7522 UNKNOWN, // 17EA..17EF 7523 KHMER, // 17F0..17F9 7524 UNKNOWN, // 17FA..17FF 7525 MONGOLIAN, // 1800..1801 7526 COMMON, // 1802..1803 7527 MONGOLIAN, // 1804 7528 COMMON, // 1805 7529 MONGOLIAN, // 1806..1819 7530 UNKNOWN, // 181A..181F 7531 MONGOLIAN, // 1820..1878 7532 UNKNOWN, // 1879..187F 7533 MONGOLIAN, // 1880..18AA 7534 UNKNOWN, // 18AB..18AF 7535 CANADIAN_ABORIGINAL, // 18B0..18F5 7536 UNKNOWN, // 18F6..18FF 7537 LIMBU, // 1900..191E 7538 UNKNOWN, // 191F 7539 LIMBU, // 1920..192B 7540 UNKNOWN, // 192C..192F 7541 LIMBU, // 1930..193B 7542 UNKNOWN, // 193C..193F 7543 LIMBU, // 1940 7544 UNKNOWN, // 1941..1943 7545 LIMBU, // 1944..194F 7546 TAI_LE, // 1950..196D 7547 UNKNOWN, // 196E..196F 7548 TAI_LE, // 1970..1974 7549 UNKNOWN, // 1975..197F 7550 NEW_TAI_LUE, // 1980..19AB 7551 UNKNOWN, // 19AC..19AF 7552 NEW_TAI_LUE, // 19B0..19C9 7553 UNKNOWN, // 19CA..19CF 7554 NEW_TAI_LUE, // 19D0..19DA 7555 UNKNOWN, // 19DB..19DD 7556 NEW_TAI_LUE, // 19DE..19DF 7557 KHMER, // 19E0..19FF 7558 BUGINESE, // 1A00..1A1B 7559 UNKNOWN, // 1A1C..1A1D 7560 BUGINESE, // 1A1E..1A1F 7561 TAI_THAM, // 1A20..1A5E 7562 UNKNOWN, // 1A5F 7563 TAI_THAM, // 1A60..1A7C 7564 UNKNOWN, // 1A7D..1A7E 7565 TAI_THAM, // 1A7F..1A89 7566 UNKNOWN, // 1A8A..1A8F 7567 TAI_THAM, // 1A90..1A99 7568 UNKNOWN, // 1A9A..1A9F 7569 TAI_THAM, // 1AA0..1AAD 7570 UNKNOWN, // 1AAE..1AAF 7571 INHERITED, // 1AB0..1ACE 7572 UNKNOWN, // 1ACF..1AFF 7573 BALINESE, // 1B00..1B4C 7574 UNKNOWN, // 1B4D..1B4F 7575 BALINESE, // 1B50..1B7E 7576 UNKNOWN, // 1B7F 7577 SUNDANESE, // 1B80..1BBF 7578 BATAK, // 1BC0..1BF3 7579 UNKNOWN, // 1BF4..1BFB 7580 BATAK, // 1BFC..1BFF 7581 LEPCHA, // 1C00..1C37 7582 UNKNOWN, // 1C38..1C3A 7583 LEPCHA, // 1C3B..1C49 7584 UNKNOWN, // 1C4A..1C4C 7585 LEPCHA, // 1C4D..1C4F 7586 OL_CHIKI, // 1C50..1C7F 7587 CYRILLIC, // 1C80..1C88 7588 UNKNOWN, // 1C89..1C8F 7589 GEORGIAN, // 1C90..1CBA 7590 UNKNOWN, // 1CBB..1CBC 7591 GEORGIAN, // 1CBD..1CBF 7592 SUNDANESE, // 1CC0..1CC7 7593 UNKNOWN, // 1CC8..1CCF 7594 INHERITED, // 1CD0..1CD2 7595 COMMON, // 1CD3 7596 INHERITED, // 1CD4..1CE0 7597 COMMON, // 1CE1 7598 INHERITED, // 1CE2..1CE8 7599 COMMON, // 1CE9..1CEC 7600 INHERITED, // 1CED 7601 COMMON, // 1CEE..1CF3 7602 INHERITED, // 1CF4 7603 COMMON, // 1CF5..1CF7 7604 INHERITED, // 1CF8..1CF9 7605 COMMON, // 1CFA 7606 UNKNOWN, // 1CFB..1CFF 7607 LATIN, // 1D00..1D25 7608 GREEK, // 1D26..1D2A 7609 CYRILLIC, // 1D2B 7610 LATIN, // 1D2C..1D5C 7611 GREEK, // 1D5D..1D61 7612 LATIN, // 1D62..1D65 7613 GREEK, // 1D66..1D6A 7614 LATIN, // 1D6B..1D77 7615 CYRILLIC, // 1D78 7616 LATIN, // 1D79..1DBE 7617 GREEK, // 1DBF 7618 INHERITED, // 1DC0..1DFF 7619 LATIN, // 1E00..1EFF 7620 GREEK, // 1F00..1F15 7621 UNKNOWN, // 1F16..1F17 7622 GREEK, // 1F18..1F1D 7623 UNKNOWN, // 1F1E..1F1F 7624 GREEK, // 1F20..1F45 7625 UNKNOWN, // 1F46..1F47 7626 GREEK, // 1F48..1F4D 7627 UNKNOWN, // 1F4E..1F4F 7628 GREEK, // 1F50..1F57 7629 UNKNOWN, // 1F58 7630 GREEK, // 1F59 7631 UNKNOWN, // 1F5A 7632 GREEK, // 1F5B 7633 UNKNOWN, // 1F5C 7634 GREEK, // 1F5D 7635 UNKNOWN, // 1F5E 7636 GREEK, // 1F5F..1F7D 7637 UNKNOWN, // 1F7E..1F7F 7638 GREEK, // 1F80..1FB4 7639 UNKNOWN, // 1FB5 7640 GREEK, // 1FB6..1FC4 7641 UNKNOWN, // 1FC5 7642 GREEK, // 1FC6..1FD3 7643 UNKNOWN, // 1FD4..1FD5 7644 GREEK, // 1FD6..1FDB 7645 UNKNOWN, // 1FDC 7646 GREEK, // 1FDD..1FEF 7647 UNKNOWN, // 1FF0..1FF1 7648 GREEK, // 1FF2..1FF4 7649 UNKNOWN, // 1FF5 7650 GREEK, // 1FF6..1FFE 7651 UNKNOWN, // 1FFF 7652 COMMON, // 2000..200B 7653 INHERITED, // 200C..200D 7654 COMMON, // 200E..2064 7655 UNKNOWN, // 2065 7656 COMMON, // 2066..2070 7657 LATIN, // 2071 7658 UNKNOWN, // 2072..2073 7659 COMMON, // 2074..207E 7660 LATIN, // 207F 7661 COMMON, // 2080..208E 7662 UNKNOWN, // 208F 7663 LATIN, // 2090..209C 7664 UNKNOWN, // 209D..209F 7665 COMMON, // 20A0..20C0 7666 UNKNOWN, // 20C1..20CF 7667 INHERITED, // 20D0..20F0 7668 UNKNOWN, // 20F1..20FF 7669 COMMON, // 2100..2125 7670 GREEK, // 2126 7671 COMMON, // 2127..2129 7672 LATIN, // 212A..212B 7673 COMMON, // 212C..2131 7674 LATIN, // 2132 7675 COMMON, // 2133..214D 7676 LATIN, // 214E 7677 COMMON, // 214F..215F 7678 LATIN, // 2160..2188 7679 COMMON, // 2189..218B 7680 UNKNOWN, // 218C..218F 7681 COMMON, // 2190..2426 7682 UNKNOWN, // 2427..243F 7683 COMMON, // 2440..244A 7684 UNKNOWN, // 244B..245F 7685 COMMON, // 2460..27FF 7686 BRAILLE, // 2800..28FF 7687 COMMON, // 2900..2B73 7688 UNKNOWN, // 2B74..2B75 7689 COMMON, // 2B76..2B95 7690 UNKNOWN, // 2B96 7691 COMMON, // 2B97..2BFF 7692 GLAGOLITIC, // 2C00..2C5F 7693 LATIN, // 2C60..2C7F 7694 COPTIC, // 2C80..2CF3 7695 UNKNOWN, // 2CF4..2CF8 7696 COPTIC, // 2CF9..2CFF 7697 GEORGIAN, // 2D00..2D25 7698 UNKNOWN, // 2D26 7699 GEORGIAN, // 2D27 7700 UNKNOWN, // 2D28..2D2C 7701 GEORGIAN, // 2D2D 7702 UNKNOWN, // 2D2E..2D2F 7703 TIFINAGH, // 2D30..2D67 7704 UNKNOWN, // 2D68..2D6E 7705 TIFINAGH, // 2D6F..2D70 7706 UNKNOWN, // 2D71..2D7E 7707 TIFINAGH, // 2D7F 7708 ETHIOPIC, // 2D80..2D96 7709 UNKNOWN, // 2D97..2D9F 7710 ETHIOPIC, // 2DA0..2DA6 7711 UNKNOWN, // 2DA7 7712 ETHIOPIC, // 2DA8..2DAE 7713 UNKNOWN, // 2DAF 7714 ETHIOPIC, // 2DB0..2DB6 7715 UNKNOWN, // 2DB7 7716 ETHIOPIC, // 2DB8..2DBE 7717 UNKNOWN, // 2DBF 7718 ETHIOPIC, // 2DC0..2DC6 7719 UNKNOWN, // 2DC7 7720 ETHIOPIC, // 2DC8..2DCE 7721 UNKNOWN, // 2DCF 7722 ETHIOPIC, // 2DD0..2DD6 7723 UNKNOWN, // 2DD7 7724 ETHIOPIC, // 2DD8..2DDE 7725 UNKNOWN, // 2DDF 7726 CYRILLIC, // 2DE0..2DFF 7727 COMMON, // 2E00..2E5D 7728 UNKNOWN, // 2E5E..2E7F 7729 HAN, // 2E80..2E99 7730 UNKNOWN, // 2E9A 7731 HAN, // 2E9B..2EF3 7732 UNKNOWN, // 2EF4..2EFF 7733 HAN, // 2F00..2FD5 7734 UNKNOWN, // 2FD6..2FEF 7735 COMMON, // 2FF0..3004 7736 HAN, // 3005 7737 COMMON, // 3006 7738 HAN, // 3007 7739 COMMON, // 3008..3020 7740 HAN, // 3021..3029 7741 INHERITED, // 302A..302D 7742 HANGUL, // 302E..302F 7743 COMMON, // 3030..3037 7744 HAN, // 3038..303B 7745 COMMON, // 303C..303F 7746 UNKNOWN, // 3040 7747 HIRAGANA, // 3041..3096 7748 UNKNOWN, // 3097..3098 7749 INHERITED, // 3099..309A 7750 COMMON, // 309B..309C 7751 HIRAGANA, // 309D..309F 7752 COMMON, // 30A0 7753 KATAKANA, // 30A1..30FA 7754 COMMON, // 30FB..30FC 7755 KATAKANA, // 30FD..30FF 7756 UNKNOWN, // 3100..3104 7757 BOPOMOFO, // 3105..312F 7758 UNKNOWN, // 3130 7759 HANGUL, // 3131..318E 7760 UNKNOWN, // 318F 7761 COMMON, // 3190..319F 7762 BOPOMOFO, // 31A0..31BF 7763 COMMON, // 31C0..31E3 7764 UNKNOWN, // 31E4..31EE 7765 COMMON, // 31EF 7766 KATAKANA, // 31F0..31FF 7767 HANGUL, // 3200..321E 7768 UNKNOWN, // 321F 7769 COMMON, // 3220..325F 7770 HANGUL, // 3260..327E 7771 COMMON, // 327F..32CF 7772 KATAKANA, // 32D0..32FE 7773 COMMON, // 32FF 7774 KATAKANA, // 3300..3357 7775 COMMON, // 3358..33FF 7776 HAN, // 3400..4DBF 7777 COMMON, // 4DC0..4DFF 7778 HAN, // 4E00..9FFF 7779 YI, // A000..A48C 7780 UNKNOWN, // A48D..A48F 7781 YI, // A490..A4C6 7782 UNKNOWN, // A4C7..A4CF 7783 LISU, // A4D0..A4FF 7784 VAI, // A500..A62B 7785 UNKNOWN, // A62C..A63F 7786 CYRILLIC, // A640..A69F 7787 BAMUM, // A6A0..A6F7 7788 UNKNOWN, // A6F8..A6FF 7789 COMMON, // A700..A721 7790 LATIN, // A722..A787 7791 COMMON, // A788..A78A 7792 LATIN, // A78B..A7CA 7793 UNKNOWN, // A7CB..A7CF 7794 LATIN, // A7D0..A7D1 7795 UNKNOWN, // A7D2 7796 LATIN, // A7D3 7797 UNKNOWN, // A7D4 7798 LATIN, // A7D5..A7D9 7799 UNKNOWN, // A7DA..A7F1 7800 LATIN, // A7F2..A7FF 7801 SYLOTI_NAGRI, // A800..A82C 7802 UNKNOWN, // A82D..A82F 7803 COMMON, // A830..A839 7804 UNKNOWN, // A83A..A83F 7805 PHAGS_PA, // A840..A877 7806 UNKNOWN, // A878..A87F 7807 SAURASHTRA, // A880..A8C5 7808 UNKNOWN, // A8C6..A8CD 7809 SAURASHTRA, // A8CE..A8D9 7810 UNKNOWN, // A8DA..A8DF 7811 DEVANAGARI, // A8E0..A8FF 7812 KAYAH_LI, // A900..A92D 7813 COMMON, // A92E 7814 KAYAH_LI, // A92F 7815 REJANG, // A930..A953 7816 UNKNOWN, // A954..A95E 7817 REJANG, // A95F 7818 HANGUL, // A960..A97C 7819 UNKNOWN, // A97D..A97F 7820 JAVANESE, // A980..A9CD 7821 UNKNOWN, // A9CE 7822 COMMON, // A9CF 7823 JAVANESE, // A9D0..A9D9 7824 UNKNOWN, // A9DA..A9DD 7825 JAVANESE, // A9DE..A9DF 7826 MYANMAR, // A9E0..A9FE 7827 UNKNOWN, // A9FF 7828 CHAM, // AA00..AA36 7829 UNKNOWN, // AA37..AA3F 7830 CHAM, // AA40..AA4D 7831 UNKNOWN, // AA4E..AA4F 7832 CHAM, // AA50..AA59 7833 UNKNOWN, // AA5A..AA5B 7834 CHAM, // AA5C..AA5F 7835 MYANMAR, // AA60..AA7F 7836 TAI_VIET, // AA80..AAC2 7837 UNKNOWN, // AAC3..AADA 7838 TAI_VIET, // AADB..AADF 7839 MEETEI_MAYEK, // AAE0..AAF6 7840 UNKNOWN, // AAF7..AB00 7841 ETHIOPIC, // AB01..AB06 7842 UNKNOWN, // AB07..AB08 7843 ETHIOPIC, // AB09..AB0E 7844 UNKNOWN, // AB0F..AB10 7845 ETHIOPIC, // AB11..AB16 7846 UNKNOWN, // AB17..AB1F 7847 ETHIOPIC, // AB20..AB26 7848 UNKNOWN, // AB27 7849 ETHIOPIC, // AB28..AB2E 7850 UNKNOWN, // AB2F 7851 LATIN, // AB30..AB5A 7852 COMMON, // AB5B 7853 LATIN, // AB5C..AB64 7854 GREEK, // AB65 7855 LATIN, // AB66..AB69 7856 COMMON, // AB6A..AB6B 7857 UNKNOWN, // AB6C..AB6F 7858 CHEROKEE, // AB70..ABBF 7859 MEETEI_MAYEK, // ABC0..ABED 7860 UNKNOWN, // ABEE..ABEF 7861 MEETEI_MAYEK, // ABF0..ABF9 7862 UNKNOWN, // ABFA..ABFF 7863 HANGUL, // AC00..D7A3 7864 UNKNOWN, // D7A4..D7AF 7865 HANGUL, // D7B0..D7C6 7866 UNKNOWN, // D7C7..D7CA 7867 HANGUL, // D7CB..D7FB 7868 UNKNOWN, // D7FC..F8FF 7869 HAN, // F900..FA6D 7870 UNKNOWN, // FA6E..FA6F 7871 HAN, // FA70..FAD9 7872 UNKNOWN, // FADA..FAFF 7873 LATIN, // FB00..FB06 7874 UNKNOWN, // FB07..FB12 7875 ARMENIAN, // FB13..FB17 7876 UNKNOWN, // FB18..FB1C 7877 HEBREW, // FB1D..FB36 7878 UNKNOWN, // FB37 7879 HEBREW, // FB38..FB3C 7880 UNKNOWN, // FB3D 7881 HEBREW, // FB3E 7882 UNKNOWN, // FB3F 7883 HEBREW, // FB40..FB41 7884 UNKNOWN, // FB42 7885 HEBREW, // FB43..FB44 7886 UNKNOWN, // FB45 7887 HEBREW, // FB46..FB4F 7888 ARABIC, // FB50..FBC2 7889 UNKNOWN, // FBC3..FBD2 7890 ARABIC, // FBD3..FD3D 7891 COMMON, // FD3E..FD3F 7892 ARABIC, // FD40..FD8F 7893 UNKNOWN, // FD90..FD91 7894 ARABIC, // FD92..FDC7 7895 UNKNOWN, // FDC8..FDCE 7896 ARABIC, // FDCF 7897 UNKNOWN, // FDD0..FDEF 7898 ARABIC, // FDF0..FDFF 7899 INHERITED, // FE00..FE0F 7900 COMMON, // FE10..FE19 7901 UNKNOWN, // FE1A..FE1F 7902 INHERITED, // FE20..FE2D 7903 CYRILLIC, // FE2E..FE2F 7904 COMMON, // FE30..FE52 7905 UNKNOWN, // FE53 7906 COMMON, // FE54..FE66 7907 UNKNOWN, // FE67 7908 COMMON, // FE68..FE6B 7909 UNKNOWN, // FE6C..FE6F 7910 ARABIC, // FE70..FE74 7911 UNKNOWN, // FE75 7912 ARABIC, // FE76..FEFC 7913 UNKNOWN, // FEFD..FEFE 7914 COMMON, // FEFF 7915 UNKNOWN, // FF00 7916 COMMON, // FF01..FF20 7917 LATIN, // FF21..FF3A 7918 COMMON, // FF3B..FF40 7919 LATIN, // FF41..FF5A 7920 COMMON, // FF5B..FF65 7921 KATAKANA, // FF66..FF6F 7922 COMMON, // FF70 7923 KATAKANA, // FF71..FF9D 7924 COMMON, // FF9E..FF9F 7925 HANGUL, // FFA0..FFBE 7926 UNKNOWN, // FFBF..FFC1 7927 HANGUL, // FFC2..FFC7 7928 UNKNOWN, // FFC8..FFC9 7929 HANGUL, // FFCA..FFCF 7930 UNKNOWN, // FFD0..FFD1 7931 HANGUL, // FFD2..FFD7 7932 UNKNOWN, // FFD8..FFD9 7933 HANGUL, // FFDA..FFDC 7934 UNKNOWN, // FFDD..FFDF 7935 COMMON, // FFE0..FFE6 7936 UNKNOWN, // FFE7 7937 COMMON, // FFE8..FFEE 7938 UNKNOWN, // FFEF..FFF8 7939 COMMON, // FFF9..FFFD 7940 UNKNOWN, // FFFE..FFFF 7941 LINEAR_B, // 10000..1000B 7942 UNKNOWN, // 1000C 7943 LINEAR_B, // 1000D..10026 7944 UNKNOWN, // 10027 7945 LINEAR_B, // 10028..1003A 7946 UNKNOWN, // 1003B 7947 LINEAR_B, // 1003C..1003D 7948 UNKNOWN, // 1003E 7949 LINEAR_B, // 1003F..1004D 7950 UNKNOWN, // 1004E..1004F 7951 LINEAR_B, // 10050..1005D 7952 UNKNOWN, // 1005E..1007F 7953 LINEAR_B, // 10080..100FA 7954 UNKNOWN, // 100FB..100FF 7955 COMMON, // 10100..10102 7956 UNKNOWN, // 10103..10106 7957 COMMON, // 10107..10133 7958 UNKNOWN, // 10134..10136 7959 COMMON, // 10137..1013F 7960 GREEK, // 10140..1018E 7961 UNKNOWN, // 1018F 7962 COMMON, // 10190..1019C 7963 UNKNOWN, // 1019D..1019F 7964 GREEK, // 101A0 7965 UNKNOWN, // 101A1..101CF 7966 COMMON, // 101D0..101FC 7967 INHERITED, // 101FD 7968 UNKNOWN, // 101FE..1027F 7969 LYCIAN, // 10280..1029C 7970 UNKNOWN, // 1029D..1029F 7971 CARIAN, // 102A0..102D0 7972 UNKNOWN, // 102D1..102DF 7973 INHERITED, // 102E0 7974 COMMON, // 102E1..102FB 7975 UNKNOWN, // 102FC..102FF 7976 OLD_ITALIC, // 10300..10323 7977 UNKNOWN, // 10324..1032C 7978 OLD_ITALIC, // 1032D..1032F 7979 GOTHIC, // 10330..1034A 7980 UNKNOWN, // 1034B..1034F 7981 OLD_PERMIC, // 10350..1037A 7982 UNKNOWN, // 1037B..1037F 7983 UGARITIC, // 10380..1039D 7984 UNKNOWN, // 1039E 7985 UGARITIC, // 1039F 7986 OLD_PERSIAN, // 103A0..103C3 7987 UNKNOWN, // 103C4..103C7 7988 OLD_PERSIAN, // 103C8..103D5 7989 UNKNOWN, // 103D6..103FF 7990 DESERET, // 10400..1044F 7991 SHAVIAN, // 10450..1047F 7992 OSMANYA, // 10480..1049D 7993 UNKNOWN, // 1049E..1049F 7994 OSMANYA, // 104A0..104A9 7995 UNKNOWN, // 104AA..104AF 7996 OSAGE, // 104B0..104D3 7997 UNKNOWN, // 104D4..104D7 7998 OSAGE, // 104D8..104FB 7999 UNKNOWN, // 104FC..104FF 8000 ELBASAN, // 10500..10527 8001 UNKNOWN, // 10528..1052F 8002 CAUCASIAN_ALBANIAN, // 10530..10563 8003 UNKNOWN, // 10564..1056E 8004 CAUCASIAN_ALBANIAN, // 1056F 8005 VITHKUQI, // 10570..1057A 8006 UNKNOWN, // 1057B 8007 VITHKUQI, // 1057C..1058A 8008 UNKNOWN, // 1058B 8009 VITHKUQI, // 1058C..10592 8010 UNKNOWN, // 10593 8011 VITHKUQI, // 10594..10595 8012 UNKNOWN, // 10596 8013 VITHKUQI, // 10597..105A1 8014 UNKNOWN, // 105A2 8015 VITHKUQI, // 105A3..105B1 8016 UNKNOWN, // 105B2 8017 VITHKUQI, // 105B3..105B9 8018 UNKNOWN, // 105BA 8019 VITHKUQI, // 105BB..105BC 8020 UNKNOWN, // 105BD..105FF 8021 LINEAR_A, // 10600..10736 8022 UNKNOWN, // 10737..1073F 8023 LINEAR_A, // 10740..10755 8024 UNKNOWN, // 10756..1075F 8025 LINEAR_A, // 10760..10767 8026 UNKNOWN, // 10768..1077F 8027 LATIN, // 10780..10785 8028 UNKNOWN, // 10786 8029 LATIN, // 10787..107B0 8030 UNKNOWN, // 107B1 8031 LATIN, // 107B2..107BA 8032 UNKNOWN, // 107BB..107FF 8033 CYPRIOT, // 10800..10805 8034 UNKNOWN, // 10806..10807 8035 CYPRIOT, // 10808 8036 UNKNOWN, // 10809 8037 CYPRIOT, // 1080A..10835 8038 UNKNOWN, // 10836 8039 CYPRIOT, // 10837..10838 8040 UNKNOWN, // 10839..1083B 8041 CYPRIOT, // 1083C 8042 UNKNOWN, // 1083D..1083E 8043 CYPRIOT, // 1083F 8044 IMPERIAL_ARAMAIC, // 10840..10855 8045 UNKNOWN, // 10856 8046 IMPERIAL_ARAMAIC, // 10857..1085F 8047 PALMYRENE, // 10860..1087F 8048 NABATAEAN, // 10880..1089E 8049 UNKNOWN, // 1089F..108A6 8050 NABATAEAN, // 108A7..108AF 8051 UNKNOWN, // 108B0..108DF 8052 HATRAN, // 108E0..108F2 8053 UNKNOWN, // 108F3 8054 HATRAN, // 108F4..108F5 8055 UNKNOWN, // 108F6..108FA 8056 HATRAN, // 108FB..108FF 8057 PHOENICIAN, // 10900..1091B 8058 UNKNOWN, // 1091C..1091E 8059 PHOENICIAN, // 1091F 8060 LYDIAN, // 10920..10939 8061 UNKNOWN, // 1093A..1093E 8062 LYDIAN, // 1093F 8063 UNKNOWN, // 10940..1097F 8064 MEROITIC_HIEROGLYPHS, // 10980..1099F 8065 MEROITIC_CURSIVE, // 109A0..109B7 8066 UNKNOWN, // 109B8..109BB 8067 MEROITIC_CURSIVE, // 109BC..109CF 8068 UNKNOWN, // 109D0..109D1 8069 MEROITIC_CURSIVE, // 109D2..109FF 8070 KHAROSHTHI, // 10A00..10A03 8071 UNKNOWN, // 10A04 8072 KHAROSHTHI, // 10A05..10A06 8073 UNKNOWN, // 10A07..10A0B 8074 KHAROSHTHI, // 10A0C..10A13 8075 UNKNOWN, // 10A14 8076 KHAROSHTHI, // 10A15..10A17 8077 UNKNOWN, // 10A18 8078 KHAROSHTHI, // 10A19..10A35 8079 UNKNOWN, // 10A36..10A37 8080 KHAROSHTHI, // 10A38..10A3A 8081 UNKNOWN, // 10A3B..10A3E 8082 KHAROSHTHI, // 10A3F..10A48 8083 UNKNOWN, // 10A49..10A4F 8084 KHAROSHTHI, // 10A50..10A58 8085 UNKNOWN, // 10A59..10A5F 8086 OLD_SOUTH_ARABIAN, // 10A60..10A7F 8087 OLD_NORTH_ARABIAN, // 10A80..10A9F 8088 UNKNOWN, // 10AA0..10ABF 8089 MANICHAEAN, // 10AC0..10AE6 8090 UNKNOWN, // 10AE7..10AEA 8091 MANICHAEAN, // 10AEB..10AF6 8092 UNKNOWN, // 10AF7..10AFF 8093 AVESTAN, // 10B00..10B35 8094 UNKNOWN, // 10B36..10B38 8095 AVESTAN, // 10B39..10B3F 8096 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 8097 UNKNOWN, // 10B56..10B57 8098 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 8099 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 8100 UNKNOWN, // 10B73..10B77 8101 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 8102 PSALTER_PAHLAVI, // 10B80..10B91 8103 UNKNOWN, // 10B92..10B98 8104 PSALTER_PAHLAVI, // 10B99..10B9C 8105 UNKNOWN, // 10B9D..10BA8 8106 PSALTER_PAHLAVI, // 10BA9..10BAF 8107 UNKNOWN, // 10BB0..10BFF 8108 OLD_TURKIC, // 10C00..10C48 8109 UNKNOWN, // 10C49..10C7F 8110 OLD_HUNGARIAN, // 10C80..10CB2 8111 UNKNOWN, // 10CB3..10CBF 8112 OLD_HUNGARIAN, // 10CC0..10CF2 8113 UNKNOWN, // 10CF3..10CF9 8114 OLD_HUNGARIAN, // 10CFA..10CFF 8115 HANIFI_ROHINGYA, // 10D00..10D27 8116 UNKNOWN, // 10D28..10D2F 8117 HANIFI_ROHINGYA, // 10D30..10D39 8118 UNKNOWN, // 10D3A..10E5F 8119 ARABIC, // 10E60..10E7E 8120 UNKNOWN, // 10E7F 8121 YEZIDI, // 10E80..10EA9 8122 UNKNOWN, // 10EAA 8123 YEZIDI, // 10EAB..10EAD 8124 UNKNOWN, // 10EAE..10EAF 8125 YEZIDI, // 10EB0..10EB1 8126 UNKNOWN, // 10EB2..10EFC 8127 ARABIC, // 10EFD..10EFF 8128 OLD_SOGDIAN, // 10F00..10F27 8129 UNKNOWN, // 10F28..10F2F 8130 SOGDIAN, // 10F30..10F59 8131 UNKNOWN, // 10F5A..10F6F 8132 OLD_UYGHUR, // 10F70..10F89 8133 UNKNOWN, // 10F8A..10FAF 8134 CHORASMIAN, // 10FB0..10FCB 8135 UNKNOWN, // 10FCC..10FDF 8136 ELYMAIC, // 10FE0..10FF6 8137 UNKNOWN, // 10FF7..10FFF 8138 BRAHMI, // 11000..1104D 8139 UNKNOWN, // 1104E..11051 8140 BRAHMI, // 11052..11075 8141 UNKNOWN, // 11076..1107E 8142 BRAHMI, // 1107F 8143 KAITHI, // 11080..110C2 8144 UNKNOWN, // 110C3..110CC 8145 KAITHI, // 110CD 8146 UNKNOWN, // 110CE..110CF 8147 SORA_SOMPENG, // 110D0..110E8 8148 UNKNOWN, // 110E9..110EF 8149 SORA_SOMPENG, // 110F0..110F9 8150 UNKNOWN, // 110FA..110FF 8151 CHAKMA, // 11100..11134 8152 UNKNOWN, // 11135 8153 CHAKMA, // 11136..11147 8154 UNKNOWN, // 11148..1114F 8155 MAHAJANI, // 11150..11176 8156 UNKNOWN, // 11177..1117F 8157 SHARADA, // 11180..111DF 8158 UNKNOWN, // 111E0 8159 SINHALA, // 111E1..111F4 8160 UNKNOWN, // 111F5..111FF 8161 KHOJKI, // 11200..11211 8162 UNKNOWN, // 11212 8163 KHOJKI, // 11213..11241 8164 UNKNOWN, // 11242..1127F 8165 MULTANI, // 11280..11286 8166 UNKNOWN, // 11287 8167 MULTANI, // 11288 8168 UNKNOWN, // 11289 8169 MULTANI, // 1128A..1128D 8170 UNKNOWN, // 1128E 8171 MULTANI, // 1128F..1129D 8172 UNKNOWN, // 1129E 8173 MULTANI, // 1129F..112A9 8174 UNKNOWN, // 112AA..112AF 8175 KHUDAWADI, // 112B0..112EA 8176 UNKNOWN, // 112EB..112EF 8177 KHUDAWADI, // 112F0..112F9 8178 UNKNOWN, // 112FA..112FF 8179 GRANTHA, // 11300..11303 8180 UNKNOWN, // 11304 8181 GRANTHA, // 11305..1130C 8182 UNKNOWN, // 1130D..1130E 8183 GRANTHA, // 1130F..11310 8184 UNKNOWN, // 11311..11312 8185 GRANTHA, // 11313..11328 8186 UNKNOWN, // 11329 8187 GRANTHA, // 1132A..11330 8188 UNKNOWN, // 11331 8189 GRANTHA, // 11332..11333 8190 UNKNOWN, // 11334 8191 GRANTHA, // 11335..11339 8192 UNKNOWN, // 1133A 8193 INHERITED, // 1133B 8194 GRANTHA, // 1133C..11344 8195 UNKNOWN, // 11345..11346 8196 GRANTHA, // 11347..11348 8197 UNKNOWN, // 11349..1134A 8198 GRANTHA, // 1134B..1134D 8199 UNKNOWN, // 1134E..1134F 8200 GRANTHA, // 11350 8201 UNKNOWN, // 11351..11356 8202 GRANTHA, // 11357 8203 UNKNOWN, // 11358..1135C 8204 GRANTHA, // 1135D..11363 8205 UNKNOWN, // 11364..11365 8206 GRANTHA, // 11366..1136C 8207 UNKNOWN, // 1136D..1136F 8208 GRANTHA, // 11370..11374 8209 UNKNOWN, // 11375..113FF 8210 NEWA, // 11400..1145B 8211 UNKNOWN, // 1145C 8212 NEWA, // 1145D..11461 8213 UNKNOWN, // 11462..1147F 8214 TIRHUTA, // 11480..114C7 8215 UNKNOWN, // 114C8..114CF 8216 TIRHUTA, // 114D0..114D9 8217 UNKNOWN, // 114DA..1157F 8218 SIDDHAM, // 11580..115B5 8219 UNKNOWN, // 115B6..115B7 8220 SIDDHAM, // 115B8..115DD 8221 UNKNOWN, // 115DE..115FF 8222 MODI, // 11600..11644 8223 UNKNOWN, // 11645..1164F 8224 MODI, // 11650..11659 8225 UNKNOWN, // 1165A..1165F 8226 MONGOLIAN, // 11660..1166C 8227 UNKNOWN, // 1166D..1167F 8228 TAKRI, // 11680..116B9 8229 UNKNOWN, // 116BA..116BF 8230 TAKRI, // 116C0..116C9 8231 UNKNOWN, // 116CA..116FF 8232 AHOM, // 11700..1171A 8233 UNKNOWN, // 1171B..1171C 8234 AHOM, // 1171D..1172B 8235 UNKNOWN, // 1172C..1172F 8236 AHOM, // 11730..11746 8237 UNKNOWN, // 11747..117FF 8238 DOGRA, // 11800..1183B 8239 UNKNOWN, // 1183C..1189F 8240 WARANG_CITI, // 118A0..118F2 8241 UNKNOWN, // 118F3..118FE 8242 WARANG_CITI, // 118FF 8243 DIVES_AKURU, // 11900..11906 8244 UNKNOWN, // 11907..11908 8245 DIVES_AKURU, // 11909 8246 UNKNOWN, // 1190A..1190B 8247 DIVES_AKURU, // 1190C..11913 8248 UNKNOWN, // 11914 8249 DIVES_AKURU, // 11915..11916 8250 UNKNOWN, // 11917 8251 DIVES_AKURU, // 11918..11935 8252 UNKNOWN, // 11936 8253 DIVES_AKURU, // 11937..11938 8254 UNKNOWN, // 11939..1193A 8255 DIVES_AKURU, // 1193B..11946 8256 UNKNOWN, // 11947..1194F 8257 DIVES_AKURU, // 11950..11959 8258 UNKNOWN, // 1195A..1199F 8259 NANDINAGARI, // 119A0..119A7 8260 UNKNOWN, // 119A8..119A9 8261 NANDINAGARI, // 119AA..119D7 8262 UNKNOWN, // 119D8..119D9 8263 NANDINAGARI, // 119DA..119E4 8264 UNKNOWN, // 119E5..119FF 8265 ZANABAZAR_SQUARE, // 11A00..11A47 8266 UNKNOWN, // 11A48..11A4F 8267 SOYOMBO, // 11A50..11AA2 8268 UNKNOWN, // 11AA3..11AAF 8269 CANADIAN_ABORIGINAL, // 11AB0..11ABF 8270 PAU_CIN_HAU, // 11AC0..11AF8 8271 UNKNOWN, // 11AF9..11AFF 8272 DEVANAGARI, // 11B00..11B09 8273 UNKNOWN, // 11B0A..11BFF 8274 BHAIKSUKI, // 11C00..11C08 8275 UNKNOWN, // 11C09 8276 BHAIKSUKI, // 11C0A..11C36 8277 UNKNOWN, // 11C37 8278 BHAIKSUKI, // 11C38..11C45 8279 UNKNOWN, // 11C46..11C4F 8280 BHAIKSUKI, // 11C50..11C6C 8281 UNKNOWN, // 11C6D..11C6F 8282 MARCHEN, // 11C70..11C8F 8283 UNKNOWN, // 11C90..11C91 8284 MARCHEN, // 11C92..11CA7 8285 UNKNOWN, // 11CA8 8286 MARCHEN, // 11CA9..11CB6 8287 UNKNOWN, // 11CB7..11CFF 8288 MASARAM_GONDI, // 11D00..11D06 8289 UNKNOWN, // 11D07 8290 MASARAM_GONDI, // 11D08..11D09 8291 UNKNOWN, // 11D0A 8292 MASARAM_GONDI, // 11D0B..11D36 8293 UNKNOWN, // 11D37..11D39 8294 MASARAM_GONDI, // 11D3A 8295 UNKNOWN, // 11D3B 8296 MASARAM_GONDI, // 11D3C..11D3D 8297 UNKNOWN, // 11D3E 8298 MASARAM_GONDI, // 11D3F..11D47 8299 UNKNOWN, // 11D48..11D4F 8300 MASARAM_GONDI, // 11D50..11D59 8301 UNKNOWN, // 11D5A..11D5F 8302 GUNJALA_GONDI, // 11D60..11D65 8303 UNKNOWN, // 11D66 8304 GUNJALA_GONDI, // 11D67..11D68 8305 UNKNOWN, // 11D69 8306 GUNJALA_GONDI, // 11D6A..11D8E 8307 UNKNOWN, // 11D8F 8308 GUNJALA_GONDI, // 11D90..11D91 8309 UNKNOWN, // 11D92 8310 GUNJALA_GONDI, // 11D93..11D98 8311 UNKNOWN, // 11D99..11D9F 8312 GUNJALA_GONDI, // 11DA0..11DA9 8313 UNKNOWN, // 11DAA..11EDF 8314 MAKASAR, // 11EE0..11EF8 8315 UNKNOWN, // 11EF9..11EFF 8316 KAWI, // 11F00..11F10 8317 UNKNOWN, // 11F11 8318 KAWI, // 11F12..11F3A 8319 UNKNOWN, // 11F3B..11F3D 8320 KAWI, // 11F3E..11F59 8321 UNKNOWN, // 11F5A..11FAF 8322 LISU, // 11FB0 8323 UNKNOWN, // 11FB1..11FBF 8324 TAMIL, // 11FC0..11FF1 8325 UNKNOWN, // 11FF2..11FFE 8326 TAMIL, // 11FFF 8327 CUNEIFORM, // 12000..12399 8328 UNKNOWN, // 1239A..123FF 8329 CUNEIFORM, // 12400..1246E 8330 UNKNOWN, // 1246F 8331 CUNEIFORM, // 12470..12474 8332 UNKNOWN, // 12475..1247F 8333 CUNEIFORM, // 12480..12543 8334 UNKNOWN, // 12544..12F8F 8335 CYPRO_MINOAN, // 12F90..12FF2 8336 UNKNOWN, // 12FF3..12FFF 8337 EGYPTIAN_HIEROGLYPHS, // 13000..13455 8338 UNKNOWN, // 13456..143FF 8339 ANATOLIAN_HIEROGLYPHS, // 14400..14646 8340 UNKNOWN, // 14647..167FF 8341 BAMUM, // 16800..16A38 8342 UNKNOWN, // 16A39..16A3F 8343 MRO, // 16A40..16A5E 8344 UNKNOWN, // 16A5F 8345 MRO, // 16A60..16A69 8346 UNKNOWN, // 16A6A..16A6D 8347 MRO, // 16A6E..16A6F 8348 TANGSA, // 16A70..16ABE 8349 UNKNOWN, // 16ABF 8350 TANGSA, // 16AC0..16AC9 8351 UNKNOWN, // 16ACA..16ACF 8352 BASSA_VAH, // 16AD0..16AED 8353 UNKNOWN, // 16AEE..16AEF 8354 BASSA_VAH, // 16AF0..16AF5 8355 UNKNOWN, // 16AF6..16AFF 8356 PAHAWH_HMONG, // 16B00..16B45 8357 UNKNOWN, // 16B46..16B4F 8358 PAHAWH_HMONG, // 16B50..16B59 8359 UNKNOWN, // 16B5A 8360 PAHAWH_HMONG, // 16B5B..16B61 8361 UNKNOWN, // 16B62 8362 PAHAWH_HMONG, // 16B63..16B77 8363 UNKNOWN, // 16B78..16B7C 8364 PAHAWH_HMONG, // 16B7D..16B8F 8365 UNKNOWN, // 16B90..16E3F 8366 MEDEFAIDRIN, // 16E40..16E9A 8367 UNKNOWN, // 16E9B..16EFF 8368 MIAO, // 16F00..16F4A 8369 UNKNOWN, // 16F4B..16F4E 8370 MIAO, // 16F4F..16F87 8371 UNKNOWN, // 16F88..16F8E 8372 MIAO, // 16F8F..16F9F 8373 UNKNOWN, // 16FA0..16FDF 8374 TANGUT, // 16FE0 8375 NUSHU, // 16FE1 8376 HAN, // 16FE2..16FE3 8377 KHITAN_SMALL_SCRIPT, // 16FE4 8378 UNKNOWN, // 16FE5..16FEF 8379 HAN, // 16FF0..16FF1 8380 UNKNOWN, // 16FF2..16FFF 8381 TANGUT, // 17000..187F7 8382 UNKNOWN, // 187F8..187FF 8383 TANGUT, // 18800..18AFF 8384 KHITAN_SMALL_SCRIPT, // 18B00..18CD5 8385 UNKNOWN, // 18CD6..18CFF 8386 TANGUT, // 18D00..18D08 8387 UNKNOWN, // 18D09..1AFEF 8388 KATAKANA, // 1AFF0..1AFF3 8389 UNKNOWN, // 1AFF4 8390 KATAKANA, // 1AFF5..1AFFB 8391 UNKNOWN, // 1AFFC 8392 KATAKANA, // 1AFFD..1AFFE 8393 UNKNOWN, // 1AFFF 8394 KATAKANA, // 1B000 8395 HIRAGANA, // 1B001..1B11F 8396 KATAKANA, // 1B120..1B122 8397 UNKNOWN, // 1B123..1B131 8398 HIRAGANA, // 1B132 8399 UNKNOWN, // 1B133..1B14F 8400 HIRAGANA, // 1B150..1B152 8401 UNKNOWN, // 1B153..1B154 8402 KATAKANA, // 1B155 8403 UNKNOWN, // 1B156..1B163 8404 KATAKANA, // 1B164..1B167 8405 UNKNOWN, // 1B168..1B16F 8406 NUSHU, // 1B170..1B2FB 8407 UNKNOWN, // 1B2FC..1BBFF 8408 DUPLOYAN, // 1BC00..1BC6A 8409 UNKNOWN, // 1BC6B..1BC6F 8410 DUPLOYAN, // 1BC70..1BC7C 8411 UNKNOWN, // 1BC7D..1BC7F 8412 DUPLOYAN, // 1BC80..1BC88 8413 UNKNOWN, // 1BC89..1BC8F 8414 DUPLOYAN, // 1BC90..1BC99 8415 UNKNOWN, // 1BC9A..1BC9B 8416 DUPLOYAN, // 1BC9C..1BC9F 8417 COMMON, // 1BCA0..1BCA3 8418 UNKNOWN, // 1BCA4..1CEFF 8419 INHERITED, // 1CF00..1CF2D 8420 UNKNOWN, // 1CF2E..1CF2F 8421 INHERITED, // 1CF30..1CF46 8422 UNKNOWN, // 1CF47..1CF4F 8423 COMMON, // 1CF50..1CFC3 8424 UNKNOWN, // 1CFC4..1CFFF 8425 COMMON, // 1D000..1D0F5 8426 UNKNOWN, // 1D0F6..1D0FF 8427 COMMON, // 1D100..1D126 8428 UNKNOWN, // 1D127..1D128 8429 COMMON, // 1D129..1D166 8430 INHERITED, // 1D167..1D169 8431 COMMON, // 1D16A..1D17A 8432 INHERITED, // 1D17B..1D182 8433 COMMON, // 1D183..1D184 8434 INHERITED, // 1D185..1D18B 8435 COMMON, // 1D18C..1D1A9 8436 INHERITED, // 1D1AA..1D1AD 8437 COMMON, // 1D1AE..1D1EA 8438 UNKNOWN, // 1D1EB..1D1FF 8439 GREEK, // 1D200..1D245 8440 UNKNOWN, // 1D246..1D2BF 8441 COMMON, // 1D2C0..1D2D3 8442 UNKNOWN, // 1D2D4..1D2DF 8443 COMMON, // 1D2E0..1D2F3 8444 UNKNOWN, // 1D2F4..1D2FF 8445 COMMON, // 1D300..1D356 8446 UNKNOWN, // 1D357..1D35F 8447 COMMON, // 1D360..1D378 8448 UNKNOWN, // 1D379..1D3FF 8449 COMMON, // 1D400..1D454 8450 UNKNOWN, // 1D455 8451 COMMON, // 1D456..1D49C 8452 UNKNOWN, // 1D49D 8453 COMMON, // 1D49E..1D49F 8454 UNKNOWN, // 1D4A0..1D4A1 8455 COMMON, // 1D4A2 8456 UNKNOWN, // 1D4A3..1D4A4 8457 COMMON, // 1D4A5..1D4A6 8458 UNKNOWN, // 1D4A7..1D4A8 8459 COMMON, // 1D4A9..1D4AC 8460 UNKNOWN, // 1D4AD 8461 COMMON, // 1D4AE..1D4B9 8462 UNKNOWN, // 1D4BA 8463 COMMON, // 1D4BB 8464 UNKNOWN, // 1D4BC 8465 COMMON, // 1D4BD..1D4C3 8466 UNKNOWN, // 1D4C4 8467 COMMON, // 1D4C5..1D505 8468 UNKNOWN, // 1D506 8469 COMMON, // 1D507..1D50A 8470 UNKNOWN, // 1D50B..1D50C 8471 COMMON, // 1D50D..1D514 8472 UNKNOWN, // 1D515 8473 COMMON, // 1D516..1D51C 8474 UNKNOWN, // 1D51D 8475 COMMON, // 1D51E..1D539 8476 UNKNOWN, // 1D53A 8477 COMMON, // 1D53B..1D53E 8478 UNKNOWN, // 1D53F 8479 COMMON, // 1D540..1D544 8480 UNKNOWN, // 1D545 8481 COMMON, // 1D546 8482 UNKNOWN, // 1D547..1D549 8483 COMMON, // 1D54A..1D550 8484 UNKNOWN, // 1D551 8485 COMMON, // 1D552..1D6A5 8486 UNKNOWN, // 1D6A6..1D6A7 8487 COMMON, // 1D6A8..1D7CB 8488 UNKNOWN, // 1D7CC..1D7CD 8489 COMMON, // 1D7CE..1D7FF 8490 SIGNWRITING, // 1D800..1DA8B 8491 UNKNOWN, // 1DA8C..1DA9A 8492 SIGNWRITING, // 1DA9B..1DA9F 8493 UNKNOWN, // 1DAA0 8494 SIGNWRITING, // 1DAA1..1DAAF 8495 UNKNOWN, // 1DAB0..1DEFF 8496 LATIN, // 1DF00..1DF1E 8497 UNKNOWN, // 1DF1F..1DF24 8498 LATIN, // 1DF25..1DF2A 8499 UNKNOWN, // 1DF2B..1DFFF 8500 GLAGOLITIC, // 1E000..1E006 8501 UNKNOWN, // 1E007 8502 GLAGOLITIC, // 1E008..1E018 8503 UNKNOWN, // 1E019..1E01A 8504 GLAGOLITIC, // 1E01B..1E021 8505 UNKNOWN, // 1E022 8506 GLAGOLITIC, // 1E023..1E024 8507 UNKNOWN, // 1E025 8508 GLAGOLITIC, // 1E026..1E02A 8509 UNKNOWN, // 1E02B..1E02F 8510 CYRILLIC, // 1E030..1E06D 8511 UNKNOWN, // 1E06E..1E08E 8512 CYRILLIC, // 1E08F 8513 UNKNOWN, // 1E090..1E0FF 8514 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 8515 UNKNOWN, // 1E12D..1E12F 8516 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 8517 UNKNOWN, // 1E13E..1E13F 8518 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 8519 UNKNOWN, // 1E14A..1E14D 8520 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 8521 UNKNOWN, // 1E150..1E28F 8522 TOTO, // 1E290..1E2AE 8523 UNKNOWN, // 1E2AF..1E2BF 8524 WANCHO, // 1E2C0..1E2F9 8525 UNKNOWN, // 1E2FA..1E2FE 8526 WANCHO, // 1E2FF 8527 UNKNOWN, // 1E300..1E4CF 8528 NAG_MUNDARI, // 1E4D0..1E4F9 8529 UNKNOWN, // 1E4FA..1E7DF 8530 ETHIOPIC, // 1E7E0..1E7E6 8531 UNKNOWN, // 1E7E7 8532 ETHIOPIC, // 1E7E8..1E7EB 8533 UNKNOWN, // 1E7EC 8534 ETHIOPIC, // 1E7ED..1E7EE 8535 UNKNOWN, // 1E7EF 8536 ETHIOPIC, // 1E7F0..1E7FE 8537 UNKNOWN, // 1E7FF 8538 MENDE_KIKAKUI, // 1E800..1E8C4 8539 UNKNOWN, // 1E8C5..1E8C6 8540 MENDE_KIKAKUI, // 1E8C7..1E8D6 8541 UNKNOWN, // 1E8D7..1E8FF 8542 ADLAM, // 1E900..1E94B 8543 UNKNOWN, // 1E94C..1E94F 8544 ADLAM, // 1E950..1E959 8545 UNKNOWN, // 1E95A..1E95D 8546 ADLAM, // 1E95E..1E95F 8547 UNKNOWN, // 1E960..1EC70 8548 COMMON, // 1EC71..1ECB4 8549 UNKNOWN, // 1ECB5..1ED00 8550 COMMON, // 1ED01..1ED3D 8551 UNKNOWN, // 1ED3E..1EDFF 8552 ARABIC, // 1EE00..1EE03 8553 UNKNOWN, // 1EE04 8554 ARABIC, // 1EE05..1EE1F 8555 UNKNOWN, // 1EE20 8556 ARABIC, // 1EE21..1EE22 8557 UNKNOWN, // 1EE23 8558 ARABIC, // 1EE24 8559 UNKNOWN, // 1EE25..1EE26 8560 ARABIC, // 1EE27 8561 UNKNOWN, // 1EE28 8562 ARABIC, // 1EE29..1EE32 8563 UNKNOWN, // 1EE33 8564 ARABIC, // 1EE34..1EE37 8565 UNKNOWN, // 1EE38 8566 ARABIC, // 1EE39 8567 UNKNOWN, // 1EE3A 8568 ARABIC, // 1EE3B 8569 UNKNOWN, // 1EE3C..1EE41 8570 ARABIC, // 1EE42 8571 UNKNOWN, // 1EE43..1EE46 8572 ARABIC, // 1EE47 8573 UNKNOWN, // 1EE48 8574 ARABIC, // 1EE49 8575 UNKNOWN, // 1EE4A 8576 ARABIC, // 1EE4B 8577 UNKNOWN, // 1EE4C 8578 ARABIC, // 1EE4D..1EE4F 8579 UNKNOWN, // 1EE50 8580 ARABIC, // 1EE51..1EE52 8581 UNKNOWN, // 1EE53 8582 ARABIC, // 1EE54 8583 UNKNOWN, // 1EE55..1EE56 8584 ARABIC, // 1EE57 8585 UNKNOWN, // 1EE58 8586 ARABIC, // 1EE59 8587 UNKNOWN, // 1EE5A 8588 ARABIC, // 1EE5B 8589 UNKNOWN, // 1EE5C 8590 ARABIC, // 1EE5D 8591 UNKNOWN, // 1EE5E 8592 ARABIC, // 1EE5F 8593 UNKNOWN, // 1EE60 8594 ARABIC, // 1EE61..1EE62 8595 UNKNOWN, // 1EE63 8596 ARABIC, // 1EE64 8597 UNKNOWN, // 1EE65..1EE66 8598 ARABIC, // 1EE67..1EE6A 8599 UNKNOWN, // 1EE6B 8600 ARABIC, // 1EE6C..1EE72 8601 UNKNOWN, // 1EE73 8602 ARABIC, // 1EE74..1EE77 8603 UNKNOWN, // 1EE78 8604 ARABIC, // 1EE79..1EE7C 8605 UNKNOWN, // 1EE7D 8606 ARABIC, // 1EE7E 8607 UNKNOWN, // 1EE7F 8608 ARABIC, // 1EE80..1EE89 8609 UNKNOWN, // 1EE8A 8610 ARABIC, // 1EE8B..1EE9B 8611 UNKNOWN, // 1EE9C..1EEA0 8612 ARABIC, // 1EEA1..1EEA3 8613 UNKNOWN, // 1EEA4 8614 ARABIC, // 1EEA5..1EEA9 8615 UNKNOWN, // 1EEAA 8616 ARABIC, // 1EEAB..1EEBB 8617 UNKNOWN, // 1EEBC..1EEEF 8618 ARABIC, // 1EEF0..1EEF1 8619 UNKNOWN, // 1EEF2..1EFFF 8620 COMMON, // 1F000..1F02B 8621 UNKNOWN, // 1F02C..1F02F 8622 COMMON, // 1F030..1F093 8623 UNKNOWN, // 1F094..1F09F 8624 COMMON, // 1F0A0..1F0AE 8625 UNKNOWN, // 1F0AF..1F0B0 8626 COMMON, // 1F0B1..1F0BF 8627 UNKNOWN, // 1F0C0 8628 COMMON, // 1F0C1..1F0CF 8629 UNKNOWN, // 1F0D0 8630 COMMON, // 1F0D1..1F0F5 8631 UNKNOWN, // 1F0F6..1F0FF 8632 COMMON, // 1F100..1F1AD 8633 UNKNOWN, // 1F1AE..1F1E5 8634 COMMON, // 1F1E6..1F1FF 8635 HIRAGANA, // 1F200 8636 COMMON, // 1F201..1F202 8637 UNKNOWN, // 1F203..1F20F 8638 COMMON, // 1F210..1F23B 8639 UNKNOWN, // 1F23C..1F23F 8640 COMMON, // 1F240..1F248 8641 UNKNOWN, // 1F249..1F24F 8642 COMMON, // 1F250..1F251 8643 UNKNOWN, // 1F252..1F25F 8644 COMMON, // 1F260..1F265 8645 UNKNOWN, // 1F266..1F2FF 8646 COMMON, // 1F300..1F6D7 8647 UNKNOWN, // 1F6D8..1F6DB 8648 COMMON, // 1F6DC..1F6EC 8649 UNKNOWN, // 1F6ED..1F6EF 8650 COMMON, // 1F6F0..1F6FC 8651 UNKNOWN, // 1F6FD..1F6FF 8652 COMMON, // 1F700..1F776 8653 UNKNOWN, // 1F777..1F77A 8654 COMMON, // 1F77B..1F7D9 8655 UNKNOWN, // 1F7DA..1F7DF 8656 COMMON, // 1F7E0..1F7EB 8657 UNKNOWN, // 1F7EC..1F7EF 8658 COMMON, // 1F7F0 8659 UNKNOWN, // 1F7F1..1F7FF 8660 COMMON, // 1F800..1F80B 8661 UNKNOWN, // 1F80C..1F80F 8662 COMMON, // 1F810..1F847 8663 UNKNOWN, // 1F848..1F84F 8664 COMMON, // 1F850..1F859 8665 UNKNOWN, // 1F85A..1F85F 8666 COMMON, // 1F860..1F887 8667 UNKNOWN, // 1F888..1F88F 8668 COMMON, // 1F890..1F8AD 8669 UNKNOWN, // 1F8AE..1F8AF 8670 COMMON, // 1F8B0..1F8B1 8671 UNKNOWN, // 1F8B2..1F8FF 8672 COMMON, // 1F900..1FA53 8673 UNKNOWN, // 1FA54..1FA5F 8674 COMMON, // 1FA60..1FA6D 8675 UNKNOWN, // 1FA6E..1FA6F 8676 COMMON, // 1FA70..1FA7C 8677 UNKNOWN, // 1FA7D..1FA7F 8678 COMMON, // 1FA80..1FA88 8679 UNKNOWN, // 1FA89..1FA8F 8680 COMMON, // 1FA90..1FABD 8681 UNKNOWN, // 1FABE 8682 COMMON, // 1FABF..1FAC5 8683 UNKNOWN, // 1FAC6..1FACD 8684 COMMON, // 1FACE..1FADB 8685 UNKNOWN, // 1FADC..1FADF 8686 COMMON, // 1FAE0..1FAE8 8687 UNKNOWN, // 1FAE9..1FAEF 8688 COMMON, // 1FAF0..1FAF8 8689 UNKNOWN, // 1FAF9..1FAFF 8690 COMMON, // 1FB00..1FB92 8691 UNKNOWN, // 1FB93 8692 COMMON, // 1FB94..1FBCA 8693 UNKNOWN, // 1FBCB..1FBEF 8694 COMMON, // 1FBF0..1FBF9 8695 UNKNOWN, // 1FBFA..1FFFF 8696 HAN, // 20000..2A6DF 8697 UNKNOWN, // 2A6E0..2A6FF 8698 HAN, // 2A700..2B739 8699 UNKNOWN, // 2B73A..2B73F 8700 HAN, // 2B740..2B81D 8701 UNKNOWN, // 2B81E..2B81F 8702 HAN, // 2B820..2CEA1 8703 UNKNOWN, // 2CEA2..2CEAF 8704 HAN, // 2CEB0..2EBE0 8705 UNKNOWN, // 2EBE1..2EBEF 8706 HAN, // 2EBF0..2EE5D 8707 UNKNOWN, // 2EE5E..2F7FF 8708 HAN, // 2F800..2FA1D 8709 UNKNOWN, // 2FA1E..2FFFF 8710 HAN, // 30000..3134A 8711 UNKNOWN, // 3134B..3134F 8712 HAN, // 31350..323AF 8713 UNKNOWN, // 323B0..E0000 8714 COMMON, // E0001 8715 UNKNOWN, // E0002..E001F 8716 COMMON, // E0020..E007F 8717 UNKNOWN, // E0080..E00FF 8718 INHERITED, // E0100..E01EF 8719 UNKNOWN, // E01F0..10FFFF 8720 }; 8721 8722 private static final HashMap<String, Character.UnicodeScript> aliases; 8723 static { 8724 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1); 8725 aliases.put("ADLM", ADLAM); 8726 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 8727 aliases.put("AHOM", AHOM); 8728 aliases.put("ARAB", ARABIC); 8729 aliases.put("ARMI", IMPERIAL_ARAMAIC); 8730 aliases.put("ARMN", ARMENIAN); 8731 aliases.put("AVST", AVESTAN); 8732 aliases.put("BALI", BALINESE); 8733 aliases.put("BAMU", BAMUM); 8734 aliases.put("BASS", BASSA_VAH); 8735 aliases.put("BATK", BATAK); 8736 aliases.put("BENG", BENGALI); 8737 aliases.put("BHKS", BHAIKSUKI); 8738 aliases.put("BOPO", BOPOMOFO); 8739 aliases.put("BRAH", BRAHMI); 8740 aliases.put("BRAI", BRAILLE); 8741 aliases.put("BUGI", BUGINESE); 8742 aliases.put("BUHD", BUHID); 8743 aliases.put("CAKM", CHAKMA); 8744 aliases.put("CANS", CANADIAN_ABORIGINAL); 8745 aliases.put("CARI", CARIAN); 8746 aliases.put("CHAM", CHAM); 8747 aliases.put("CHER", CHEROKEE); 8748 aliases.put("CHRS", CHORASMIAN); 8749 aliases.put("COPT", COPTIC); 8750 aliases.put("CPMN", CYPRO_MINOAN); 8751 aliases.put("CPRT", CYPRIOT); 8752 aliases.put("CYRL", CYRILLIC); 8753 aliases.put("DEVA", DEVANAGARI); 8754 aliases.put("DIAK", DIVES_AKURU); 8755 aliases.put("DOGR", DOGRA); 8756 aliases.put("DSRT", DESERET); 8757 aliases.put("DUPL", DUPLOYAN); 8758 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 8759 aliases.put("ELBA", ELBASAN); 8760 aliases.put("ELYM", ELYMAIC); 8761 aliases.put("ETHI", ETHIOPIC); 8762 aliases.put("GEOR", GEORGIAN); 8763 aliases.put("GLAG", GLAGOLITIC); 8764 aliases.put("GONG", GUNJALA_GONDI); 8765 aliases.put("GONM", MASARAM_GONDI); 8766 aliases.put("GOTH", GOTHIC); 8767 aliases.put("GRAN", GRANTHA); 8768 aliases.put("GREK", GREEK); 8769 aliases.put("GUJR", GUJARATI); 8770 aliases.put("GURU", GURMUKHI); 8771 aliases.put("HANG", HANGUL); 8772 aliases.put("HANI", HAN); 8773 aliases.put("HANO", HANUNOO); 8774 aliases.put("HATR", HATRAN); 8775 aliases.put("HEBR", HEBREW); 8776 aliases.put("HIRA", HIRAGANA); 8777 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 8778 aliases.put("HMNG", PAHAWH_HMONG); 8779 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 8780 aliases.put("HUNG", OLD_HUNGARIAN); 8781 aliases.put("ITAL", OLD_ITALIC); 8782 aliases.put("JAVA", JAVANESE); 8783 aliases.put("KALI", KAYAH_LI); 8784 aliases.put("KANA", KATAKANA); 8785 aliases.put("KAWI", KAWI); 8786 aliases.put("KHAR", KHAROSHTHI); 8787 aliases.put("KHMR", KHMER); 8788 aliases.put("KHOJ", KHOJKI); 8789 aliases.put("KITS", KHITAN_SMALL_SCRIPT); 8790 aliases.put("KNDA", KANNADA); 8791 aliases.put("KTHI", KAITHI); 8792 aliases.put("LANA", TAI_THAM); 8793 aliases.put("LAOO", LAO); 8794 aliases.put("LATN", LATIN); 8795 aliases.put("LEPC", LEPCHA); 8796 aliases.put("LIMB", LIMBU); 8797 aliases.put("LINA", LINEAR_A); 8798 aliases.put("LINB", LINEAR_B); 8799 aliases.put("LISU", LISU); 8800 aliases.put("LYCI", LYCIAN); 8801 aliases.put("LYDI", LYDIAN); 8802 aliases.put("MAHJ", MAHAJANI); 8803 aliases.put("MAKA", MAKASAR); 8804 aliases.put("MAND", MANDAIC); 8805 aliases.put("MANI", MANICHAEAN); 8806 aliases.put("MARC", MARCHEN); 8807 aliases.put("MEDF", MEDEFAIDRIN); 8808 aliases.put("MEND", MENDE_KIKAKUI); 8809 aliases.put("MERC", MEROITIC_CURSIVE); 8810 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 8811 aliases.put("MLYM", MALAYALAM); 8812 aliases.put("MODI", MODI); 8813 aliases.put("MONG", MONGOLIAN); 8814 aliases.put("MROO", MRO); 8815 aliases.put("MTEI", MEETEI_MAYEK); 8816 aliases.put("MULT", MULTANI); 8817 aliases.put("MYMR", MYANMAR); 8818 aliases.put("NAGM", NAG_MUNDARI); 8819 aliases.put("NAND", NANDINAGARI); 8820 aliases.put("NARB", OLD_NORTH_ARABIAN); 8821 aliases.put("NBAT", NABATAEAN); 8822 aliases.put("NEWA", NEWA); 8823 aliases.put("NKOO", NKO); 8824 aliases.put("NSHU", NUSHU); 8825 aliases.put("OGAM", OGHAM); 8826 aliases.put("OLCK", OL_CHIKI); 8827 aliases.put("ORKH", OLD_TURKIC); 8828 aliases.put("ORYA", ORIYA); 8829 aliases.put("OSGE", OSAGE); 8830 aliases.put("OSMA", OSMANYA); 8831 aliases.put("OUGR", OLD_UYGHUR); 8832 aliases.put("PALM", PALMYRENE); 8833 aliases.put("PAUC", PAU_CIN_HAU); 8834 aliases.put("PERM", OLD_PERMIC); 8835 aliases.put("PHAG", PHAGS_PA); 8836 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 8837 aliases.put("PHLP", PSALTER_PAHLAVI); 8838 aliases.put("PHNX", PHOENICIAN); 8839 aliases.put("PLRD", MIAO); 8840 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 8841 aliases.put("RJNG", REJANG); 8842 aliases.put("ROHG", HANIFI_ROHINGYA); 8843 aliases.put("RUNR", RUNIC); 8844 aliases.put("SAMR", SAMARITAN); 8845 aliases.put("SARB", OLD_SOUTH_ARABIAN); 8846 aliases.put("SAUR", SAURASHTRA); 8847 aliases.put("SGNW", SIGNWRITING); 8848 aliases.put("SHAW", SHAVIAN); 8849 aliases.put("SHRD", SHARADA); 8850 aliases.put("SIDD", SIDDHAM); 8851 aliases.put("SIND", KHUDAWADI); 8852 aliases.put("SINH", SINHALA); 8853 aliases.put("SOGD", SOGDIAN); 8854 aliases.put("SOGO", OLD_SOGDIAN); 8855 aliases.put("SORA", SORA_SOMPENG); 8856 aliases.put("SOYO", SOYOMBO); 8857 aliases.put("SUND", SUNDANESE); 8858 aliases.put("SYLO", SYLOTI_NAGRI); 8859 aliases.put("SYRC", SYRIAC); 8860 aliases.put("TAGB", TAGBANWA); 8861 aliases.put("TAKR", TAKRI); 8862 aliases.put("TALE", TAI_LE); 8863 aliases.put("TALU", NEW_TAI_LUE); 8864 aliases.put("TAML", TAMIL); 8865 aliases.put("TANG", TANGUT); 8866 aliases.put("TAVT", TAI_VIET); 8867 aliases.put("TELU", TELUGU); 8868 aliases.put("TFNG", TIFINAGH); 8869 aliases.put("TGLG", TAGALOG); 8870 aliases.put("THAA", THAANA); 8871 aliases.put("THAI", THAI); 8872 aliases.put("TIBT", TIBETAN); 8873 aliases.put("TIRH", TIRHUTA); 8874 aliases.put("TNSA", TANGSA); 8875 aliases.put("TOTO", TOTO); 8876 aliases.put("UGAR", UGARITIC); 8877 aliases.put("VAII", VAI); 8878 aliases.put("VITH", VITHKUQI); 8879 aliases.put("WARA", WARANG_CITI); 8880 aliases.put("WCHO", WANCHO); 8881 aliases.put("XPEO", OLD_PERSIAN); 8882 aliases.put("XSUX", CUNEIFORM); 8883 aliases.put("YEZI", YEZIDI); 8884 aliases.put("YIII", YI); 8885 aliases.put("ZANB", ZANABAZAR_SQUARE); 8886 aliases.put("ZINH", INHERITED); 8887 aliases.put("ZYYY", COMMON); 8888 aliases.put("ZZZZ", UNKNOWN); 8889 } 8890 8891 /** 8892 * Returns the enum constant representing the Unicode script of which 8893 * the given character (Unicode code point) is assigned to. 8894 * 8895 * @param codePoint the character (Unicode code point) in question. 8896 * @return The {@code UnicodeScript} constant representing the 8897 * Unicode script of which this character is assigned to. 8898 * 8899 * @throws IllegalArgumentException if the specified 8900 * {@code codePoint} is an invalid Unicode code point. 8901 * @see Character#isValidCodePoint(int) 8902 * 8903 */ 8904 public static UnicodeScript of(int codePoint) { 8905 if (!isValidCodePoint(codePoint)) 8906 throw new IllegalArgumentException( 8907 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8908 int type = getType(codePoint); 8909 // leave SURROGATE and PRIVATE_USE for table lookup 8910 if (type == UNASSIGNED) 8911 return UNKNOWN; 8912 int index = Arrays.binarySearch(scriptStarts, codePoint); 8913 if (index < 0) 8914 index = -index - 2; 8915 return scripts[index]; 8916 } 8917 8918 /** 8919 * Returns the UnicodeScript constant with the given Unicode script 8920 * name or the script name alias. Script names and their aliases are 8921 * determined by The Unicode Standard. The files {@code Scripts.txt} 8922 * and {@code PropertyValueAliases.txt} define script names 8923 * and the script name aliases for a particular version of the 8924 * standard. The {@link Character} class specifies the version of 8925 * the standard that it supports. 8926 * <p> 8927 * Character case is ignored for all of the valid script names. 8928 * The en_US locale's case mapping rules are used to provide 8929 * case-insensitive string comparisons for script name validation. 8930 * 8931 * @param scriptName A {@code UnicodeScript} name. 8932 * @return The {@code UnicodeScript} constant identified 8933 * by {@code scriptName} 8934 * @throws IllegalArgumentException if {@code scriptName} is an 8935 * invalid name 8936 * @throws NullPointerException if {@code scriptName} is null 8937 */ 8938 public static final UnicodeScript forName(String scriptName) { 8939 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 8940 //.replace(' ', '_')); 8941 UnicodeScript sc = aliases.get(scriptName); 8942 if (sc != null) 8943 return sc; 8944 return valueOf(scriptName); 8945 } 8946 } 8947 8948 /** 8949 * The value of the {@code Character}. 8950 * 8951 * @serial 8952 */ 8953 private final char value; 8954 8955 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 8956 @java.io.Serial 8957 private static final long serialVersionUID = 3786198910865385080L; 8958 8959 /** 8960 * Constructs a newly allocated {@code Character} object that 8961 * represents the specified {@code char} value. 8962 * 8963 * @param value the value to be represented by the 8964 * {@code Character} object. 8965 * 8966 * @deprecated 8967 * It is rarely appropriate to use this constructor. The static factory 8968 * {@link #valueOf(char)} is generally a better choice, as it is 8969 * likely to yield significantly better space and time performance. 8970 */ 8971 @Deprecated(since="9", forRemoval = true) 8972 public Character(char value) { 8973 this.value = value; 8974 } 8975 8976 private static final class CharacterCache { 8977 private CharacterCache(){} 8978 8979 @Stable 8980 static final Character[] cache; 8981 static Character[] archivedCache; 8982 8983 static { 8984 int size = 127 + 1; 8985 8986 // Load and use the archived cache if it exists 8987 CDS.initializeFromArchive(CharacterCache.class); 8988 if (archivedCache == null) { 8989 Character[] c = new Character[size]; 8990 for (int i = 0; i < size; i++) { 8991 c[i] = new Character((char) i); 8992 } 8993 archivedCache = c; 8994 } 8995 cache = archivedCache; 8996 assert cache.length == size; 8997 } 8998 } 8999 9000 /** 9001 * Returns a {@code Character} instance representing the specified 9002 * {@code char} value. 9003 * If a new {@code Character} instance is not required, this method 9004 * should generally be used in preference to the constructor 9005 * {@link #Character(char)}, as this method is likely to yield 9006 * significantly better space and time performance by caching 9007 * frequently requested values. 9008 * 9009 * This method will always cache values in the range {@code 9010 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 9011 * cache other values outside of this range. 9012 * 9013 * @param c a char value. 9014 * @return a {@code Character} instance representing {@code c}. 9015 * @since 1.5 9016 */ 9017 @IntrinsicCandidate 9018 @DeserializeConstructor 9019 public static Character valueOf(char c) { 9020 if (c <= 127) { // must cache 9021 return CharacterCache.cache[(int)c]; 9022 } 9023 return new Character(c); 9024 } 9025 9026 /** 9027 * Returns the value of this {@code Character} object. 9028 * @return the primitive {@code char} value represented by 9029 * this object. 9030 */ 9031 @IntrinsicCandidate 9032 public char charValue() { 9033 return value; 9034 } 9035 9036 /** 9037 * Returns a hash code for this {@code Character}; equal to the result 9038 * of invoking {@code charValue()}. 9039 * 9040 * @return a hash code value for this {@code Character} 9041 */ 9042 @Override 9043 public int hashCode() { 9044 return Character.hashCode(value); 9045 } 9046 9047 /** 9048 * Returns a hash code for a {@code char} value; compatible with 9049 * {@code Character.hashCode()}. 9050 * 9051 * @since 1.8 9052 * 9053 * @param value The {@code char} for which to return a hash code. 9054 * @return a hash code value for a {@code char} value. 9055 */ 9056 public static int hashCode(char value) { 9057 return (int)value; 9058 } 9059 9060 /** 9061 * Compares this object against the specified object. 9062 * The result is {@code true} if and only if the argument is not 9063 * {@code null} and is a {@code Character} object that 9064 * represents the same {@code char} value as this object. 9065 * 9066 * @param obj the object to compare with. 9067 * @return {@code true} if the objects are the same; 9068 * {@code false} otherwise. 9069 */ 9070 public boolean equals(Object obj) { 9071 if (obj instanceof Character c) { 9072 return value == c.charValue(); 9073 } 9074 return false; 9075 } 9076 9077 /** 9078 * Returns a {@code String} object representing this 9079 * {@code Character}'s value. The result is a string of 9080 * length 1 whose sole component is the primitive 9081 * {@code char} value represented by this 9082 * {@code Character} object. 9083 * 9084 * @return a string representation of this object. 9085 */ 9086 @Override 9087 public String toString() { 9088 return String.valueOf(value); 9089 } 9090 9091 /** 9092 * Returns a {@code String} object representing the 9093 * specified {@code char}. The result is a string of length 9094 * 1 consisting solely of the specified {@code char}. 9095 * 9096 * @apiNote This method cannot handle <a 9097 * href="#supplementary"> supplementary characters</a>. To support 9098 * all Unicode characters, including supplementary characters, use 9099 * the {@link #toString(int)} method. 9100 * 9101 * @param c the {@code char} to be converted 9102 * @return the string representation of the specified {@code char} 9103 * @since 1.4 9104 */ 9105 public static String toString(char c) { 9106 return String.valueOf(c); 9107 } 9108 9109 /** 9110 * Returns a {@code String} object representing the 9111 * specified character (Unicode code point). The result is a string of 9112 * length 1 or 2, consisting solely of the specified {@code codePoint}. 9113 * 9114 * @param codePoint the {@code codePoint} to be converted 9115 * @return the string representation of the specified {@code codePoint} 9116 * @throws IllegalArgumentException if the specified 9117 * {@code codePoint} is not a {@linkplain #isValidCodePoint 9118 * valid Unicode code point}. 9119 * @since 11 9120 */ 9121 public static String toString(int codePoint) { 9122 return String.valueOfCodePoint(codePoint); 9123 } 9124 9125 /** 9126 * Determines whether the specified code point is a valid 9127 * <a href="http://www.unicode.org/glossary/#code_point"> 9128 * Unicode code point value</a>. 9129 * 9130 * @param codePoint the Unicode code point to be tested 9131 * @return {@code true} if the specified code point value is between 9132 * {@link #MIN_CODE_POINT} and 9133 * {@link #MAX_CODE_POINT} inclusive; 9134 * {@code false} otherwise. 9135 * @since 1.5 9136 */ 9137 public static boolean isValidCodePoint(int codePoint) { 9138 // Optimized form of: 9139 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 9140 int plane = codePoint >>> 16; 9141 return plane < ((MAX_CODE_POINT + 1) >>> 16); 9142 } 9143 9144 /** 9145 * Determines whether the specified character (Unicode code point) 9146 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 9147 * Such code points can be represented using a single {@code char}. 9148 * 9149 * @param codePoint the character (Unicode code point) to be tested 9150 * @return {@code true} if the specified code point is between 9151 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 9152 * {@code false} otherwise. 9153 * @since 1.7 9154 */ 9155 public static boolean isBmpCodePoint(int codePoint) { 9156 return codePoint >>> 16 == 0; 9157 // Optimized form of: 9158 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 9159 // We consistently use logical shift (>>>) to facilitate 9160 // additional runtime optimizations. 9161 } 9162 9163 /** 9164 * Determines whether the specified character (Unicode code point) 9165 * is in the <a href="#supplementary">supplementary character</a> range. 9166 * 9167 * @param codePoint the character (Unicode code point) to be tested 9168 * @return {@code true} if the specified code point is between 9169 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 9170 * {@link #MAX_CODE_POINT} inclusive; 9171 * {@code false} otherwise. 9172 * @since 1.5 9173 */ 9174 public static boolean isSupplementaryCodePoint(int codePoint) { 9175 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 9176 && codePoint < MAX_CODE_POINT + 1; 9177 } 9178 9179 /** 9180 * Determines if the given {@code char} value is a 9181 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9182 * Unicode high-surrogate code unit</a> 9183 * (also known as <i>leading-surrogate code unit</i>). 9184 * 9185 * <p>Such values do not represent characters by themselves, 9186 * but are used in the representation of 9187 * <a href="#supplementary">supplementary characters</a> 9188 * in the UTF-16 encoding. 9189 * 9190 * @param ch the {@code char} value to be tested. 9191 * @return {@code true} if the {@code char} value is between 9192 * {@link #MIN_HIGH_SURROGATE} and 9193 * {@link #MAX_HIGH_SURROGATE} inclusive; 9194 * {@code false} otherwise. 9195 * @see Character#isLowSurrogate(char) 9196 * @see Character.UnicodeBlock#of(int) 9197 * @since 1.5 9198 */ 9199 public static boolean isHighSurrogate(char ch) { 9200 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 9201 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 9202 } 9203 9204 /** 9205 * Determines if the given {@code char} value is a 9206 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9207 * Unicode low-surrogate code unit</a> 9208 * (also known as <i>trailing-surrogate code unit</i>). 9209 * 9210 * <p>Such values do not represent characters by themselves, 9211 * but are used in the representation of 9212 * <a href="#supplementary">supplementary characters</a> 9213 * in the UTF-16 encoding. 9214 * 9215 * @param ch the {@code char} value to be tested. 9216 * @return {@code true} if the {@code char} value is between 9217 * {@link #MIN_LOW_SURROGATE} and 9218 * {@link #MAX_LOW_SURROGATE} inclusive; 9219 * {@code false} otherwise. 9220 * @see Character#isHighSurrogate(char) 9221 * @since 1.5 9222 */ 9223 public static boolean isLowSurrogate(char ch) { 9224 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 9225 } 9226 9227 /** 9228 * Determines if the given {@code char} value is a Unicode 9229 * <i>surrogate code unit</i>. 9230 * 9231 * <p>Such values do not represent characters by themselves, 9232 * but are used in the representation of 9233 * <a href="#supplementary">supplementary characters</a> 9234 * in the UTF-16 encoding. 9235 * 9236 * <p>A char value is a surrogate code unit if and only if it is either 9237 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 9238 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 9239 * 9240 * @param ch the {@code char} value to be tested. 9241 * @return {@code true} if the {@code char} value is between 9242 * {@link #MIN_SURROGATE} and 9243 * {@link #MAX_SURROGATE} inclusive; 9244 * {@code false} otherwise. 9245 * @since 1.7 9246 */ 9247 public static boolean isSurrogate(char ch) { 9248 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 9249 } 9250 9251 /** 9252 * Determines whether the specified pair of {@code char} 9253 * values is a valid 9254 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9255 * Unicode surrogate pair</a>. 9256 * 9257 * <p>This method is equivalent to the expression: 9258 * <blockquote><pre>{@code 9259 * isHighSurrogate(high) && isLowSurrogate(low) 9260 * }</pre></blockquote> 9261 * 9262 * @param high the high-surrogate code value to be tested 9263 * @param low the low-surrogate code value to be tested 9264 * @return {@code true} if the specified high and 9265 * low-surrogate code values represent a valid surrogate pair; 9266 * {@code false} otherwise. 9267 * @since 1.5 9268 */ 9269 public static boolean isSurrogatePair(char high, char low) { 9270 return isHighSurrogate(high) && isLowSurrogate(low); 9271 } 9272 9273 /** 9274 * Determines the number of {@code char} values needed to 9275 * represent the specified character (Unicode code point). If the 9276 * specified character is equal to or greater than 0x10000, then 9277 * the method returns 2. Otherwise, the method returns 1. 9278 * 9279 * <p>This method doesn't validate the specified character to be a 9280 * valid Unicode code point. The caller must validate the 9281 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 9282 * if necessary. 9283 * 9284 * @param codePoint the character (Unicode code point) to be tested. 9285 * @return 2 if the character is a valid supplementary character; 1 otherwise. 9286 * @see Character#isSupplementaryCodePoint(int) 9287 * @since 1.5 9288 */ 9289 public static int charCount(int codePoint) { 9290 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 9291 } 9292 9293 /** 9294 * Converts the specified surrogate pair to its supplementary code 9295 * point value. This method does not validate the specified 9296 * surrogate pair. The caller must validate it using {@link 9297 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 9298 * 9299 * @param high the high-surrogate code unit 9300 * @param low the low-surrogate code unit 9301 * @return the supplementary code point composed from the 9302 * specified surrogate pair. 9303 * @since 1.5 9304 */ 9305 public static int toCodePoint(char high, char low) { 9306 // Optimized form of: 9307 // return ((high - MIN_HIGH_SURROGATE) << 10) 9308 // + (low - MIN_LOW_SURROGATE) 9309 // + MIN_SUPPLEMENTARY_CODE_POINT; 9310 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 9311 - (MIN_HIGH_SURROGATE << 10) 9312 - MIN_LOW_SURROGATE); 9313 } 9314 9315 /** 9316 * Returns the code point at the given index of the 9317 * {@code CharSequence}. If the {@code char} value at 9318 * the given index in the {@code CharSequence} is in the 9319 * high-surrogate range, the following index is less than the 9320 * length of the {@code CharSequence}, and the 9321 * {@code char} value at the following index is in the 9322 * low-surrogate range, then the supplementary code point 9323 * corresponding to this surrogate pair is returned. Otherwise, 9324 * the {@code char} value at the given index is returned. 9325 * 9326 * @param seq a sequence of {@code char} values (Unicode code 9327 * units) 9328 * @param index the index to the {@code char} values (Unicode 9329 * code units) in {@code seq} to be converted 9330 * @return the Unicode code point at the given index 9331 * @throws NullPointerException if {@code seq} is null. 9332 * @throws IndexOutOfBoundsException if the value 9333 * {@code index} is negative or not less than 9334 * {@link CharSequence#length() seq.length()}. 9335 * @since 1.5 9336 */ 9337 public static int codePointAt(CharSequence seq, int index) { 9338 char c1 = seq.charAt(index); 9339 if (isHighSurrogate(c1) && ++index < seq.length()) { 9340 char c2 = seq.charAt(index); 9341 if (isLowSurrogate(c2)) { 9342 return toCodePoint(c1, c2); 9343 } 9344 } 9345 return c1; 9346 } 9347 9348 /** 9349 * Returns the code point at the given index of the 9350 * {@code char} array. If the {@code char} value at 9351 * the given index in the {@code char} array is in the 9352 * high-surrogate range, the following index is less than the 9353 * length of the {@code char} array, and the 9354 * {@code char} value at the following index is in the 9355 * low-surrogate range, then the supplementary code point 9356 * corresponding to this surrogate pair is returned. Otherwise, 9357 * the {@code char} value at the given index is returned. 9358 * 9359 * @param a the {@code char} array 9360 * @param index the index to the {@code char} values (Unicode 9361 * code units) in the {@code char} array to be converted 9362 * @return the Unicode code point at the given index 9363 * @throws NullPointerException if {@code a} is null. 9364 * @throws IndexOutOfBoundsException if the value 9365 * {@code index} is negative or not less than 9366 * the length of the {@code char} array. 9367 * @since 1.5 9368 */ 9369 public static int codePointAt(char[] a, int index) { 9370 return codePointAtImpl(a, index, a.length); 9371 } 9372 9373 /** 9374 * Returns the code point at the given index of the 9375 * {@code char} array, where only array elements with 9376 * {@code index} less than {@code limit} can be used. If 9377 * the {@code char} value at the given index in the 9378 * {@code char} array is in the high-surrogate range, the 9379 * following index is less than the {@code limit}, and the 9380 * {@code char} value at the following index is in the 9381 * low-surrogate range, then the supplementary code point 9382 * corresponding to this surrogate pair is returned. Otherwise, 9383 * the {@code char} value at the given index is returned. 9384 * 9385 * @param a the {@code char} array 9386 * @param index the index to the {@code char} values (Unicode 9387 * code units) in the {@code char} array to be converted 9388 * @param limit the index after the last array element that 9389 * can be used in the {@code char} array 9390 * @return the Unicode code point at the given index 9391 * @throws NullPointerException if {@code a} is null. 9392 * @throws IndexOutOfBoundsException if the {@code index} 9393 * argument is negative or not less than the {@code limit} 9394 * argument, or if the {@code limit} argument is negative or 9395 * greater than the length of the {@code char} array. 9396 * @since 1.5 9397 */ 9398 public static int codePointAt(char[] a, int index, int limit) { 9399 if (index >= limit || index < 0 || limit > a.length) { 9400 throw new IndexOutOfBoundsException(); 9401 } 9402 return codePointAtImpl(a, index, limit); 9403 } 9404 9405 // throws ArrayIndexOutOfBoundsException if index out of bounds 9406 static int codePointAtImpl(char[] a, int index, int limit) { 9407 char c1 = a[index]; 9408 if (isHighSurrogate(c1) && ++index < limit) { 9409 char c2 = a[index]; 9410 if (isLowSurrogate(c2)) { 9411 return toCodePoint(c1, c2); 9412 } 9413 } 9414 return c1; 9415 } 9416 9417 /** 9418 * Returns the code point preceding the given index of the 9419 * {@code CharSequence}. If the {@code char} value at 9420 * {@code (index - 1)} in the {@code CharSequence} is in 9421 * the low-surrogate range, {@code (index - 2)} is not 9422 * negative, and the {@code char} value at {@code (index - 2)} 9423 * in the {@code CharSequence} is in the 9424 * high-surrogate range, then the supplementary code point 9425 * corresponding to this surrogate pair is returned. Otherwise, 9426 * the {@code char} value at {@code (index - 1)} is 9427 * returned. 9428 * 9429 * @param seq the {@code CharSequence} instance 9430 * @param index the index following the code point that should be returned 9431 * @return the Unicode code point value before the given index. 9432 * @throws NullPointerException if {@code seq} is null. 9433 * @throws IndexOutOfBoundsException if the {@code index} 9434 * argument is less than 1 or greater than {@link 9435 * CharSequence#length() seq.length()}. 9436 * @since 1.5 9437 */ 9438 public static int codePointBefore(CharSequence seq, int index) { 9439 char c2 = seq.charAt(--index); 9440 if (isLowSurrogate(c2) && index > 0) { 9441 char c1 = seq.charAt(--index); 9442 if (isHighSurrogate(c1)) { 9443 return toCodePoint(c1, c2); 9444 } 9445 } 9446 return c2; 9447 } 9448 9449 /** 9450 * Returns the code point preceding the given index of the 9451 * {@code char} array. If the {@code char} value at 9452 * {@code (index - 1)} in the {@code char} array is in 9453 * the low-surrogate range, {@code (index - 2)} is not 9454 * negative, and the {@code char} value at {@code (index - 2)} 9455 * in the {@code char} array is in the 9456 * high-surrogate range, then the supplementary code point 9457 * corresponding to this surrogate pair is returned. Otherwise, 9458 * the {@code char} value at {@code (index - 1)} is 9459 * returned. 9460 * 9461 * @param a the {@code char} array 9462 * @param index the index following the code point that should be returned 9463 * @return the Unicode code point value before the given index. 9464 * @throws NullPointerException if {@code a} is null. 9465 * @throws IndexOutOfBoundsException if the {@code index} 9466 * argument is less than 1 or greater than the length of the 9467 * {@code char} array 9468 * @since 1.5 9469 */ 9470 public static int codePointBefore(char[] a, int index) { 9471 return codePointBeforeImpl(a, index, 0); 9472 } 9473 9474 /** 9475 * Returns the code point preceding the given index of the 9476 * {@code char} array, where only array elements with 9477 * {@code index} greater than or equal to {@code start} 9478 * can be used. If the {@code char} value at {@code (index - 1)} 9479 * in the {@code char} array is in the 9480 * low-surrogate range, {@code (index - 2)} is not less than 9481 * {@code start}, and the {@code char} value at 9482 * {@code (index - 2)} in the {@code char} array is in 9483 * the high-surrogate range, then the supplementary code point 9484 * corresponding to this surrogate pair is returned. Otherwise, 9485 * the {@code char} value at {@code (index - 1)} is 9486 * returned. 9487 * 9488 * @param a the {@code char} array 9489 * @param index the index following the code point that should be returned 9490 * @param start the index of the first array element in the 9491 * {@code char} array 9492 * @return the Unicode code point value before the given index. 9493 * @throws NullPointerException if {@code a} is null. 9494 * @throws IndexOutOfBoundsException if the {@code index} 9495 * argument is not greater than the {@code start} argument or 9496 * is greater than the length of the {@code char} array, or 9497 * if the {@code start} argument is negative or not less than 9498 * the length of the {@code char} array. 9499 * @since 1.5 9500 */ 9501 public static int codePointBefore(char[] a, int index, int start) { 9502 if (index <= start || start < 0 || index > a.length) { 9503 throw new IndexOutOfBoundsException(); 9504 } 9505 return codePointBeforeImpl(a, index, start); 9506 } 9507 9508 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 9509 static int codePointBeforeImpl(char[] a, int index, int start) { 9510 char c2 = a[--index]; 9511 if (isLowSurrogate(c2) && index > start) { 9512 char c1 = a[--index]; 9513 if (isHighSurrogate(c1)) { 9514 return toCodePoint(c1, c2); 9515 } 9516 } 9517 return c2; 9518 } 9519 9520 /** 9521 * Returns the leading surrogate (a 9522 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9523 * high surrogate code unit</a>) of the 9524 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9525 * surrogate pair</a> 9526 * representing the specified supplementary character (Unicode 9527 * code point) in the UTF-16 encoding. If the specified character 9528 * is not a 9529 * <a href="Character.html#supplementary">supplementary character</a>, 9530 * an unspecified {@code char} is returned. 9531 * 9532 * <p>If 9533 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9534 * is {@code true}, then 9535 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 9536 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 9537 * are also always {@code true}. 9538 * 9539 * @param codePoint a supplementary character (Unicode code point) 9540 * @return the leading surrogate code unit used to represent the 9541 * character in the UTF-16 encoding 9542 * @since 1.7 9543 */ 9544 public static char highSurrogate(int codePoint) { 9545 return (char) ((codePoint >>> 10) 9546 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 9547 } 9548 9549 /** 9550 * Returns the trailing surrogate (a 9551 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9552 * low surrogate code unit</a>) of the 9553 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9554 * surrogate pair</a> 9555 * representing the specified supplementary character (Unicode 9556 * code point) in the UTF-16 encoding. If the specified character 9557 * is not a 9558 * <a href="Character.html#supplementary">supplementary character</a>, 9559 * an unspecified {@code char} is returned. 9560 * 9561 * <p>If 9562 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9563 * is {@code true}, then 9564 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 9565 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 9566 * are also always {@code true}. 9567 * 9568 * @param codePoint a supplementary character (Unicode code point) 9569 * @return the trailing surrogate code unit used to represent the 9570 * character in the UTF-16 encoding 9571 * @since 1.7 9572 */ 9573 public static char lowSurrogate(int codePoint) { 9574 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 9575 } 9576 9577 /** 9578 * Converts the specified character (Unicode code point) to its 9579 * UTF-16 representation. If the specified code point is a BMP 9580 * (Basic Multilingual Plane or Plane 0) value, the same value is 9581 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 9582 * specified code point is a supplementary character, its 9583 * surrogate values are stored in {@code dst[dstIndex]} 9584 * (high-surrogate) and {@code dst[dstIndex+1]} 9585 * (low-surrogate), and 2 is returned. 9586 * 9587 * @param codePoint the character (Unicode code point) to be converted. 9588 * @param dst an array of {@code char} in which the 9589 * {@code codePoint}'s UTF-16 value is stored. 9590 * @param dstIndex the start index into the {@code dst} 9591 * array where the converted value is stored. 9592 * @return 1 if the code point is a BMP code point, 2 if the 9593 * code point is a supplementary code point. 9594 * @throws IllegalArgumentException if the specified 9595 * {@code codePoint} is not a valid Unicode code point. 9596 * @throws NullPointerException if the specified {@code dst} is null. 9597 * @throws IndexOutOfBoundsException if {@code dstIndex} 9598 * is negative or not less than {@code dst.length}, or if 9599 * {@code dst} at {@code dstIndex} doesn't have enough 9600 * array element(s) to store the resulting {@code char} 9601 * value(s). (If {@code dstIndex} is equal to 9602 * {@code dst.length-1} and the specified 9603 * {@code codePoint} is a supplementary character, the 9604 * high-surrogate value is not stored in 9605 * {@code dst[dstIndex]}.) 9606 * @since 1.5 9607 */ 9608 public static int toChars(int codePoint, char[] dst, int dstIndex) { 9609 if (isBmpCodePoint(codePoint)) { 9610 dst[dstIndex] = (char) codePoint; 9611 return 1; 9612 } else if (isValidCodePoint(codePoint)) { 9613 toSurrogates(codePoint, dst, dstIndex); 9614 return 2; 9615 } else { 9616 throw new IllegalArgumentException( 9617 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9618 } 9619 } 9620 9621 /** 9622 * Converts the specified character (Unicode code point) to its 9623 * UTF-16 representation stored in a {@code char} array. If 9624 * the specified code point is a BMP (Basic Multilingual Plane or 9625 * Plane 0) value, the resulting {@code char} array has 9626 * the same value as {@code codePoint}. If the specified code 9627 * point is a supplementary code point, the resulting 9628 * {@code char} array has the corresponding surrogate pair. 9629 * 9630 * @param codePoint a Unicode code point 9631 * @return a {@code char} array having 9632 * {@code codePoint}'s UTF-16 representation. 9633 * @throws IllegalArgumentException if the specified 9634 * {@code codePoint} is not a valid Unicode code point. 9635 * @since 1.5 9636 */ 9637 public static char[] toChars(int codePoint) { 9638 if (isBmpCodePoint(codePoint)) { 9639 return new char[] { (char) codePoint }; 9640 } else if (isValidCodePoint(codePoint)) { 9641 char[] result = new char[2]; 9642 toSurrogates(codePoint, result, 0); 9643 return result; 9644 } else { 9645 throw new IllegalArgumentException( 9646 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9647 } 9648 } 9649 9650 static void toSurrogates(int codePoint, char[] dst, int index) { 9651 // We write elements "backwards" to guarantee all-or-nothing 9652 dst[index+1] = lowSurrogate(codePoint); 9653 dst[index] = highSurrogate(codePoint); 9654 } 9655 9656 /** 9657 * Returns the number of Unicode code points in the text range of 9658 * the specified char sequence. The text range begins at the 9659 * specified {@code beginIndex} and extends to the 9660 * {@code char} at index {@code endIndex - 1}. Thus the 9661 * length (in {@code char}s) of the text range is 9662 * {@code endIndex-beginIndex}. Unpaired surrogates within 9663 * the text range count as one code point each. 9664 * 9665 * @param seq the char sequence 9666 * @param beginIndex the index to the first {@code char} of 9667 * the text range. 9668 * @param endIndex the index after the last {@code char} of 9669 * the text range. 9670 * @return the number of Unicode code points in the specified text 9671 * range 9672 * @throws NullPointerException if {@code seq} is null. 9673 * @throws IndexOutOfBoundsException if the 9674 * {@code beginIndex} is negative, or {@code endIndex} 9675 * is larger than the length of the given sequence, or 9676 * {@code beginIndex} is larger than {@code endIndex}. 9677 * @since 1.5 9678 */ 9679 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 9680 Objects.checkFromToIndex(beginIndex, endIndex, seq.length()); 9681 int n = endIndex - beginIndex; 9682 for (int i = beginIndex; i < endIndex; ) { 9683 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 9684 isLowSurrogate(seq.charAt(i))) { 9685 n--; 9686 i++; 9687 } 9688 } 9689 return n; 9690 } 9691 9692 /** 9693 * Returns the number of Unicode code points in a subarray of the 9694 * {@code char} array argument. The {@code offset} 9695 * argument is the index of the first {@code char} of the 9696 * subarray and the {@code count} argument specifies the 9697 * length of the subarray in {@code char}s. Unpaired 9698 * surrogates within the subarray count as one code point each. 9699 * 9700 * @param a the {@code char} array 9701 * @param offset the index of the first {@code char} in the 9702 * given {@code char} array 9703 * @param count the length of the subarray in {@code char}s 9704 * @return the number of Unicode code points in the specified subarray 9705 * @throws NullPointerException if {@code a} is null. 9706 * @throws IndexOutOfBoundsException if {@code offset} or 9707 * {@code count} is negative, or if {@code offset + 9708 * count} is larger than the length of the given array. 9709 * @since 1.5 9710 */ 9711 public static int codePointCount(char[] a, int offset, int count) { 9712 Objects.checkFromIndexSize(offset, count, a.length); 9713 return codePointCountImpl(a, offset, count); 9714 } 9715 9716 static int codePointCountImpl(char[] a, int offset, int count) { 9717 int endIndex = offset + count; 9718 int n = count; 9719 for (int i = offset; i < endIndex; ) { 9720 if (isHighSurrogate(a[i++]) && i < endIndex && 9721 isLowSurrogate(a[i])) { 9722 n--; 9723 i++; 9724 } 9725 } 9726 return n; 9727 } 9728 9729 /** 9730 * Returns the index within the given char sequence that is offset 9731 * from the given {@code index} by {@code codePointOffset} 9732 * code points. Unpaired surrogates within the text range given by 9733 * {@code index} and {@code codePointOffset} count as 9734 * one code point each. 9735 * 9736 * @param seq the char sequence 9737 * @param index the index to be offset 9738 * @param codePointOffset the offset in code points 9739 * @return the index within the char sequence 9740 * @throws NullPointerException if {@code seq} is null. 9741 * @throws IndexOutOfBoundsException if {@code index} 9742 * is negative or larger than the length of the char sequence, 9743 * or if {@code codePointOffset} is positive and the 9744 * subsequence starting with {@code index} has fewer than 9745 * {@code codePointOffset} code points, or if 9746 * {@code codePointOffset} is negative and the subsequence 9747 * before {@code index} has fewer than the absolute value 9748 * of {@code codePointOffset} code points. 9749 * @since 1.5 9750 */ 9751 public static int offsetByCodePoints(CharSequence seq, int index, 9752 int codePointOffset) { 9753 int length = seq.length(); 9754 if (index < 0 || index > length) { 9755 throw new IndexOutOfBoundsException(); 9756 } 9757 9758 int x = index; 9759 if (codePointOffset >= 0) { 9760 int i; 9761 for (i = 0; x < length && i < codePointOffset; i++) { 9762 if (isHighSurrogate(seq.charAt(x++)) && x < length && 9763 isLowSurrogate(seq.charAt(x))) { 9764 x++; 9765 } 9766 } 9767 if (i < codePointOffset) { 9768 throw new IndexOutOfBoundsException(); 9769 } 9770 } else { 9771 int i; 9772 for (i = codePointOffset; x > 0 && i < 0; i++) { 9773 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 9774 isHighSurrogate(seq.charAt(x-1))) { 9775 x--; 9776 } 9777 } 9778 if (i < 0) { 9779 throw new IndexOutOfBoundsException(); 9780 } 9781 } 9782 return x; 9783 } 9784 9785 /** 9786 * Returns the index within the given {@code char} subarray 9787 * that is offset from the given {@code index} by 9788 * {@code codePointOffset} code points. The 9789 * {@code start} and {@code count} arguments specify a 9790 * subarray of the {@code char} array. Unpaired surrogates 9791 * within the text range given by {@code index} and 9792 * {@code codePointOffset} count as one code point each. 9793 * 9794 * @param a the {@code char} array 9795 * @param start the index of the first {@code char} of the 9796 * subarray 9797 * @param count the length of the subarray in {@code char}s 9798 * @param index the index to be offset 9799 * @param codePointOffset the offset in code points 9800 * @return the index within the subarray 9801 * @throws NullPointerException if {@code a} is null. 9802 * @throws IndexOutOfBoundsException 9803 * if {@code start} or {@code count} is negative, 9804 * or if {@code start + count} is larger than the length of 9805 * the given array, 9806 * or if {@code index} is less than {@code start} or 9807 * larger then {@code start + count}, 9808 * or if {@code codePointOffset} is positive and the text range 9809 * starting with {@code index} and ending with {@code start + count - 1} 9810 * has fewer than {@code codePointOffset} code 9811 * points, 9812 * or if {@code codePointOffset} is negative and the text range 9813 * starting with {@code start} and ending with {@code index - 1} 9814 * has fewer than the absolute value of 9815 * {@code codePointOffset} code points. 9816 * @since 1.5 9817 */ 9818 public static int offsetByCodePoints(char[] a, int start, int count, 9819 int index, int codePointOffset) { 9820 if (count > a.length-start || start < 0 || count < 0 9821 || index < start || index > start+count) { 9822 throw new IndexOutOfBoundsException(); 9823 } 9824 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 9825 } 9826 9827 static int offsetByCodePointsImpl(char[]a, int start, int count, 9828 int index, int codePointOffset) { 9829 int x = index; 9830 if (codePointOffset >= 0) { 9831 int limit = start + count; 9832 int i; 9833 for (i = 0; x < limit && i < codePointOffset; i++) { 9834 if (isHighSurrogate(a[x++]) && x < limit && 9835 isLowSurrogate(a[x])) { 9836 x++; 9837 } 9838 } 9839 if (i < codePointOffset) { 9840 throw new IndexOutOfBoundsException(); 9841 } 9842 } else { 9843 int i; 9844 for (i = codePointOffset; x > start && i < 0; i++) { 9845 if (isLowSurrogate(a[--x]) && x > start && 9846 isHighSurrogate(a[x-1])) { 9847 x--; 9848 } 9849 } 9850 if (i < 0) { 9851 throw new IndexOutOfBoundsException(); 9852 } 9853 } 9854 return x; 9855 } 9856 9857 /** 9858 * Determines if the specified character is a lowercase character. 9859 * <p> 9860 * A character is lowercase if its general category type, provided 9861 * by {@code Character.getType(ch)}, is 9862 * {@code LOWERCASE_LETTER}, or it has contributory property 9863 * Other_Lowercase as defined by the Unicode Standard. 9864 * <p> 9865 * The following are examples of lowercase characters: 9866 * <blockquote><pre> 9867 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9868 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9869 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9870 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9871 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9872 * </pre></blockquote> 9873 * <p> Many other Unicode characters are lowercase too. 9874 * 9875 * <p><b>Note:</b> This method cannot handle <a 9876 * href="#supplementary"> supplementary characters</a>. To support 9877 * all Unicode characters, including supplementary characters, use 9878 * the {@link #isLowerCase(int)} method. 9879 * 9880 * @param ch the character to be tested. 9881 * @return {@code true} if the character is lowercase; 9882 * {@code false} otherwise. 9883 * @see Character#isLowerCase(char) 9884 * @see Character#isTitleCase(char) 9885 * @see Character#toLowerCase(char) 9886 * @see Character#getType(char) 9887 */ 9888 public static boolean isLowerCase(char ch) { 9889 return isLowerCase((int)ch); 9890 } 9891 9892 /** 9893 * Determines if the specified character (Unicode code point) is a 9894 * lowercase character. 9895 * <p> 9896 * A character is lowercase if its general category type, provided 9897 * by {@link Character#getType getType(codePoint)}, is 9898 * {@code LOWERCASE_LETTER}, or it has contributory property 9899 * Other_Lowercase as defined by the Unicode Standard. 9900 * <p> 9901 * The following are examples of lowercase characters: 9902 * <blockquote><pre> 9903 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9904 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9905 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9906 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9907 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9908 * </pre></blockquote> 9909 * <p> Many other Unicode characters are lowercase too. 9910 * 9911 * @param codePoint the character (Unicode code point) to be tested. 9912 * @return {@code true} if the character is lowercase; 9913 * {@code false} otherwise. 9914 * @see Character#isLowerCase(int) 9915 * @see Character#isTitleCase(int) 9916 * @see Character#toLowerCase(int) 9917 * @see Character#getType(int) 9918 * @since 1.5 9919 */ 9920 public static boolean isLowerCase(int codePoint) { 9921 return CharacterData.of(codePoint).isLowerCase(codePoint); 9922 } 9923 9924 /** 9925 * Determines if the specified character is an uppercase character. 9926 * <p> 9927 * A character is uppercase if its general category type, provided by 9928 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 9929 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9930 * <p> 9931 * The following are examples of uppercase characters: 9932 * <blockquote><pre> 9933 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9934 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9935 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9936 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9937 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9938 * </pre></blockquote> 9939 * <p> Many other Unicode characters are uppercase too. 9940 * 9941 * <p><b>Note:</b> This method cannot handle <a 9942 * href="#supplementary"> supplementary characters</a>. To support 9943 * all Unicode characters, including supplementary characters, use 9944 * the {@link #isUpperCase(int)} method. 9945 * 9946 * @param ch the character to be tested. 9947 * @return {@code true} if the character is uppercase; 9948 * {@code false} otherwise. 9949 * @see Character#isLowerCase(char) 9950 * @see Character#isTitleCase(char) 9951 * @see Character#toUpperCase(char) 9952 * @see Character#getType(char) 9953 * @since 1.0 9954 */ 9955 public static boolean isUpperCase(char ch) { 9956 return isUpperCase((int)ch); 9957 } 9958 9959 /** 9960 * Determines if the specified character (Unicode code point) is an uppercase character. 9961 * <p> 9962 * A character is uppercase if its general category type, provided by 9963 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 9964 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9965 * <p> 9966 * The following are examples of uppercase characters: 9967 * <blockquote><pre> 9968 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9969 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9970 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9971 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9972 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9973 * </pre></blockquote> 9974 * <p> Many other Unicode characters are uppercase too. 9975 * 9976 * @param codePoint the character (Unicode code point) to be tested. 9977 * @return {@code true} if the character is uppercase; 9978 * {@code false} otherwise. 9979 * @see Character#isLowerCase(int) 9980 * @see Character#isTitleCase(int) 9981 * @see Character#toUpperCase(int) 9982 * @see Character#getType(int) 9983 * @since 1.5 9984 */ 9985 public static boolean isUpperCase(int codePoint) { 9986 return CharacterData.of(codePoint).isUpperCase(codePoint); 9987 } 9988 9989 /** 9990 * Determines if the specified character is a titlecase character. 9991 * <p> 9992 * A character is a titlecase character if its general 9993 * category type, provided by {@code Character.getType(ch)}, 9994 * is {@code TITLECASE_LETTER}. 9995 * <p> 9996 * Some characters look like pairs of Latin letters. For example, there 9997 * is an uppercase letter that looks like "LJ" and has a corresponding 9998 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 9999 * is the appropriate form to use when rendering a word in lowercase 10000 * with initial capitals, as for a book title. 10001 * <p> 10002 * These are some of the Unicode characters for which this method returns 10003 * {@code true}: 10004 * <ul> 10005 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10006 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10007 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10008 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10009 * </ul> 10010 * <p> Many other Unicode characters are titlecase too. 10011 * 10012 * <p><b>Note:</b> This method cannot handle <a 10013 * href="#supplementary"> supplementary characters</a>. To support 10014 * all Unicode characters, including supplementary characters, use 10015 * the {@link #isTitleCase(int)} method. 10016 * 10017 * @param ch the character to be tested. 10018 * @return {@code true} if the character is titlecase; 10019 * {@code false} otherwise. 10020 * @see Character#isLowerCase(char) 10021 * @see Character#isUpperCase(char) 10022 * @see Character#toTitleCase(char) 10023 * @see Character#getType(char) 10024 * @since 1.0.2 10025 */ 10026 public static boolean isTitleCase(char ch) { 10027 return isTitleCase((int)ch); 10028 } 10029 10030 /** 10031 * Determines if the specified character (Unicode code point) is a titlecase character. 10032 * <p> 10033 * A character is a titlecase character if its general 10034 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10035 * is {@code TITLECASE_LETTER}. 10036 * <p> 10037 * Some characters look like pairs of Latin letters. For example, there 10038 * is an uppercase letter that looks like "LJ" and has a corresponding 10039 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10040 * is the appropriate form to use when rendering a word in lowercase 10041 * with initial capitals, as for a book title. 10042 * <p> 10043 * These are some of the Unicode characters for which this method returns 10044 * {@code true}: 10045 * <ul> 10046 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10047 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10048 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10049 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10050 * </ul> 10051 * <p> Many other Unicode characters are titlecase too. 10052 * 10053 * @param codePoint the character (Unicode code point) to be tested. 10054 * @return {@code true} if the character is titlecase; 10055 * {@code false} otherwise. 10056 * @see Character#isLowerCase(int) 10057 * @see Character#isUpperCase(int) 10058 * @see Character#toTitleCase(int) 10059 * @see Character#getType(int) 10060 * @since 1.5 10061 */ 10062 public static boolean isTitleCase(int codePoint) { 10063 return getType(codePoint) == Character.TITLECASE_LETTER; 10064 } 10065 10066 /** 10067 * Determines if the specified character is a digit. 10068 * <p> 10069 * A character is a digit if its general category type, provided 10070 * by {@code Character.getType(ch)}, is 10071 * {@code DECIMAL_DIGIT_NUMBER}. 10072 * <p> 10073 * Some Unicode character ranges that contain digits: 10074 * <ul> 10075 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10076 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10077 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10078 * Arabic-Indic digits 10079 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10080 * Extended Arabic-Indic digits 10081 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10082 * Devanagari digits 10083 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10084 * Fullwidth digits 10085 * </ul> 10086 * 10087 * Many other character ranges contain digits as well. 10088 * 10089 * <p><b>Note:</b> This method cannot handle <a 10090 * href="#supplementary"> supplementary characters</a>. To support 10091 * all Unicode characters, including supplementary characters, use 10092 * the {@link #isDigit(int)} method. 10093 * 10094 * @param ch the character to be tested. 10095 * @return {@code true} if the character is a digit; 10096 * {@code false} otherwise. 10097 * @see Character#digit(char, int) 10098 * @see Character#forDigit(int, int) 10099 * @see Character#getType(char) 10100 */ 10101 public static boolean isDigit(char ch) { 10102 return isDigit((int)ch); 10103 } 10104 10105 /** 10106 * Determines if the specified character (Unicode code point) is a digit. 10107 * <p> 10108 * A character is a digit if its general category type, provided 10109 * by {@link Character#getType(int) getType(codePoint)}, is 10110 * {@code DECIMAL_DIGIT_NUMBER}. 10111 * <p> 10112 * Some Unicode character ranges that contain digits: 10113 * <ul> 10114 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10115 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10116 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10117 * Arabic-Indic digits 10118 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10119 * Extended Arabic-Indic digits 10120 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10121 * Devanagari digits 10122 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10123 * Fullwidth digits 10124 * </ul> 10125 * 10126 * Many other character ranges contain digits as well. 10127 * 10128 * @param codePoint the character (Unicode code point) to be tested. 10129 * @return {@code true} if the character is a digit; 10130 * {@code false} otherwise. 10131 * @see Character#forDigit(int, int) 10132 * @see Character#getType(int) 10133 * @since 1.5 10134 */ 10135 public static boolean isDigit(int codePoint) { 10136 return CharacterData.of(codePoint).isDigit(codePoint); 10137 } 10138 10139 /** 10140 * Determines if a character is defined in Unicode. 10141 * <p> 10142 * A character is defined if at least one of the following is true: 10143 * <ul> 10144 * <li>It has an entry in the UnicodeData file. 10145 * <li>It has a value in a range defined by the UnicodeData file. 10146 * </ul> 10147 * 10148 * <p><b>Note:</b> This method cannot handle <a 10149 * href="#supplementary"> supplementary characters</a>. To support 10150 * all Unicode characters, including supplementary characters, use 10151 * the {@link #isDefined(int)} method. 10152 * 10153 * @param ch the character to be tested 10154 * @return {@code true} if the character has a defined meaning 10155 * in Unicode; {@code false} otherwise. 10156 * @see Character#isDigit(char) 10157 * @see Character#isLetter(char) 10158 * @see Character#isLetterOrDigit(char) 10159 * @see Character#isLowerCase(char) 10160 * @see Character#isTitleCase(char) 10161 * @see Character#isUpperCase(char) 10162 * @since 1.0.2 10163 */ 10164 public static boolean isDefined(char ch) { 10165 return isDefined((int)ch); 10166 } 10167 10168 /** 10169 * Determines if a character (Unicode code point) is defined in Unicode. 10170 * <p> 10171 * A character is defined if at least one of the following is true: 10172 * <ul> 10173 * <li>It has an entry in the UnicodeData file. 10174 * <li>It has a value in a range defined by the UnicodeData file. 10175 * </ul> 10176 * 10177 * @param codePoint the character (Unicode code point) to be tested. 10178 * @return {@code true} if the character has a defined meaning 10179 * in Unicode; {@code false} otherwise. 10180 * @see Character#isDigit(int) 10181 * @see Character#isLetter(int) 10182 * @see Character#isLetterOrDigit(int) 10183 * @see Character#isLowerCase(int) 10184 * @see Character#isTitleCase(int) 10185 * @see Character#isUpperCase(int) 10186 * @since 1.5 10187 */ 10188 public static boolean isDefined(int codePoint) { 10189 return getType(codePoint) != Character.UNASSIGNED; 10190 } 10191 10192 /** 10193 * Determines if the specified character is a letter. 10194 * <p> 10195 * A character is considered to be a letter if its general 10196 * category type, provided by {@code Character.getType(ch)}, 10197 * is any of the following: 10198 * <ul> 10199 * <li> {@code UPPERCASE_LETTER} 10200 * <li> {@code LOWERCASE_LETTER} 10201 * <li> {@code TITLECASE_LETTER} 10202 * <li> {@code MODIFIER_LETTER} 10203 * <li> {@code OTHER_LETTER} 10204 * </ul> 10205 * 10206 * Not all letters have case. Many characters are 10207 * letters but are neither uppercase nor lowercase nor titlecase. 10208 * 10209 * <p><b>Note:</b> This method cannot handle <a 10210 * href="#supplementary"> supplementary characters</a>. To support 10211 * all Unicode characters, including supplementary characters, use 10212 * the {@link #isLetter(int)} method. 10213 * 10214 * @param ch the character to be tested. 10215 * @return {@code true} if the character is a letter; 10216 * {@code false} otherwise. 10217 * @see Character#isDigit(char) 10218 * @see Character#isJavaIdentifierStart(char) 10219 * @see Character#isJavaLetter(char) 10220 * @see Character#isJavaLetterOrDigit(char) 10221 * @see Character#isLetterOrDigit(char) 10222 * @see Character#isLowerCase(char) 10223 * @see Character#isTitleCase(char) 10224 * @see Character#isUnicodeIdentifierStart(char) 10225 * @see Character#isUpperCase(char) 10226 */ 10227 public static boolean isLetter(char ch) { 10228 return isLetter((int)ch); 10229 } 10230 10231 /** 10232 * Determines if the specified character (Unicode code point) is a letter. 10233 * <p> 10234 * A character is considered to be a letter if its general 10235 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10236 * is any of the following: 10237 * <ul> 10238 * <li> {@code UPPERCASE_LETTER} 10239 * <li> {@code LOWERCASE_LETTER} 10240 * <li> {@code TITLECASE_LETTER} 10241 * <li> {@code MODIFIER_LETTER} 10242 * <li> {@code OTHER_LETTER} 10243 * </ul> 10244 * 10245 * Not all letters have case. Many characters are 10246 * letters but are neither uppercase nor lowercase nor titlecase. 10247 * 10248 * @param codePoint the character (Unicode code point) to be tested. 10249 * @return {@code true} if the character is a letter; 10250 * {@code false} otherwise. 10251 * @see Character#isDigit(int) 10252 * @see Character#isJavaIdentifierStart(int) 10253 * @see Character#isLetterOrDigit(int) 10254 * @see Character#isLowerCase(int) 10255 * @see Character#isTitleCase(int) 10256 * @see Character#isUnicodeIdentifierStart(int) 10257 * @see Character#isUpperCase(int) 10258 * @since 1.5 10259 */ 10260 public static boolean isLetter(int codePoint) { 10261 return ((((1 << Character.UPPERCASE_LETTER) | 10262 (1 << Character.LOWERCASE_LETTER) | 10263 (1 << Character.TITLECASE_LETTER) | 10264 (1 << Character.MODIFIER_LETTER) | 10265 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 10266 != 0; 10267 } 10268 10269 /** 10270 * Determines if the specified character is a letter or digit. 10271 * <p> 10272 * A character is considered to be a letter or digit if either 10273 * {@code Character.isLetter(char ch)} or 10274 * {@code Character.isDigit(char ch)} returns 10275 * {@code true} for the character. 10276 * 10277 * <p><b>Note:</b> This method cannot handle <a 10278 * href="#supplementary"> supplementary characters</a>. To support 10279 * all Unicode characters, including supplementary characters, use 10280 * the {@link #isLetterOrDigit(int)} method. 10281 * 10282 * @param ch the character to be tested. 10283 * @return {@code true} if the character is a letter or digit; 10284 * {@code false} otherwise. 10285 * @see Character#isDigit(char) 10286 * @see Character#isJavaIdentifierPart(char) 10287 * @see Character#isJavaLetter(char) 10288 * @see Character#isJavaLetterOrDigit(char) 10289 * @see Character#isLetter(char) 10290 * @see Character#isUnicodeIdentifierPart(char) 10291 * @since 1.0.2 10292 */ 10293 public static boolean isLetterOrDigit(char ch) { 10294 return isLetterOrDigit((int)ch); 10295 } 10296 10297 /** 10298 * Determines if the specified character (Unicode code point) is a letter or digit. 10299 * <p> 10300 * A character is considered to be a letter or digit if either 10301 * {@link #isLetter(int) isLetter(codePoint)} or 10302 * {@link #isDigit(int) isDigit(codePoint)} returns 10303 * {@code true} for the character. 10304 * 10305 * @param codePoint the character (Unicode code point) to be tested. 10306 * @return {@code true} if the character is a letter or digit; 10307 * {@code false} otherwise. 10308 * @see Character#isDigit(int) 10309 * @see Character#isJavaIdentifierPart(int) 10310 * @see Character#isLetter(int) 10311 * @see Character#isUnicodeIdentifierPart(int) 10312 * @since 1.5 10313 */ 10314 public static boolean isLetterOrDigit(int codePoint) { 10315 return ((((1 << Character.UPPERCASE_LETTER) | 10316 (1 << Character.LOWERCASE_LETTER) | 10317 (1 << Character.TITLECASE_LETTER) | 10318 (1 << Character.MODIFIER_LETTER) | 10319 (1 << Character.OTHER_LETTER) | 10320 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 10321 != 0; 10322 } 10323 10324 /** 10325 * Determines if the specified character is permissible as the first 10326 * character in a Java identifier. 10327 * <p> 10328 * A character may start a Java identifier if and only if 10329 * one of the following conditions is true: 10330 * <ul> 10331 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10332 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10333 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10334 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10335 * </ul> 10336 * 10337 * @param ch the character to be tested. 10338 * @return {@code true} if the character may start a Java 10339 * identifier; {@code false} otherwise. 10340 * @see Character#isJavaLetterOrDigit(char) 10341 * @see Character#isJavaIdentifierStart(char) 10342 * @see Character#isJavaIdentifierPart(char) 10343 * @see Character#isLetter(char) 10344 * @see Character#isLetterOrDigit(char) 10345 * @see Character#isUnicodeIdentifierStart(char) 10346 * @since 1.0.2 10347 * @deprecated Replaced by isJavaIdentifierStart(char). 10348 */ 10349 @Deprecated(since="1.1") 10350 public static boolean isJavaLetter(char ch) { 10351 return isJavaIdentifierStart(ch); 10352 } 10353 10354 /** 10355 * Determines if the specified character may be part of a Java 10356 * identifier as other than the first character. 10357 * <p> 10358 * A character may be part of a Java identifier if and only if one 10359 * of the following conditions is true: 10360 * <ul> 10361 * <li> it is a letter 10362 * <li> it is a currency symbol (such as {@code '$'}) 10363 * <li> it is a connecting punctuation character (such as {@code '_'}) 10364 * <li> it is a digit 10365 * <li> it is a numeric letter (such as a Roman numeral character) 10366 * <li> it is a combining mark 10367 * <li> it is a non-spacing mark 10368 * <li> {@code isIdentifierIgnorable} returns 10369 * {@code true} for the character. 10370 * </ul> 10371 * 10372 * @param ch the character to be tested. 10373 * @return {@code true} if the character may be part of a 10374 * Java identifier; {@code false} otherwise. 10375 * @see Character#isJavaLetter(char) 10376 * @see Character#isJavaIdentifierStart(char) 10377 * @see Character#isJavaIdentifierPart(char) 10378 * @see Character#isLetter(char) 10379 * @see Character#isLetterOrDigit(char) 10380 * @see Character#isUnicodeIdentifierPart(char) 10381 * @see Character#isIdentifierIgnorable(char) 10382 * @since 1.0.2 10383 * @deprecated Replaced by isJavaIdentifierPart(char). 10384 */ 10385 @Deprecated(since="1.1") 10386 public static boolean isJavaLetterOrDigit(char ch) { 10387 return isJavaIdentifierPart(ch); 10388 } 10389 10390 /** 10391 * Determines if the specified character (Unicode code point) is alphabetic. 10392 * <p> 10393 * A character is considered to be alphabetic if its general category type, 10394 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 10395 * the following: 10396 * <ul> 10397 * <li> {@code UPPERCASE_LETTER} 10398 * <li> {@code LOWERCASE_LETTER} 10399 * <li> {@code TITLECASE_LETTER} 10400 * <li> {@code MODIFIER_LETTER} 10401 * <li> {@code OTHER_LETTER} 10402 * <li> {@code LETTER_NUMBER} 10403 * </ul> 10404 * or it has contributory property Other_Alphabetic as defined by the 10405 * Unicode Standard. 10406 * 10407 * @param codePoint the character (Unicode code point) to be tested. 10408 * @return {@code true} if the character is a Unicode alphabet 10409 * character, {@code false} otherwise. 10410 * @since 1.7 10411 */ 10412 public static boolean isAlphabetic(int codePoint) { 10413 return (((((1 << Character.UPPERCASE_LETTER) | 10414 (1 << Character.LOWERCASE_LETTER) | 10415 (1 << Character.TITLECASE_LETTER) | 10416 (1 << Character.MODIFIER_LETTER) | 10417 (1 << Character.OTHER_LETTER) | 10418 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 10419 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 10420 } 10421 10422 /** 10423 * Determines if the specified character (Unicode code point) is a CJKV 10424 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 10425 * the Unicode Standard. 10426 * 10427 * @param codePoint the character (Unicode code point) to be tested. 10428 * @return {@code true} if the character is a Unicode ideograph 10429 * character, {@code false} otherwise. 10430 * @since 1.7 10431 */ 10432 public static boolean isIdeographic(int codePoint) { 10433 return CharacterData.of(codePoint).isIdeographic(codePoint); 10434 } 10435 10436 /** 10437 * Determines if the specified character is 10438 * permissible as the first character in a Java identifier. 10439 * <p> 10440 * A character may start a Java identifier if and only if 10441 * one of the following conditions is true: 10442 * <ul> 10443 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10444 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10445 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10446 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10447 * </ul> 10448 * 10449 * <p><b>Note:</b> This method cannot handle <a 10450 * href="#supplementary"> supplementary characters</a>. To support 10451 * all Unicode characters, including supplementary characters, use 10452 * the {@link #isJavaIdentifierStart(int)} method. 10453 * 10454 * @param ch the character to be tested. 10455 * @return {@code true} if the character may start a Java identifier; 10456 * {@code false} otherwise. 10457 * @see Character#isJavaIdentifierPart(char) 10458 * @see Character#isLetter(char) 10459 * @see Character#isUnicodeIdentifierStart(char) 10460 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10461 * @since 1.1 10462 */ 10463 @SuppressWarnings("doclint:reference") // cross-module links 10464 public static boolean isJavaIdentifierStart(char ch) { 10465 return isJavaIdentifierStart((int)ch); 10466 } 10467 10468 /** 10469 * Determines if the character (Unicode code point) is 10470 * permissible as the first character in a Java identifier. 10471 * <p> 10472 * A character may start a Java identifier if and only if 10473 * one of the following conditions is true: 10474 * <ul> 10475 * <li> {@link #isLetter(int) isLetter(codePoint)} 10476 * returns {@code true} 10477 * <li> {@link #getType(int) getType(codePoint)} 10478 * returns {@code LETTER_NUMBER} 10479 * <li> the referenced character is a currency symbol (such as {@code '$'}) 10480 * <li> the referenced character is a connecting punctuation character 10481 * (such as {@code '_'}). 10482 * </ul> 10483 * 10484 * @param codePoint the character (Unicode code point) to be tested. 10485 * @return {@code true} if the character may start a Java identifier; 10486 * {@code false} otherwise. 10487 * @see Character#isJavaIdentifierPart(int) 10488 * @see Character#isLetter(int) 10489 * @see Character#isUnicodeIdentifierStart(int) 10490 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10491 * @since 1.5 10492 */ 10493 @SuppressWarnings("doclint:reference") // cross-module links 10494 public static boolean isJavaIdentifierStart(int codePoint) { 10495 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 10496 } 10497 10498 /** 10499 * Determines if the specified character may be part of a Java 10500 * identifier as other than the first character. 10501 * <p> 10502 * A character may be part of a Java identifier if any of the following 10503 * conditions are true: 10504 * <ul> 10505 * <li> it is a letter 10506 * <li> it is a currency symbol (such as {@code '$'}) 10507 * <li> it is a connecting punctuation character (such as {@code '_'}) 10508 * <li> it is a digit 10509 * <li> it is a numeric letter (such as a Roman numeral character) 10510 * <li> it is a combining mark 10511 * <li> it is a non-spacing mark 10512 * <li> {@code isIdentifierIgnorable} returns 10513 * {@code true} for the character 10514 * </ul> 10515 * 10516 * <p><b>Note:</b> This method cannot handle <a 10517 * href="#supplementary"> supplementary characters</a>. To support 10518 * all Unicode characters, including supplementary characters, use 10519 * the {@link #isJavaIdentifierPart(int)} method. 10520 * 10521 * @param ch the character to be tested. 10522 * @return {@code true} if the character may be part of a 10523 * Java identifier; {@code false} otherwise. 10524 * @see Character#isIdentifierIgnorable(char) 10525 * @see Character#isJavaIdentifierStart(char) 10526 * @see Character#isLetterOrDigit(char) 10527 * @see Character#isUnicodeIdentifierPart(char) 10528 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10529 * @since 1.1 10530 */ 10531 @SuppressWarnings("doclint:reference") // cross-module links 10532 public static boolean isJavaIdentifierPart(char ch) { 10533 return isJavaIdentifierPart((int)ch); 10534 } 10535 10536 /** 10537 * Determines if the character (Unicode code point) may be part of a Java 10538 * identifier as other than the first character. 10539 * <p> 10540 * A character may be part of a Java identifier if any of the following 10541 * conditions are true: 10542 * <ul> 10543 * <li> it is a letter 10544 * <li> it is a currency symbol (such as {@code '$'}) 10545 * <li> it is a connecting punctuation character (such as {@code '_'}) 10546 * <li> it is a digit 10547 * <li> it is a numeric letter (such as a Roman numeral character) 10548 * <li> it is a combining mark 10549 * <li> it is a non-spacing mark 10550 * <li> {@link #isIdentifierIgnorable(int) 10551 * isIdentifierIgnorable(codePoint)} returns {@code true} for 10552 * the code point 10553 * </ul> 10554 * 10555 * @param codePoint the character (Unicode code point) to be tested. 10556 * @return {@code true} if the character may be part of a 10557 * Java identifier; {@code false} otherwise. 10558 * @see Character#isIdentifierIgnorable(int) 10559 * @see Character#isJavaIdentifierStart(int) 10560 * @see Character#isLetterOrDigit(int) 10561 * @see Character#isUnicodeIdentifierPart(int) 10562 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10563 * @since 1.5 10564 */ 10565 @SuppressWarnings("doclint:reference") // cross-module links 10566 public static boolean isJavaIdentifierPart(int codePoint) { 10567 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 10568 } 10569 10570 /** 10571 * Determines if the specified character is permissible as the 10572 * first character in a Unicode identifier. 10573 * <p> 10574 * A character may start a Unicode identifier if and only if 10575 * one of the following conditions is true: 10576 * <ul> 10577 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10578 * <li> {@link #getType(char) getType(ch)} returns 10579 * {@code LETTER_NUMBER}. 10580 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10581 * {@code Other_ID_Start}</a> character. 10582 * </ul> 10583 * <p> 10584 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10585 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10586 * with the following profile of UAX31: 10587 * <pre> 10588 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10589 * </pre> 10590 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10591 * compatibility. 10592 * 10593 * <p><b>Note:</b> This method cannot handle <a 10594 * href="#supplementary"> supplementary characters</a>. To support 10595 * all Unicode characters, including supplementary characters, use 10596 * the {@link #isUnicodeIdentifierStart(int)} method. 10597 * 10598 * @param ch the character to be tested. 10599 * @return {@code true} if the character may start a Unicode 10600 * identifier; {@code false} otherwise. 10601 * 10602 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10603 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10604 * @see Character#isJavaIdentifierStart(char) 10605 * @see Character#isLetter(char) 10606 * @see Character#isUnicodeIdentifierPart(char) 10607 * @since 1.1 10608 */ 10609 public static boolean isUnicodeIdentifierStart(char ch) { 10610 return isUnicodeIdentifierStart((int)ch); 10611 } 10612 10613 /** 10614 * Determines if the specified character (Unicode code point) is permissible as the 10615 * first character in a Unicode identifier. 10616 * <p> 10617 * A character may start a Unicode identifier if and only if 10618 * one of the following conditions is true: 10619 * <ul> 10620 * <li> {@link #isLetter(int) isLetter(codePoint)} 10621 * returns {@code true} 10622 * <li> {@link #getType(int) getType(codePoint)} 10623 * returns {@code LETTER_NUMBER}. 10624 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10625 * {@code Other_ID_Start}</a> character. 10626 * </ul> 10627 * <p> 10628 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10629 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10630 * with the following profile of UAX31: 10631 * <pre> 10632 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10633 * </pre> 10634 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10635 * compatibility. 10636 * 10637 * @param codePoint the character (Unicode code point) to be tested. 10638 * @return {@code true} if the character may start a Unicode 10639 * identifier; {@code false} otherwise. 10640 * 10641 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10642 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10643 * @see Character#isJavaIdentifierStart(int) 10644 * @see Character#isLetter(int) 10645 * @see Character#isUnicodeIdentifierPart(int) 10646 * @since 1.5 10647 */ 10648 public static boolean isUnicodeIdentifierStart(int codePoint) { 10649 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 10650 } 10651 10652 /** 10653 * Determines if the specified character may be part of a Unicode 10654 * identifier as other than the first character. 10655 * <p> 10656 * A character may be part of a Unicode identifier if and only if 10657 * one of the following statements is true: 10658 * <ul> 10659 * <li> it is a letter 10660 * <li> it is a connecting punctuation character (such as {@code '_'}) 10661 * <li> it is a digit 10662 * <li> it is a numeric letter (such as a Roman numeral character) 10663 * <li> it is a combining mark 10664 * <li> it is a non-spacing mark 10665 * <li> {@code isIdentifierIgnorable} returns 10666 * {@code true} for this character. 10667 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10668 * {@code Other_ID_Start}</a> character. 10669 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10670 * {@code Other_ID_Continue}</a> character. 10671 * </ul> 10672 * <p> 10673 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10674 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10675 * with the following profile of UAX31: 10676 * <pre> 10677 * Continue := Start + ID_Continue + ignorable 10678 * Medial := empty 10679 * ignorable := isIdentifierIgnorable(char) returns true for the character 10680 * </pre> 10681 * {@code ignorable} is added to {@code Continue} for backward 10682 * compatibility. 10683 * 10684 * <p><b>Note:</b> This method cannot handle <a 10685 * href="#supplementary"> supplementary characters</a>. To support 10686 * all Unicode characters, including supplementary characters, use 10687 * the {@link #isUnicodeIdentifierPart(int)} method. 10688 * 10689 * @param ch the character to be tested. 10690 * @return {@code true} if the character may be part of a 10691 * Unicode identifier; {@code false} otherwise. 10692 * 10693 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10694 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10695 * @see Character#isIdentifierIgnorable(char) 10696 * @see Character#isJavaIdentifierPart(char) 10697 * @see Character#isLetterOrDigit(char) 10698 * @see Character#isUnicodeIdentifierStart(char) 10699 * @since 1.1 10700 */ 10701 public static boolean isUnicodeIdentifierPart(char ch) { 10702 return isUnicodeIdentifierPart((int)ch); 10703 } 10704 10705 /** 10706 * Determines if the specified character (Unicode code point) may be part of a Unicode 10707 * identifier as other than the first character. 10708 * <p> 10709 * A character may be part of a Unicode identifier if and only if 10710 * one of the following statements is true: 10711 * <ul> 10712 * <li> it is a letter 10713 * <li> it is a connecting punctuation character (such as {@code '_'}) 10714 * <li> it is a digit 10715 * <li> it is a numeric letter (such as a Roman numeral character) 10716 * <li> it is a combining mark 10717 * <li> it is a non-spacing mark 10718 * <li> {@code isIdentifierIgnorable} returns 10719 * {@code true} for this character. 10720 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10721 * {@code Other_ID_Start}</a> character. 10722 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10723 * {@code Other_ID_Continue}</a> character. 10724 * </ul> 10725 * <p> 10726 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10727 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10728 * with the following profile of UAX31: 10729 * <pre> 10730 * Continue := Start + ID_Continue + ignorable 10731 * Medial := empty 10732 * ignorable := isIdentifierIgnorable(int) returns true for the character 10733 * </pre> 10734 * {@code ignorable} is added to {@code Continue} for backward 10735 * compatibility. 10736 * 10737 * @param codePoint the character (Unicode code point) to be tested. 10738 * @return {@code true} if the character may be part of a 10739 * Unicode identifier; {@code false} otherwise. 10740 * 10741 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10742 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10743 * @see Character#isIdentifierIgnorable(int) 10744 * @see Character#isJavaIdentifierPart(int) 10745 * @see Character#isLetterOrDigit(int) 10746 * @see Character#isUnicodeIdentifierStart(int) 10747 * @since 1.5 10748 */ 10749 public static boolean isUnicodeIdentifierPart(int codePoint) { 10750 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 10751 } 10752 10753 /** 10754 * Determines if the specified character should be regarded as 10755 * an ignorable character in a Java identifier or a Unicode identifier. 10756 * <p> 10757 * The following Unicode characters are ignorable in a Java identifier 10758 * or a Unicode identifier: 10759 * <ul> 10760 * <li>ISO control characters that are not whitespace 10761 * <ul> 10762 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10763 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10764 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10765 * </ul> 10766 * 10767 * <li>all characters that have the {@code FORMAT} general 10768 * category value 10769 * </ul> 10770 * 10771 * <p><b>Note:</b> This method cannot handle <a 10772 * href="#supplementary"> supplementary characters</a>. To support 10773 * all Unicode characters, including supplementary characters, use 10774 * the {@link #isIdentifierIgnorable(int)} method. 10775 * 10776 * @param ch the character to be tested. 10777 * @return {@code true} if the character is an ignorable control 10778 * character that may be part of a Java or Unicode identifier; 10779 * {@code false} otherwise. 10780 * @see Character#isJavaIdentifierPart(char) 10781 * @see Character#isUnicodeIdentifierPart(char) 10782 * @since 1.1 10783 */ 10784 public static boolean isIdentifierIgnorable(char ch) { 10785 return isIdentifierIgnorable((int)ch); 10786 } 10787 10788 /** 10789 * Determines if the specified character (Unicode code point) should be regarded as 10790 * an ignorable character in a Java identifier or a Unicode identifier. 10791 * <p> 10792 * The following Unicode characters are ignorable in a Java identifier 10793 * or a Unicode identifier: 10794 * <ul> 10795 * <li>ISO control characters that are not whitespace 10796 * <ul> 10797 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10798 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10799 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10800 * </ul> 10801 * 10802 * <li>all characters that have the {@code FORMAT} general 10803 * category value 10804 * </ul> 10805 * 10806 * @param codePoint the character (Unicode code point) to be tested. 10807 * @return {@code true} if the character is an ignorable control 10808 * character that may be part of a Java or Unicode identifier; 10809 * {@code false} otherwise. 10810 * @see Character#isJavaIdentifierPart(int) 10811 * @see Character#isUnicodeIdentifierPart(int) 10812 * @since 1.5 10813 */ 10814 public static boolean isIdentifierIgnorable(int codePoint) { 10815 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 10816 } 10817 10818 /** 10819 * Determines if the specified character (Unicode code point) is an Emoji. 10820 * <p> 10821 * A character is considered to be an Emoji if and only if it has the {@code Emoji} 10822 * property, defined in 10823 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10824 * Unicode Emoji (Technical Standard #51)</a>. 10825 * 10826 * @param codePoint the character (Unicode code point) to be tested. 10827 * @return {@code true} if the character is an Emoji; 10828 * {@code false} otherwise. 10829 * @since 21 10830 */ 10831 public static boolean isEmoji(int codePoint) { 10832 return CharacterData.of(codePoint).isEmoji(codePoint); 10833 } 10834 10835 /** 10836 * Determines if the specified character (Unicode code point) has the 10837 * Emoji Presentation property by default. 10838 * <p> 10839 * A character is considered to have the Emoji Presentation property if and 10840 * only if it has the {@code Emoji_Presentation} property, defined in 10841 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10842 * Unicode Emoji (Technical Standard #51)</a>. 10843 * 10844 * @param codePoint the character (Unicode code point) to be tested. 10845 * @return {@code true} if the character has the Emoji Presentation 10846 * property; {@code false} otherwise. 10847 * @since 21 10848 */ 10849 public static boolean isEmojiPresentation(int codePoint) { 10850 return CharacterData.of(codePoint).isEmojiPresentation(codePoint); 10851 } 10852 10853 /** 10854 * Determines if the specified character (Unicode code point) is an 10855 * Emoji Modifier. 10856 * <p> 10857 * A character is considered to be an Emoji Modifier if and only if it has 10858 * the {@code Emoji_Modifier} property, defined in 10859 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10860 * Unicode Emoji (Technical Standard #51)</a>. 10861 * 10862 * @param codePoint the character (Unicode code point) to be tested. 10863 * @return {@code true} if the character is an Emoji Modifier; 10864 * {@code false} otherwise. 10865 * @since 21 10866 */ 10867 public static boolean isEmojiModifier(int codePoint) { 10868 return CharacterData.of(codePoint).isEmojiModifier(codePoint); 10869 } 10870 10871 /** 10872 * Determines if the specified character (Unicode code point) is an 10873 * Emoji Modifier Base. 10874 * <p> 10875 * A character is considered to be an Emoji Modifier Base if and only if it has 10876 * the {@code Emoji_Modifier_Base} property, defined in 10877 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10878 * Unicode Emoji (Technical Standard #51)</a>. 10879 * 10880 * @param codePoint the character (Unicode code point) to be tested. 10881 * @return {@code true} if the character is an Emoji Modifier Base; 10882 * {@code false} otherwise. 10883 * @since 21 10884 */ 10885 public static boolean isEmojiModifierBase(int codePoint) { 10886 return CharacterData.of(codePoint).isEmojiModifierBase(codePoint); 10887 } 10888 10889 /** 10890 * Determines if the specified character (Unicode code point) is an 10891 * Emoji Component. 10892 * <p> 10893 * A character is considered to be an Emoji Component if and only if it has 10894 * the {@code Emoji_Component} property, defined in 10895 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10896 * Unicode Emoji (Technical Standard #51)</a>. 10897 * 10898 * @param codePoint the character (Unicode code point) to be tested. 10899 * @return {@code true} if the character is an Emoji Component; 10900 * {@code false} otherwise. 10901 * @since 21 10902 */ 10903 public static boolean isEmojiComponent(int codePoint) { 10904 return CharacterData.of(codePoint).isEmojiComponent(codePoint); 10905 } 10906 10907 /** 10908 * Determines if the specified character (Unicode code point) is 10909 * an Extended Pictographic. 10910 * <p> 10911 * A character is considered to be an Extended Pictographic if and only if it has 10912 * the {@code Extended_Pictographic} property, defined in 10913 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10914 * Unicode Emoji (Technical Standard #51)</a>. 10915 * 10916 * @param codePoint the character (Unicode code point) to be tested. 10917 * @return {@code true} if the character is an Extended Pictographic; 10918 * {@code false} otherwise. 10919 * @since 21 10920 */ 10921 public static boolean isExtendedPictographic(int codePoint) { 10922 return CharacterData.of(codePoint).isExtendedPictographic(codePoint); 10923 } 10924 10925 /** 10926 * Converts the character argument to lowercase using case 10927 * mapping information from the UnicodeData file. 10928 * <p> 10929 * Note that 10930 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 10931 * does not always return {@code true} for some ranges of 10932 * characters, particularly those that are symbols or ideographs. 10933 * 10934 * <p>In general, {@link String#toLowerCase()} should be used to map 10935 * characters to lowercase. {@code String} case mapping methods 10936 * have several benefits over {@code Character} case mapping methods. 10937 * {@code String} case mapping methods can perform locale-sensitive 10938 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10939 * the {@code Character} case mapping methods cannot. 10940 * 10941 * <p><b>Note:</b> This method cannot handle <a 10942 * href="#supplementary"> supplementary characters</a>. To support 10943 * all Unicode characters, including supplementary characters, use 10944 * the {@link #toLowerCase(int)} method. 10945 * 10946 * @param ch the character to be converted. 10947 * @return the lowercase equivalent of the character, if any; 10948 * otherwise, the character itself. 10949 * @see Character#isLowerCase(char) 10950 * @see String#toLowerCase() 10951 */ 10952 public static char toLowerCase(char ch) { 10953 return (char)toLowerCase((int)ch); 10954 } 10955 10956 /** 10957 * Converts the character (Unicode code point) argument to 10958 * lowercase using case mapping information from the UnicodeData 10959 * file. 10960 * 10961 * <p> Note that 10962 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 10963 * does not always return {@code true} for some ranges of 10964 * characters, particularly those that are symbols or ideographs. 10965 * 10966 * <p>In general, {@link String#toLowerCase()} should be used to map 10967 * characters to lowercase. {@code String} case mapping methods 10968 * have several benefits over {@code Character} case mapping methods. 10969 * {@code String} case mapping methods can perform locale-sensitive 10970 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10971 * the {@code Character} case mapping methods cannot. 10972 * 10973 * @param codePoint the character (Unicode code point) to be converted. 10974 * @return the lowercase equivalent of the character (Unicode code 10975 * point), if any; otherwise, the character itself. 10976 * @see Character#isLowerCase(int) 10977 * @see String#toLowerCase() 10978 * 10979 * @since 1.5 10980 */ 10981 public static int toLowerCase(int codePoint) { 10982 return CharacterData.of(codePoint).toLowerCase(codePoint); 10983 } 10984 10985 /** 10986 * Converts the character argument to uppercase using case mapping 10987 * information from the UnicodeData file. 10988 * <p> 10989 * Note that 10990 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 10991 * does not always return {@code true} for some ranges of 10992 * characters, particularly those that are symbols or ideographs. 10993 * 10994 * <p>In general, {@link String#toUpperCase()} should be used to map 10995 * characters to uppercase. {@code String} case mapping methods 10996 * have several benefits over {@code Character} case mapping methods. 10997 * {@code String} case mapping methods can perform locale-sensitive 10998 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10999 * the {@code Character} case mapping methods cannot. 11000 * 11001 * <p><b>Note:</b> This method cannot handle <a 11002 * href="#supplementary"> supplementary characters</a>. To support 11003 * all Unicode characters, including supplementary characters, use 11004 * the {@link #toUpperCase(int)} method. 11005 * 11006 * @param ch the character to be converted. 11007 * @return the uppercase equivalent of the character, if any; 11008 * otherwise, the character itself. 11009 * @see Character#isUpperCase(char) 11010 * @see String#toUpperCase() 11011 */ 11012 public static char toUpperCase(char ch) { 11013 return (char)toUpperCase((int)ch); 11014 } 11015 11016 /** 11017 * Converts the character (Unicode code point) argument to 11018 * uppercase using case mapping information from the UnicodeData 11019 * file. 11020 * 11021 * <p>Note that 11022 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 11023 * does not always return {@code true} for some ranges of 11024 * characters, particularly those that are symbols or ideographs. 11025 * 11026 * <p>In general, {@link String#toUpperCase()} should be used to map 11027 * characters to uppercase. {@code String} case mapping methods 11028 * have several benefits over {@code Character} case mapping methods. 11029 * {@code String} case mapping methods can perform locale-sensitive 11030 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11031 * the {@code Character} case mapping methods cannot. 11032 * 11033 * @param codePoint the character (Unicode code point) to be converted. 11034 * @return the uppercase equivalent of the character, if any; 11035 * otherwise, the character itself. 11036 * @see Character#isUpperCase(int) 11037 * @see String#toUpperCase() 11038 * 11039 * @since 1.5 11040 */ 11041 public static int toUpperCase(int codePoint) { 11042 return CharacterData.of(codePoint).toUpperCase(codePoint); 11043 } 11044 11045 /** 11046 * Converts the character argument to titlecase using case mapping 11047 * information from the UnicodeData file. If a character has no 11048 * explicit titlecase mapping and is not itself a titlecase char 11049 * according to UnicodeData, then the uppercase mapping is 11050 * returned as an equivalent titlecase mapping. If the 11051 * {@code char} argument is already a titlecase 11052 * {@code char}, the same {@code char} value will be 11053 * returned. 11054 * <p> 11055 * Note that 11056 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 11057 * does not always return {@code true} for some ranges of 11058 * characters. 11059 * 11060 * <p><b>Note:</b> This method cannot handle <a 11061 * href="#supplementary"> supplementary characters</a>. To support 11062 * all Unicode characters, including supplementary characters, use 11063 * the {@link #toTitleCase(int)} method. 11064 * 11065 * @param ch the character to be converted. 11066 * @return the titlecase equivalent of the character, if any; 11067 * otherwise, the character itself. 11068 * @see Character#isTitleCase(char) 11069 * @see Character#toLowerCase(char) 11070 * @see Character#toUpperCase(char) 11071 * @since 1.0.2 11072 */ 11073 public static char toTitleCase(char ch) { 11074 return (char)toTitleCase((int)ch); 11075 } 11076 11077 /** 11078 * Converts the character (Unicode code point) argument to titlecase using case mapping 11079 * information from the UnicodeData file. If a character has no 11080 * explicit titlecase mapping and is not itself a titlecase char 11081 * according to UnicodeData, then the uppercase mapping is 11082 * returned as an equivalent titlecase mapping. If the 11083 * character argument is already a titlecase 11084 * character, the same character value will be 11085 * returned. 11086 * 11087 * <p>Note that 11088 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 11089 * does not always return {@code true} for some ranges of 11090 * characters. 11091 * 11092 * @param codePoint the character (Unicode code point) to be converted. 11093 * @return the titlecase equivalent of the character, if any; 11094 * otherwise, the character itself. 11095 * @see Character#isTitleCase(int) 11096 * @see Character#toLowerCase(int) 11097 * @see Character#toUpperCase(int) 11098 * @since 1.5 11099 */ 11100 public static int toTitleCase(int codePoint) { 11101 return CharacterData.of(codePoint).toTitleCase(codePoint); 11102 } 11103 11104 /** 11105 * Returns the numeric value of the character {@code ch} in the 11106 * specified radix. 11107 * <p> 11108 * If the radix is not in the range {@code MIN_RADIX} ≤ 11109 * {@code radix} ≤ {@code MAX_RADIX} or if the 11110 * value of {@code ch} is not a valid digit in the specified 11111 * radix, {@code -1} is returned. A character is a valid digit 11112 * if at least one of the following is true: 11113 * <ul> 11114 * <li>The method {@code isDigit} is {@code true} of the character 11115 * and the Unicode decimal digit value of the character (or its 11116 * single-character decomposition) is less than the specified radix. 11117 * In this case the decimal digit value is returned. 11118 * <li>The character is one of the uppercase Latin letters 11119 * {@code 'A'} through {@code 'Z'} and its code is less than 11120 * {@code radix + 'A' - 10}. 11121 * In this case, {@code ch - 'A' + 10} 11122 * is returned. 11123 * <li>The character is one of the lowercase Latin letters 11124 * {@code 'a'} through {@code 'z'} and its code is less than 11125 * {@code radix + 'a' - 10}. 11126 * In this case, {@code ch - 'a' + 10} 11127 * is returned. 11128 * <li>The character is one of the fullwidth uppercase Latin letters A 11129 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11130 * and its code is less than 11131 * {@code radix + '\u005CuFF21' - 10}. 11132 * In this case, {@code ch - '\u005CuFF21' + 10} 11133 * is returned. 11134 * <li>The character is one of the fullwidth lowercase Latin letters a 11135 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11136 * and its code is less than 11137 * {@code radix + '\u005CuFF41' - 10}. 11138 * In this case, {@code ch - '\u005CuFF41' + 10} 11139 * is returned. 11140 * </ul> 11141 * 11142 * <p><b>Note:</b> This method cannot handle <a 11143 * href="#supplementary"> supplementary characters</a>. To support 11144 * all Unicode characters, including supplementary characters, use 11145 * the {@link #digit(int, int)} method. 11146 * 11147 * @param ch the character to be converted. 11148 * @param radix the radix. 11149 * @return the numeric value represented by the character in the 11150 * specified radix. 11151 * @see Character#forDigit(int, int) 11152 * @see Character#isDigit(char) 11153 */ 11154 public static int digit(char ch, int radix) { 11155 return digit((int)ch, radix); 11156 } 11157 11158 /** 11159 * Returns the numeric value of the specified character (Unicode 11160 * code point) in the specified radix. 11161 * 11162 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 11163 * {@code radix} ≤ {@code MAX_RADIX} or if the 11164 * character is not a valid digit in the specified 11165 * radix, {@code -1} is returned. A character is a valid digit 11166 * if at least one of the following is true: 11167 * <ul> 11168 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 11169 * and the Unicode decimal digit value of the character (or its 11170 * single-character decomposition) is less than the specified radix. 11171 * In this case the decimal digit value is returned. 11172 * <li>The character is one of the uppercase Latin letters 11173 * {@code 'A'} through {@code 'Z'} and its code is less than 11174 * {@code radix + 'A' - 10}. 11175 * In this case, {@code codePoint - 'A' + 10} 11176 * is returned. 11177 * <li>The character is one of the lowercase Latin letters 11178 * {@code 'a'} through {@code 'z'} and its code is less than 11179 * {@code radix + 'a' - 10}. 11180 * In this case, {@code codePoint - 'a' + 10} 11181 * is returned. 11182 * <li>The character is one of the fullwidth uppercase Latin letters A 11183 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11184 * and its code is less than 11185 * {@code radix + '\u005CuFF21' - 10}. 11186 * In this case, 11187 * {@code codePoint - '\u005CuFF21' + 10} 11188 * is returned. 11189 * <li>The character is one of the fullwidth lowercase Latin letters a 11190 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11191 * and its code is less than 11192 * {@code radix + '\u005CuFF41'- 10}. 11193 * In this case, 11194 * {@code codePoint - '\u005CuFF41' + 10} 11195 * is returned. 11196 * </ul> 11197 * 11198 * @param codePoint the character (Unicode code point) to be converted. 11199 * @param radix the radix. 11200 * @return the numeric value represented by the character in the 11201 * specified radix. 11202 * @see Character#forDigit(int, int) 11203 * @see Character#isDigit(int) 11204 * @since 1.5 11205 */ 11206 public static int digit(int codePoint, int radix) { 11207 return CharacterData.of(codePoint).digit(codePoint, radix); 11208 } 11209 11210 /** 11211 * Returns the {@code int} value that the specified Unicode 11212 * character represents. For example, the character 11213 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 11214 * an int with a value of 50. 11215 * <p> 11216 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11217 * {@code '\u005Cu005A'}), lowercase 11218 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11219 * full width variant ({@code '\u005CuFF21'} through 11220 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11221 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11222 * through 35. This is independent of the Unicode specification, 11223 * which does not assign numeric values to these {@code char} 11224 * values. 11225 * <p> 11226 * If the character does not have a numeric value, then -1 is returned. 11227 * If the character has a numeric value that cannot be represented as a 11228 * nonnegative integer (for example, a fractional value), then -2 11229 * is returned. 11230 * 11231 * <p><b>Note:</b> This method cannot handle <a 11232 * href="#supplementary"> supplementary characters</a>. To support 11233 * all Unicode characters, including supplementary characters, use 11234 * the {@link #getNumericValue(int)} method. 11235 * 11236 * @param ch the character to be converted. 11237 * @return the numeric value of the character, as a nonnegative {@code int} 11238 * value; -2 if the character has a numeric value but the value 11239 * can not be represented as a nonnegative {@code int} value; 11240 * -1 if the character has no numeric value. 11241 * @see Character#forDigit(int, int) 11242 * @see Character#isDigit(char) 11243 * @since 1.1 11244 */ 11245 public static int getNumericValue(char ch) { 11246 return getNumericValue((int)ch); 11247 } 11248 11249 /** 11250 * Returns the {@code int} value that the specified 11251 * character (Unicode code point) represents. For example, the character 11252 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 11253 * an {@code int} with a value of 50. 11254 * <p> 11255 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11256 * {@code '\u005Cu005A'}), lowercase 11257 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11258 * full width variant ({@code '\u005CuFF21'} through 11259 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11260 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11261 * through 35. This is independent of the Unicode specification, 11262 * which does not assign numeric values to these {@code char} 11263 * values. 11264 * <p> 11265 * If the character does not have a numeric value, then -1 is returned. 11266 * If the character has a numeric value that cannot be represented as a 11267 * nonnegative integer (for example, a fractional value), then -2 11268 * is returned. 11269 * 11270 * @param codePoint the character (Unicode code point) to be converted. 11271 * @return the numeric value of the character, as a nonnegative {@code int} 11272 * value; -2 if the character has a numeric value but the value 11273 * can not be represented as a nonnegative {@code int} value; 11274 * -1 if the character has no numeric value. 11275 * @see Character#forDigit(int, int) 11276 * @see Character#isDigit(int) 11277 * @since 1.5 11278 */ 11279 public static int getNumericValue(int codePoint) { 11280 return CharacterData.of(codePoint).getNumericValue(codePoint); 11281 } 11282 11283 /** 11284 * Determines if the specified character is ISO-LATIN-1 white space. 11285 * This method returns {@code true} for the following five 11286 * characters only: 11287 * <table class="striped"> 11288 * <caption style="display:none">truechars</caption> 11289 * <thead> 11290 * <tr><th scope="col">Character 11291 * <th scope="col">Code 11292 * <th scope="col">Name 11293 * </thead> 11294 * <tbody> 11295 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 11296 * <td>{@code HORIZONTAL TABULATION}</td></tr> 11297 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 11298 * <td>{@code NEW LINE}</td></tr> 11299 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 11300 * <td>{@code FORM FEED}</td></tr> 11301 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 11302 * <td>{@code CARRIAGE RETURN}</td></tr> 11303 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 11304 * <td>{@code SPACE}</td></tr> 11305 * </tbody> 11306 * </table> 11307 * 11308 * @param ch the character to be tested. 11309 * @return {@code true} if the character is ISO-LATIN-1 white 11310 * space; {@code false} otherwise. 11311 * @see Character#isSpaceChar(char) 11312 * @see Character#isWhitespace(char) 11313 * @deprecated Replaced by isWhitespace(char). 11314 */ 11315 @Deprecated(since="1.1") 11316 public static boolean isSpace(char ch) { 11317 return (ch <= 0x0020) && 11318 (((((1L << 0x0009) | 11319 (1L << 0x000A) | 11320 (1L << 0x000C) | 11321 (1L << 0x000D) | 11322 (1L << 0x0020)) >> ch) & 1L) != 0); 11323 } 11324 11325 11326 /** 11327 * Determines if the specified character is a Unicode space character. 11328 * A character is considered to be a space character if and only if 11329 * it is specified to be a space character by the Unicode Standard. This 11330 * method returns true if the character's general category type is any of 11331 * the following: 11332 * <ul> 11333 * <li> {@code SPACE_SEPARATOR} 11334 * <li> {@code LINE_SEPARATOR} 11335 * <li> {@code PARAGRAPH_SEPARATOR} 11336 * </ul> 11337 * 11338 * <p><b>Note:</b> This method cannot handle <a 11339 * href="#supplementary"> supplementary characters</a>. To support 11340 * all Unicode characters, including supplementary characters, use 11341 * the {@link #isSpaceChar(int)} method. 11342 * 11343 * @param ch the character to be tested. 11344 * @return {@code true} if the character is a space character; 11345 * {@code false} otherwise. 11346 * @see Character#isWhitespace(char) 11347 * @since 1.1 11348 */ 11349 public static boolean isSpaceChar(char ch) { 11350 return isSpaceChar((int)ch); 11351 } 11352 11353 /** 11354 * Determines if the specified character (Unicode code point) is a 11355 * Unicode space character. A character is considered to be a 11356 * space character if and only if it is specified to be a space 11357 * character by the Unicode Standard. This method returns true if 11358 * the character's general category type is any of the following: 11359 * 11360 * <ul> 11361 * <li> {@link #SPACE_SEPARATOR} 11362 * <li> {@link #LINE_SEPARATOR} 11363 * <li> {@link #PARAGRAPH_SEPARATOR} 11364 * </ul> 11365 * 11366 * @param codePoint the character (Unicode code point) to be tested. 11367 * @return {@code true} if the character is a space character; 11368 * {@code false} otherwise. 11369 * @see Character#isWhitespace(int) 11370 * @since 1.5 11371 */ 11372 public static boolean isSpaceChar(int codePoint) { 11373 return ((((1 << Character.SPACE_SEPARATOR) | 11374 (1 << Character.LINE_SEPARATOR) | 11375 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 11376 != 0; 11377 } 11378 11379 /** 11380 * Determines if the specified character is white space according to Java. 11381 * A character is a Java whitespace character if and only if it satisfies 11382 * one of the following criteria: 11383 * <ul> 11384 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 11385 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 11386 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11387 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11388 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11389 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11390 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11391 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11392 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11393 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11394 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11395 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11396 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11397 * </ul> 11398 * 11399 * <p><b>Note:</b> This method cannot handle <a 11400 * href="#supplementary"> supplementary characters</a>. To support 11401 * all Unicode characters, including supplementary characters, use 11402 * the {@link #isWhitespace(int)} method. 11403 * 11404 * @param ch the character to be tested. 11405 * @return {@code true} if the character is a Java whitespace 11406 * character; {@code false} otherwise. 11407 * @see Character#isSpaceChar(char) 11408 * @since 1.1 11409 */ 11410 public static boolean isWhitespace(char ch) { 11411 return isWhitespace((int)ch); 11412 } 11413 11414 /** 11415 * Determines if the specified character (Unicode code point) is 11416 * white space according to Java. A character is a Java 11417 * whitespace character if and only if it satisfies one of the 11418 * following criteria: 11419 * <ul> 11420 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 11421 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 11422 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11423 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11424 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11425 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11426 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11427 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11428 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11429 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11430 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11431 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11432 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11433 * </ul> 11434 * 11435 * @param codePoint the character (Unicode code point) to be tested. 11436 * @return {@code true} if the character is a Java whitespace 11437 * character; {@code false} otherwise. 11438 * @see Character#isSpaceChar(int) 11439 * @since 1.5 11440 */ 11441 public static boolean isWhitespace(int codePoint) { 11442 return CharacterData.of(codePoint).isWhitespace(codePoint); 11443 } 11444 11445 /** 11446 * Determines if the specified character is an ISO control 11447 * character. A character is considered to be an ISO control 11448 * character if its code is in the range {@code '\u005Cu0000'} 11449 * through {@code '\u005Cu001F'} or in the range 11450 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11451 * 11452 * <p><b>Note:</b> This method cannot handle <a 11453 * href="#supplementary"> supplementary characters</a>. To support 11454 * all Unicode characters, including supplementary characters, use 11455 * the {@link #isISOControl(int)} method. 11456 * 11457 * @param ch the character to be tested. 11458 * @return {@code true} if the character is an ISO control character; 11459 * {@code false} otherwise. 11460 * 11461 * @see Character#isSpaceChar(char) 11462 * @see Character#isWhitespace(char) 11463 * @since 1.1 11464 */ 11465 public static boolean isISOControl(char ch) { 11466 return isISOControl((int)ch); 11467 } 11468 11469 /** 11470 * Determines if the referenced character (Unicode code point) is an ISO control 11471 * character. A character is considered to be an ISO control 11472 * character if its code is in the range {@code '\u005Cu0000'} 11473 * through {@code '\u005Cu001F'} or in the range 11474 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11475 * 11476 * @param codePoint the character (Unicode code point) to be tested. 11477 * @return {@code true} if the character is an ISO control character; 11478 * {@code false} otherwise. 11479 * @see Character#isSpaceChar(int) 11480 * @see Character#isWhitespace(int) 11481 * @since 1.5 11482 */ 11483 public static boolean isISOControl(int codePoint) { 11484 // Optimized form of: 11485 // (codePoint >= 0x00 && codePoint <= 0x1F) || 11486 // (codePoint >= 0x7F && codePoint <= 0x9F); 11487 return codePoint <= 0x9F && 11488 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 11489 } 11490 11491 /** 11492 * Returns a value indicating a character's general category. 11493 * 11494 * <p><b>Note:</b> This method cannot handle <a 11495 * href="#supplementary"> supplementary characters</a>. To support 11496 * all Unicode characters, including supplementary characters, use 11497 * the {@link #getType(int)} method. 11498 * 11499 * @param ch the character to be tested. 11500 * @return a value of type {@code int} representing the 11501 * character's general category. 11502 * @see Character#COMBINING_SPACING_MARK 11503 * @see Character#CONNECTOR_PUNCTUATION 11504 * @see Character#CONTROL 11505 * @see Character#CURRENCY_SYMBOL 11506 * @see Character#DASH_PUNCTUATION 11507 * @see Character#DECIMAL_DIGIT_NUMBER 11508 * @see Character#ENCLOSING_MARK 11509 * @see Character#END_PUNCTUATION 11510 * @see Character#FINAL_QUOTE_PUNCTUATION 11511 * @see Character#FORMAT 11512 * @see Character#INITIAL_QUOTE_PUNCTUATION 11513 * @see Character#LETTER_NUMBER 11514 * @see Character#LINE_SEPARATOR 11515 * @see Character#LOWERCASE_LETTER 11516 * @see Character#MATH_SYMBOL 11517 * @see Character#MODIFIER_LETTER 11518 * @see Character#MODIFIER_SYMBOL 11519 * @see Character#NON_SPACING_MARK 11520 * @see Character#OTHER_LETTER 11521 * @see Character#OTHER_NUMBER 11522 * @see Character#OTHER_PUNCTUATION 11523 * @see Character#OTHER_SYMBOL 11524 * @see Character#PARAGRAPH_SEPARATOR 11525 * @see Character#PRIVATE_USE 11526 * @see Character#SPACE_SEPARATOR 11527 * @see Character#START_PUNCTUATION 11528 * @see Character#SURROGATE 11529 * @see Character#TITLECASE_LETTER 11530 * @see Character#UNASSIGNED 11531 * @see Character#UPPERCASE_LETTER 11532 * @since 1.1 11533 */ 11534 public static int getType(char ch) { 11535 return getType((int)ch); 11536 } 11537 11538 /** 11539 * Returns a value indicating a character's general category. 11540 * 11541 * @param codePoint the character (Unicode code point) to be tested. 11542 * @return a value of type {@code int} representing the 11543 * character's general category. 11544 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 11545 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 11546 * @see Character#CONTROL CONTROL 11547 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 11548 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 11549 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 11550 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 11551 * @see Character#END_PUNCTUATION END_PUNCTUATION 11552 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 11553 * @see Character#FORMAT FORMAT 11554 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 11555 * @see Character#LETTER_NUMBER LETTER_NUMBER 11556 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 11557 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 11558 * @see Character#MATH_SYMBOL MATH_SYMBOL 11559 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 11560 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 11561 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 11562 * @see Character#OTHER_LETTER OTHER_LETTER 11563 * @see Character#OTHER_NUMBER OTHER_NUMBER 11564 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 11565 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 11566 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 11567 * @see Character#PRIVATE_USE PRIVATE_USE 11568 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 11569 * @see Character#START_PUNCTUATION START_PUNCTUATION 11570 * @see Character#SURROGATE SURROGATE 11571 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 11572 * @see Character#UNASSIGNED UNASSIGNED 11573 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 11574 * @since 1.5 11575 */ 11576 public static int getType(int codePoint) { 11577 return CharacterData.of(codePoint).getType(codePoint); 11578 } 11579 11580 /** 11581 * Determines the character representation for a specific digit in 11582 * the specified radix. If the value of {@code radix} is not a 11583 * valid radix, or the value of {@code digit} is not a valid 11584 * digit in the specified radix, the null character 11585 * ({@code '\u005Cu0000'}) is returned. 11586 * <p> 11587 * The {@code radix} argument is valid if it is greater than or 11588 * equal to {@code MIN_RADIX} and less than or equal to 11589 * {@code MAX_RADIX}. The {@code digit} argument is valid if 11590 * {@code 0 <= digit < radix}. 11591 * <p> 11592 * If the digit is less than 10, then 11593 * {@code '0' + digit} is returned. Otherwise, the value 11594 * {@code 'a' + digit - 10} is returned. 11595 * 11596 * @param digit the number to convert to a character. 11597 * @param radix the radix. 11598 * @return the {@code char} representation of the specified digit 11599 * in the specified radix. 11600 * @see Character#MIN_RADIX 11601 * @see Character#MAX_RADIX 11602 * @see Character#digit(char, int) 11603 */ 11604 public static char forDigit(int digit, int radix) { 11605 if ((digit >= radix) || (digit < 0)) { 11606 return '\0'; 11607 } 11608 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 11609 return '\0'; 11610 } 11611 if (digit < 10) { 11612 return (char)('0' + digit); 11613 } 11614 return (char)('a' - 10 + digit); 11615 } 11616 11617 /** 11618 * Returns the Unicode directionality property for the given 11619 * character. Character directionality is used to calculate the 11620 * visual ordering of text. The directionality value of undefined 11621 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 11622 * 11623 * <p><b>Note:</b> This method cannot handle <a 11624 * href="#supplementary"> supplementary characters</a>. To support 11625 * all Unicode characters, including supplementary characters, use 11626 * the {@link #getDirectionality(int)} method. 11627 * 11628 * @param ch {@code char} for which the directionality property 11629 * is requested. 11630 * @return the directionality property of the {@code char} value. 11631 * 11632 * @see Character#DIRECTIONALITY_UNDEFINED 11633 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 11634 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 11635 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11636 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 11637 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11638 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11639 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 11640 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11641 * @see Character#DIRECTIONALITY_NONSPACING_MARK 11642 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 11643 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 11644 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 11645 * @see Character#DIRECTIONALITY_WHITESPACE 11646 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 11647 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11648 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11649 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11650 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11651 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11652 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11653 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11654 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 11655 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11656 * @since 1.4 11657 */ 11658 public static byte getDirectionality(char ch) { 11659 return getDirectionality((int)ch); 11660 } 11661 11662 /** 11663 * Returns the Unicode directionality property for the given 11664 * character (Unicode code point). Character directionality is 11665 * used to calculate the visual ordering of text. The 11666 * directionality value of undefined character is {@link 11667 * #DIRECTIONALITY_UNDEFINED}. 11668 * 11669 * @param codePoint the character (Unicode code point) for which 11670 * the directionality property is requested. 11671 * @return the directionality property of the character. 11672 * 11673 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 11674 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 11675 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 11676 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11677 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 11678 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11679 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11680 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 11681 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11682 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 11683 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 11684 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 11685 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 11686 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 11687 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 11688 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11689 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11690 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11691 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11692 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11693 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11694 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11695 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 11696 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11697 * @since 1.5 11698 */ 11699 public static byte getDirectionality(int codePoint) { 11700 return CharacterData.of(codePoint).getDirectionality(codePoint); 11701 } 11702 11703 /** 11704 * Determines whether the character is mirrored according to the 11705 * Unicode specification. Mirrored characters should have their 11706 * glyphs horizontally mirrored when displayed in text that is 11707 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 11708 * PARENTHESIS is semantically defined to be an <i>opening 11709 * parenthesis</i>. This will appear as a "(" in text that is 11710 * left-to-right but as a ")" in text that is right-to-left. 11711 * 11712 * <p><b>Note:</b> This method cannot handle <a 11713 * href="#supplementary"> supplementary characters</a>. To support 11714 * all Unicode characters, including supplementary characters, use 11715 * the {@link #isMirrored(int)} method. 11716 * 11717 * @param ch {@code char} for which the mirrored property is requested 11718 * @return {@code true} if the char is mirrored, {@code false} 11719 * if the {@code char} is not mirrored or is not defined. 11720 * @since 1.4 11721 */ 11722 public static boolean isMirrored(char ch) { 11723 return isMirrored((int)ch); 11724 } 11725 11726 /** 11727 * Determines whether the specified character (Unicode code point) 11728 * is mirrored according to the Unicode specification. Mirrored 11729 * characters should have their glyphs horizontally mirrored when 11730 * displayed in text that is right-to-left. For example, 11731 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 11732 * defined to be an <i>opening parenthesis</i>. This will appear 11733 * as a "(" in text that is left-to-right but as a ")" in text 11734 * that is right-to-left. 11735 * 11736 * @param codePoint the character (Unicode code point) to be tested. 11737 * @return {@code true} if the character is mirrored, {@code false} 11738 * if the character is not mirrored or is not defined. 11739 * @since 1.5 11740 */ 11741 public static boolean isMirrored(int codePoint) { 11742 return CharacterData.of(codePoint).isMirrored(codePoint); 11743 } 11744 11745 /** 11746 * Compares two {@code Character} objects numerically. 11747 * 11748 * @param anotherCharacter the {@code Character} to be compared. 11749 * @return the value {@code 0} if the argument {@code Character} 11750 * is equal to this {@code Character}; a value less than 11751 * {@code 0} if this {@code Character} is numerically less 11752 * than the {@code Character} argument; and a value greater than 11753 * {@code 0} if this {@code Character} is numerically greater 11754 * than the {@code Character} argument (unsigned comparison). 11755 * Note that this is strictly a numerical comparison; it is not 11756 * locale-dependent. 11757 * @since 1.2 11758 */ 11759 public int compareTo(Character anotherCharacter) { 11760 return compare(this.value, anotherCharacter.value); 11761 } 11762 11763 /** 11764 * Compares two {@code char} values numerically. 11765 * The value returned is identical to what would be returned by: 11766 * <pre> 11767 * Character.valueOf(x).compareTo(Character.valueOf(y)) 11768 * </pre> 11769 * 11770 * @param x the first {@code char} to compare 11771 * @param y the second {@code char} to compare 11772 * @return the value {@code 0} if {@code x == y}; 11773 * a value less than {@code 0} if {@code x < y}; and 11774 * a value greater than {@code 0} if {@code x > y} 11775 * @since 1.7 11776 */ 11777 public static int compare(char x, char y) { 11778 return x - y; 11779 } 11780 11781 /** 11782 * Converts the character (Unicode code point) argument to uppercase using 11783 * information from the UnicodeData file. 11784 * 11785 * @param codePoint the character (Unicode code point) to be converted. 11786 * @return either the uppercase equivalent of the character, if 11787 * any, or an error flag ({@code Character.ERROR}) 11788 * that indicates that a 1:M {@code char} mapping exists. 11789 * @see Character#isLowerCase(char) 11790 * @see Character#isUpperCase(char) 11791 * @see Character#toLowerCase(char) 11792 * @see Character#toTitleCase(char) 11793 * @since 1.4 11794 */ 11795 static int toUpperCaseEx(int codePoint) { 11796 assert isValidCodePoint(codePoint); 11797 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 11798 } 11799 11800 /** 11801 * Converts the character (Unicode code point) argument to uppercase using case 11802 * mapping information from the SpecialCasing file in the Unicode 11803 * specification. If a character has no explicit uppercase 11804 * mapping, then the {@code char} itself is returned in the 11805 * {@code char[]}. 11806 * 11807 * @param codePoint the character (Unicode code point) to be converted. 11808 * @return a {@code char[]} with the uppercased character. 11809 * @since 1.4 11810 */ 11811 static char[] toUpperCaseCharArray(int codePoint) { 11812 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 11813 assert isBmpCodePoint(codePoint); 11814 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 11815 } 11816 11817 /** 11818 * The number of bits used to represent a {@code char} value in unsigned 11819 * binary form, constant {@code 16}. 11820 * 11821 * @since 1.5 11822 */ 11823 public static final int SIZE = 16; 11824 11825 /** 11826 * The number of bytes used to represent a {@code char} value in unsigned 11827 * binary form. 11828 * 11829 * @since 1.8 11830 */ 11831 public static final int BYTES = SIZE / Byte.SIZE; 11832 11833 /** 11834 * Returns the value obtained by reversing the order of the bytes in the 11835 * specified {@code char} value. 11836 * 11837 * @param ch The {@code char} of which to reverse the byte order. 11838 * @return the value obtained by reversing (or, equivalently, swapping) 11839 * the bytes in the specified {@code char} value. 11840 * @since 1.5 11841 */ 11842 @IntrinsicCandidate 11843 public static char reverseBytes(char ch) { 11844 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 11845 } 11846 11847 /** 11848 * Returns the name of the specified character 11849 * {@code codePoint}, or null if the code point is 11850 * {@link #UNASSIGNED unassigned}. 11851 * <p> 11852 * If the specified character is not assigned a name by 11853 * the <i>UnicodeData</i> file (part of the Unicode Character 11854 * Database maintained by the Unicode Consortium), the returned 11855 * name is the same as the result of the expression: 11856 * 11857 * <blockquote>{@code 11858 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11859 * + " " 11860 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11861 * 11862 * }</blockquote> 11863 * 11864 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name 11865 * returned by this method follows the naming scheme in the 11866 * "Unicode Name Property" section of the Unicode Standard. For other 11867 * code points, such as Hangul/Ideographs, The name generation rule above 11868 * differs from the one defined in the Unicode Standard. 11869 * 11870 * @param codePoint the character (Unicode code point) 11871 * 11872 * @return the name of the specified character, or null if 11873 * the code point is unassigned. 11874 * 11875 * @throws IllegalArgumentException if the specified 11876 * {@code codePoint} is not a valid Unicode 11877 * code point. 11878 * 11879 * @since 1.7 11880 */ 11881 public static String getName(int codePoint) { 11882 if (!isValidCodePoint(codePoint)) { 11883 throw new IllegalArgumentException( 11884 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 11885 } 11886 String name = CharacterName.getInstance().getName(codePoint); 11887 if (name != null) 11888 return name; 11889 if (getType(codePoint) == UNASSIGNED) 11890 return null; 11891 UnicodeBlock block = UnicodeBlock.of(codePoint); 11892 if (block != null) 11893 return block.toString().replace('_', ' ') + " " 11894 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11895 // should never come here 11896 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11897 } 11898 11899 /** 11900 * Returns the code point value of the Unicode character specified by 11901 * the given character name. 11902 * <p> 11903 * If a character is not assigned a name by the <i>UnicodeData</i> 11904 * file (part of the Unicode Character Database maintained by the Unicode 11905 * Consortium), its name is defined as the result of the expression: 11906 * 11907 * <blockquote>{@code 11908 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11909 * + " " 11910 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11911 * 11912 * }</blockquote> 11913 * <p> 11914 * The {@code name} matching is case insensitive, with any leading and 11915 * trailing whitespace character removed. 11916 * 11917 * For the code points in the <i>UnicodeData</i> file, this method 11918 * recognizes the name which conforms to the name defined in the 11919 * "Unicode Name Property" section in the Unicode Standard. For other 11920 * code points, this method recognizes the name generated with 11921 * {@link #getName(int)} method. 11922 * 11923 * @param name the character name 11924 * 11925 * @return the code point value of the character specified by its name. 11926 * 11927 * @throws IllegalArgumentException if the specified {@code name} 11928 * is not a valid character name. 11929 * @throws NullPointerException if {@code name} is {@code null} 11930 * 11931 * @since 9 11932 */ 11933 public static int codePointOf(String name) { 11934 name = name.trim().toUpperCase(Locale.ROOT); 11935 int cp = CharacterName.getInstance().getCodePoint(name); 11936 if (cp != -1) 11937 return cp; 11938 try { 11939 int off = name.lastIndexOf(' '); 11940 if (off != -1) { 11941 cp = Integer.parseInt(name, off + 1, name.length(), 16); 11942 if (isValidCodePoint(cp) && name.equals(getName(cp))) 11943 return cp; 11944 } 11945 } catch (Exception x) {} 11946 throw new IllegalArgumentException("Unrecognized character name :" + name); 11947 } 11948 }