| // RuleBasedCollator.java - Concrete class for locale-based string compare. |
| |
| /* Copyright (C) 1999, 2000 Free Software Foundation |
| |
| This file is part of libgcj. |
| |
| This software is copyrighted work licensed under the terms of the |
| Libgcj License. Please consult the file "LIBGCJ_LICENSE" for |
| details. */ |
| |
| package java.text; |
| |
| import java.util.Enumeration; |
| import java.util.Hashtable; |
| import java.util.Vector; |
| |
| /** |
| * @author Tom Tromey <tromey@cygnus.com> |
| * @date March 25, 1999 |
| */ |
| /* Written using "Java Class Libraries", 2nd edition, plus online |
| * API docs for JDK 1.2 from http://www.javasoft.com. |
| * Status: Believed complete and correct |
| */ |
| |
| final class RBCElement |
| { |
| String key; |
| char relation; |
| |
| RBCElement (String key, char relation) |
| { |
| this.key = key; |
| this.relation = relation; |
| } |
| } |
| |
| public class RuleBasedCollator extends Collator |
| { |
| public Object clone () |
| { |
| return new RuleBasedCollator (this); |
| } |
| |
| // A helper for CollationElementIterator.next(). |
| int ceiNext (CollationElementIterator cei) |
| { |
| if (cei.lookahead_set) |
| { |
| cei.lookahead_set = false; |
| return cei.lookahead; |
| } |
| |
| int save = cei.index; |
| int max = cei.text.length(); |
| String s = null; |
| |
| // It is possible to have a case where `abc' has a mapping, but |
| // neither `ab' nor `abd' do. In this case we must treat `abd' as |
| // nothing special. |
| boolean found = false; |
| |
| int i; |
| for (i = save + 1; i <= max; ++i) |
| { |
| s = cei.text.substring(save, i); |
| if (prefixes.get(s) == null) |
| break; |
| found = true; |
| } |
| // Assume s != null. |
| |
| Object obj = map.get(s); |
| // The special case. |
| while (found && obj == null && s.length() > 1) |
| { |
| --i; |
| s = cei.text.substring(save, i); |
| obj = map.get(s); |
| } |
| |
| // Update state. |
| cei.index = i; |
| |
| if (obj == null) |
| { |
| // This idea, and the values, come from JDK. |
| // assert (s.length() == 1) |
| cei.lookahead_set = true; |
| cei.lookahead = s.charAt(0) << 8; |
| return 0x7fff << 16; |
| } |
| |
| return ((Integer) obj).intValue(); |
| } |
| |
| // A helper for compareTo() that returns the next character that has |
| // a nonzero ordering at the indicated strength. This is also used |
| // in CollationKey. |
| static final int next (CollationElementIterator iter, int strength) |
| { |
| while (true) |
| { |
| int os = iter.next(); |
| if (os == CollationElementIterator.NULLORDER) |
| return os; |
| int c = 0; |
| switch (strength) |
| { |
| case PRIMARY: |
| c = os & ~0xffff; |
| break; |
| case SECONDARY: |
| c = os & ~0x00ff; |
| break; |
| case TERTIARY: |
| case IDENTICAL: |
| c = os; |
| break; |
| } |
| if (c != 0) |
| return c; |
| } |
| } |
| |
| public int compare (String source, String target) |
| { |
| CollationElementIterator cs, ct; |
| |
| cs = new CollationElementIterator (source, this); |
| ct = new CollationElementIterator (target, this); |
| |
| while (true) |
| { |
| int os = next (cs, strength); |
| int ot = next (ct, strength); |
| |
| if (os == CollationElementIterator.NULLORDER |
| && ot == CollationElementIterator.NULLORDER) |
| break; |
| else if (os == CollationElementIterator.NULLORDER) |
| { |
| // Source string is shorter, so return "less than". |
| return -1; |
| } |
| else if (ot == CollationElementIterator.NULLORDER) |
| { |
| // Target string is shorter, so return "greater than". |
| return 1; |
| } |
| |
| if (os != ot) |
| return os - ot; |
| } |
| |
| return 0; |
| } |
| |
| public boolean equals (Object obj) |
| { |
| if (! (obj instanceof RuleBasedCollator) || ! super.equals(obj)) |
| return false; |
| RuleBasedCollator rbc = (RuleBasedCollator) obj; |
| // FIXME: this is probably wrong. Instead we should compare maps |
| // directly. |
| return (frenchAccents == rbc.frenchAccents |
| && rules.equals(rbc.rules)); |
| } |
| |
| public CollationElementIterator getCollationElementIterator (String source) |
| { |
| StringBuffer expand = new StringBuffer (source.length()); |
| int max = source.length(); |
| for (int i = 0; i < max; ++i) |
| decomposeCharacter (source.charAt(i), expand); |
| return new CollationElementIterator (expand.toString(), this); |
| } |
| |
| public CollationElementIterator getCollationElementIterator (CharacterIterator source) |
| { |
| StringBuffer expand = new StringBuffer (); |
| for (char c = source.first (); |
| c != CharacterIterator.DONE; |
| c = source.next ()) |
| decomposeCharacter (c, expand); |
| |
| return new CollationElementIterator (expand.toString(), this); |
| } |
| |
| public CollationKey getCollationKey (String source) |
| { |
| return new CollationKey (getCollationElementIterator (source), source, |
| strength); |
| } |
| |
| public String getRules () |
| { |
| return rules; |
| } |
| |
| public int hashCode () |
| { |
| return (frenchAccents ? 1231 : 1237 |
| ^ rules.hashCode() |
| ^ map.hashCode() |
| ^ prefixes.hashCode()); |
| } |
| |
| private final boolean is_special (char c) |
| { |
| // Rules from JCL book. |
| return ((c >= 0x0009 && c <= 0x000d) |
| || (c >= 0x0020 && c <= 0x002f) |
| || (c >= 0x003a && c <= 0x0040) |
| || (c >= 0x005b && c <= 0x0060) |
| || (c >= 0x007b && c <= 0x007e)); |
| } |
| |
| private final int text_argument (String rules, int index, |
| StringBuffer result) |
| { |
| result.setLength(0); |
| int len = rules.length(); |
| while (index < len) |
| { |
| char c = rules.charAt(index); |
| if (c == '\'' && index + 2 < len |
| && rules.charAt(index + 2) == '\'' |
| && is_special (rules.charAt(index + 1))) |
| index += 2; |
| else if (is_special (c) || Character.isWhitespace(c)) |
| return index; |
| result.append(c); |
| ++index; |
| } |
| return index; |
| } |
| |
| public RuleBasedCollator (String rules) throws ParseException |
| { |
| this.rules = rules; |
| this.frenchAccents = false; |
| |
| // We keep each rule in order in a vector. At the end we traverse |
| // the vector and compute collation values from it. |
| int insertion_index = 0; |
| Vector vec = new Vector (); |
| |
| StringBuffer argument = new StringBuffer (); |
| |
| int len = rules.length(); |
| for (int index = 0; index < len; ++index) |
| { |
| char c = rules.charAt(index); |
| |
| // Just skip whitespace. |
| if (Character.isWhitespace(c)) |
| continue; |
| |
| // Modifier. |
| if (c == '@') |
| { |
| frenchAccents = true; |
| continue; |
| } |
| |
| // Check for relation or reset operator. |
| if (! (c == '<' || c == ';' || c == ',' || c == '=' || c == '&')) |
| throw new ParseException ("invalid character", index); |
| |
| ++index; |
| while (index < len) |
| { |
| if (! Character.isWhitespace(rules.charAt(index))) |
| break; |
| ++index; |
| } |
| if (index == len) |
| throw new ParseException ("missing argument", index); |
| |
| int save = index; |
| index = text_argument (rules, index, argument); |
| if (argument.length() == 0) |
| throw new ParseException ("invalid character", save); |
| String arg = argument.toString(); |
| int item_index = vec.indexOf(arg); |
| if (c != '&') |
| { |
| // If the argument already appears in the vector, then we |
| // must remove it in order to re-order. |
| if (item_index != -1) |
| { |
| vec.removeElementAt(item_index); |
| if (insertion_index >= item_index) |
| --insertion_index; |
| } |
| RBCElement r = new RBCElement (arg, c); |
| vec.insertElementAt(r, insertion_index); |
| ++insertion_index; |
| } |
| else |
| { |
| // Reset. |
| if (item_index == -1) |
| throw |
| new ParseException ("argument to reset not previously seen", |
| save); |
| insertion_index = item_index + 1; |
| } |
| |
| // Ugly: in this case the resulting INDEX comes from |
| // text_argument, which returns the index of the next |
| // character we should examine. |
| --index; |
| } |
| |
| // Now construct a hash table that maps strings onto their |
| // collation values. |
| int primary = 0; |
| int secondary = 0; |
| int tertiary = 0; |
| this.map = new Hashtable (); |
| this.prefixes = new Hashtable (); |
| Enumeration e = vec.elements(); |
| while (e.hasMoreElements()) |
| { |
| RBCElement r = (RBCElement) e.nextElement(); |
| switch (r.relation) |
| { |
| case '<': |
| ++primary; |
| secondary = 0; |
| tertiary = 0; |
| break; |
| case ';': |
| ++secondary; |
| tertiary = 0; |
| break; |
| case ',': |
| ++tertiary; |
| break; |
| case '=': |
| break; |
| } |
| // This must match CollationElementIterator. |
| map.put(r.key, new Integer (primary << 16 |
| | secondary << 8 | tertiary)); |
| |
| // Make a map of all lookaheads we might need. |
| for (int i = r.key.length() - 1; i >= 1; --i) |
| prefixes.put(r.key.substring(0, i), Boolean.TRUE); |
| } |
| } |
| |
| // This is a helper for clone. |
| private RuleBasedCollator (RuleBasedCollator other) |
| { |
| frenchAccents = other.frenchAccents; |
| rules = other.rules; |
| decmp = other.decmp; |
| strength = other.strength; |
| map = other.map; |
| prefixes = other.prefixes; |
| } |
| |
| // True if we are using French-style accent ordering. |
| private boolean frenchAccents; |
| |
| // It's easier to just save the rules than to try to recreate them. |
| private String rules; |
| |
| // This maps strings onto collation values. |
| private Hashtable map; |
| // An entry in this hash means that more lookahead is required for |
| // the prefix string. |
| private Hashtable prefixes; |
| } |