| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| TranslateFunction |
|
| 3.8333333333333335;3.833 |
| 1 | /* | |
| 2 | * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/TranslateFunction.java,v 1.10 2006/02/05 21:47:41 elharo Exp $ | |
| 3 | * $Revision: 1.10 $ | |
| 4 | * $Date: 2006/02/05 21:47:41 $ | |
| 5 | * | |
| 6 | * ==================================================================== | |
| 7 | * | |
| 8 | * Copyright 2000-2002 bob mcwhirter & James Strachan. | |
| 9 | * All rights reserved. | |
| 10 | * | |
| 11 | * Redistribution and use in source and binary forms, with or without | |
| 12 | * modification, are permitted provided that the following conditions are | |
| 13 | * met: | |
| 14 | * | |
| 15 | * * Redistributions of source code must retain the above copyright | |
| 16 | * notice, this list of conditions and the following disclaimer. | |
| 17 | * | |
| 18 | * * Redistributions in binary form must reproduce the above copyright | |
| 19 | * notice, this list of conditions and the following disclaimer in the | |
| 20 | * documentation and/or other materials provided with the distribution. | |
| 21 | * | |
| 22 | * * Neither the name of the Jaxen Project nor the names of its | |
| 23 | * contributors may be used to endorse or promote products derived | |
| 24 | * from this software without specific prior written permission. | |
| 25 | * | |
| 26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | |
| 27 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
| 28 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | |
| 29 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | |
| 30 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 31 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 32 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 33 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
| 34 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 35 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 36 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 37 | * | |
| 38 | * ==================================================================== | |
| 39 | * This software consists of voluntary contributions made by many | |
| 40 | * individuals on behalf of the Jaxen Project and was originally | |
| 41 | * created by bob mcwhirter <bob@werken.com> and | |
| 42 | * James Strachan <jstrachan@apache.org>. For more information on the | |
| 43 | * Jaxen Project, please see <http://www.jaxen.org/>. | |
| 44 | * | |
| 45 | * $Id: TranslateFunction.java,v 1.10 2006/02/05 21:47:41 elharo Exp $ | |
| 46 | */ | |
| 47 | ||
| 48 | ||
| 49 | package org.jaxen.function; | |
| 50 | ||
| 51 | import java.util.HashMap; | |
| 52 | import java.util.List; | |
| 53 | import java.util.Map; | |
| 54 | ||
| 55 | import org.jaxen.Context; | |
| 56 | import org.jaxen.Function; | |
| 57 | import org.jaxen.FunctionCallException; | |
| 58 | import org.jaxen.Navigator; | |
| 59 | ||
| 60 | /** | |
| 61 | * <p> | |
| 62 | * <b>4.2</b> | |
| 63 | * <code><i>string</i> translate(<i>string</i>,<i>string</i>,<i>string</i>)</code> | |
| 64 | * </p> | |
| 65 | * | |
| 66 | * <blockquote src="http://www.w3.org/TR/xpath#function-translate"> | |
| 67 | * <p> | |
| 68 | * The <b><a href="http://www.w3.org/TR/xpath#function-translate">translate</a></b> function | |
| 69 | * returns the first argument string with occurrences of characters in | |
| 70 | * the second argument string replaced by the character at the | |
| 71 | * corresponding position in the third argument string. For example, | |
| 72 | * <code>translate("bar","abc","ABC")</code> returns the string | |
| 73 | * <code>BAr</code>. If there is a character in the second argument | |
| 74 | * string with no character at a corresponding position in the third | |
| 75 | * argument string (because the second argument string is longer than | |
| 76 | * the third argument string), then occurrences of that character in the | |
| 77 | * first argument string are removed. For example, | |
| 78 | * <code>translate("--aaa--","abc-","ABC")</code> returns | |
| 79 | * <code>"AAA"</code>. If a character occurs more than once in the | |
| 80 | * second argument string, then the first occurrence determines the | |
| 81 | * replacement character. If the third argument string is longer than | |
| 82 | * the second argument string, then excess characters are ignored. | |
| 83 | * </p> | |
| 84 | * | |
| 85 | * <blockquote> <b>NOTE: </b>The <b>translate</b> function is not a | |
| 86 | * sufficient solution for case conversion in all languages. A future | |
| 87 | * version of XPath may provide additional functions for case | |
| 88 | * conversion.</blockquote> | |
| 89 | * | |
| 90 | * </blockquote> | |
| 91 | * | |
| 92 | * @author Jan Dvorak ( jan.dvorak @ mathan.cz ) | |
| 93 | * | |
| 94 | * @see <a href="http://www.w3.org/TR/xpath#function-translate" | |
| 95 | * target="_top">Section 4.2 of the XPath Specification</a> | |
| 96 | */ | |
| 97 | public class TranslateFunction implements Function | |
| 98 | { | |
| 99 | ||
| 100 | /* The translation is done thru a HashMap. Performance tip (for anyone | |
| 101 | * who needs to improve the performance of this particular function): | |
| 102 | * Cache the HashMaps, once they are constructed. */ | |
| 103 | ||
| 104 | /** | |
| 105 | * Create a new <code>TranslateFunction</code> object. | |
| 106 | */ | |
| 107 | 106 | public TranslateFunction() {} |
| 108 | ||
| 109 | ||
| 110 | /** Returns a copy of the first argument in which | |
| 111 | * characters found in the second argument are replaced by | |
| 112 | * corresponding characters from the third argument. | |
| 113 | * | |
| 114 | * @param context the context at the point in the | |
| 115 | * expression when the function is called | |
| 116 | * @param args a list that contains exactly three items | |
| 117 | * | |
| 118 | * @return a <code>String</code> built from <code>args.get(0)</code> | |
| 119 | * in which occurrences of characters in <code>args.get(1)</code> | |
| 120 | * are replaced by the corresponding characters in <code>args.get(2)</code> | |
| 121 | * | |
| 122 | * @throws FunctionCallException if <code>args</code> does not have exactly three items | |
| 123 | */ | |
| 124 | public Object call(Context context, | |
| 125 | List args) throws FunctionCallException | |
| 126 | { | |
| 127 | 102 | if (args.size() == 3) { |
| 128 | 100 | return evaluate( args.get(0), |
| 129 | args.get(1), | |
| 130 | args.get(2), | |
| 131 | context.getNavigator() ); | |
| 132 | } | |
| 133 | ||
| 134 | 2 | throw new FunctionCallException( "translate() requires three arguments." ); |
| 135 | } | |
| 136 | ||
| 137 | /** | |
| 138 | * Returns a copy of <code>strArg</code> in which | |
| 139 | * characters found in <code>fromArg</code> are replaced by | |
| 140 | * corresponding characters from <code>toArg</code>. | |
| 141 | * If necessary each argument is first converted to it string-value | |
| 142 | * as if by the XPath <code>string()</code> function. | |
| 143 | * | |
| 144 | * @param strArg the base string | |
| 145 | * @param fromArg the characters to be replaced | |
| 146 | * @param toArg the characters they will be replaced by | |
| 147 | * @param nav the <code>Navigator</code> used to calculate the string-values of the arguments. | |
| 148 | * | |
| 149 | * @return a copy of <code>strArg</code> in which | |
| 150 | * characters found in <code>fromArg</code> are replaced by | |
| 151 | * corresponding characters from <code>toArg</code> | |
| 152 | * | |
| 153 | * @throws FunctionCallException if one of the arguments is a malformed Unicode string; | |
| 154 | * that is, if surrogate characters don't line up properly | |
| 155 | * | |
| 156 | */ | |
| 157 | public static String evaluate(Object strArg, | |
| 158 | Object fromArg, | |
| 159 | Object toArg, | |
| 160 | Navigator nav) throws FunctionCallException | |
| 161 | { | |
| 162 | 100 | String inStr = StringFunction.evaluate( strArg, nav ); |
| 163 | 100 | String fromStr = StringFunction.evaluate( fromArg, nav ); |
| 164 | 100 | String toStr = StringFunction.evaluate( toArg, nav ); |
| 165 | ||
| 166 | // Initialize the mapping in a HashMap | |
| 167 | 100 | Map characterMap = new HashMap(); |
| 168 | 100 | String[] fromCharacters = toUnicodeCharacters(fromStr); |
| 169 | 100 | String[] toCharacters = toUnicodeCharacters(toStr); |
| 170 | 96 | int fromLen = fromCharacters.length; |
| 171 | 96 | int toLen = toCharacters.length; |
| 172 | 376 | for ( int i = 0; i < fromLen; i++ ) { |
| 173 | 280 | String cFrom = fromCharacters[i]; |
| 174 | 280 | if ( characterMap.containsKey( cFrom ) ) { |
| 175 | // We've seen the character before, ignore | |
| 176 | 8 | continue; |
| 177 | } | |
| 178 | ||
| 179 | 272 | if ( i < toLen ) { |
| 180 | // Will change | |
| 181 | 224 | characterMap.put( cFrom, toCharacters[i] ); |
| 182 | 224 | } |
| 183 | else { | |
| 184 | // Will delete | |
| 185 | 48 | characterMap.put( cFrom, null ); |
| 186 | } | |
| 187 | } | |
| 188 | ||
| 189 | // Process the input string thru the map | |
| 190 | 96 | StringBuffer outStr = new StringBuffer( inStr.length() ); |
| 191 | 96 | String[] inCharacters = toUnicodeCharacters(inStr); |
| 192 | 96 | int inLen = inCharacters.length; |
| 193 | 438 | for ( int i = 0; i < inLen; i++ ) { |
| 194 | 342 | String cIn = inCharacters[i]; |
| 195 | 342 | if ( characterMap.containsKey( cIn ) ) { |
| 196 | 274 | String cTo = (String) characterMap.get( cIn ); |
| 197 | 274 | if ( cTo != null ) { |
| 198 | 226 | outStr.append( cTo ); |
| 199 | } | |
| 200 | 274 | } |
| 201 | else { | |
| 202 | 68 | outStr.append( cIn ); |
| 203 | } | |
| 204 | } | |
| 205 | ||
| 206 | 96 | return outStr.toString(); |
| 207 | } | |
| 208 | ||
| 209 | private static String[] toUnicodeCharacters(String s) throws FunctionCallException { | |
| 210 | ||
| 211 | 296 | String[] result = new String[s.length()]; |
| 212 | 296 | int stringLength = 0; |
| 213 | 1180 | for (int i = 0; i < s.length(); i++) { |
| 214 | 888 | char c1 = s.charAt(i); |
| 215 | 888 | if (isHighSurrogate(c1)) { |
| 216 | try { | |
| 217 | 18 | char c2 = s.charAt(i+1); |
| 218 | 18 | if (isLowSurrogate(c2)) { |
| 219 | 14 | result[stringLength] = (c1 + "" + c2).intern(); |
| 220 | 14 | i++; |
| 221 | 14 | } |
| 222 | else { | |
| 223 | 4 | throw new FunctionCallException("Mismatched surrogate pair in translate function"); |
| 224 | } | |
| 225 | } | |
| 226 | 0 | catch (StringIndexOutOfBoundsException ex) { |
| 227 | 0 | throw new FunctionCallException("High surrogate without low surrogate at end of string passed to translate function"); |
| 228 | 14 | } |
| 229 | } | |
| 230 | else { | |
| 231 | 870 | result[stringLength]=String.valueOf(c1).intern(); |
| 232 | } | |
| 233 | 884 | stringLength++; |
| 234 | } | |
| 235 | ||
| 236 | 292 | if (stringLength == result.length) return result; |
| 237 | ||
| 238 | // trim array | |
| 239 | 14 | String[] trimmed = new String[stringLength]; |
| 240 | 14 | System.arraycopy(result, 0, trimmed, 0, stringLength); |
| 241 | 14 | return trimmed; |
| 242 | ||
| 243 | } | |
| 244 | ||
| 245 | private static boolean isHighSurrogate(char c) { | |
| 246 | 888 | return c >= 0xD800 && c <= 0xDBFF; |
| 247 | } | |
| 248 | ||
| 249 | private static boolean isLowSurrogate(char c) { | |
| 250 | 18 | return c >= 0xDC00 && c <= 0xDFFF; |
| 251 | } | |
| 252 | ||
| 253 | } |