| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| SubstringFunction |
|
| 9.333333333333334;9.333 |
| 1 | /* | |
| 2 | * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/SubstringFunction.java,v 1.16 2006/02/05 21:47:41 elharo Exp $ | |
| 3 | * $Revision: 1.16 $ | |
| 4 | * $Date: 2006/02/05 21:47:41 $ | |
| 5 | * | |
| 6 | * ==================================================================== | |
| 7 | * | |
| 8 | * Copyright 2000-2002 bob mcwhirter & James Strachan. | |
| 9 | * All rights reserved. | |
| 10 | * | |
| 11 | * | |
| 12 | * Redistribution and use in source and binary forms, with or without | |
| 13 | * modification, are permitted provided that the following conditions are | |
| 14 | * met: | |
| 15 | * | |
| 16 | * * Redistributions of source code must retain the above copyright | |
| 17 | * notice, this list of conditions and the following disclaimer. | |
| 18 | * | |
| 19 | * * Redistributions in binary form must reproduce the above copyright | |
| 20 | * notice, this list of conditions and the following disclaimer in the | |
| 21 | * documentation and/or other materials provided with the distribution. | |
| 22 | * | |
| 23 | * * Neither the name of the Jaxen Project nor the names of its | |
| 24 | * contributors may be used to endorse or promote products derived | |
| 25 | * from this software without specific prior written permission. | |
| 26 | * | |
| 27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | |
| 28 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
| 29 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | |
| 30 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | |
| 31 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 32 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 33 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 34 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
| 35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 38 | * | |
| 39 | * ==================================================================== | |
| 40 | * This software consists of voluntary contributions made by many | |
| 41 | * individuals on behalf of the Jaxen Project and was originally | |
| 42 | * created by bob mcwhirter <bob@werken.com> and | |
| 43 | * James Strachan <jstrachan@apache.org>. For more information on the | |
| 44 | * Jaxen Project, please see <http://www.jaxen.org/>. | |
| 45 | * | |
| 46 | */ | |
| 47 | package org.jaxen.function; | |
| 48 | ||
| 49 | import java.util.List; | |
| 50 | ||
| 51 | import org.jaxen.Context; | |
| 52 | import org.jaxen.Function; | |
| 53 | import org.jaxen.FunctionCallException; | |
| 54 | import org.jaxen.Navigator; | |
| 55 | /** | |
| 56 | * <p> | |
| 57 | * <b>4.2</b> | |
| 58 | * <code><i>string</i> substring(<i>string</i>,<i>number</i>,<i>number?</i>)</code> | |
| 59 | * </p> | |
| 60 | * | |
| 61 | * <blockquote src="http://www.w3.org/TR/xpath"> | |
| 62 | * <p>The <b>substring</b> function returns the | |
| 63 | * substring of the first argument starting at the position specified in | |
| 64 | * the second argument with length specified in the third argument. For | |
| 65 | * example, | |
| 66 | * | |
| 67 | * <code>substring("12345",2,3)</code> returns <code>"234"</code>. | |
| 68 | * If the third argument is not specified, it returns the substring | |
| 69 | * starting at the position specified in the second argument and | |
| 70 | * continuing to the end of the string. For example, | |
| 71 | * <code>substring("12345",2)</code> returns <code>"2345"</code>. | |
| 72 | * </p> | |
| 73 | * | |
| 74 | * <p> | |
| 75 | * More precisely, each character in the string (see <a | |
| 76 | * href="http://www.w3.org/TR/xpath#strings">[<b>3.6 Strings</b>]</a>) is considered to have a | |
| 77 | * numeric position: the position of the first character is 1, the | |
| 78 | * position of the second character is 2 and so on. | |
| 79 | * </p> | |
| 80 | * | |
| 81 | * <blockquote> <b>NOTE: </b>This differs from Java and ECMAScript, in | |
| 82 | * which the <code>String.substring</code> method treats the position | |
| 83 | * of the first character as 0.</blockquote> | |
| 84 | * | |
| 85 | * <p> | |
| 86 | * The returned substring contains those characters for which the | |
| 87 | * position of the character is greater than or equal to the rounded | |
| 88 | * value of the second argument and, if the third argument is specified, | |
| 89 | * less than the sum of the rounded value of the second argument and the | |
| 90 | * rounded value of the third argument; the comparisons and addition | |
| 91 | * used for the above follow the standard IEEE 754 rules; rounding is | |
| 92 | * done as if by a call to the <b><a href="#function-round">round</a></b> | |
| 93 | * function. The following examples illustrate various unusual cases: | |
| 94 | * </p> | |
| 95 | * | |
| 96 | * <ul> | |
| 97 | * | |
| 98 | * <li> | |
| 99 | * <p> | |
| 100 | * <code>substring("12345", 1.5, 2.6)</code> returns | |
| 101 | * <code>"234"</code> | |
| 102 | * </p> | |
| 103 | * </li> | |
| 104 | * | |
| 105 | * <li> | |
| 106 | * <p> | |
| 107 | * <code>substring("12345", 0, 3)</code> returns <code>"12"</code> | |
| 108 | * | |
| 109 | * </p> | |
| 110 | * </li> | |
| 111 | * | |
| 112 | * <li> | |
| 113 | * <p> | |
| 114 | * <code>substring("12345", 0 div 0, 3)</code> returns <code>""</code> | |
| 115 | * </p> | |
| 116 | * </li> | |
| 117 | * | |
| 118 | * <li> | |
| 119 | * <p>. | |
| 120 | * <code>substring("12345", 1, 0 div 0)</code> returns | |
| 121 | * | |
| 122 | * <code>""</code> | |
| 123 | * </p> | |
| 124 | * </li> | |
| 125 | * | |
| 126 | * <li> | |
| 127 | * <p> | |
| 128 | * <code>substring("12345", -42, 1 div 0)</code> returns | |
| 129 | * <code>"12345"</code> | |
| 130 | * </p> | |
| 131 | * </li> | |
| 132 | * | |
| 133 | * <li> | |
| 134 | * <p> | |
| 135 | * | |
| 136 | * <code>substring("12345", -1 div 0, 1 div 0)</code> returns | |
| 137 | * <code>""</code> </blockquote> | |
| 138 | * | |
| 139 | * @author bob mcwhirter (bob @ werken.com) | |
| 140 | * | |
| 141 | * @see <a href="http://www.w3.org/TR/xpath#function-substring" | |
| 142 | * target="_top">Section 4.2 of the XPath Specification</a> | |
| 143 | */ | |
| 144 | public class SubstringFunction implements Function | |
| 145 | { | |
| 146 | ||
| 147 | /** | |
| 148 | * Create a new <code>SubstringFunction</code> object. | |
| 149 | */ | |
| 150 | 106 | public SubstringFunction() {} |
| 151 | ||
| 152 | ||
| 153 | /** Returns a substring of an XPath string-value by character index. | |
| 154 | * | |
| 155 | * @param context the context at the point in the | |
| 156 | * expression when the function is called | |
| 157 | * @param args a list that contains two or three items | |
| 158 | * | |
| 159 | * @return a <code>String</code> containing the specifed character subsequence of | |
| 160 | * the original string or the string-value of the context node | |
| 161 | * | |
| 162 | * @throws FunctionCallException if <code>args</code> has more than three | |
| 163 | * or less than two items | |
| 164 | */ | |
| 165 | public Object call(Context context, | |
| 166 | List args) throws FunctionCallException | |
| 167 | { | |
| 168 | 104 | final int argc = args.size(); |
| 169 | 104 | if (argc < 2 || argc > 3){ |
| 170 | 4 | throw new FunctionCallException( "substring() requires two or three arguments." ); |
| 171 | } | |
| 172 | ||
| 173 | 100 | final Navigator nav = context.getNavigator(); |
| 174 | ||
| 175 | 100 | final String str = StringFunction.evaluate(args.get(0), nav ); |
| 176 | // The spec doesn't really address this case | |
| 177 | 100 | if (str == null) { |
| 178 | 0 | return ""; |
| 179 | } | |
| 180 | ||
| 181 | 100 | final int stringLength = (StringLengthFunction.evaluate(args.get(0), nav )).intValue(); |
| 182 | ||
| 183 | 100 | if (stringLength == 0) { |
| 184 | 2 | return ""; |
| 185 | } | |
| 186 | ||
| 187 | 98 | Double d1 = NumberFunction.evaluate(args.get(1), nav); |
| 188 | ||
| 189 | 98 | if (d1.isNaN()){ |
| 190 | 10 | return ""; |
| 191 | } | |
| 192 | // Round the value and subtract 1 as Java strings are zero based | |
| 193 | 88 | int start = RoundFunction.evaluate(d1, nav).intValue() - 1; |
| 194 | ||
| 195 | 88 | int substringLength = stringLength; |
| 196 | 88 | if (argc == 3){ |
| 197 | 74 | Double d2 = NumberFunction.evaluate(args.get(2), nav); |
| 198 | ||
| 199 | 74 | if (!d2.isNaN()){ |
| 200 | 64 | substringLength = RoundFunction.evaluate(d2, nav ).intValue(); |
| 201 | 64 | } |
| 202 | else { | |
| 203 | 10 | substringLength = 0; |
| 204 | } | |
| 205 | } | |
| 206 | ||
| 207 | 88 | if (substringLength < 0) return ""; |
| 208 | ||
| 209 | 84 | int end = start + substringLength; |
| 210 | 84 | if (argc == 2) end = stringLength; |
| 211 | ||
| 212 | // negative start is treated as 0 | |
| 213 | 84 | if ( start < 0){ |
| 214 | 24 | start = 0; |
| 215 | 24 | } |
| 216 | 60 | else if (start > stringLength){ |
| 217 | 10 | return ""; |
| 218 | } | |
| 219 | ||
| 220 | 74 | if (end > stringLength){ |
| 221 | 22 | end = stringLength; |
| 222 | 22 | } |
| 223 | 52 | else if (end < start) return ""; |
| 224 | ||
| 225 | 72 | if (stringLength == str.length()) { |
| 226 | // easy case; no surrogate pairs | |
| 227 | 66 | return str.substring(start, end); |
| 228 | } | |
| 229 | else { | |
| 230 | 6 | return unicodeSubstring(str, start, end); |
| 231 | } | |
| 232 | ||
| 233 | } | |
| 234 | ||
| 235 | private static String unicodeSubstring(String s, int start, int end) { | |
| 236 | ||
| 237 | 6 | StringBuffer result = new StringBuffer(s.length()); |
| 238 | 22 | for (int jChar = 0, uChar=0; uChar < end; jChar++, uChar++) { |
| 239 | 16 | char c = s.charAt(jChar); |
| 240 | 16 | if (uChar >= start) result.append(c); |
| 241 | 16 | if (c >= 0xD800) { // get the low surrogate |
| 242 | // ???? we could check here that this is indeed a low surroagte | |
| 243 | // we could also catch StringIndexOutOfBoundsException | |
| 244 | 6 | jChar++; |
| 245 | 6 | if (uChar >= start) result.append(s.charAt(jChar)); |
| 246 | } | |
| 247 | } | |
| 248 | 6 | return result.toString(); |
| 249 | } | |
| 250 | } |