1 /*
2 * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/StringLengthFunction.java,v 1.12 2006/02/05 21:47:41 elharo Exp $
3 * $Revision: 1.12 $
4 * $Date: 2006/02/05 21:47:41 $
5 *
6 * ====================================================================
7 *
8 * Copyright 2000-2002 bob mcwhirter & James Strachan.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions are
13 * met:
14 *
15 * * Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 *
18 * * Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * * Neither the name of the Jaxen Project nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
27 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
29 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
30 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 *
38 * ====================================================================
39 * This software consists of voluntary contributions made by many
40 * individuals on behalf of the Jaxen Project and was originally
41 * created by bob mcwhirter <bob@werken.com> and
42 * James Strachan <jstrachan@apache.org>. For more information on the
43 * Jaxen Project, please see <http://www.jaxen.org/>.
44 *
45 * $Id: StringLengthFunction.java,v 1.12 2006/02/05 21:47:41 elharo Exp $
46 */
47
48
49 package org.jaxen.function;
50
51 import java.util.List;
52
53 import org.jaxen.Context;
54 import org.jaxen.Function;
55 import org.jaxen.FunctionCallException;
56 import org.jaxen.Navigator;
57
58 /***
59 * <p><b>4.2</b> <code><i>number</i> string-length(<i>string</i>)</code></p>
60 *
61 * <p>
62 * The <b>string-length</b> function returns the number of <strong>Unicode characters</strong>
63 * in its argument. This is <strong>not</strong> necessarily
64 * the same as the number <strong>Java chars</strong>
65 * in the corresponding Java string. In particular, if the Java <code>String</code>
66 * contains surrogate pairs each such pair will be counted as only one character
67 * by this function. If the argument is omitted,
68 * it returns the length of the string-value of the context node.
69 * </p>
70 *
71 * @author bob mcwhirter (bob @ werken.com)
72 * @see <a href="http://www.w3.org/TR/xpath#function-string-length" target="_top">Section
73 * 4.2 of the XPath Specification</a>
74 */
75 public class StringLengthFunction implements Function
76 {
77
78
79 /***
80 * Create a new <code>StringLengthFunction</code> object.
81 */
82 public StringLengthFunction() {}
83
84
85 /***
86 * <p>
87 * Returns the number of Unicode characters in the string-value of the argument.
88 * </p>
89 *
90 * @param context the context at the point in the
91 * expression when the function is called
92 * @param args a list containing the item whose string-value is to be counted.
93 * If empty, the length of the context node's string-value is returned.
94 *
95 * @return a <code>Double</code> giving the number of Unicode characters
96 *
97 * @throws FunctionCallException if args has more than one item
98 */
99 public Object call(Context context,
100 List args) throws FunctionCallException
101 {
102 if (args.size() == 0)
103 {
104 return evaluate( context.getNodeSet(),
105 context.getNavigator() );
106 }
107 else if (args.size() == 1)
108 {
109 return evaluate( args.get(0),
110 context.getNavigator() );
111 }
112
113 throw new FunctionCallException( "string-length() requires one argument." );
114 }
115
116 /***
117 * <p>
118 * Returns the number of Unicode characters in the string-value of
119 * an object.
120 * </p>
121 *
122 * @param obj the object whose string-value is counted
123 * @param nav used to calculate the string-values of the first two arguments
124 *
125 * @return a <code>Double</code> giving the number of Unicode characters
126 *
127 * @throws FunctionCallException if the string contains mismatched surrogates
128 */
129 public static Double evaluate(Object obj, Navigator nav) throws FunctionCallException
130 {
131 String str = StringFunction.evaluate( obj, nav );
132 // String.length() counts UTF-16 code points; not Unicode characters
133 char[] data = str.toCharArray();
134 int length = 0;
135 for (int i = 0; i < data.length; i++) {
136 char c = data[i];
137 length++;
138 // if this is a high surrogate; assume the next character is
139 // is a low surrogate and skip it
140 if (c >= 0xD800) {
141 try {
142 char low = data[i+1];
143 if (low < 0xDC00 || low > 0xDFFF) {
144 throw new FunctionCallException("Bad surrogate pair in string " + str);
145 }
146 i++; // increment past low surrogate
147 }
148 catch (ArrayIndexOutOfBoundsException ex) {
149 throw new FunctionCallException("Bad surrogate pair in string " + str);
150 }
151 }
152 }
153 return new Double(length);
154 }
155
156 }