File Source: Utilities.java
1 package org.apache.roller.weblogger.util;
2
3 import java.io.BufferedInputStream;
4 import java.io.BufferedOutputStream;
5 import java.io.File;
6 import java.io.FileInputStream;
7 import java.io.FileOutputStream;
8 import java.io.IOException;
9 import java.io.InputStream;
10 import java.io.OutputStream;
11 import java.io.UnsupportedEncodingException;
12 import java.net.URLDecoder;
13 import java.net.URLEncoder;
14 import java.security.MessageDigest;
15 import java.util.Arrays;
16 import java.util.Collections;
17 import java.util.List;
18 import java.util.Locale;
19 import java.util.NoSuchElementException;
20 import java.util.StringTokenizer;
21 import java.util.regex.Matcher;
22 import java.util.regex.Pattern;
23
24 import org.apache.commons.lang.StringEscapeUtils;
25 import org.apache.commons.lang.StringUtils;
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.commons.codec.binary.Base64;
29 import org.apache.roller.util.RegexUtil;
30
31
32 /**
33 * General purpose utilities, not for use in templates.
34 */
/*
P/P * Method: void org.apache.roller.weblogger.util.Utilities()
*/
35 public class Utilities {
36 /** The <code>Log</code> instance for this class. */
/*
P/P * Method: org.apache.roller.weblogger.util.Utilities__static_init
*
* Postconditions:
* init'ed(BR_TAG_PATTERN)
* init'ed(CLOSING_A_TAG_PATTERN)
* init'ed(CLOSING_BLOCKQUOTE_TAG_PATTERN)
* init'ed(CLOSING_B_TAG_PATTERN)
* init'ed(CLOSING_I_TAG_PATTERN)
* init'ed(CLOSING_LI_TAG_PATTERN)
* init'ed(CLOSING_OL_TAG_PATTERN)
* init'ed(CLOSING_PRE_TAG_PATTERN)
* init'ed(CLOSING_P_TAG_PATTERN)
* init'ed(CLOSING_UL_TAG_PATTERN)
* ...
*/
37 private static Log mLogger = LogFactory.getLog(Utilities.class);
38
39 public final static String TAG_SPLIT_CHARS = " ,\n\r\f\t";
40
41 private static Pattern mLinkPattern =
42 Pattern.compile("<a href=.*?>", Pattern.CASE_INSENSITIVE);
43 private static final Pattern OPENING_B_TAG_PATTERN =
44 Pattern.compile("<b>", Pattern.CASE_INSENSITIVE);
45 private static final Pattern CLOSING_B_TAG_PATTERN =
46 Pattern.compile("</b>", Pattern.CASE_INSENSITIVE);
47 private static final Pattern OPENING_I_TAG_PATTERN =
48 Pattern.compile("<i>", Pattern.CASE_INSENSITIVE);
49 private static final Pattern CLOSING_I_TAG_PATTERN =
50 Pattern.compile("</i>", Pattern.CASE_INSENSITIVE);
51 private static final Pattern OPENING_BLOCKQUOTE_TAG_PATTERN =
52 Pattern.compile("<blockquote>", Pattern.CASE_INSENSITIVE);
53 private static final Pattern CLOSING_BLOCKQUOTE_TAG_PATTERN =
54 Pattern.compile("</blockquote>", Pattern.CASE_INSENSITIVE);
55 private static final Pattern BR_TAG_PATTERN =
56 Pattern.compile("<br */*>", Pattern.CASE_INSENSITIVE);
57 private static final Pattern OPENING_P_TAG_PATTERN =
58 Pattern.compile("<p>", Pattern.CASE_INSENSITIVE);
59 private static final Pattern CLOSING_P_TAG_PATTERN =
60 Pattern.compile("</p>", Pattern.CASE_INSENSITIVE);
61 private static final Pattern OPENING_PRE_TAG_PATTERN =
62 Pattern.compile("<pre>", Pattern.CASE_INSENSITIVE);
63 private static final Pattern CLOSING_PRE_TAG_PATTERN =
64 Pattern.compile("</pre>", Pattern.CASE_INSENSITIVE);
65 private static final Pattern OPENING_UL_TAG_PATTERN =
66 Pattern.compile("<ul>", Pattern.CASE_INSENSITIVE);
67 private static final Pattern CLOSING_UL_TAG_PATTERN =
68 Pattern.compile("</ul>", Pattern.CASE_INSENSITIVE);
69 private static final Pattern OPENING_OL_TAG_PATTERN =
70 Pattern.compile("<ol>", Pattern.CASE_INSENSITIVE);
71 private static final Pattern CLOSING_OL_TAG_PATTERN =
72 Pattern.compile("</ol>", Pattern.CASE_INSENSITIVE);
73 private static final Pattern OPENING_LI_TAG_PATTERN =
74 Pattern.compile("<li>", Pattern.CASE_INSENSITIVE);
75 private static final Pattern CLOSING_LI_TAG_PATTERN =
76 Pattern.compile("</li>", Pattern.CASE_INSENSITIVE);
77 private static final Pattern CLOSING_A_TAG_PATTERN =
78 Pattern.compile("</a>", Pattern.CASE_INSENSITIVE);
79 private static final Pattern OPENING_A_TAG_PATTERN =
80 Pattern.compile("<a href=.*?>", Pattern.CASE_INSENSITIVE);
81 private static final Pattern QUOTE_PATTERN =
82 Pattern.compile(""", Pattern.CASE_INSENSITIVE);
83
84
85 //------------------------------------------------------------------------
86 /** Strip jsessionid off of a URL */
87 public static String stripJsessionId( String url ) {
88 // Strip off jsessionid found in referer URL
/*
P/P * Method: String stripJsessionId(String)
*
* Preconditions:
* url != null
*
* Postconditions:
* init'ed(java.lang.String:substring(...)._tainted)
* init'ed(java.lang.StringBuilder:toString(...)._tainted)
* return_value == One-of{url, &java.lang.String:substring(...), &java.lang.StringBuilder:toString(...)}
* return_value != null
*
* Test Vectors:
* java.lang.String:indexOf(...)@89: {-1}, {-231..-2, 0..232-1}
* java.lang.String:indexOf(...)@91: {-231..-2, 0..232-1}, {-1}
*/
89 int startPos = url.indexOf(";jsessionid=");
90 if ( startPos != -1 ) {
91 int endPos = url.indexOf("?",startPos);
92 if ( endPos == -1 ) {
93 url = url.substring(0,startPos);
94 } else {
95 url = url.substring(0,startPos)
96 + url.substring(endPos,url.length());
97 }
98 }
99 return url;
100 }
101
102 //------------------------------------------------------------------------
103 /**
104 * Escape, but do not replace HTML.
105 * The default behaviour is to escape ampersands.
106 */
107 public static String escapeHTML(String s) {
/*
P/P * Method: String escapeHTML(String)
*
* Postconditions:
* init'ed(return_value)
*/
108 return escapeHTML(s, true);
109 }
110
111 //------------------------------------------------------------------------
112 /**
113 * Escape, but do not replace HTML.
114 * @param escapeAmpersand Optionally escape
115 * ampersands (&).
116 */
117 public static String escapeHTML(String s, boolean escapeAmpersand) {
118 // got to do amp's first so we don't double escape
/*
P/P * Method: String escapeHTML(String, bool)
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* escapeAmpersand: {0}, {1}
*/
119 if (escapeAmpersand) {
120 s = StringUtils.replace(s, "&", "&");
121 }
122 s = StringUtils.replace(s, " ", " ");
123 s = StringUtils.replace(s, "\"", """);
124 s = StringUtils.replace(s, "<", "<");
125 s = StringUtils.replace(s, ">", ">");
126 return s;
127 }
128
129 public static String unescapeHTML(String str) {
/*
P/P * Method: String unescapeHTML(String)
*
* Postconditions:
* init'ed(return_value)
*/
130 return StringEscapeUtils.unescapeHtml(str);
131 }
132
133 //------------------------------------------------------------------------
134 /**
135 * Remove occurences of html, defined as any text
136 * between the characters "<" and ">". Replace
137 * any HTML tags with a space.
138 */
139 public static String removeHTML(String str) {
/*
P/P * Method: String removeHTML(String)
*
* Postconditions:
* return_value != null
*/
140 return removeHTML(str, true);
141 }
142
143 /**
144 * Remove occurences of html, defined as any text
145 * between the characters "<" and ">".
146 * Optionally replace HTML tags with a space.
147 *
148 * @param str
149 * @param addSpace
150 * @return
151 */
152 public static String removeHTML(String str, boolean addSpace) {
/*
P/P * Method: String removeHTML(String, bool)
*
* Presumptions:
* java.lang.String:indexOf(...)@168 <= 232-2
*
* Postconditions:
* return_value != null
*
* Test Vectors:
* addSpace: {0}, {1}
* str: Inverse{null}, Addr_Set{null}
* java.lang.String:indexOf(...)@156: {-231..-2, 0..232-1}, {-1}
* java.lang.String:indexOf(...)@168: {-231..-1}, {0..232-2}
*/
153 if (str == null) return "";
154 StringBuffer ret = new StringBuffer(str.length());
155 int start = 0;
156 int beginTag = str.indexOf("<");
157 int endTag = 0;
158 if (beginTag == -1)
159 return str;
160
161 while (beginTag >= start) {
162 if (beginTag > 0) {
163 ret.append(str.substring(start, beginTag));
164
165 // replace each tag with a space (looks better)
166 if (addSpace) ret.append(" ");
167 }
168 endTag = str.indexOf(">", beginTag);
169
170 // if endTag found move "cursor" forward
171 if (endTag > -1) {
172 start = endTag + 1;
173 beginTag = str.indexOf("<", start);
174 }
175 // if no endTag found, get rest of str and break
176 else {
177 ret.append(str.substring(beginTag));
178 break;
179 }
180 }
181 // append everything after the last endTag
182 if (endTag > -1 && endTag + 1 < str.length()) {
183 ret.append(str.substring(endTag + 1));
184 }
185 return ret.toString().trim();
186 }
187
188 //------------------------------------------------------------------------
189 /** Run both removeHTML and escapeHTML on a string.
190 * @param s String to be run through removeHTML and escapeHTML.
191 * @return String with HTML removed and HTML special characters escaped.
192 */
193 public static String removeAndEscapeHTML( String s ) {
/*
P/P * Method: String removeAndEscapeHTML(String)
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* s: Inverse{null}, Addr_Set{null}
*/
194 if ( s==null ) return "";
195 else return Utilities.escapeHTML( Utilities.removeHTML(s) );
196 }
197
198 //------------------------------------------------------------------------
199 /**
200 * Autoformat.
201 */
202 public static String autoformat(String s) {
/*
P/P * Method: String autoformat(String)
*
* Postconditions:
* init'ed(return_value)
*/
203 String ret = StringUtils.replace(s, "\n", "<br />");
204 return ret;
205 }
206
207
208 /**
209 * Code (stolen from Pebble) to add rel="nofollow" string to all links in HTML.
210 */
211 public static String addNofollow(String html) {
/*
P/P * Method: String addNofollow(String)
*
* Preconditions:
* (soft) mLinkPattern != null
*
* Presumptions:
* java.util.regex.Pattern:matcher(...)@215 != null
*
* Postconditions:
* init'ed(java.lang.StringBuffer:toString(...)._tainted)
* return_value == One-of{html, &java.lang.StringBuffer:toString(...)}
* init'ed(return_value)
*
* Test Vectors:
* html: Addr_Set{null}, Inverse{null}
* java.lang.String:indexOf(...)@222: {-231..-2, 0..232-1}, {-1}
* java.lang.String:length(...)@212: {1..232-1}, {0}
* java.util.regex.Matcher:find(...)@217: {0}, {1}
*/
212 if (html == null || html.length() == 0) {
213 return html;
214 }
215 Matcher m = mLinkPattern.matcher(html);
216 StringBuffer buf = new StringBuffer();
217 while (m.find()) {
218 int start = m.start();
219 int end = m.end();
220 String link = html.substring(start, end);
221 buf.append(html.substring(0, start));
222 if (link.indexOf("rel=\"nofollow\"") == -1) {
223 buf.append(
224 link.substring(0, link.length() - 1) + " rel=\"nofollow\">");
225 } else {
226 buf.append(link);
227 }
228 html = html.substring(end, html.length());
229 m = mLinkPattern.matcher(html);
230 }
231 buf.append(html);
232 return buf.toString();
233 }
234
235
236 //------------------------------------------------------------------------
237 /**
238 * Replaces occurences of non-alphanumeric characters with an underscore.
239 */
240 public static String replaceNonAlphanumeric(String str) {
/*
P/P * Method: String replaceNonAlphanumeric(String)
*
* Preconditions:
* str != null
*
* Postconditions:
* java.lang.StringBuffer:toString(...)._tainted == 0
* return_value == &java.lang.StringBuffer:toString(...)
*/
241 return replaceNonAlphanumeric(str, '_');
242 }
243
244 //------------------------------------------------------------------------
245 /**
246 * Replaces occurences of non-alphanumeric characters with a
247 * supplied char.
248 */
249 public static String replaceNonAlphanumeric(String str, char subst) {
/*
P/P * Method: String replaceNonAlphanumeric(String, char)
*
* Preconditions:
* str != null
*
* Presumptions:
* testChars.length@251 <= 232-1
*
* Postconditions:
* java.lang.StringBuffer:toString(...)._tainted == 0
* return_value == &java.lang.StringBuffer:toString(...)
*
* Test Vectors:
* java.lang.Character:isLetterOrDigit(...)@253: {0}, {1}
*/
250 StringBuffer ret = new StringBuffer(str.length());
251 char[] testChars = str.toCharArray();
252 for (int i = 0; i < testChars.length; i++) {
+ 253 if (Character.isLetterOrDigit(testChars[i])) {
+ 254 ret.append(testChars[i]);
255 } else {
256 ret.append( subst );
257 }
258 }
259 return ret.toString();
260 }
261
262 //------------------------------------------------------------------------
263 /**
264 * Remove occurences of non-alphanumeric characters.
265 */
266 public static String removeNonAlphanumeric(String str) {
/*
P/P * Method: String removeNonAlphanumeric(String)
*
* Preconditions:
* str != null
*
* Presumptions:
* testChars.length@268 <= 232-1
*
* Postconditions:
* java.lang.StringBuffer:toString(...)._tainted == 0
* return_value == &java.lang.StringBuffer:toString(...)
*/
267 StringBuffer ret = new StringBuffer(str.length());
268 char[] testChars = str.toCharArray();
269 for (int i = 0; i < testChars.length; i++) {
270 // MR: Allow periods in page links
+ 271 if (Character.isLetterOrDigit(testChars[i]) ||
272 testChars[i] == '.') {
+ 273 ret.append(testChars[i]);
274 }
275 }
276 return ret.toString();
277 }
278
279 //------------------------------------------------------------------------
280 /**
281 * @param stringArray
282 * @param delim
283 * @return
284 */
285 public static String stringArrayToString(String[] stringArray, String delim) {
/*
P/P * Method: String stringArrayToString(String[], String)
*
* Preconditions:
* stringArray != null
* stringArray.length <= 232-1
* (soft) init'ed(stringArray[...])
*
* Postconditions:
* init'ed(java.lang.StringBuilder:toString(...)._tainted)
* init'ed(return_value)
*
* Test Vectors:
* java.lang.String:length(...)@288: {0}, {1..232-1}
*/
286 String ret = "";
287 for (int i = 0; i < stringArray.length; i++) {
+ 288 if (ret.length() > 0)
289 ret = ret + delim + stringArray[i];
290 else
291 ret = stringArray[i];
292 }
293 return ret;
294 }
295
296 //--------------------------------------------------------------------------
297 /** Convert string to string array. */
298 public static String[] stringToStringArray(String instr, String delim)
299 throws NoSuchElementException, NumberFormatException {
/*
P/P * Method: String[] stringToStringArray(String, String)
*
* Presumptions:
* java.util.StringTokenizer:countTokens(...)@301 >= 1
*
* Postconditions:
* init'ed(java.util.StringTokenizer:nextToken(...)._tainted)
* return_value == &new String[](stringToStringArray#2)
* new String[](stringToStringArray#2) num objects == 1
* (soft) return_value.length in 1..232-1
* return_value[...] == &java.util.StringTokenizer:nextToken(...)
*
* Test Vectors:
* java.util.StringTokenizer:hasMoreTokens(...)@304: {0}, {1}
*/
300 StringTokenizer toker = new StringTokenizer(instr, delim);
301 String stringArray[] = new String[toker.countTokens()];
302 int i = 0;
303
304 while (toker.hasMoreTokens()) {
+ 305 stringArray[i++] = toker.nextToken();
306 }
307 return stringArray;
308 }
309
310 //--------------------------------------------------------------------------
311 /** Convert string to integer array. */
312 public static int[] stringToIntArray(String instr, String delim)
313 throws NoSuchElementException, NumberFormatException {
/*
P/P * Method: int[] stringToIntArray(String, String)
*
* Presumptions:
* java.util.StringTokenizer:countTokens(...)@315 >= 1
*
* Postconditions:
* return_value == &new int[](stringToIntArray#2)
* new int[](stringToIntArray#2) num objects == 1
* (soft) return_value.length in 1..232-1
* init'ed(return_value[...])
*
* Test Vectors:
* java.util.StringTokenizer:hasMoreTokens(...)@318: {0}, {1}
*/
314 StringTokenizer toker = new StringTokenizer(instr, delim);
315 int intArray[] = new int[toker.countTokens()];
316 int i = 0;
317
318 while (toker.hasMoreTokens()) {
319 String sInt = toker.nextToken();
320 int nInt = Integer.parseInt(sInt);
+ 321 intArray[i++] = new Integer(nInt).intValue();
322 }
323 return intArray;
324 }
325
326 //-------------------------------------------------------------------
327 /** Convert integer array to a string. */
328 public static String intArrayToString(int[] intArray) {
/*
P/P * Method: String intArrayToString(int[])
*
* Preconditions:
* intArray != null
* intArray.length <= 232-1
* (soft) init'ed(intArray[...])
*
* Postconditions:
* java.lang.Integer:toString(...)._tainted == 0
* java.lang.StringBuilder:toString(...)._tainted == 0
* return_value in Addr_Set{&"",&java.lang.StringBuilder:toString(...),&java.lang.Integer:toString(...)}
*
* Test Vectors:
* java.lang.String:length(...)@331: {0}, {1..232-1}
*/
329 String ret = "";
330 for (int i = 0; i < intArray.length; i++) {
331 if (ret.length() > 0)
332 ret = ret + "," + Integer.toString(intArray[i]);
333 else
334 ret = Integer.toString(intArray[i]);
335 }
336 return ret;
337 }
338
339 //------------------------------------------------------------------------
340 public static void copyFile(File from, File to) throws IOException {
/*
P/P * Method: void copyFile(File, File)
*
* Preconditions:
* from != null
*/
341 InputStream in = null;
342 OutputStream out = null;
343
344 try {
345 in = new FileInputStream(from);
346 } catch (IOException ex) {
347 throw new IOException(
348 "Utilities.copyFile: opening input stream '"
349 + from.getPath()
350 + "', "
351 + ex.getMessage());
352 }
353
354 try {
355 out = new FileOutputStream(to);
356 } catch (Exception ex) {
357 try {
358 in.close();
359 } catch (IOException ex1) {
360 }
361 throw new IOException(
362 "Utilities.copyFile: opening output stream '"
363 + to.getPath()
364 + "', "
365 + ex.getMessage());
366 }
367
368 copyInputToOutput(in, out, from.length());
369 }
370
371 //------------------------------------------------------------------------
372 /**
373 * Utility method to copy an input stream to an output stream.
374 * Wraps both streams in buffers. Ensures right numbers of bytes copied.
375 */
376 public static void copyInputToOutput(
377 InputStream input,
378 OutputStream output,
379 long byteCount)
380 throws IOException {
381 int bytes;
382 long length;
383
/*
P/P * Method: void copyInputToOutput(InputStream, OutputStream, long)
*/
384 BufferedInputStream in = new BufferedInputStream(input);
385 BufferedOutputStream out = new BufferedOutputStream(output);
386
387 byte[] buffer;
388 buffer = new byte[8192];
389
390 for (length = byteCount; length > 0;) {
391 bytes = (int) (length > 8192 ? 8192 : length);
392
393 try {
394 bytes = in.read(buffer, 0, bytes);
395 } catch (IOException ex) {
396 try {
397 in.close();
398 out.close();
399 } catch (IOException ex1) {
400 }
401 throw new IOException(
402 "Reading input stream, " + ex.getMessage());
403 }
404
405 if (bytes < 0)
406 break;
407
408 length -= bytes;
409
410 try {
411 out.write(buffer, 0, bytes);
412 } catch (IOException ex) {
413 try {
414 in.close();
415 out.close();
416 } catch (IOException ex1) {
417 }
418 throw new IOException(
419 "Writing output stream, " + ex.getMessage());
420 }
421 }
422
423 try {
424 in.close();
425 out.close();
426 } catch (IOException ex) {
427 throw new IOException("Closing file streams, " + ex.getMessage());
428 }
429 }
430
431 //------------------------------------------------------------------------
432 public static void copyInputToOutput(
433 InputStream input,
434 OutputStream output)
435 throws IOException {
/*
P/P * Method: void copyInputToOutput(InputStream, OutputStream)
*
* Test Vectors:
* java.io.BufferedInputStream:read(...)@440: {-1}, {-231..-2, 0..232-1}
*/
436 BufferedInputStream in = new BufferedInputStream(input);
437 BufferedOutputStream out = new BufferedOutputStream(output);
438 byte buffer[] = new byte[8192];
439 for (int count = 0; count != -1;) {
440 count = in.read(buffer, 0, 8192);
441 if (count != -1)
442 out.write(buffer, 0, count);
443 }
444
445 try {
446 in.close();
447 out.close();
448 } catch (IOException ex) {
449 throw new IOException("Closing file streams, " + ex.getMessage());
450 }
451 }
452
453 /**
454 * Encode a string using algorithm specified in web.xml and return the
455 * resulting encrypted password. If exception, the plain credentials
456 * string is returned
457 *
458 * @param password Password or other credentials to use in authenticating
459 * this username
460 * @param algorithm Algorithm used to do the digest
461 *
462 * @return encypted password based on the algorithm.
463 */
464 public static String encodePassword(String password, String algorithm) {
/*
P/P * Method: String encodePassword(String, String)
*
* Preconditions:
* password != null
* (soft) mLogger != null
*
* Presumptions:
* encodedPassword.length@484 <= 232-1
* java.security.MessageDigest:digest(...)@484 != null
* java.security.MessageDigest:getInstance(...)@471 != null
*
* Postconditions:
* java.lang.StringBuffer:toString(...)._tainted == 0
* return_value == One-of{password, &java.lang.StringBuffer:toString(...)}
* return_value != null
*/
465 byte[] unencodedPassword = password.getBytes();
466
467 MessageDigest md = null;
468
469 try {
470 // first create an instance, given the provider
471 md = MessageDigest.getInstance(algorithm);
472 } catch (Exception e) {
473 mLogger.error("Exception: " + e);
474 return password;
475 }
476
477 md.reset();
478
479 // call the update method one or more times
480 // (useful when you don't know the size of your data, eg. stream)
481 md.update(unencodedPassword);
482
483 // now calculate the hash
484 byte[] encodedPassword = md.digest();
485
486 StringBuffer buf = new StringBuffer();
487
488 for (int i = 0; i < encodedPassword.length; i++) {
+ 489 if ((encodedPassword[i] & 0xff) < 0x10) {
490 buf.append("0");
491 }
492
+ 493 buf.append(Long.toString(encodedPassword[i] & 0xff, 16));
494 }
495
496 return buf.toString();
497 }
498
499 /**
500 * Encode a string using Base64 encoding. Used when storing passwords
501 * as cookies.
502 *
503 * This is weak encoding in that anyone can use the decodeString
504 * routine to reverse the encoding.
505 *
506 * @param str
507 * @return String
508 * @throws IOException
509 */
510 public static String encodeString(String str) throws IOException {
/*
P/P * Method: String encodeString(String)
*
* Preconditions:
* str != null
*
* Postconditions:
* return_value != null
*/
+ 511 Base64 base64 = new Base64();
512 String encodedStr = new String(base64.encodeBase64(str.getBytes()));
513 return (encodedStr.trim());
514 }
515
516 /**
517 * Decode a string using Base64 encoding.
518 *
519 * @param str
520 * @return String
521 * @throws IOException
522 */
523 public static String decodeString(String str) throws IOException {
/*
P/P * Method: String decodeString(String)
*
* Preconditions:
* str != null
*
* Postconditions:
* return_value == &new String(decodeString#2)
* new String(decodeString#2) num objects == 1
*/
+ 524 Base64 base64 = new Base64();
525 String value = new String(base64.decodeBase64(str.getBytes()));
526 return (value);
527 }
528
529 /**
530 * Strips HTML and truncates.
531 */
532 public static String truncate(
533 String str, int lower, int upper, String appendToEnd) {
534 // strip markup from the string
/*
P/P * Method: String truncate(String, int, int, String)
*
* Postconditions:
* init'ed(java.lang.String:substring(...)._tainted)
* init'ed(java.lang.StringBuilder:toString(...)._tainted)
* return_value != null
*
* Test Vectors:
* lower - upper: {-6_442_450_943..0}, {1..6_442_450_943}
*/
535 String str2 = removeHTML(str, false);
536
537 // quickly adjust the upper if it is set lower than 'lower'
538 if (upper < lower) {
539 upper = lower;
540 }
541
542 // now determine if the string fits within the upper limit
543 // if it does, go straight to return, do not pass 'go' and collect $200
544 if(str2.length() > upper) {
545 // the magic location int
546 int loc;
547
548 // first we determine where the next space appears after lower
549 loc = str2.lastIndexOf(' ', upper);
550
551 // now we'll see if the location is greater than the lower limit
552 if(loc >= lower) {
553 // yes it was, so we'll cut it off here
554 str2 = str2.substring(0, loc);
555 } else {
556 // no it wasnt, so we'll cut it off at the upper limit
557 str2 = str2.substring(0, upper);
+ 558 loc = upper;
559 }
560
561 // the string was truncated, so we append the appendToEnd String
562 str2 = str2 + appendToEnd;
563 }
564
565 return str2;
566 }
567
568 /**
569 * This method based on code from the String taglib at Apache Jakarta:
570 * http://cvs.apache.org/viewcvs/jakarta-taglibs/string/src/org/apache/taglibs/string/util/StringW.java?rev=1.16&content-type=text/vnd.viewcvs-markup
571 * Copyright (c) 1999 The Apache Software Foundation.
572 * Author: timster@mac.com
573 *
574 * @param str
575 * @param lower
576 * @param upper
577 * @param appendToEnd
578 * @return
579 */
580 public static String truncateNicely(String str, int lower, int upper, String appendToEnd) {
581 // strip markup from the string
/*
P/P * Method: String truncateNicely(String, int, int, String)
*
* Preconditions:
* str != null
*
* Presumptions:
* java.lang.String:indexOf(...)@619 + java.lang.String:length(...)@619 in -231..232-1
* java.lang.String:lastIndexOf(...)@613 <= 232-2
*
* Postconditions:
* init'ed(java.lang.StringBuilder:toString(...)._tainted)
* return_value == One-of{str, &java.lang.StringBuilder:toString(...)}
* return_value != null
*
* Test Vectors:
* lower - upper: {-6_442_450_943..0}, {1..6_442_450_943}
*/
582 String str2 = removeHTML(str, false);
583 boolean diff = (str2.length() < str.length());
584
585 // quickly adjust the upper if it is set lower than 'lower'
586 if(upper < lower) {
587 upper = lower;
588 }
589
590 // now determine if the string fits within the upper limit
591 // if it does, go straight to return, do not pass 'go' and collect $200
592 if(str2.length() > upper) {
593 // the magic location int
594 int loc;
595
596 // first we determine where the next space appears after lower
597 loc = str2.lastIndexOf(' ', upper);
598
599 // now we'll see if the location is greater than the lower limit
600 if(loc >= lower) {
601 // yes it was, so we'll cut it off here
602 str2 = str2.substring(0, loc);
603 } else {
604 // no it wasnt, so we'll cut it off at the upper limit
605 str2 = str2.substring(0, upper);
606 loc = upper;
607 }
608
609 // HTML was removed from original str
610 if (diff) {
611
612 // location of last space in truncated string
613 loc = str2.lastIndexOf(' ', loc);
614
615 // get last "word" in truncated string (add 1 to loc to eliminate space
616 String str3 = str2.substring(loc+1);
617
618 // find this fragment in original str, from 'loc' position
619 loc = str.indexOf(str3, loc) + str3.length();
620
621 // get truncated string from original str, given new 'loc'
622 str2 = str.substring(0, loc);
623
624 // get all the HTML from original str after loc
625 str3 = extractHTML(str.substring(loc));
626
627 // remove any tags which generate visible HTML
628 // This call is unecessary, all HTML has already been stripped
629 //str3 = removeVisibleHTMLTags(str3);
630
631 // append the appendToEnd String and
632 // add extracted HTML back onto truncated string
633 str = str2 + appendToEnd + str3;
634 } else {
635 // the string was truncated, so we append the appendToEnd String
636 str = str2 + appendToEnd;
637 }
638
639 }
640
641 return str;
642 }
643
644 public static String truncateText(String str, int lower, int upper, String appendToEnd) {
645 // strip markup from the string
/*
P/P * Method: String truncateText(String, int, int, String)
*
* Preconditions:
* str != null
*
* Postconditions:
* init'ed(java.lang.StringBuilder:toString(...)._tainted)
* return_value == One-of{str, &java.lang.StringBuilder:toString(...)}
* return_value != null
*
* Test Vectors:
* lower - upper: {-6_442_450_943..0}, {1..6_442_450_943}
*/
646 String str2 = removeHTML(str, false);
+ 647 boolean diff = (str2.length() < str.length());
648
649 // quickly adjust the upper if it is set lower than 'lower'
650 if(upper < lower) {
651 upper = lower;
652 }
653
654 // now determine if the string fits within the upper limit
655 // if it does, go straight to return, do not pass 'go' and collect $200
656 if(str2.length() > upper) {
657 // the magic location int
658 int loc;
659
660 // first we determine where the next space appears after lower
661 loc = str2.lastIndexOf(' ', upper);
662
663 // now we'll see if the location is greater than the lower limit
664 if(loc >= lower) {
665 // yes it was, so we'll cut it off here
666 str2 = str2.substring(0, loc);
667 } else {
668 // no it wasnt, so we'll cut it off at the upper limit
669 str2 = str2.substring(0, upper);
+ 670 loc = upper;
671 }
672 // the string was truncated, so we append the appendToEnd String
673 str = str2 + appendToEnd;
674 }
675 return str;
676 }
677
678 /**
679 * @param str
680 * @return
681 */
682 private static String stripLineBreaks(String str) {
683 // TODO: use a string buffer, ignore case !
/*
P/P * Method: String stripLineBreaks(String)
*
* Preconditions:
* str != null
*
* Postconditions:
* return_value != null
*/
684 str = str.replaceAll("<br>", "");
685 str = str.replaceAll("<br/>", "");
686 str = str.replaceAll("<br />", "");
687 str = str.replaceAll("<p></p>", "");
688 str = str.replaceAll("<p/>","");
689 str = str.replaceAll("<p />","");
690 return str;
691 }
692
693 /**
694 * Need need to get rid of any user-visible HTML tags once all text has been
695 * removed such as <BR>. This sounds like a better approach than removing
696 * all HTML tags and taking the chance to leave some tags un-closed.
697 *
698 * WARNING: this method has serious performance problems a
699 *
700 * @author Alexis Moussine-Pouchkine (alexis.moussine-pouchkine@france.sun.com)
701 * @author Lance Lavandowska
702 * @param str the String object to modify
703 * @return the new String object without the HTML "visible" tags
704 */
705 private static String removeVisibleHTMLTags(String str) {
/*
P/P * Method: String removeVisibleHTMLTags(String)
*
* Preconditions:
* str != null
*
* Presumptions:
* java.lang.StringBuffer:indexOf(...)@714 + java.lang.String:length(...)@716 in -231..232-1
* java.lang.StringBuffer:indexOf(...)@714 + java.lang.String:length(...)@717 in -231..232-1
* java.lang.StringBuffer:indexOf(...)@720 <= 232-2
* java.lang.StringBuffer:indexOf(...)@738 <= 232-2
* java.lang.StringBuffer:indexOf(...)@741 + java.lang.String:length(...)@744 in -231..232-1
* ...
*
* Postconditions:
* init'ed(java.lang.StringBuffer:toString(...)._tainted)
* return_value == &java.lang.StringBuffer:toString(...)
*
* Test Vectors:
* java.lang.String:endsWith(...)@715: {0}, {1}
* java.lang.StringBuffer:charAt(...)@750: {0..46, 48..216-1}, {47}
* java.lang.StringBuffer:indexOf(...)@714: {-1}, {-231..-2, 0..232-1}
* java.lang.StringBuffer:indexOf(...)@720: {-231..-1}, {0..232-2}
* java.lang.StringBuffer:indexOf(...)@738: {-231..-1}, {0..232-3}
* java.lang.StringBuffer:indexOf(...)@741: {-231..-1}, {0..232-1}
* java.lang.StringBuffer:indexOf(...)@749: {-231..-2, 0..232-2}, {-1}
*/
706 str = stripLineBreaks(str);
707 StringBuffer result = new StringBuffer(str);
708 StringBuffer lcresult = new StringBuffer(str.toLowerCase());
709
710 // <img should take care of smileys
711 String[] visibleTags = {"<img"}; // are there others to add?
712 int stringIndex;
713 for ( int j = 0 ; j < visibleTags.length ; j++ ) {
714 while ( (stringIndex = lcresult.indexOf(visibleTags[j])) != -1 ) {
715 if ( visibleTags[j].endsWith(">") ) {
716 result.delete(stringIndex, stringIndex+visibleTags[j].length() );
717 lcresult.delete(stringIndex, stringIndex+visibleTags[j].length() );
718 } else {
719 // need to delete everything up until next closing '>', for <img for instance
720 int endIndex = result.indexOf(">", stringIndex);
721 if (endIndex > -1) {
722 // only delete it if we find the end! If we don't the HTML may be messed up, but we
723 // can't safely delete anything.
724 result.delete(stringIndex, endIndex + 1 );
725 lcresult.delete(stringIndex, endIndex + 1 );
726 }
727 }
728 }
729 }
730
731 // TODO: This code is buggy by nature. It doesn't deal with nesting of tags properly.
732 // remove certain elements with open & close tags
733 String[] openCloseTags = {"li", "a", "div", "h1", "h2", "h3", "h4"}; // more ?
734 for (int j = 0; j < openCloseTags.length; j++) {
735 // could this be better done with a regular expression?
736 String closeTag = "</"+openCloseTags[j]+">";
737 int lastStringIndex = 0;
738 while ( (stringIndex = lcresult.indexOf( "<"+openCloseTags[j], lastStringIndex)) > -1) {
739 lastStringIndex = stringIndex;
740 // Try to find the matching closing tag (ignores possible nesting!)
741 int endIndex = lcresult.indexOf(closeTag, stringIndex);
742 if (endIndex > -1) {
743 // If we found it delete it.
744 result.delete(stringIndex, endIndex+closeTag.length());
745 lcresult.delete(stringIndex, endIndex+closeTag.length());
746 } else {
747 // Try to see if it is a self-closed empty content tag, i.e. closed with />.
748 endIndex = lcresult.indexOf(">", stringIndex);
749 int nextStart = lcresult.indexOf("<", stringIndex+1);
750 if (endIndex > stringIndex && lcresult.charAt(endIndex-1) == '/' && (endIndex < nextStart || nextStart == -1)) {
751 // Looks like it, so remove it.
752 result.delete(stringIndex, endIndex + 1);
753 lcresult.delete(stringIndex, endIndex + 1);
754
755 }
756 }
757 }
758 }
759
760 return result.toString();
761 }
762
763 /**
764 * Extract (keep) JUST the HTML from the String.
765 * @param str
766 * @return
767 */
768 public static String extractHTML(String str) {
/*
P/P * Method: String extractHTML(String)
*
* Presumptions:
* java.lang.String:indexOf(...)@778 <= 232-2
*
* Postconditions:
* init'ed(java.lang.StringBuffer:toString(...)._tainted)
* return_value == One-of{&"", str, &java.lang.StringBuffer:toString(...)}
* return_value != null
*
* Test Vectors:
* str: Inverse{null}, Addr_Set{null}
* java.lang.String:indexOf(...)@772: {-231..-2, 0..232-1}, {-1}
* java.lang.String:indexOf(...)@778: {-231..-1}, {0..232-2}
*/
769 if (str == null) return "";
770 StringBuffer ret = new StringBuffer(str.length());
771 int start = 0;
772 int beginTag = str.indexOf("<");
773 int endTag = 0;
774 if (beginTag == -1)
775 return str;
776
777 while (beginTag >= start) {
778 endTag = str.indexOf(">", beginTag);
779
780 // if endTag found, keep tag
781 if (endTag > -1) {
782 ret.append( str.substring(beginTag, endTag+1) );
783
784 // move start forward and find another tag
785 start = endTag + 1;
786 beginTag = str.indexOf("<", start);
787 }
788 // if no endTag found, break
789 else {
790 break;
791 }
792 }
793 return ret.toString();
794 }
795
796
797 public static String hexEncode(String str) {
/*
P/P * Method: String hexEncode(String)
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* org.apache.commons.lang.StringUtils:isEmpty(...)@798: {0}, {1}
*/
798 if (StringUtils.isEmpty(str)) return str;
799
800 return RegexUtil.encode(str);
801 }
802
803 public static String encodeEmail(String str) {
/*
P/P * Method: String encodeEmail(String)
*
* Postconditions:
* init'ed(return_value)
*/
804 return str!=null ? RegexUtil.encodeEmail(str) : null;
805 }
806
807 /**
808 * URL encoding.
809 * @param s a string to be URL-encoded
810 * @return URL encoding of s using character encoding UTF-8; null if s is null.
811 */
812 public static final String encode(String s) {
813 try {
/*
P/P * Method: String encode(String)
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* s: Addr_Set{null}, Inverse{null}
*/
814 if (s != null)
815 return URLEncoder.encode(s, "UTF-8");
816 else
817 return s;
818 } catch (UnsupportedEncodingException e) {
819 // Java Spec requires UTF-8 be in all Java environments, so this should not happen
820 return s;
821 }
822 }
823
824 /**
825 * URL decoding.
826 * @param s a URL-encoded string to be URL-decoded
827 * @return URL decoded value of s using character encoding UTF-8; null if s is null.
828 */
829 public static final String decode(String s) {
830 try {
/*
P/P * Method: String decode(String)
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* s: Addr_Set{null}, Inverse{null}
*/
831 if (s != null)
832 return URLDecoder.decode(s, "UTF-8");
833 else
834 return s;
835 } catch (UnsupportedEncodingException e) {
836 // Java Spec requires UTF-8 be in all Java environments, so this should not happen
837 return s;
838 }
839 }
840
841 /**
842 * @param string
843 * @return
844 */
845 public static int stringToInt(String string) {
846 try {
/*
P/P * Method: int stringToInt(String)
*
* Preconditions:
* (soft) mLogger != null
*
* Presumptions:
* java.lang.Integer:valueOf(...)@847 != null
*
* Postconditions:
* init'ed(return_value)
*/
847 return Integer.valueOf(string).intValue();
848 } catch (NumberFormatException e) {
849 mLogger.debug("Invalid Integer:" + string);
850 }
851 return 0;
852 }
853
854 /**
855 * Convert a byte array into a Base64 string (as used in mime formats)
856 */
857 public static String toBase64(byte[] aValue) {
858
/*
P/P * Method: String toBase64(byte[])
*
* Preconditions:
* aValue != null
* (soft) aValue.length in {0, 3..232-1}
* (soft) init'ed(aValue[...])
*
* Postconditions:
* java.lang.StringBuffer:toString(...)._tainted == 0
* return_value == &java.lang.StringBuffer:toString(...)
*/
+ 859 final String m_strBase64Chars =
860 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
861
862 int byte1;
863 int byte2;
864 int byte3;
865 int iByteLen = aValue.length;
866 StringBuffer tt = new StringBuffer();
867
868 for (int i = 0; i < iByteLen; i += 3) {
869 boolean bByte2 = (i + 1) < iByteLen;
870 boolean bByte3 = (i + 2) < iByteLen;
871 byte1 = aValue[i] & 0xFF;
872 byte2 = (bByte2) ? (aValue[i + 1] & 0xFF) : 0;
873 byte3 = (bByte3) ? (aValue[i + 2] & 0xFF) : 0;
874
875 tt.append(m_strBase64Chars.charAt(byte1 / 4));
876 tt.append(m_strBase64Chars.charAt((byte2 / 16) + ((byte1 & 0x3) * 16)));
877 tt.append(((bByte2) ? m_strBase64Chars.charAt((byte3 / 64) + ((byte2 & 0xF) * 4)) : '='));
878 tt.append(((bByte3) ? m_strBase64Chars.charAt(byte3 & 0x3F) : '='));
879 }
880
881 return tt.toString();
882 }
883
884 /**
885 * @param tag
886 * @return
887 */
888 public static String stripInvalidTagCharacters(String tag) {
/*
P/P * Method: String stripInvalidTagCharacters(String)
*
* Preconditions:
* tag != null
*
* Presumptions:
* charArray.length@893 <= 232-1
*
* Postconditions:
* java.lang.StringBuffer:toString(...)._tainted == 0
* return_value == &java.lang.StringBuffer:toString(...)
*
* Test Vectors:
* java.lang.Character:isUnicodeIdentifierPart(...)@904: {1}, {0}
* java.lang.Character:isUnicodeIdentifierStart(...)@904: {0}, {1}
*/
889 if (tag == null)
890 throw new NullPointerException();
891
892 StringBuffer sb = new StringBuffer();
893 char[] charArray = tag.toCharArray();
894 for (int i = 0; i < charArray.length; i++) {
+ 895 char c = charArray[i];
896
897 // fast-path exclusions quotes and commas are obvious
+ 898 switch (c) {
899 case 34: // "
900 case 44: // ,
901 continue;
902 }
903
+ 904 if ((33 <= c && c <= 126) || Character.isUnicodeIdentifierPart(c)
905 || Character.isUnicodeIdentifierStart(c)) {
+ 906 sb.append(charArray[i]);
907 }
908 }
909 return sb.toString();
910 }
911
912 public static String normalizeTag(String tag, Locale locale) {
/*
P/P * Method: String normalizeTag(String, Locale)
*
* Preconditions:
* tag != null
*
* Postconditions:
* return_value != null
*
* Test Vectors:
* locale: Inverse{null}, Addr_Set{null}
*/
913 tag = Utilities.stripInvalidTagCharacters(tag);
914 return locale == null ? tag.toLowerCase() : tag.toLowerCase(locale);
915 }
916
917 /**
918 * @param tags
919 * @return
920 */
921 public static List splitStringAsTags(String tags) {
/*
P/P * Method: List splitStringAsTags(String)
*
* Presumptions:
* init'ed(java.util.Collections.EMPTY_LIST)
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* org.apache.commons.lang.StringUtils:split(...)@922: Inverse{null}, Addr_Set{null}
*/
922 String[] tagsarr = StringUtils.split(tags, TAG_SPLIT_CHARS);
923 if(tagsarr == null)
924 return Collections.EMPTY_LIST;
925 return Arrays.asList(tagsarr);
926 }
927
928
929 /**
930 * Transforms the given String into a subset of HTML displayable on a web
931 * page. The subset includes <b>, <i>, <p>, <br>,
932 * <pre> and <a href> (and their corresponding end tags).
933 *
934 * @param s the String to transform
935 * @return the transformed String
936 */
937 public static String transformToHTMLSubset(String s) {
938
/*
P/P * Method: String transformToHTMLSubset(String)
*
* Presumptions:
* java.util.regex.Matcher:replaceAll(...)@985 != null
* java.util.regex.Pattern:compile(...)@43 != null
* java.util.regex.Pattern:compile(...)@45 != null
* java.util.regex.Pattern:compile(...)@47 != null
* java.util.regex.Pattern:compile(...)@49 != null
* ...
*
* Postconditions:
* init'ed(java.lang.StringBuilder:toString(...)._tainted)
* init'ed(return_value)
*
* Test Vectors:
* s: Inverse{null}, Addr_Set{null}
* java.util.regex.Matcher:find(...)@965: {0}, {1}
*/
939 if (s == null) {
940 return null;
941 }
942
943 s = replace(s, OPENING_B_TAG_PATTERN, "<b>");
944 s = replace(s, CLOSING_B_TAG_PATTERN, "</b>");
945 s = replace(s, OPENING_I_TAG_PATTERN, "<i>");
946 s = replace(s, CLOSING_I_TAG_PATTERN, "</i>");
947 s = replace(s, OPENING_BLOCKQUOTE_TAG_PATTERN, "<blockquote>");
948 s = replace(s, CLOSING_BLOCKQUOTE_TAG_PATTERN, "</blockquote>");
949 s = replace(s, BR_TAG_PATTERN, "<br />");
950 s = replace(s, OPENING_P_TAG_PATTERN, "<p>");
951 s = replace(s, CLOSING_P_TAG_PATTERN, "</p>");
952 s = replace(s, OPENING_PRE_TAG_PATTERN, "<pre>");
953 s = replace(s, CLOSING_PRE_TAG_PATTERN, "</pre>");
954 s = replace(s, OPENING_UL_TAG_PATTERN, "<ul>");
955 s = replace(s, CLOSING_UL_TAG_PATTERN, "</ul>");
956 s = replace(s, OPENING_OL_TAG_PATTERN, "<ol>");
957 s = replace(s, CLOSING_OL_TAG_PATTERN, "</ol>");
958 s = replace(s, OPENING_LI_TAG_PATTERN, "<li>");
959 s = replace(s, CLOSING_LI_TAG_PATTERN, "</li>");
960 s = replace(s, QUOTE_PATTERN, "\"");
961
962 // HTTP links
963 s = replace(s, CLOSING_A_TAG_PATTERN, "</a>");
964 Matcher m = OPENING_A_TAG_PATTERN.matcher(s);
965 while (m.find()) {
966 int start = m.start();
967 int end = m.end();
968 String link = s.substring(start, end);
969 link = "<" + link.substring(4, link.length() - 4) + ">";
970 s = s.substring(0, start) + link + s.substring(end, s.length());
971 m = OPENING_A_TAG_PATTERN.matcher(s);
972 }
973
974 // escaped angle brackets
975 s = s.replaceAll("<", "<");
976 s = s.replaceAll(">", ">");
977 s = s.replaceAll("&#", "&#");
978
979 return s;
980 }
981
982
983 private static String replace(String string, Pattern pattern, String replacement) {
/*
P/P * Method: String replace(String, Pattern, String)
*
* Preconditions:
* pattern != null
*
* Presumptions:
* java.util.regex.Pattern:matcher(...)@984 != null
*
* Postconditions:
* init'ed(return_value)
*/
984 Matcher m = pattern.matcher(string);
985 return m.replaceAll(replacement);
986 }
987
988 }
SofCheck Inspector Build Version : 2.18479
| Utilities.java |
2009-Jan-02 14:25:28 |
| Utilities.class |
2009-Sep-04 03:12:32 |