File Source: Blacklist.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. The ASF licenses this file to You
4 * under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License. For additional information regarding
15 * copyright in this work, please see the NOTICE file in the top level
16 * directory of this distribution.
17 */
18 /* Created on Nov 11, 2003 */
19 package org.apache.roller.weblogger.util;
20
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23 import java.io.BufferedReader;
24 import java.io.FileInputStream;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.InputStreamReader;
28 import java.io.File;
29 import java.io.FileOutputStream;
30 import java.net.HttpURLConnection;
31 import java.net.URL;
32 import java.text.ParseException;
33 import java.text.SimpleDateFormat;
34 import java.util.ArrayList;
35 import java.util.Date;
36 import java.util.Iterator;
37 import java.util.LinkedList;
38 import java.util.List;
39 import java.util.StringTokenizer;
40 import java.util.regex.Matcher;
41 import java.util.regex.Pattern;
42 import java.util.regex.PatternSyntaxException;
43 import org.apache.roller.weblogger.config.WebloggerConfig;
44 import org.apache.commons.lang.StringUtils;
45 import org.apache.roller.util.DateUtil;
46
47 /**
48 * Loads MT-Blacklist style blacklist from disk and allows callers to test
49 * strings against the blacklist and (optionally) addition blacklists.
50 * <br />
51 * First looks for blacklist.txt in uploads directory, than in classpath
52 * as /blacklist.txt. Download from web feature disabed.
53 * <br />
54 * Blacklist is formatted one entry per line.
55 * Any line that begins with # is considered to be a comment.
56 * Any line that begins with ( is considered to be a regex expression.
57 * <br />
58 * For more information on the (discontinued) MT-Blacklist service:
59 * http://www.jayallen.org/projects/mt-blacklist.
60 *
61 * @author Lance Lavandowska
62 * @author Allen Gilliland
63 */
64 public class Blacklist {
65
/*
P/P * Method: org.apache.roller.weblogger.util.Blacklist__static_init
*
* Presumptions:
* blacklist.blacklistRegex init'ed
* blacklist.blacklistStr init'ed
* org.apache.commons.logging.LogFactory:getLog(...)@66 != null
*
* Postconditions:
* blacklist == &new Blacklist(Blacklist__static_init#1)
* blacklistURL == null
* (soft) mLogger != null
* new Blacklist(Blacklist__static_init#1) num objects == 1
* new LinkedList(Blacklist#1) num objects == 1
* new LinkedList(Blacklist#2) num objects == 1
* blacklist.blacklistRegex == &new LinkedList(Blacklist#2)
* blacklist.blacklistStr == &new LinkedList(Blacklist#1)
* init'ed(blacklist.lastModified)
* new Date(loadBlacklistFromFile#3) num objects <= 1
*/
66 private static Log mLogger = LogFactory.getLog(Blacklist.class);
67
68 private static Blacklist blacklist;
69 private static final String blacklistFile = "blacklist.txt";
70 private static final String lastUpdateStr = "Last update:";
71
72 /** We no longer have a blacklist update URL */
73 private static final String blacklistURL = null;
74
75 private Date lastModified = null;
76 private List blacklistStr = new LinkedList();
77 private List blacklistRegex = new LinkedList();
78
79 // setup our singleton at class loading time
80 static {
81 mLogger.info("Initializing MT Blacklist");
82 blacklist = new Blacklist();
83 blacklist.loadBlacklistFromFile(null);
84 }
85
86 /** Hide constructor */
/*
P/P * Method: void org.apache.roller.weblogger.util.Blacklist()
*
* Postconditions:
* this.blacklistRegex == &new LinkedList(Blacklist#2)
* this.blacklistStr == &new LinkedList(Blacklist#1)
* this.lastModified == null
* new LinkedList(Blacklist#1) num objects == 1
* new LinkedList(Blacklist#2) num objects == 1
*/
87 private Blacklist() {
88 }
89
90 /** Singleton factory method. */
91 public static Blacklist getBlacklist() {
/*
P/P * Method: Blacklist getBlacklist()
*
* Preconditions:
* init'ed(blacklist)
*
* Postconditions:
* return_value == blacklist
* init'ed(return_value)
*/
92 return blacklist;
93 }
94
95 /** Updated MT blacklist if necessary. */
96 public static void checkForUpdate() {
/*
P/P * Method: void checkForUpdate()
*
* Preconditions:
* blacklist != null
*
* Postconditions:
* possibly_updated(blacklist.lastModified)
* new Date(loadBlacklistFromFile#3) num objects == 0
*/
97 getBlacklist().update();
98 }
99
100 /** Non-Static update method. */
101 public void update() {
/*
P/P * Method: void update()
*
* Postconditions:
* possibly_updated(this.lastModified)
* new Date(loadBlacklistFromFile#3) num objects == 0
*/
+ 102 if (this.blacklistURL != null) {
+ 103 boolean blacklist_updated = this.downloadBlacklist();
104 if (blacklist_updated) {
105 this.loadBlacklistFromFile(null);
106 }
107 }
108 }
109
110 /** Download the MT blacklist from the web to our uploads directory. */
111 private boolean downloadBlacklist() {
112
/*
P/P * Method: bool downloadBlacklist()
*
* Preconditions:
* mLogger != null
* (soft) init'ed(this.lastModified)
*
* Presumptions:
* init'ed(java.io.File.separator)
* java.net.HttpURLConnection:getInputStream(...)@154 != null
* java.net.URL:openConnection(...)@118 != null
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* this.lastModified: Addr_Set{null}, Inverse{null}
* java.io.InputStream:read(...)@165: {-231..0}, {1..232-1}
* java.net.HttpURLConnection:getResponseCode(...)@131: {-231..199, 201..303, 305..232-1}, {304}, {200}
*/
113 boolean blacklist_updated = false;
114 try {
115 mLogger.debug("Attempting to download MT blacklist");
116
117 URL url = new URL(blacklistURL);
118 HttpURLConnection connection =
119 (HttpURLConnection) url.openConnection();
120
121 // after spending way too much time debugging i've discovered
122 // that the blacklist server is selective based on the User-Agent
123 // header. without this header set i always get a 403 response :(
124 connection.setRequestProperty("User-Agent", "Mozilla/5.0");
125
126 if (this.lastModified != null) {
127 connection.setRequestProperty("If-Modified-Since",
128 DateUtil.formatRfc822(this.lastModified));
129 }
130
131 int responseCode = connection.getResponseCode();
132
133 mLogger.debug("HttpConnection response = "+responseCode);
134
135 // did the connection return NotModified? If so, no need to parse
136 if (responseCode == HttpURLConnection.HTTP_NOT_MODIFIED) {
137 mLogger.debug("MT blacklist site says we are current");
138 return false;
139 }
140
141 // did the connection return a LastModified header?
142 long lastModifiedLong =
143 connection.getHeaderFieldDate("Last-Modified", -1);
144
145 // if the file is newer than our current then we need do update it
146 if (responseCode == HttpURLConnection.HTTP_OK &&
147 (this.lastModified == null ||
148 this.lastModified.getTime() < lastModifiedLong)) {
149
+ 150 mLogger.debug("my last modified = "+this.lastModified.getTime());
151 mLogger.debug("MT last modified = "+lastModifiedLong);
152
153 // save the new blacklist
154 InputStream instream = connection.getInputStream();
155
156 String uploadDir = WebloggerConfig.getProperty("uploads.dir");
157 String path = uploadDir + File.separator + blacklistFile;
158 FileOutputStream outstream = new FileOutputStream(path);
159
160 mLogger.debug("writing updated MT blacklist to "+path);
161
162 // read from url and write to file
163 byte[] buf = new byte[4096];
164 int length = 0;
165 while((length = instream.read(buf)) > 0)
166 outstream.write(buf, 0, length);
167
168 outstream.close();
169 instream.close();
170
171 blacklist_updated = true;
172
173 mLogger.debug("MT blacklist download completed.");
174
175 } else {
176 mLogger.debug("blacklist *NOT* saved, assuming we are current");
177 }
178
179 } catch (Exception e) {
180 mLogger.error("error downloading blacklist", e);
181 }
182
183 return blacklist_updated;
184 }
185
186 /**
187 * Load the MT blacklist from the file system.
188 * We look for a previously downloaded version of the blacklist first and
189 * if it's not found then we load the default blacklist packed with Roller.
190 * Only public for purposes of unit testing.
191 */
192 public void loadBlacklistFromFile(String blacklistFilePath) {
193
/*
P/P * Method: void loadBlacklistFromFile(String)
*
* Preconditions:
* mLogger != null
* (soft) init'ed(this.lastModified)
* (soft) this.blacklistRegex != null
* (soft) this.blacklistStr != null
*
* Presumptions:
* init'ed(java.io.File.separator)
* java.lang.Object:getClass(...)@216 != null
*
* Postconditions:
* (soft) init'ed(this.lastModified)
* new Date(loadBlacklistFromFile#3) num objects <= 1
*
* Test Vectors:
* blacklistFilePath: Inverse{null}, Addr_Set{null}
* this.lastModified: Addr_Set{null}, Inverse{null}
*/
194 InputStream txtStream = null;
195 try {
196 String path = blacklistFilePath;
197 if (path == null) {
198 String uploadDir = WebloggerConfig.getProperty("uploads.dir");
199 path = uploadDir + File.separator + blacklistFile;
200 }
201 File blacklistFile = new File(path);
202
203 // check our lastModified date to see if we need to re-read the file
204 if (this.lastModified != null &&
205 this.lastModified.getTime() >= blacklistFile.lastModified()) {
206 mLogger.debug("Blacklist is current, no need to load again");
207 return;
208 } else {
209 this.lastModified = new Date(blacklistFile.lastModified());
210 }
211 txtStream = new FileInputStream(blacklistFile);
212 mLogger.info("Loading blacklist from "+path);
213
214 } catch (Exception e) {
215 // Roller keeps a copy in the webapp just in case
216 txtStream = getClass().getResourceAsStream("/blacklist.txt");
217 mLogger.warn(
218 "Couldn't find downloaded blacklist, loaded blacklist.txt from classpath instead");
219 }
220
221 if (txtStream != null) {
222 readFromStream(txtStream, false);
223 } else {
224 mLogger.error("Couldn't load a blacklist file from anywhere, "
225 + "this means blacklist checking is disabled for now.");
226 }
227 mLogger.info("Number of blacklist string rules: "+blacklistStr.size());
228 mLogger.info("Number of blacklist regex rules: "+blacklistRegex.size());
229 }
230
231 /**
232 * Read in the InputStream for rules.
233 * @param txtStream
234 */
235 private String readFromStream(InputStream txtStream, boolean saveStream) {
236 String line;
/*
P/P * Method: String readFromStream(InputStream, bool)
*
* Preconditions:
* (soft) mLogger != null
* (soft) this.blacklistRegex != null
* (soft) this.blacklistStr != null
*
* Postconditions:
* init'ed(java.lang.StringBuffer:toString(...)._tainted)
* return_value == &java.lang.StringBuffer:toString(...)
* possibly_updated(this.lastModified)
*
* Test Vectors:
* saveStream: {0}, {1}
* java.lang.String:startsWith(...)@243: {0}, {1}
*/
237 StringBuffer buf = new StringBuffer();
238 BufferedReader in = null;
239 try {
240 in = new BufferedReader(
241 new InputStreamReader( txtStream, "UTF-8" ) );
242 while ((line = in.readLine()) != null) {
243 if (line.startsWith("#")) {
244 readComment(line);
245 } else {
246 readRule(line);
247 }
248
249 if (saveStream) buf.append(line).append("\n");
250 }
251 } catch (Exception e) {
252 mLogger.error(e);
253 } finally {
254 try {
255 if (in != null) in.close();
256 } catch (IOException e1) {
257 mLogger.error(e1);
258 }
259 }
260 return buf.toString();
261 }
262
263 private void readRule(String str) {
/*
P/P * Method: void readRule(String)
*
* Preconditions:
* (soft) str != null
* (soft) this.blacklistRegex != null
* (soft) this.blacklistStr != null
*
* Presumptions:
* java.lang.String:indexOf(...)@270 >= -231+1
*
* Test Vectors:
* java.lang.String:indexOf(...)@268: {-231..0}, {1..232-1}
* java.lang.String:indexOf(...)@274: {-231..-1}, {0..232-1}
* org.apache.commons.lang.StringUtils:isEmpty(...)@264: {0}, {1}
* org.apache.commons.lang.StringUtils:isNotEmpty(...)@278: {0}, {1}
*/
264 if (StringUtils.isEmpty(str)) return; // bad condition
265
266 String rule = str.trim();
267
268 if (str.indexOf("#") > 0) // line has a comment
269 {
270 int commentLoc = str.indexOf("#");
271 rule = str.substring(0, commentLoc-1).trim(); // strip comment
272 }
273
274 if (rule.indexOf( "(" ) > -1) // regex rule
275 {
276 // pre-compile patterns since they will be frequently used
277 blacklistRegex.add(Pattern.compile(rule));
278 } else if (StringUtils.isNotEmpty(rule)) {
279 blacklistStr.add(rule);
280 }
281 }
282
283 /** Read comment and try to parse out "Last update" value */
284 private void readComment(String str) {
/*
P/P * Method: void readComment(String)
*
* Preconditions:
* str != null
* (soft) mLogger != null
*
* Presumptions:
* java.lang.String:indexOf(...)@285 + java.lang.String:length(...)@287 in -231..232-1
*
* Postconditions:
* possibly_updated(this.lastModified)
*
* Test Vectors:
* java.lang.String:indexOf(...)@285: {-231..-1}, {0..232-1}
*/
285 int lastUpdatePos = str.indexOf(lastUpdateStr);
286 if (lastUpdatePos > -1) {
287 str = str.substring(lastUpdatePos + lastUpdateStr.length());
288 str = str.trim();
289 try {
290 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
291 lastModified = DateUtil.parse(str, sdf);
292 } catch (ParseException e) {
293 mLogger.debug("ParseException reading " + str);
294 }
295 }
296 }
297
298 /**
299 * Does the String argument match any of the rules in the built-in blacklist?
300 */
301 public boolean isBlacklisted(String str) {
/*
P/P * Method: bool isBlacklisted(String)
*
* Preconditions:
* (soft) mLogger != null
* (soft) this.blacklistRegex != null
* (soft) this.blacklistStr != null
*
* Postconditions:
* init'ed(return_value)
*/
302 return isBlacklisted(str, null, null);
303 }
304
305 /**
306 * Does the String argument match any of the rules in the built-in blacklist
307 * plus additional blacklists provided by caller?
308 * @param str String to be checked against blacklist
309 * @param moreStringRules Additional string rules to consider
310 * @param moreRegexRules Additional regex rules to consider
311 */
312 public boolean isBlacklisted(
313 String str, List moreStringRules, List moreRegexRules) {
/*
P/P * Method: bool isBlacklisted(String, List, List)
*
* Preconditions:
* (soft) mLogger != null
* (soft) this.blacklistRegex != null
* (soft) this.blacklistStr != null
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* moreRegexRules: Addr_Set{null}, Inverse{null}
* moreStringRules: Addr_Set{null}, Inverse{null}
* str: Addr_Set{null}, Inverse{null}
* java.util.List:size(...)@322: {-231..0}, {1..232-1}
* java.util.List:size(...)@331: {-231..0}, {1..232-1}
* org.apache.commons.lang.StringUtils:isEmpty(...)@314: {0}, {1}
*/
314 if (str == null || StringUtils.isEmpty(str)) return false;
315
316 // First iterate over blacklist, doing indexOf.
317 // Then iterate over blacklistRegex and test.
318 // As soon as there is a hit in either case return true
319
320 // test plain String.indexOf
321 List stringRules = blacklistStr;
322 if (moreStringRules != null && moreStringRules.size() > 0) {
323 stringRules = new ArrayList();
324 stringRules.addAll(moreStringRules);
325 stringRules.addAll(blacklistStr);
326 }
327 if (testStringRules(str, stringRules)) return true;
328
329 // test regex blacklisted
330 List regexRules = blacklistRegex;
331 if (moreRegexRules != null && moreRegexRules.size() > 0) {
332 regexRules = new ArrayList();
333 regexRules.addAll(moreRegexRules);
334 regexRules.addAll(blacklistRegex);
335 }
336 return testRegExRules(str, regexRules);
337 }
338
339 /**
340 * Test string only against rules provided by caller, NOT against built-in blacklist.
341 * @param str String to be checked against rules
342 * @param moreStringRules String rules to consider
343 * @param moreRegexRules Regex rules to consider
344 */
345 public static boolean matchesRulesOnly(
346 String str, List stringRules, List regexRules) {
/*
P/P * Method: bool matchesRulesOnly(String, List, List)
*
* Preconditions:
* stringRules != null
* (soft) mLogger != null
* (soft) regexRules != null
* (soft) str != null
*
* Postconditions:
* init'ed(return_value)
*/
347 if (testStringRules(str, stringRules)) return true;
348 return testRegExRules(str, regexRules);
349 }
350
351 /** Test String against the RegularExpression rules. */
352 private static boolean testRegExRules(String str, List regexRules) {
/*
P/P * Method: bool testRegExRules(String, List)
*
* Preconditions:
* regexRules != null
* (soft) mLogger != null
*
* Presumptions:
* java.util.Iterator:next(...)@357 != null
* java.util.regex.Pattern:matcher(...)@361 != null
* java.util.regex.Pattern:matcher(...)@368 != null
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* java.util.Iterator:hasNext(...)@356: {0}, {1}
* java.util.regex.Matcher:find(...)@362: {0}, {1}
* java.util.regex.Matcher:find(...)@368: {0}, {1}
* org.apache.commons.logging.Log:isDebugEnabled(...)@360: {0}, {1}
*/
353 boolean hit = false;
354 Pattern testPattern = null;
355 Iterator iter = regexRules.iterator();
356 while (iter.hasNext()) {
357 testPattern = (Pattern)iter.next();
358
359 // want to see what it is matching on, but only in debug mode
360 if (mLogger.isDebugEnabled()) {
361 Matcher matcher = testPattern.matcher(str);
362 if (matcher.find()) {
363 mLogger.debug(matcher.group()
364 + " matched by " + testPattern.pattern());
365 return true;
366 }
367 } else {
368 if (testPattern.matcher(str).find()) {
369 return true;
370 }
371 }
372 }
373 return hit;
374 }
375
376 /**
377 * Tests the source text against the String rules. Each String rule is
378 * first treated as a word-boundary, case insensitive regular expression.
379 * If a PatternSyntaxException is encountered, a simple contains test
380 * is performed.
381 *
382 * @param source The text in which to apply the matching rules.
383 * @param rules A list a simple matching rules.
384 *
385 * @return true if a match was found, otherwise false
386 */
387 private static boolean testStringRules(String source, List rules) {
/*
P/P * Method: bool testStringRules(String, List)
*
* Preconditions:
* rules != null
* (soft) mLogger != null
* (soft) source != null
*
* Presumptions:
* java.util.regex.Pattern:compile(...)@402 != null
* java.util.regex.Pattern:matcher(...)@406 != null
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* java.util.Iterator:hasNext(...)@390: {0}, {1}
*/
388 boolean matches = false;
389
390 for (Object ruleObj : rules) {
391 String rule;
392 rule = (String) ruleObj;
393
394 try {
395 StringBuilder patternBuilder;
396 patternBuilder = new StringBuilder();
397 patternBuilder.append("\\b(");
398 patternBuilder.append(rule);
399 patternBuilder.append(")\\b");
400
401 Pattern pattern;
402 pattern = Pattern.compile(patternBuilder.toString(),
403 Pattern.CASE_INSENSITIVE);
404
405 Matcher matcher;
406 matcher = pattern.matcher(source);
407
408 matches = matcher.find();
409 if (matches) {
410 break;
411 }
412 }
+ 413 catch (PatternSyntaxException e) {
414 matches = source.contains(rule);
415 if (matches) {
416 break;
417 }
418 }
419 finally {
420 if (matches) {
421 // Log the matched rule in debug mode
422 if (mLogger.isDebugEnabled()) {
423 mLogger.debug("matched:" + rule + ":");
424 }
425 }
426 }
427 }
428
429 return matches;
430 }
431
432 /** Utility method to populate lists based a blacklist in string form */
433 public static void populateSpamRules(
434 String blacklist, List stringRules, List regexRules, String addendum) {
/*
P/P * Method: void populateSpamRules(String, List, List, String)
*
* Preconditions:
* (soft) regexRules != null
* (soft) stringRules != null
*
* Test Vectors:
* java.lang.String:startsWith(...)@441: {0}, {1}
* java.lang.String:startsWith(...)@442: {0}, {1}
* java.util.StringTokenizer:hasMoreTokens(...)@439: {0}, {1}
*/
435 String weblogWords = blacklist;
436 weblogWords = (weblogWords == null) ? "" : weblogWords;
437 String siteWords = (addendum != null) ? addendum : "";
438 StringTokenizer toker = new StringTokenizer(siteWords + weblogWords,"\n");
439 while (toker.hasMoreTokens()) {
440 String token = toker.nextToken().trim();
441 if (token.startsWith("#")) continue;
442 if (token.startsWith("(")) {
443 regexRules.add(Pattern.compile(token));
444 } else {
445 stringRules.add(token);
446 }
447 }
448 }
449
450 /** Return pretty list of String and RegEx rules. */
451 public String toString() {
/*
P/P * Method: String toString()
*
* Preconditions:
* init'ed(this.blacklistRegex)
* init'ed(this.blacklistStr)
*
* Postconditions:
* java.lang.StringBuffer:toString(...)._tainted == 0
* return_value == &java.lang.StringBuffer:toString(...)
*/
452 StringBuffer buf = new StringBuffer("blacklist ");
453 buf.append(blacklistStr).append("\n");
454 buf.append("Regex blacklist ").append(blacklistRegex);
455 return buf.toString();
456 }
457 }
SofCheck Inspector Build Version : 2.18479
| Blacklist.java |
2009-Jan-02 14:24:56 |
| Blacklist.class |
2009-Sep-04 03:12:32 |