File Source: contentspamlistener.java
1 /*
2 * Copyright (c) 2003-2006, Simon Brown
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * - Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * - Neither the name of Pebble nor the names of its contributors may
17 * be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32 package net.sourceforge.pebble.event.response;
33
34 import net.sourceforge.pebble.PluginProperties;
35 import net.sourceforge.pebble.domain.Response;
36 import org.apache.commons.logging.Log;
37 import org.apache.commons.logging.LogFactory;
38
39 import java.util.regex.Matcher;
40 import java.util.regex.Pattern;
41
42 /**
43 * Checks comment and TrackBack content for specified regexs and sets
44 * the state of such responses to pending. This includes all user-definable
45 * fields of the response : title, content, source name and source link. The
46 * spam score is incremented for each field that exceeds the threshold.
47 *
48 * @author Simon Brown
49 */
/*
P/P * Method: void net.sourceforge.pebble.event.response.ContentSpamListener()
*/
50 public class ContentSpamListener extends BlogEntryResponseListenerSupport {
51
52 /** the log used by this class */
/*
P/P * Method: net.sourceforge.pebble.event.response.ContentSpamListener__static_init
*
* Postconditions:
* init'ed(log)
*/
53 private static final Log log = LogFactory.getLog(ContentSpamListener.class);
54
55 /** the default regex list */
56 public static final String DEFAULT_REGEX_LIST = "cialis, viagra, poker, casino, xanax, holdem, hold-em, hold em, sex, craps, fuck, shit, teenage, phentermine, blackjack, roulette, gambling, pharmacy, carisoprodol, pills, penis, penis enlargement, anal, hentai, anime, vicodin, massage, nude, ejaculation, porn, gay, naked, girl, teens, babe, masturbating, squirt, incest, fetish, discount, cheap, interesdting, levitra, government, grants, loan, &\\#.*;, kasino, slots, play, bingo, mortgage, baccarat";
57
58 /** the default threshold for the number of content matches */
59 public static final int DEFAULT_THRESHOLD = 0;
60
61 /** the name of the regex list property */
62 public static final String REGEX_LIST_KEY = "ContentSpamListener.regexList";
63
64 /** the name of the threshold property */
65 public static final String THRESHOLD_KEY = "ContentSpamListener.threshold";
66
67 /**
68 * Called when a comment or TrackBack has been added.
69 *
70 * @param response a Response
71 */
72 protected void blogEntryResponseAdded(Response response) {
/*
P/P * Method: void blogEntryResponseAdded(Response)
*
* Preconditions:
* response != null
*
* Presumptions:
* java.lang.Integer:parseInt(...)@85 <= 232-2
* org.apache.commons.logging.LogFactory:getLog(...)@53 != null
* regexes.length@77 <= 232-1
* regexes[...] != null
* regexes[...]@77 != null
*
* Preconditions:
* response.blogEntry.blog.pluginProperties != null
* response.blogEntry != null
* response.blogEntry.blog != null
* init'ed(response.title)
* (soft) response.spamScore <= 232-5
* (soft) init'ed(response.author)
* (soft) init'ed(response.blogName)
* (soft) init'ed(response.body)
* (soft) init'ed(response.excerpt)
* (soft) init'ed(response.url)
* ...
*
* Postconditions:
* response.spamScore == One-of{old response.spamScore, old response.spamScore + 1, One-of{old response.spamScore, old response.spamScore + 1} + 1, One-of{old response.spamScore, old response.spamScore + 1, One-of{old response.spamScore, old response.spamScore + 1} + 1} + 1, One-o...
* (soft) init'ed(response.spamScore)
* response.state == One-of{old response.state, &net.sourceforge.pebble.domain.State__static_init.new State(State__static_init#3)}
*
* Test Vectors:
* net.sourceforge.pebble.PluginProperties:getProperty(...)@74: Addr_Set{null}, Inverse{null}
* net.sourceforge.pebble.PluginProperties:hasProperty(...)@83: {0}, {1}
*/
73 PluginProperties props = response.getBlogEntry().getBlog().getPluginProperties();
74 String regexList = props.getProperty(REGEX_LIST_KEY);
75 String regexes[] = null;
76 if (regexList != null) {
77 regexes = regexList.split(",");
78 } else {
79 regexes = new String[0];
80 }
81
82 int threshold = DEFAULT_THRESHOLD;
83 if (props.hasProperty(THRESHOLD_KEY)) {
84 try {
85 threshold = Integer.parseInt(props.getProperty(THRESHOLD_KEY));
86 } catch (NumberFormatException nfe) {
87 log.error(nfe.getMessage());
88 // do nothing, the value has already been defaulted
89 }
90 }
91
92 // check each of the fields in turn
93 if (!contentWithinThreshold(response.getTitle(), regexes, threshold)) {
94 log.info(response.getTitle() + " marked as pending : threshold for title exceeded");
95 response.setPending();
96 response.incrementSpamScore();
97 }
98 if (!contentWithinThreshold(response.getSourceName(), regexes, threshold)) {
99 log.info(response.getTitle() + " marked as pending : threshold for source name exceeded");
100 response.setPending();
101 response.incrementSpamScore();
102 }
103 if (!contentWithinThreshold(response.getSourceLink(), regexes, threshold)) {
104 log.info(response.getTitle() + " marked as pending : threshold for source link exceeded");
105 response.setPending();
106 response.incrementSpamScore();
107 }
108 if (!contentWithinThreshold(response.getContent(), regexes, threshold)) {
109 log.info(response.getTitle() + " marked as pending : threshold for content exceeded");
110 response.setPending();
111 response.incrementSpamScore();
112 }
113 }
114
115 private boolean contentWithinThreshold(String content, String regexes[], int threshold) {
/*
P/P * Method: bool contentWithinThreshold(String, String[], int)
*
* Preconditions:
* (soft) regexes != null
* (soft) regexes.length <= 232-1
* (soft) regexes[...] != null
* (soft) threshold <= 232-2
*
* Presumptions:
* java.util.regex.Pattern:compile(...)@122 != null
* java.util.regex.Pattern:matcher(...)@123 != null
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* content: Addr_Set{null}, Inverse{null}
* java.lang.String:length(...)@116: {1..232-1}, {0}
* java.util.regex.Matcher:find(...)@124: {1}, {0}
*/
116 if (content == null || content.trim().length() == 0) {
117 return true;
118 }
119
120 int count = 0;
121 for (int i = 0; i < regexes.length; i++) {
122 Pattern p = Pattern.compile(regexes[i].trim(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
123 Matcher m = p.matcher(content);
124 while (m.find()) {
125 count++;
126
127 if (count > threshold) {
128 return false;
129 }
130 }
131 }
132
133 return true;
134 }
135
136 }
SofCheck Inspector Build Version : 2.22510
| contentspamlistener.java |
2010-Jun-25 19:40:32 |
| contentspamlistener.class |
2010-Jul-19 20:23:38 |