File Source: contentspamlistener.java

     1  /*
     2   * Copyright (c) 2003-2006, Simon Brown
     3   * All rights reserved.
     4   *
     5   * Redistribution and use in source and binary forms, with or without
     6   * modification, are permitted provided that the following conditions are met:
     7   *
     8   *   - Redistributions of source code must retain the above copyright
     9   *     notice, this list of conditions and the following disclaimer.
    10   *
    11   *   - Redistributions in binary form must reproduce the above copyright
    12   *     notice, this list of conditions and the following disclaimer in
    13   *     the documentation and/or other materials provided with the
    14   *     distribution.
    15   *
    16   *   - Neither the name of Pebble nor the names of its contributors may
    17   *     be used to endorse or promote products derived from this software
    18   *     without specific prior written permission.
    19   *
    20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    30   * POSSIBILITY OF SUCH DAMAGE.
    31   */
    32  package net.sourceforge.pebble.event.response;
    33  
    34  import net.sourceforge.pebble.PluginProperties;
    35  import net.sourceforge.pebble.domain.Response;
    36  import org.apache.commons.logging.Log;
    37  import org.apache.commons.logging.LogFactory;
    38  
    39  import java.util.regex.Matcher;
    40  import java.util.regex.Pattern;
    41  
    42  /**
    43   * Checks comment and TrackBack content for specified regexs and sets
    44   * the state of such responses to pending. This includes all user-definable
    45   * fields of the response : title, content, source name and source link. The
    46   * spam score is incremented for each field that exceeds the threshold. 
    47   *
    48   * @author Simon Brown
    49   */
         /* 
    P/P   *  Method: void net.sourceforge.pebble.event.response.ContentSpamListener()
          */
    50  public class ContentSpamListener extends BlogEntryResponseListenerSupport {
    51  
    52    /** the log used by this class */
           /* 
    P/P     *  Method: net.sourceforge.pebble.event.response.ContentSpamListener__static_init
            * 
            *  Postconditions:
            *    init'ed(log)
            */
    53    private static final Log log = LogFactory.getLog(ContentSpamListener.class);
    54  
    55    /** the default regex list */
    56    public static final String DEFAULT_REGEX_LIST = "cialis, viagra, poker, casino, xanax, holdem, hold-em, hold em, sex, craps, fuck, shit, teenage, phentermine, blackjack, roulette, gambling, pharmacy, carisoprodol, pills, penis, penis enlargement, anal, hentai, anime, vicodin, massage, nude, ejaculation, porn, gay, naked, girl, teens, babe, masturbating, squirt, incest, fetish, discount, cheap, interesdting, levitra, government, grants, loan, &\\#.*;, kasino, slots, play, bingo, mortgage, baccarat";
    57    
    58    /** the default threshold for the number of content matches */
    59    public static final int DEFAULT_THRESHOLD = 0;
    60  
    61    /** the name of the regex list property */
    62    public static final String REGEX_LIST_KEY = "ContentSpamListener.regexList";
    63  
    64    /** the name of the threshold property */
    65    public static final String THRESHOLD_KEY = "ContentSpamListener.threshold";
    66  
    67    /**
    68     * Called when a comment or TrackBack has been added.
    69     *
    70     * @param response a Response
    71     */
    72    protected void blogEntryResponseAdded(Response response) {
             /* 
    P/P       *  Method: void blogEntryResponseAdded(Response)
              * 
              *  Preconditions:
              *    response != null
              * 
              *  Presumptions:
              *    java.lang.Integer:parseInt(...)@85 <= 232-2
              *    org.apache.commons.logging.LogFactory:getLog(...)@53 != null
              *    regexes.length@77 <= 232-1
              *    regexes[...] != null
              *    regexes[...]@77 != null
              * 
              *  Preconditions:
              *    response.blogEntry.blog.pluginProperties != null
              *    response.blogEntry != null
              *    response.blogEntry.blog != null
              *    init'ed(response.title)
              *    (soft) response.spamScore <= 232-5
              *    (soft) init'ed(response.author)
              *    (soft) init'ed(response.blogName)
              *    (soft) init'ed(response.body)
              *    (soft) init'ed(response.excerpt)
              *    (soft) init'ed(response.url)
              *    ...
              * 
              *  Postconditions:
              *    response.spamScore == One-of{old response.spamScore, old response.spamScore + 1, One-of{old response.spamScore, old response.spamScore + 1} + 1, One-of{old response.spamScore, old response.spamScore + 1, One-of{old response.spamScore, old response.spamScore + 1} + 1} + 1, One-o...
              *    (soft) init'ed(response.spamScore)
              *    response.state == One-of{old response.state, &net.sourceforge.pebble.domain.State__static_init.new State(State__static_init#3)}
              * 
              *  Test Vectors:
              *    net.sourceforge.pebble.PluginProperties:getProperty(...)@74: Addr_Set{null}, Inverse{null}
              *    net.sourceforge.pebble.PluginProperties:hasProperty(...)@83: {0}, {1}
              */
    73      PluginProperties props = response.getBlogEntry().getBlog().getPluginProperties();
    74      String regexList = props.getProperty(REGEX_LIST_KEY);
    75      String regexes[] = null;
    76      if (regexList != null) {
    77        regexes = regexList.split(",");
    78      } else {
    79        regexes = new String[0];
    80      }
    81  
    82      int threshold = DEFAULT_THRESHOLD;
    83      if (props.hasProperty(THRESHOLD_KEY)) {
    84        try {
    85          threshold = Integer.parseInt(props.getProperty(THRESHOLD_KEY));
    86        } catch (NumberFormatException nfe) {
    87          log.error(nfe.getMessage());
    88          // do nothing, the value has already been defaulted
    89        }
    90      }
    91  
    92      // check each of the fields in turn
    93      if (!contentWithinThreshold(response.getTitle(), regexes, threshold)) {
    94        log.info(response.getTitle() + " marked as pending : threshold for title exceeded");
    95        response.setPending();
    96        response.incrementSpamScore();
    97      }
    98      if (!contentWithinThreshold(response.getSourceName(), regexes, threshold)) {
    99        log.info(response.getTitle() + " marked as pending : threshold for source name exceeded");
   100        response.setPending();
   101        response.incrementSpamScore();
   102      }
   103      if (!contentWithinThreshold(response.getSourceLink(), regexes, threshold)) {
   104        log.info(response.getTitle() + " marked as pending : threshold for source link exceeded");
   105        response.setPending();
   106        response.incrementSpamScore();
   107      }
   108      if (!contentWithinThreshold(response.getContent(), regexes, threshold)) {
   109        log.info(response.getTitle() + " marked as pending : threshold for content exceeded");
   110        response.setPending();
   111        response.incrementSpamScore();
   112      }
   113    }
   114  
   115    private boolean contentWithinThreshold(String content, String regexes[], int threshold) {
             /* 
    P/P       *  Method: bool contentWithinThreshold(String, String[], int)
              * 
              *  Preconditions:
              *    (soft) regexes != null
              *    (soft) regexes.length <= 232-1
              *    (soft) regexes[...] != null
              *    (soft) threshold <= 232-2
              * 
              *  Presumptions:
              *    java.util.regex.Pattern:compile(...)@122 != null
              *    java.util.regex.Pattern:matcher(...)@123 != null
              * 
              *  Postconditions:
              *    init'ed(return_value)
              * 
              *  Test Vectors:
              *    content: Addr_Set{null}, Inverse{null}
              *    java.lang.String:length(...)@116: {1..232-1}, {0}
              *    java.util.regex.Matcher:find(...)@124: {1}, {0}
              */
   116      if (content == null || content.trim().length() == 0) {
   117        return true;
   118      }
   119  
   120      int count = 0;
   121      for (int i = 0; i < regexes.length; i++) {
   122        Pattern p = Pattern.compile(regexes[i].trim(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
   123        Matcher m = p.matcher(content);
   124        while (m.find()) {
   125          count++;
   126  
   127          if (count > threshold) {
   128            return false;
   129          }
   130        }
   131      }
   132  
   133      return true;
   134    }
   135  
   136  }








SofCheck Inspector Build Version : 2.22510
contentspamlistener.java 2010-Jun-25 19:40:32
contentspamlistener.class 2010-Jul-19 20:23:38