File Source: linkspamlistener.java

     1  /*
     2   * Copyright (c) 2003-2006, Simon Brown
     3   * All rights reserved.
     4   *
     5   * Redistribution and use in source and binary forms, with or without
     6   * modification, are permitted provided that the following conditions are met:
     7   *
     8   *   - Redistributions of source code must retain the above copyright
     9   *     notice, this list of conditions and the following disclaimer.
    10   *
    11   *   - Redistributions in binary form must reproduce the above copyright
    12   *     notice, this list of conditions and the following disclaimer in
    13   *     the documentation and/or other materials provided with the
    14   *     distribution.
    15   *
    16   *   - Neither the name of Pebble nor the names of its contributors may
    17   *     be used to endorse or promote products derived from this software
    18   *     without specific prior written permission.
    19   *
    20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    30   * POSSIBILITY OF SUCH DAMAGE.
    31   */
    32  package net.sourceforge.pebble.event.response;
    33  
    34  import net.sourceforge.pebble.PluginProperties;
    35  import net.sourceforge.pebble.domain.Comment;
    36  import net.sourceforge.pebble.domain.Response;
    37  import org.apache.commons.logging.Log;
    38  import org.apache.commons.logging.LogFactory;
    39  
    40  import java.util.regex.Matcher;
    41  import java.util.regex.Pattern;
    42  
    43  /**
    44   * Checks comment and TrackBack content for a large number of links and sets
    45   * the state of such responses to pending.
    46   *
    47   * @author Simon Brown
    48   */
         /* 
    P/P   *  Method: void net.sourceforge.pebble.event.response.LinkSpamListener()
          */
    49  public class LinkSpamListener extends BlogEntryResponseListenerSupport {
    50  
    51    /** the log used by this class */
           /* 
    P/P     *  Method: net.sourceforge.pebble.event.response.LinkSpamListener__static_init
            * 
            *  Postconditions:
            *    init'ed(HTML_LINK_PATTERN)
            *    init'ed(log)
            */
    52    private static final Log log = LogFactory.getLog(LinkSpamListener.class);
    53  
    54    /** the pattern used to find HTML links */
    55    private static final Pattern HTML_LINK_PATTERN = Pattern.compile("<a.*?href=.*?>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
    56  
    57    /** the default threshold for the number of links allowed */
    58    public static final int DEFAULT_THRESHOLD = 3;
    59  
    60    /** the name of the threshold property for comments */
    61    public static final String COMMENT_THRESHOLD_KEY = "LinkSpamListener.commentThreshold";
    62  
    63    /** the name of the threshold property for TrackBacks */
    64    public static final String TRACKBACK_THRESHOLD_KEY = "LinkSpamListener.trackbackThreshold";
    65  
    66    /**
    67     * Called when a comment or TrackBack has been added.
    68     *
    69     * @param response a Response
    70     */
    71    protected void blogEntryResponseAdded(Response response) {
             /* 
    P/P       *  Method: void blogEntryResponseAdded(Response)
              * 
              *  Preconditions:
              *    response != null
              * 
              *  Presumptions:
              *    java.util.regex.Pattern:compile(...)@55 != null
              *    java.util.regex.Pattern:matcher(...)@74 != null
              *    org.apache.commons.logging.LogFactory:getLog(...)@52 != null
              * 
              *  Test Vectors:
              *    java.util.regex.Matcher:find(...)@76: {1}, {0}
              * 
              *  Preconditions:
              *    (soft) response.spamScore <= 232-2
              *    (soft) response.blogEntry != null
              *    (soft) response.blogEntry.blog.pluginProperties != null
              *    (soft) response.blogEntry.blog != null
              *    (soft) init'ed(response.body)
              *    (soft) init'ed(response.excerpt)
              *    (soft) init'ed(response.title)
              * 
              *  Postconditions:
              *    response.spamScore == One-of{old response.spamScore, old response.spamScore + 1}
              *    (soft) init'ed(response.spamScore)
              *    response.state == One-of{old response.state, &net.sourceforge.pebble.domain.State__static_init.new State(State__static_init#3)}
              * 
              *  Test Vectors:
              *    net.sourceforge.pebble.PluginProperties:hasProperty(...)@88: {0}, {1}
              */
    72      String content = response.getContent();
    73      if (content != null) {
    74        Matcher m = HTML_LINK_PATTERN.matcher(content);
    75        int count = 0;
    76        while (m.find()) {
    77          count++;
    78        }
    79  
    80        PluginProperties props = response.getBlogEntry().getBlog().getPluginProperties();
    81        String propertyName = "";
    82        if (response instanceof Comment) {
    83          propertyName = COMMENT_THRESHOLD_KEY;
    84        } else {
    85          propertyName = TRACKBACK_THRESHOLD_KEY;
    86        }
    87        int threshold = DEFAULT_THRESHOLD;
    88        if (props.hasProperty(propertyName)) {
    89          try {
    90            threshold = Integer.parseInt(props.getProperty(propertyName));
    91          } catch (NumberFormatException nfe) {
    92            log.error(nfe.getMessage());
    93            // do nothing, the value has already been defaulted
    94          }
    95        }
    96  
    97        if (count > threshold) {
    98          log.info(response.getTitle() + " marked as pending : number of links is " + count + ", threshold is " + threshold);
    99          response.setPending();
   100          response.incrementSpamScore();
   101        }
   102      }
   103    }
   104  
   105  }








SofCheck Inspector Build Version : 2.22510
linkspamlistener.java 2010-Jun-25 19:40:32
linkspamlistener.class 2010-Jul-19 20:23:38