File Source: referer.java

     1  /*
     2   * Copyright (c) 2003-2006, Simon Brown
     3   * All rights reserved.
     4   *
     5   * Redistribution and use in source and binary forms, with or without
     6   * modification, are permitted provided that the following conditions are met:
     7   *
     8   *   - Redistributions of source code must retain the above copyright
     9   *     notice, this list of conditions and the following disclaimer.
    10   *
    11   *   - Redistributions in binary form must reproduce the above copyright
    12   *     notice, this list of conditions and the following disclaimer in
    13   *     the documentation and/or other materials provided with the
    14   *     distribution.
    15   *
    16   *   - Neither the name of Pebble nor the names of its contributors may
    17   *     be used to endorse or promote products derived from this software
    18   *     without specific prior written permission.
    19   *
    20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    23   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    30   * POSSIBILITY OF SUCH DAMAGE.
    31   */
    32  package net.sourceforge.pebble.logging;
    33  
    34  import java.net.URLDecoder;
    35  import java.util.regex.Matcher;
    36  import java.util.regex.Pattern;
    37  
    38  /**
    39   * Represents a referer URL along with a count of how many times
    40   * it has been refered from.
    41   *
    42   * @author    Simon Brown
    43   */
    44  public class Referer extends CountedUrl {
    45  
    46    /** regular expression to pull out the query from a Google referer */
           /* 
    P/P     *  Method: net.sourceforge.pebble.logging.Referer__static_init
            * 
            *  Postconditions:
            *    init'ed(BING_QUERY_STRING_PATTERN)
            *    init'ed(GOOGLE_IMAGES_QUERY_STRING_PATTERN)
            *    init'ed(GOOGLE_QUERY_STRING_PATTERN)
            *    init'ed(JAVABLOGS_HOT_ENTRIES_PATTERN)
            *    init'ed(JAVABLOGS_WELCOME_PATTERN)
            *    init'ed(MSN_QUERY_STRING_PATTERN)
            *    init'ed(YAHOO_QUERY_STRING_PATTERN)
            */
    47    private static final Pattern GOOGLE_QUERY_STRING_PATTERN = Pattern.compile("[?&]q=[^&]+&*");
    48  
    49    /** the prefix for all Google referers */
    50    private static final String GOOGLE_PREFIX = "http://www.google.";
    51  
    52    /** regular expression to pull out the query from a Google referer */
    53    private static final Pattern GOOGLE_IMAGES_QUERY_STRING_PATTERN = Pattern.compile("[?&]prev=[^&]+&*");
    54  
    55    /** the prefix for all Google imagesearch referers */
    56    private static final String GOOGLE_IMAGES_PREFIX = "http://images.google.";
    57  
    58    /** regular expression to pull out the query from a Yahoo! referer */
    59    private static final Pattern YAHOO_QUERY_STRING_PATTERN = Pattern.compile("[?&]p=[^&]+&*");
    60  
    61    /** the prefix for all Yahoo! referers */
    62    private static final String YAHOO_PREFIX = "http://search.yahoo.";
    63  
    64    /** regular expression to pull out the query from an MSN referer */
    65    private static final Pattern MSN_QUERY_STRING_PATTERN = Pattern.compile("[?&]q=[^&]+&*");
    66  
    67    /** the prefix for all MSN referers */
    68    private static final String MSN_PREFIX = "http://search.msn.";
    69  
    70    /** the prefix for all MSN Beta referers */
    71    private static final String MSN_BETA_PREFIX = "http://beta.search.msn.";
    72  
    73    /** regular expression to pull out the query from an BING referer */
    74    private static final Pattern BING_QUERY_STRING_PATTERN = Pattern.compile("[?&]q=[^&]+&*");
    75  
    76    /** the prefix for all BING referers */
    77    private static final String BING_PREFIX = "http://www.bing.com";
    78  
    79    /** pattern for java.blogs welcome page referers */
    80    private static final Pattern JAVABLOGS_WELCOME_PATTERN = Pattern.compile(".*javablogs.com/Welcome.*");
    81  
    82    /** pattern for java.blogs hot entries page referers */
    83    private static final Pattern JAVABLOGS_HOT_ENTRIES_PATTERN = Pattern.compile(".*javablogs.com/ViewHotBlogEntries.*");
    84  
    85    /**
    86     * Creates a new instance representing the specified url.
    87     *
    88     * @param url   the url as a String
    89     */
    90    public Referer(String url) {
             /* 
    P/P       *  Method: void net.sourceforge.pebble.logging.Referer(String)
              * 
              *  Postconditions:
              *    init'ed(this.blog)
              *    init'ed(this.fileDownload)
              *    this.logEntries == &new LinkedList(CountedUrl#1)
              *    possibly_updated(this.name)
              *    init'ed(this.newsFeed)
              *    init'ed(this.pageView)
              *    init'ed(this.url)
              *    new LinkedList(CountedUrl#1) num objects == 1
              */
    91      super(url);
    92    }
    93  
    94    protected void setUrl(String url) {
             /* 
    P/P       *  Method: void setUrl(String)
              * 
              *  Presumptions:
              *    java.util.regex.Pattern:compile(...)@47 != null
              *    java.util.regex.Pattern:compile(...)@53 != null
              *    java.util.regex.Pattern:compile(...)@59 != null
              *    java.util.regex.Pattern:compile(...)@65 != null
              *    java.util.regex.Pattern:compile(...)@74 != null
              *    ...
              * 
              *  Postconditions:
              *    this.name != null
              *    this.url == url
              *    init'ed(this.url)
              * 
              *  Test Vectors:
              *    url: Addr_Set{null}, Inverse{null}
              *    java.lang.String:equalsIgnoreCase(...)@100: {0}, {1}
              *    java.lang.String:equalsIgnoreCase(...)@105: {0}, {1}
              *    java.lang.String:equalsIgnoreCase(...)@111: {0}, {1}
              *    java.lang.String:equalsIgnoreCase(...)@116: {0}, {1}
              *    java.lang.String:equalsIgnoreCase(...)@121: {0}, {1}
              *    java.lang.String:equalsIgnoreCase(...)@126: {0}, {1}
              *    java.lang.String:length(...)@97: {1..232-1}, {0}
              *    java.util.regex.Matcher:matches(...)@130: {0}, {1}
              *    java.util.regex.Matcher:matches(...)@132: {0}, {1}
              */
    95      super.setUrl(url);
    96  
    97      if (url == null || url.length() == 0) {
    98        setName("None");
    99      } else if (url.length() > GOOGLE_PREFIX.length() &&
   100          url.substring(0, GOOGLE_PREFIX.length()).equalsIgnoreCase(GOOGLE_PREFIX)) {
   101  
   102        String query = extractQuery(GOOGLE_QUERY_STRING_PATTERN, url);
   103        setName("Google : " + query);
   104      } else if (url.length() > GOOGLE_IMAGES_PREFIX.length() &&
   105          url.substring(0, GOOGLE_IMAGES_PREFIX.length()).equalsIgnoreCase(GOOGLE_IMAGES_PREFIX)) {
   106  
   107        String query = extractQuery(GOOGLE_IMAGES_QUERY_STRING_PATTERN, url);
   108        query = extractQuery(GOOGLE_QUERY_STRING_PATTERN, query);
   109        setName("Google Images : " + query);
   110      } else if (url.length() > YAHOO_PREFIX.length() &&
   111          url.substring(0, YAHOO_PREFIX.length()).equalsIgnoreCase(YAHOO_PREFIX)) {
   112  
   113        String query = extractQuery(YAHOO_QUERY_STRING_PATTERN, url);
   114        setName("Yahoo! : " + query);
   115      } else if (url.length() > MSN_PREFIX.length() &&
   116          url.substring(0, MSN_PREFIX.length()).equalsIgnoreCase(MSN_PREFIX)) {
   117  
   118        String query = extractQuery(MSN_QUERY_STRING_PATTERN, url);
   119        setName("MSN : " + query);
   120      } else if (url.length() > MSN_BETA_PREFIX.length() &&
   121          url.substring(0, MSN_BETA_PREFIX.length()).equalsIgnoreCase(MSN_BETA_PREFIX)) {
   122  
   123        String query = extractQuery(MSN_QUERY_STRING_PATTERN, url);
   124        setName("MSN beta : " + query);
   125      } else if (url.length() >= BING_PREFIX.length() &&
   126          url.substring(0, BING_PREFIX.length()).equalsIgnoreCase(BING_PREFIX)) {
   127  
   128        String query = extractQuery(BING_QUERY_STRING_PATTERN, url);
   129        setName("Bing : " + query);
   130      } else if (JAVABLOGS_WELCOME_PATTERN.matcher(url).matches()) {
   131        setName("java.blogs : Welcome");
   132      } else if (JAVABLOGS_HOT_ENTRIES_PATTERN.matcher(url).matches()) {
   133        setName("java.blogs : Hot Entries");
   134      } else {
   135        setName(url);
   136      }
   137    }
   138  
   139    private String extractQuery(Pattern pattern, String url) {
             /* 
    P/P       *  Method: String extractQuery(Pattern, String)
              * 
              *  Preconditions:
              *    pattern != null
              *    (soft) url != null
              * 
              *  Presumptions:
              *    java.util.regex.Matcher:start(...)@143 <= 232-4
              *    java.util.regex.Pattern:matcher(...)@140 != null
              * 
              *  Postconditions:
              *    init'ed(return_value)
              * 
              *  Test Vectors:
              *    java.lang.String:endsWith(...)@146: {0}, {1}
              *    java.util.regex.Matcher:find(...)@142: {0}, {1}
              */
   140      Matcher m = pattern.matcher(url);
   141      String query = "";
   142      if (m.find()) {
   143        int start = m.start();
   144        int end = m.end();
   145        query = url.substring(start+3, end);
   146        if (query.endsWith("&")) {
   147          query = query.substring(0, query.length()-1);
   148        }
   149        try {
   150          query = URLDecoder.decode(query, "UTF-8");
   151        } catch (Exception e) {
   152        }
   153      }
   154  
   155      return query;
   156    }
   157  
   158    /**
   159     * Gets a regex expression that will filter out other referers with the same domain.
   160     *
   161     * @return  a regex as a String
   162     */
   163    public String getDomainFilter() {
   164      // and set the domain name
             /* 
    P/P       *  Method: String getDomainFilter()
              * 
              *  Preconditions:
              *    init'ed(this.url)
              * 
              *  Presumptions:
              *    java.lang.String:indexOf(...)@169 <= 232-4
              * 
              *  Postconditions:
              *    init'ed(return_value)
              * 
              *  Test Vectors:
              *    this.url: Inverse{null}, Addr_Set{null}
              *    java.lang.String:indexOf(...)@169: {-231..-2, 0..232-4}, {-1}
              *    java.lang.String:indexOf(...)@175: {-231..-1}, {0..232-1}
              *    java.lang.String:indexOf(...)@181: {-231..-1}, {0..232-1}
              */
   165      if (getUrl() == null) {
   166        return null;
   167      }
   168  
   169      int index = getUrl().indexOf("://");
   170      if (index == -1) {
   171        return getUrl();
   172      }
   173      
   174      String domainName = getUrl().substring(index+3);
   175      index = domainName.indexOf("/");
   176  
   177      if (index > -1) {
   178        domainName = domainName.substring(0, index);
   179      }
   180  
   181      if (domainName.indexOf(":") > -1) {
   182        // the domain name still has a port number so remove it
   183        domainName = domainName.substring(0, domainName.indexOf(":"));
   184      }
   185  
   186      return ".*" + domainName + ".*";
   187    }
   188  
   189  }








SofCheck Inspector Build Version : 2.22510
referer.java 2010-Jun-25 19:40:32
referer.class 2010-Jul-19 20:23:38