File Source: referer.java
1 /*
2 * Copyright (c) 2003-2006, Simon Brown
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * - Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * - Neither the name of Pebble nor the names of its contributors may
17 * be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32 package net.sourceforge.pebble.logging;
33
34 import java.net.URLDecoder;
35 import java.util.regex.Matcher;
36 import java.util.regex.Pattern;
37
38 /**
39 * Represents a referer URL along with a count of how many times
40 * it has been refered from.
41 *
42 * @author Simon Brown
43 */
44 public class Referer extends CountedUrl {
45
46 /** regular expression to pull out the query from a Google referer */
/*
P/P * Method: net.sourceforge.pebble.logging.Referer__static_init
*
* Postconditions:
* init'ed(BING_QUERY_STRING_PATTERN)
* init'ed(GOOGLE_IMAGES_QUERY_STRING_PATTERN)
* init'ed(GOOGLE_QUERY_STRING_PATTERN)
* init'ed(JAVABLOGS_HOT_ENTRIES_PATTERN)
* init'ed(JAVABLOGS_WELCOME_PATTERN)
* init'ed(MSN_QUERY_STRING_PATTERN)
* init'ed(YAHOO_QUERY_STRING_PATTERN)
*/
47 private static final Pattern GOOGLE_QUERY_STRING_PATTERN = Pattern.compile("[?&]q=[^&]+&*");
48
49 /** the prefix for all Google referers */
50 private static final String GOOGLE_PREFIX = "http://www.google.";
51
52 /** regular expression to pull out the query from a Google referer */
53 private static final Pattern GOOGLE_IMAGES_QUERY_STRING_PATTERN = Pattern.compile("[?&]prev=[^&]+&*");
54
55 /** the prefix for all Google imagesearch referers */
56 private static final String GOOGLE_IMAGES_PREFIX = "http://images.google.";
57
58 /** regular expression to pull out the query from a Yahoo! referer */
59 private static final Pattern YAHOO_QUERY_STRING_PATTERN = Pattern.compile("[?&]p=[^&]+&*");
60
61 /** the prefix for all Yahoo! referers */
62 private static final String YAHOO_PREFIX = "http://search.yahoo.";
63
64 /** regular expression to pull out the query from an MSN referer */
65 private static final Pattern MSN_QUERY_STRING_PATTERN = Pattern.compile("[?&]q=[^&]+&*");
66
67 /** the prefix for all MSN referers */
68 private static final String MSN_PREFIX = "http://search.msn.";
69
70 /** the prefix for all MSN Beta referers */
71 private static final String MSN_BETA_PREFIX = "http://beta.search.msn.";
72
73 /** regular expression to pull out the query from an BING referer */
74 private static final Pattern BING_QUERY_STRING_PATTERN = Pattern.compile("[?&]q=[^&]+&*");
75
76 /** the prefix for all BING referers */
77 private static final String BING_PREFIX = "http://www.bing.com";
78
79 /** pattern for java.blogs welcome page referers */
80 private static final Pattern JAVABLOGS_WELCOME_PATTERN = Pattern.compile(".*javablogs.com/Welcome.*");
81
82 /** pattern for java.blogs hot entries page referers */
83 private static final Pattern JAVABLOGS_HOT_ENTRIES_PATTERN = Pattern.compile(".*javablogs.com/ViewHotBlogEntries.*");
84
85 /**
86 * Creates a new instance representing the specified url.
87 *
88 * @param url the url as a String
89 */
90 public Referer(String url) {
/*
P/P * Method: void net.sourceforge.pebble.logging.Referer(String)
*
* Postconditions:
* init'ed(this.blog)
* init'ed(this.fileDownload)
* this.logEntries == &new LinkedList(CountedUrl#1)
* possibly_updated(this.name)
* init'ed(this.newsFeed)
* init'ed(this.pageView)
* init'ed(this.url)
* new LinkedList(CountedUrl#1) num objects == 1
*/
91 super(url);
92 }
93
94 protected void setUrl(String url) {
/*
P/P * Method: void setUrl(String)
*
* Presumptions:
* java.util.regex.Pattern:compile(...)@47 != null
* java.util.regex.Pattern:compile(...)@53 != null
* java.util.regex.Pattern:compile(...)@59 != null
* java.util.regex.Pattern:compile(...)@65 != null
* java.util.regex.Pattern:compile(...)@74 != null
* ...
*
* Postconditions:
* this.name != null
* this.url == url
* init'ed(this.url)
*
* Test Vectors:
* url: Addr_Set{null}, Inverse{null}
* java.lang.String:equalsIgnoreCase(...)@100: {0}, {1}
* java.lang.String:equalsIgnoreCase(...)@105: {0}, {1}
* java.lang.String:equalsIgnoreCase(...)@111: {0}, {1}
* java.lang.String:equalsIgnoreCase(...)@116: {0}, {1}
* java.lang.String:equalsIgnoreCase(...)@121: {0}, {1}
* java.lang.String:equalsIgnoreCase(...)@126: {0}, {1}
* java.lang.String:length(...)@97: {1..232-1}, {0}
* java.util.regex.Matcher:matches(...)@130: {0}, {1}
* java.util.regex.Matcher:matches(...)@132: {0}, {1}
*/
95 super.setUrl(url);
96
97 if (url == null || url.length() == 0) {
98 setName("None");
99 } else if (url.length() > GOOGLE_PREFIX.length() &&
100 url.substring(0, GOOGLE_PREFIX.length()).equalsIgnoreCase(GOOGLE_PREFIX)) {
101
102 String query = extractQuery(GOOGLE_QUERY_STRING_PATTERN, url);
103 setName("Google : " + query);
104 } else if (url.length() > GOOGLE_IMAGES_PREFIX.length() &&
105 url.substring(0, GOOGLE_IMAGES_PREFIX.length()).equalsIgnoreCase(GOOGLE_IMAGES_PREFIX)) {
106
107 String query = extractQuery(GOOGLE_IMAGES_QUERY_STRING_PATTERN, url);
108 query = extractQuery(GOOGLE_QUERY_STRING_PATTERN, query);
109 setName("Google Images : " + query);
110 } else if (url.length() > YAHOO_PREFIX.length() &&
111 url.substring(0, YAHOO_PREFIX.length()).equalsIgnoreCase(YAHOO_PREFIX)) {
112
113 String query = extractQuery(YAHOO_QUERY_STRING_PATTERN, url);
114 setName("Yahoo! : " + query);
115 } else if (url.length() > MSN_PREFIX.length() &&
116 url.substring(0, MSN_PREFIX.length()).equalsIgnoreCase(MSN_PREFIX)) {
117
118 String query = extractQuery(MSN_QUERY_STRING_PATTERN, url);
119 setName("MSN : " + query);
120 } else if (url.length() > MSN_BETA_PREFIX.length() &&
121 url.substring(0, MSN_BETA_PREFIX.length()).equalsIgnoreCase(MSN_BETA_PREFIX)) {
122
123 String query = extractQuery(MSN_QUERY_STRING_PATTERN, url);
124 setName("MSN beta : " + query);
125 } else if (url.length() >= BING_PREFIX.length() &&
126 url.substring(0, BING_PREFIX.length()).equalsIgnoreCase(BING_PREFIX)) {
127
128 String query = extractQuery(BING_QUERY_STRING_PATTERN, url);
129 setName("Bing : " + query);
130 } else if (JAVABLOGS_WELCOME_PATTERN.matcher(url).matches()) {
131 setName("java.blogs : Welcome");
132 } else if (JAVABLOGS_HOT_ENTRIES_PATTERN.matcher(url).matches()) {
133 setName("java.blogs : Hot Entries");
134 } else {
135 setName(url);
136 }
137 }
138
139 private String extractQuery(Pattern pattern, String url) {
/*
P/P * Method: String extractQuery(Pattern, String)
*
* Preconditions:
* pattern != null
* (soft) url != null
*
* Presumptions:
* java.util.regex.Matcher:start(...)@143 <= 232-4
* java.util.regex.Pattern:matcher(...)@140 != null
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* java.lang.String:endsWith(...)@146: {0}, {1}
* java.util.regex.Matcher:find(...)@142: {0}, {1}
*/
140 Matcher m = pattern.matcher(url);
141 String query = "";
142 if (m.find()) {
143 int start = m.start();
144 int end = m.end();
145 query = url.substring(start+3, end);
146 if (query.endsWith("&")) {
147 query = query.substring(0, query.length()-1);
148 }
149 try {
150 query = URLDecoder.decode(query, "UTF-8");
151 } catch (Exception e) {
152 }
153 }
154
155 return query;
156 }
157
158 /**
159 * Gets a regex expression that will filter out other referers with the same domain.
160 *
161 * @return a regex as a String
162 */
163 public String getDomainFilter() {
164 // and set the domain name
/*
P/P * Method: String getDomainFilter()
*
* Preconditions:
* init'ed(this.url)
*
* Presumptions:
* java.lang.String:indexOf(...)@169 <= 232-4
*
* Postconditions:
* init'ed(return_value)
*
* Test Vectors:
* this.url: Inverse{null}, Addr_Set{null}
* java.lang.String:indexOf(...)@169: {-231..-2, 0..232-4}, {-1}
* java.lang.String:indexOf(...)@175: {-231..-1}, {0..232-1}
* java.lang.String:indexOf(...)@181: {-231..-1}, {0..232-1}
*/
165 if (getUrl() == null) {
166 return null;
167 }
168
169 int index = getUrl().indexOf("://");
170 if (index == -1) {
171 return getUrl();
172 }
173
174 String domainName = getUrl().substring(index+3);
175 index = domainName.indexOf("/");
176
177 if (index > -1) {
178 domainName = domainName.substring(0, index);
179 }
180
181 if (domainName.indexOf(":") > -1) {
182 // the domain name still has a port number so remove it
183 domainName = domainName.substring(0, domainName.indexOf(":"));
184 }
185
186 return ".*" + domainName + ".*";
187 }
188
189 }
SofCheck Inspector Build Version : 2.22510
| referer.java |
2010-Jun-25 19:40:32 |
| referer.class |
2010-Jul-19 20:23:38 |