View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.hc.client5.http.impl.cache;
28  
29  import java.util.ArrayList;
30  import java.util.Date;
31  import java.util.List;
32  import java.util.regex.Matcher;
33  import java.util.regex.Pattern;
34  
35  import org.apache.hc.client5.http.utils.DateUtils;
36  import org.apache.hc.core5.http.Header;
37  
38  /** This class provides for parsing and understanding Warning headers. As
39   * the Warning header can be multi-valued, but the values can contain
40   * separators like commas inside quoted strings, we cannot use the regular
41   * {@link Header#getElements()} call to access the values.
42   */
43  class WarningValue {
44  
45      private int offs;
46      private int init_offs;
47      private final String src;
48      private int warnCode;
49      private String warnAgent;
50      private String warnText;
51      private Date warnDate;
52  
53      WarningValue(final String s) {
54          this(s, 0);
55      }
56  
57      WarningValue(final String s, final int offs) {
58          this.offs = this.init_offs = offs;
59          this.src = s;
60          consumeWarnValue();
61      }
62  
63      /** Returns an array of the parseable warning values contained
64       * in the given header value, which is assumed to be a
65       * Warning header. Improperly formatted warning values will be
66       * skipped, in keeping with the philosophy of "ignore what you
67       * cannot understand."
68       * @param h Warning {@link Header} to parse
69       * @return array of {@code WarnValue} objects
70       */
71      public static WarningValue[] getWarningValues(final Header h) {
72          final List<WarningValue> out = new ArrayList<>();
73          final String src = h.getValue();
74          int offs = 0;
75          while(offs < src.length()) {
76              try {
77                  final WarningValue/http/impl/cache/WarningValue.html#WarningValue">WarningValue wv = new WarningValue(src, offs);
78                  out.add(wv);
79                  offs = wv.offs;
80              } catch (final IllegalArgumentException e) {
81                  final int nextComma = src.indexOf(',', offs);
82                  if (nextComma == -1) {
83                      break;
84                  }
85                  offs = nextComma + 1;
86              }
87          }
88          final WarningValue[] wvs = {};
89          return out.toArray(wvs);
90      }
91  
92      /*
93       * LWS            = [CRLF] 1*( SP | HT )
94       * CRLF           = CR LF
95       */
96      protected void consumeLinearWhitespace() {
97          while(offs < src.length()) {
98              switch(src.charAt(offs)) {
99              case '\r':
100                 if (offs+2 >= src.length()
101                     || src.charAt(offs+1) != '\n'
102                     || (src.charAt(offs+2) != ' '
103                         && src.charAt(offs+2) != '\t')) {
104                     return;
105                 }
106                 offs += 2;
107                 break;
108             case ' ':
109             case '\t':
110                 break;
111             default:
112                 return;
113             }
114             offs++;
115         }
116     }
117 
118     /*
119      * CHAR           = <any US-ASCII character (octets 0 - 127)>
120      */
121     private boolean isChar(final char c) {
122         final int i = c;
123         return (i >= 0 && i <= 127);
124     }
125 
126     /*
127      * CTL            = <any US-ASCII control character
128                         (octets 0 - 31) and DEL (127)>
129      */
130     private boolean isControl(final char c) {
131         final int i = c;
132         return (i == 127 || (i >=0 && i <= 31));
133     }
134 
135     /*
136      * separators     = "(" | ")" | "<" | ">" | "@"
137      *                | "," | ";" | ":" | "\" | <">
138      *                | "/" | "[" | "]" | "?" | "="
139      *                | "{" | "}" | SP | HT
140      */
141     private boolean isSeparator(final char c) {
142         return (c == '(' || c == ')' || c == '<' || c == '>'
143                 || c == '@' || c == ',' || c == ';' || c == ':'
144                 || c == '\\' || c == '\"' || c == '/'
145                 || c == '[' || c == ']' || c == '?' || c == '='
146                 || c == '{' || c == '}' || c == ' ' || c == '\t');
147     }
148 
149     /*
150      * token          = 1*<any CHAR except CTLs or separators>
151      */
152     protected void consumeToken() {
153         if (!isTokenChar(src.charAt(offs))) {
154             parseError();
155         }
156         while(offs < src.length()) {
157             if (!isTokenChar(src.charAt(offs))) {
158                 break;
159             }
160             offs++;
161         }
162     }
163 
164     private boolean isTokenChar(final char c) {
165         return (isChar(c) && !isControl(c) && !isSeparator(c));
166     }
167 
168     private static final String TOPLABEL = "\\p{Alpha}([\\p{Alnum}-]*\\p{Alnum})?";
169     private static final String DOMAINLABEL = "\\p{Alnum}([\\p{Alnum}-]*\\p{Alnum})?";
170     private static final String HOSTNAME = "(" + DOMAINLABEL + "\\.)*" + TOPLABEL + "\\.?";
171     private static final String IPV4ADDRESS = "\\d+\\.\\d+\\.\\d+\\.\\d+";
172     private static final String HOST = "(" + HOSTNAME + ")|(" + IPV4ADDRESS + ")";
173     private static final String PORT = "\\d*";
174     private static final String HOSTPORT = "(" + HOST + ")(\\:" + PORT + ")?";
175     private static final Pattern HOSTPORT_PATTERN = Pattern.compile(HOSTPORT);
176 
177     protected void consumeHostPort() {
178         final Matcher m = HOSTPORT_PATTERN.matcher(src.substring(offs));
179         if (!m.find()) {
180             parseError();
181         }
182         if (m.start() != 0) {
183             parseError();
184         }
185         offs += m.end();
186     }
187 
188 
189     /*
190      * warn-agent = ( host [ ":" port ] ) | pseudonym
191      * pseudonym         = token
192      */
193     protected void consumeWarnAgent() {
194         final int curr_offs = offs;
195         try {
196             consumeHostPort();
197             warnAgent = src.substring(curr_offs, offs);
198             consumeCharacter(' ');
199             return;
200         } catch (final IllegalArgumentException e) {
201             offs = curr_offs;
202         }
203         consumeToken();
204         warnAgent = src.substring(curr_offs, offs);
205         consumeCharacter(' ');
206     }
207 
208     /*
209      * quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
210      * qdtext         = <any TEXT except <">>
211      */
212     protected void consumeQuotedString() {
213         if (src.charAt(offs) != '\"') {
214             parseError();
215         }
216         offs++;
217         boolean foundEnd = false;
218         while(offs < src.length() && !foundEnd) {
219             final char c = src.charAt(offs);
220             if (offs + 1 < src.length() && c == '\\'
221                 && isChar(src.charAt(offs+1))) {
222                 offs += 2;    // consume quoted-pair
223             } else if (c == '\"') {
224                 foundEnd = true;
225                 offs++;
226             } else if (c != '\"' && !isControl(c)) {
227                 offs++;
228             } else {
229                 parseError();
230             }
231         }
232         if (!foundEnd) {
233             parseError();
234         }
235     }
236 
237     /*
238      * warn-text  = quoted-string
239      */
240     protected void consumeWarnText() {
241         final int curr = offs;
242         consumeQuotedString();
243         warnText = src.substring(curr, offs);
244     }
245 
246     private static final String MONTH = "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec";
247     private static final String WEEKDAY = "Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday";
248     private static final String WKDAY = "Mon|Tue|Wed|Thu|Fri|Sat|Sun";
249     private static final String TIME = "\\d{2}:\\d{2}:\\d{2}";
250     private static final String DATE3 = "(" + MONTH + ") ( |\\d)\\d";
251     private static final String DATE2 = "\\d{2}-(" + MONTH + ")-\\d{2}";
252     private static final String DATE1 = "\\d{2} (" + MONTH + ") \\d{4}";
253     private static final String ASCTIME_DATE = "(" + WKDAY + ") (" + DATE3 + ") (" + TIME + ") \\d{4}";
254     private static final String RFC850_DATE = "(" + WEEKDAY + "), (" + DATE2 + ") (" + TIME + ") GMT";
255     private static final String RFC1123_DATE = "(" + WKDAY + "), (" + DATE1 + ") (" + TIME + ") GMT";
256     private static final String HTTP_DATE = "(" + RFC1123_DATE + ")|(" + RFC850_DATE + ")|(" + ASCTIME_DATE + ")";
257     private static final String WARN_DATE = "\"(" + HTTP_DATE + ")\"";
258     private static final Pattern WARN_DATE_PATTERN = Pattern.compile(WARN_DATE);
259 
260     /*
261      * warn-date  = <"> HTTP-date <">
262      */
263     protected void consumeWarnDate() {
264         final int curr = offs;
265         final Matcher m = WARN_DATE_PATTERN.matcher(src.substring(offs));
266         if (!m.lookingAt()) {
267             parseError();
268         }
269         offs += m.end();
270         warnDate = DateUtils.parseDate(src.substring(curr+1,offs-1));
271     }
272 
273     /*
274      * warning-value = warn-code SP warn-agent SP warn-text [SP warn-date]
275      */
276     protected void consumeWarnValue() {
277         consumeLinearWhitespace();
278         consumeWarnCode();
279         consumeWarnAgent();
280         consumeWarnText();
281         if (offs + 1 < src.length() && src.charAt(offs) == ' ' && src.charAt(offs+1) == '\"') {
282             consumeCharacter(' ');
283             consumeWarnDate();
284         }
285         consumeLinearWhitespace();
286         if (offs != src.length()) {
287             consumeCharacter(',');
288         }
289     }
290 
291     protected void consumeCharacter(final char c) {
292         if (offs + 1 > src.length()
293             || c != src.charAt(offs)) {
294             parseError();
295         }
296         offs++;
297     }
298 
299     /*
300      * warn-code  = 3DIGIT
301      */
302     protected void consumeWarnCode() {
303         if (offs + 4 > src.length()
304             || !Character.isDigit(src.charAt(offs))
305             || !Character.isDigit(src.charAt(offs + 1))
306             || !Character.isDigit(src.charAt(offs + 2))
307             || src.charAt(offs + 3) != ' ') {
308             parseError();
309         }
310         warnCode = Integer.parseInt(src.substring(offs,offs+3));
311         offs += 4;
312     }
313 
314     private void parseError() {
315         final String s = src.substring(init_offs);
316         throw new IllegalArgumentException("Bad warn code \"" + s + "\"");
317     }
318 
319     /** Returns the 3-digit code associated with this warning.
320      * @return {@code int}
321      */
322     public int getWarnCode() { return warnCode; }
323 
324     /** Returns the "warn-agent" string associated with this warning,
325      * which is either the name or pseudonym of the server that added
326      * this particular Warning header.
327      * @return {@link String}
328      */
329     public String getWarnAgent() { return warnAgent; }
330 
331     /** Returns the human-readable warning text for this warning. Note
332      * that the original quoted-string is returned here, including
333      * escaping for any contained characters. In other words, if the
334      * header was:
335      * <pre>
336      *   Warning: 110 fred "Response is stale"
337      * </pre>
338      * then this method will return {@code "\"Response is stale\""}
339      * (surrounding quotes included).
340      * @return {@link String}
341      */
342     public String getWarnText() { return warnText; }
343 
344     /** Returns the date and time when this warning was added, or
345      * {@code null} if a warning date was not supplied in the
346      * header.
347      * @return {@link Date}
348      */
349     public Date getWarnDate() { return warnDate; }
350 
351     /** Formats a {@code WarningValue} as a {@link String}
352      * suitable for including in a header. For example, you can:
353      * <pre>
354      *   WarningValue wv = ...;
355      *   HttpResponse resp = ...;
356      *   resp.addHeader("Warning", wv.toString());
357      * </pre>
358      * @return {@link String}
359      */
360     @Override
361     public String toString() {
362         if (warnDate != null) {
363             return String.format("%d %s %s \"%s\"", Integer.valueOf(warnCode),
364                     warnAgent, warnText, DateUtils.formatDate(warnDate));
365         } else {
366             return String.format("%d %s %s", Integer.valueOf(warnCode), warnAgent, warnText);
367         }
368     }
369 
370 }