View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  
28  package org.apache.hc.core5.http.message;
29  
30  import java.util.BitSet;
31  
32  import org.apache.hc.core5.annotation.Contract;
33  import org.apache.hc.core5.annotation.ThreadingBehavior;
34  import org.apache.hc.core5.http.Chars;
35  import org.apache.hc.core5.util.Args;
36  
37  /**
38   * Low level parser for header field elements. The parsing routines of this class are designed
39   * to produce near zero intermediate garbage and make no intermediate copies of input data.
40   * <p>
41   * This class is immutable and thread safe.
42   *
43   * @since 4.4
44   */
45  @Contract(threading = ThreadingBehavior.IMMUTABLE)
46  public class TokenParser {
47  
48      public static BitSet INIT_BITSET(final int ... b) {
49          final BitSet bitset = new BitSet();
50          for (final int aB : b) {
51              bitset.set(aB);
52          }
53          return bitset;
54      }
55  
56      /** Double quote */
57      public static final char DQUOTE = '\"';
58  
59      /** Backward slash / escape character */
60      public static final char ESCAPE = '\\';
61  
62      public static boolean isWhitespace(final char ch) {
63          return ch == Chars.SP || ch == Chars.HT || ch == Chars.CR || ch == Chars.LF;
64      }
65  
66      public static final TokenParserage/TokenParser.html#TokenParser">TokenParser INSTANCE = new TokenParser();
67  
68      /**
69       * Extracts from the sequence of chars a token terminated with any of the given delimiters
70       * discarding semantically insignificant whitespace characters.
71       *
72       * @param buf buffer with the sequence of chars to be parsed
73       * @param cursor defines the bounds and current position of the buffer
74       * @param delimiters set of delimiting characters. Can be {@code null} if the token
75       *  is not delimited by any character.
76       */
77      public String parseToken(final CharSequence buf, final ParserCursor cursor, final BitSet delimiters) {
78          Args.notNull(buf, "Char sequence");
79          Args.notNull(cursor, "Parser cursor");
80          final StringBuilder dst = new StringBuilder();
81          boolean whitespace = false;
82          while (!cursor.atEnd()) {
83              final char current = buf.charAt(cursor.getPos());
84              if (delimiters != null && delimiters.get(current)) {
85                  break;
86              } else if (isWhitespace(current)) {
87                  skipWhiteSpace(buf, cursor);
88                  whitespace = true;
89              } else {
90                  if (whitespace && dst.length() > 0) {
91                      dst.append(' ');
92                  }
93                  copyContent(buf, cursor, delimiters, dst);
94                  whitespace = false;
95              }
96          }
97          return dst.toString();
98      }
99  
100     /**
101      * Extracts from the sequence of chars a value which can be enclosed in quote marks and
102      * terminated with any of the given delimiters discarding semantically insignificant
103      * whitespace characters.
104      *
105      * @param buf buffer with the sequence of chars to be parsed
106      * @param cursor defines the bounds and current position of the buffer
107      * @param delimiters set of delimiting characters. Can be {@code null} if the value
108      *  is not delimited by any character.
109      */
110     public String parseValue(final CharSequence buf, final ParserCursor cursor, final BitSet delimiters) {
111         Args.notNull(buf, "Char sequence");
112         Args.notNull(cursor, "Parser cursor");
113         final StringBuilder dst = new StringBuilder();
114         boolean whitespace = false;
115         while (!cursor.atEnd()) {
116             final char current = buf.charAt(cursor.getPos());
117             if (delimiters != null && delimiters.get(current)) {
118                 break;
119             } else if (isWhitespace(current)) {
120                 skipWhiteSpace(buf, cursor);
121                 whitespace = true;
122             } else if (current == DQUOTE) {
123                 if (whitespace && dst.length() > 0) {
124                     dst.append(' ');
125                 }
126                 copyQuotedContent(buf, cursor, dst);
127                 whitespace = false;
128             } else {
129                 if (whitespace && dst.length() > 0) {
130                     dst.append(' ');
131                 }
132                 copyUnquotedContent(buf, cursor, delimiters, dst);
133                 whitespace = false;
134             }
135         }
136         return dst.toString();
137     }
138 
139     /**
140      * Skips semantically insignificant whitespace characters and moves the cursor to the closest
141      * non-whitespace character.
142      *
143      * @param buf buffer with the sequence of chars to be parsed
144      * @param cursor defines the bounds and current position of the buffer
145      */
146     public void skipWhiteSpace(final CharSequence buf, final ParserCursor cursor) {
147         Args.notNull(buf, "Char sequence");
148         Args.notNull(cursor, "Parser cursor");
149         int pos = cursor.getPos();
150         final int indexFrom = cursor.getPos();
151         final int indexTo = cursor.getUpperBound();
152         for (int i = indexFrom; i < indexTo; i++) {
153             final char current = buf.charAt(i);
154             if (!isWhitespace(current)) {
155                 break;
156             }
157             pos++;
158         }
159         cursor.updatePos(pos);
160     }
161 
162     /**
163      * Transfers content into the destination buffer until a whitespace character or any of
164      * the given delimiters is encountered.
165      *
166      * @param buf buffer with the sequence of chars to be parsed
167      * @param cursor defines the bounds and current position of the buffer
168      * @param delimiters set of delimiting characters. Can be {@code null} if the value
169      *  is delimited by a whitespace only.
170      * @param dst destination buffer
171      */
172     public void copyContent(final CharSequence buf, final ParserCursor cursor, final BitSet delimiters,
173             final StringBuilder dst) {
174         Args.notNull(buf, "Char sequence");
175         Args.notNull(cursor, "Parser cursor");
176         Args.notNull(dst, "String builder");
177         int pos = cursor.getPos();
178         final int indexFrom = cursor.getPos();
179         final int indexTo = cursor.getUpperBound();
180         for (int i = indexFrom; i < indexTo; i++) {
181             final char current = buf.charAt(i);
182             if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
183                 break;
184             }
185             pos++;
186             dst.append(current);
187         }
188         cursor.updatePos(pos);
189     }
190 
191     /**
192      * Transfers content into the destination buffer until a whitespace character,  a quote,
193      * or any of the given delimiters is encountered.
194      *
195      * @param buf buffer with the sequence of chars to be parsed
196      * @param cursor defines the bounds and current position of the buffer
197      * @param delimiters set of delimiting characters. Can be {@code null} if the value
198      *  is delimited by a whitespace or a quote only.
199      * @param dst destination buffer
200      */
201     public void copyUnquotedContent(final CharSequence buf, final ParserCursor cursor,
202             final BitSet delimiters, final StringBuilder dst) {
203         Args.notNull(buf, "Char sequence");
204         Args.notNull(cursor, "Parser cursor");
205         Args.notNull(dst, "String builder");
206         int pos = cursor.getPos();
207         final int indexFrom = cursor.getPos();
208         final int indexTo = cursor.getUpperBound();
209         for (int i = indexFrom; i < indexTo; i++) {
210             final char current = buf.charAt(i);
211             if ((delimiters != null && delimiters.get(current))
212                     || isWhitespace(current) || current == DQUOTE) {
213                 break;
214             }
215             pos++;
216             dst.append(current);
217         }
218         cursor.updatePos(pos);
219     }
220 
221     /**
222      * Transfers content enclosed with quote marks into the destination buffer.
223      *
224      * @param buf buffer with the sequence of chars to be parsed
225      * @param cursor defines the bounds and current position of the buffer
226      * @param dst destination buffer
227      */
228     public void copyQuotedContent(final CharSequence buf, final ParserCursor cursor,
229             final StringBuilder dst) {
230         Args.notNull(buf, "Char sequence");
231         Args.notNull(cursor, "Parser cursor");
232         Args.notNull(dst, "String builder");
233         if (cursor.atEnd()) {
234             return;
235         }
236         int pos = cursor.getPos();
237         int indexFrom = cursor.getPos();
238         final int indexTo = cursor.getUpperBound();
239         char current = buf.charAt(pos);
240         if (current != DQUOTE) {
241             return;
242         }
243         pos++;
244         indexFrom++;
245         boolean escaped = false;
246         for (int i = indexFrom; i < indexTo; i++, pos++) {
247             current = buf.charAt(i);
248             if (escaped) {
249                 if (current != DQUOTE && current != ESCAPE) {
250                     dst.append(ESCAPE);
251                 }
252                 dst.append(current);
253                 escaped = false;
254             } else {
255                 if (current == DQUOTE) {
256                     pos++;
257                     break;
258                 }
259                 if (current == ESCAPE) {
260                     escaped = true;
261                 } else if (current != Chars.CR && current != Chars.LF) {
262                     dst.append(current);
263                 }
264             }
265         }
266         cursor.updatePos(pos);
267     }
268 
269 }