1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 package org.apache.hc.core5.util;
29
30 import java.util.BitSet;
31
32 import org.apache.hc.core5.annotation.Contract;
33 import org.apache.hc.core5.annotation.ThreadingBehavior;
34
35
36
37
38
39
40
41
42
43
44 @Contract(threading = ThreadingBehavior.IMMUTABLE)
45 public class Tokenizer {
46
47 public static class Cursor {
48
49 private final int lowerBound;
50 private final int upperBound;
51 private int pos;
52
53 public Cursor(final int lowerBound, final int upperBound) {
54 super();
55 Args.notNegative(lowerBound, "lowerBound");
56 Args.check(lowerBound <= upperBound, "lowerBound cannot be greater than upperBound");
57 this.lowerBound = lowerBound;
58 this.upperBound = upperBound;
59 this.pos = lowerBound;
60 }
61
62 public int getLowerBound() {
63 return this.lowerBound;
64 }
65
66 public int getUpperBound() {
67 return this.upperBound;
68 }
69
70 public int getPos() {
71 return this.pos;
72 }
73
74 public void updatePos(final int pos) {
75 if (pos < this.lowerBound) {
76 throw new IndexOutOfBoundsException("pos: "+pos+" < lowerBound: "+this.lowerBound);
77 }
78 if (pos > this.upperBound) {
79 throw new IndexOutOfBoundsException("pos: "+pos+" > upperBound: "+this.upperBound);
80 }
81 this.pos = pos;
82 }
83
84 public boolean atEnd() {
85 return this.pos >= this.upperBound;
86 }
87
88 @Override
89 public String toString() {
90 final StringBuilder buffer = new StringBuilder();
91 buffer.append('[');
92 buffer.append(this.lowerBound);
93 buffer.append('>');
94 buffer.append(this.pos);
95 buffer.append('>');
96 buffer.append(this.upperBound);
97 buffer.append(']');
98 return buffer.toString();
99 }
100
101 }
102
103 public static BitSet INIT_BITSET(final int ... b) {
104 final BitSet bitset = new BitSet();
105 for (final int aB : b) {
106 bitset.set(aB);
107 }
108 return bitset;
109 }
110
111
112 public static final char DQUOTE = '\"';
113
114
115 public static final char ESCAPE = '\\';
116
117 public static final int CR = 13;
118 public static final int LF = 10;
119 public static final int SP = 32;
120 public static final int HT = 9;
121
122 public static boolean isWhitespace(final char ch) {
123 return ch == SP || ch == HT || ch == CR || ch == LF;
124 }
125
126 public static final Tokenizerizer.html#Tokenizer">Tokenizer INSTANCE = new Tokenizer();
127
128
129
130
131
132
133
134
135
136
137 public String parseContent(final CharSequence buf, final Cursor cursor, final BitSet delimiters) {
138 Args.notNull(buf, "Char sequence");
139 Args.notNull(cursor, "Parser cursor");
140 final StringBuilder dst = new StringBuilder();
141 copyContent(buf, cursor, delimiters, dst);
142 return dst.toString();
143 }
144
145
146
147
148
149
150
151
152
153
154 public String parseToken(final CharSequence buf, final Cursor cursor, final BitSet delimiters) {
155 Args.notNull(buf, "Char sequence");
156 Args.notNull(cursor, "Parser cursor");
157 final StringBuilder dst = new StringBuilder();
158 boolean whitespace = false;
159 while (!cursor.atEnd()) {
160 final char current = buf.charAt(cursor.getPos());
161 if (delimiters != null && delimiters.get(current)) {
162 break;
163 } else if (isWhitespace(current)) {
164 skipWhiteSpace(buf, cursor);
165 whitespace = true;
166 } else {
167 if (whitespace && dst.length() > 0) {
168 dst.append(' ');
169 }
170 copyContent(buf, cursor, delimiters, dst);
171 whitespace = false;
172 }
173 }
174 return dst.toString();
175 }
176
177
178
179
180
181
182
183
184
185
186
187 public String parseValue(final CharSequence buf, final Cursor cursor, final BitSet delimiters) {
188 Args.notNull(buf, "Char sequence");
189 Args.notNull(cursor, "Parser cursor");
190 final StringBuilder dst = new StringBuilder();
191 boolean whitespace = false;
192 while (!cursor.atEnd()) {
193 final char current = buf.charAt(cursor.getPos());
194 if (delimiters != null && delimiters.get(current)) {
195 break;
196 } else if (isWhitespace(current)) {
197 skipWhiteSpace(buf, cursor);
198 whitespace = true;
199 } else if (current == DQUOTE) {
200 if (whitespace && dst.length() > 0) {
201 dst.append(' ');
202 }
203 copyQuotedContent(buf, cursor, dst);
204 whitespace = false;
205 } else {
206 if (whitespace && dst.length() > 0) {
207 dst.append(' ');
208 }
209 copyUnquotedContent(buf, cursor, delimiters, dst);
210 whitespace = false;
211 }
212 }
213 return dst.toString();
214 }
215
216
217
218
219
220
221
222
223 public void skipWhiteSpace(final CharSequence buf, final Cursor cursor) {
224 Args.notNull(buf, "Char sequence");
225 Args.notNull(cursor, "Parser cursor");
226 int pos = cursor.getPos();
227 final int indexFrom = cursor.getPos();
228 final int indexTo = cursor.getUpperBound();
229 for (int i = indexFrom; i < indexTo; i++) {
230 final char current = buf.charAt(i);
231 if (!isWhitespace(current)) {
232 break;
233 }
234 pos++;
235 }
236 cursor.updatePos(pos);
237 }
238
239
240
241
242
243
244
245
246
247
248
249 public void copyContent(final CharSequence buf, final Cursor cursor, final BitSet delimiters,
250 final StringBuilder dst) {
251 Args.notNull(buf, "Char sequence");
252 Args.notNull(cursor, "Parser cursor");
253 Args.notNull(dst, "String builder");
254 int pos = cursor.getPos();
255 final int indexFrom = cursor.getPos();
256 final int indexTo = cursor.getUpperBound();
257 for (int i = indexFrom; i < indexTo; i++) {
258 final char current = buf.charAt(i);
259 if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
260 break;
261 }
262 pos++;
263 dst.append(current);
264 }
265 cursor.updatePos(pos);
266 }
267
268
269
270
271
272
273
274
275
276
277
278 public void copyUnquotedContent(final CharSequence buf, final Cursor cursor,
279 final BitSet delimiters, final StringBuilder dst) {
280 Args.notNull(buf, "Char sequence");
281 Args.notNull(cursor, "Parser cursor");
282 Args.notNull(dst, "String builder");
283 int pos = cursor.getPos();
284 final int indexFrom = cursor.getPos();
285 final int indexTo = cursor.getUpperBound();
286 for (int i = indexFrom; i < indexTo; i++) {
287 final char current = buf.charAt(i);
288 if ((delimiters != null && delimiters.get(current))
289 || isWhitespace(current) || current == DQUOTE) {
290 break;
291 }
292 pos++;
293 dst.append(current);
294 }
295 cursor.updatePos(pos);
296 }
297
298
299
300
301
302
303
304
305 public void copyQuotedContent(final CharSequence buf, final Cursor cursor,
306 final StringBuilder dst) {
307 Args.notNull(buf, "Char sequence");
308 Args.notNull(cursor, "Parser cursor");
309 Args.notNull(dst, "String builder");
310 if (cursor.atEnd()) {
311 return;
312 }
313 int pos = cursor.getPos();
314 int indexFrom = cursor.getPos();
315 final int indexTo = cursor.getUpperBound();
316 char current = buf.charAt(pos);
317 if (current != DQUOTE) {
318 return;
319 }
320 pos++;
321 indexFrom++;
322 boolean escaped = false;
323 for (int i = indexFrom; i < indexTo; i++, pos++) {
324 current = buf.charAt(i);
325 if (escaped) {
326 if (current != DQUOTE && current != ESCAPE) {
327 dst.append(ESCAPE);
328 }
329 dst.append(current);
330 escaped = false;
331 } else {
332 if (current == DQUOTE) {
333 pos++;
334 break;
335 }
336 if (current == ESCAPE) {
337 escaped = true;
338 } else if (current != CR && current != LF) {
339 dst.append(current);
340 }
341 }
342 }
343 cursor.updatePos(pos);
344 }
345
346 }