1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 package org.apache.hc.core5.util;
29
30 import java.util.BitSet;
31
32 import org.apache.hc.core5.annotation.Contract;
33 import org.apache.hc.core5.annotation.ThreadingBehavior;
34
35
36
37
38
39
40
41
42
43
44 @Contract(threading = ThreadingBehavior.IMMUTABLE)
45 public class Tokenizer {
46
47 public static class Cursor {
48
49 private final int lowerBound;
50 private final int upperBound;
51 private int pos;
52
53 public Cursor(final int lowerBound, final int upperBound) {
54 super();
55 Args.notNegative(lowerBound, "lowerBound");
56 Args.check(lowerBound <= upperBound, "lowerBound cannot be greater than upperBound");
57 this.lowerBound = lowerBound;
58 this.upperBound = upperBound;
59 this.pos = lowerBound;
60 }
61
62 public int getLowerBound() {
63 return this.lowerBound;
64 }
65
66 public int getUpperBound() {
67 return this.upperBound;
68 }
69
70 public int getPos() {
71 return this.pos;
72 }
73
74 public void updatePos(final int pos) {
75 Args.check(pos >= this.lowerBound, "pos: %s < lowerBound: %s", pos, this.lowerBound);
76 Args.check(pos <= this.upperBound, "pos: %s > upperBound: %s", pos, this.upperBound);
77 this.pos = pos;
78 }
79
80 public boolean atEnd() {
81 return this.pos >= this.upperBound;
82 }
83
84 @Override
85 public String toString() {
86 final StringBuilder buffer = new StringBuilder();
87 buffer.append('[');
88 buffer.append(this.lowerBound);
89 buffer.append('>');
90 buffer.append(this.pos);
91 buffer.append('>');
92 buffer.append(this.upperBound);
93 buffer.append(']');
94 return buffer.toString();
95 }
96
97 }
98
99
100
101
102 @Deprecated
103 public static BitSet INIT_BITSET(final int ... b) {
104 final BitSet bitset = new BitSet();
105 for (final int aB : b) {
106 bitset.set(aB);
107 }
108 return bitset;
109 }
110
111
112 public static final char DQUOTE = '\"';
113
114
115 public static final char ESCAPE = '\\';
116
117 public static final int CR = 13;
118 public static final int LF = 10;
119 public static final int SP = 32;
120 public static final int HT = 9;
121
122 public static boolean isWhitespace(final char ch) {
123 return ch == SP || ch == HT || ch == CR || ch == LF;
124 }
125
126
127
128
129
130
131 @FunctionalInterface
132 public interface Delimiter {
133
134 boolean test(char ch);
135
136 }
137
138
139
140
141 public static Delimiter delimiters(final BitSet delimiters) {
142 return delimiters::get;
143 }
144
145
146
147
148 public static Delimiter delimiters(final char... delimiters) {
149 return ch -> {
150 for (final char delimiter : delimiters) {
151 if (delimiter == ch) {
152 return true;
153 }
154 }
155 return false;
156 };
157 }
158
159
160
161
162 public static Delimiter delimiters(final char delimiter) {
163 return ch -> ch == delimiter;
164 }
165
166
167
168
169 public static Delimiter delimiters(final char delimiter1, final char delimiter2) {
170 return ch -> ch == delimiter1 || ch == delimiter2;
171 }
172
173
174
175
176 public static Delimiter delimiters(final char delimiter1, final char delimiter2, final char delimiter3) {
177 return ch -> ch == delimiter1 || ch == delimiter2 || ch == delimiter3;
178 }
179
180 public static final Tokenizer INSTANCE = new Tokenizer();
181
182
183
184
185
186
187
188
189
190
191 public String parseContent(final CharSequence buf, final Cursor cursor, final Delimiter delimiterPredicate) {
192 Args.notNull(buf, "Char sequence");
193 Args.notNull(cursor, "Parser cursor");
194 final StringBuilder dst = new StringBuilder();
195 copyContent(buf, cursor, delimiterPredicate, dst);
196 return dst.toString();
197 }
198
199
200
201
202 @Deprecated
203 public String parseContent(final CharSequence buf, final Cursor cursor, final BitSet bitSet) {
204 return parseContent(buf, cursor, bitSet != null ? bitSet::get : null);
205 }
206
207
208
209
210
211
212
213
214
215
216 public String parseToken(final CharSequence buf, final Cursor cursor, final Delimiter delimiterPredicate) {
217 Args.notNull(buf, "Char sequence");
218 Args.notNull(cursor, "Parser cursor");
219 final StringBuilder dst = new StringBuilder();
220 boolean whitespace = false;
221 while (!cursor.atEnd()) {
222 final char current = buf.charAt(cursor.getPos());
223 if (delimiterPredicate != null && delimiterPredicate.test(current)) {
224 break;
225 } else if (isWhitespace(current)) {
226 skipWhiteSpace(buf, cursor);
227 whitespace = true;
228 } else {
229 if (whitespace && dst.length() > 0) {
230 dst.append(' ');
231 }
232 copyContent(buf, cursor, delimiterPredicate, dst);
233 whitespace = false;
234 }
235 }
236 return dst.toString();
237 }
238
239
240
241
242 @Deprecated
243 public String parseToken(final CharSequence buf, final Cursor cursor, final BitSet bitSet) {
244 return parseToken(buf, cursor, bitSet != null ? bitSet::get : null);
245 }
246
247
248
249
250
251
252
253
254
255
256
257 public String parseValue(final CharSequence buf, final Cursor cursor, final Delimiter delimiterPredicate) {
258 Args.notNull(buf, "Char sequence");
259 Args.notNull(cursor, "Parser cursor");
260 final StringBuilder dst = new StringBuilder();
261 boolean whitespace = false;
262 while (!cursor.atEnd()) {
263 final char current = buf.charAt(cursor.getPos());
264 if (delimiterPredicate != null && delimiterPredicate.test(current)) {
265 break;
266 } else if (isWhitespace(current)) {
267 skipWhiteSpace(buf, cursor);
268 whitespace = true;
269 } else if (current == DQUOTE) {
270 if (whitespace && dst.length() > 0) {
271 dst.append(' ');
272 }
273 copyQuotedContent(buf, cursor, dst);
274 whitespace = false;
275 } else {
276 if (whitespace && dst.length() > 0) {
277 dst.append(' ');
278 }
279 copyUnquotedContent(buf, cursor, delimiterPredicate, dst);
280 whitespace = false;
281 }
282 }
283 return dst.toString();
284 }
285
286
287
288
289 @Deprecated
290 public String parseValue(final CharSequence buf, final Cursor cursor, final BitSet bitSet) {
291 return parseValue(buf, cursor, bitSet != null ? bitSet::get : null);
292 }
293
294
295
296
297
298
299
300
301 public void skipWhiteSpace(final CharSequence buf, final Cursor cursor) {
302 Args.notNull(buf, "Char sequence");
303 Args.notNull(cursor, "Parser cursor");
304 int pos = cursor.getPos();
305 final int indexFrom = cursor.getPos();
306 final int indexTo = cursor.getUpperBound();
307 for (int i = indexFrom; i < indexTo; i++) {
308 final char current = buf.charAt(i);
309 if (!isWhitespace(current)) {
310 break;
311 }
312 pos++;
313 }
314 cursor.updatePos(pos);
315 }
316
317
318
319
320
321
322
323
324
325
326
327 public void copyContent(final CharSequence buf, final Cursor cursor, final Delimiter delimiterPredicate,
328 final StringBuilder dst) {
329 Args.notNull(buf, "Char sequence");
330 Args.notNull(cursor, "Parser cursor");
331 Args.notNull(dst, "String builder");
332 int pos = cursor.getPos();
333 final int indexFrom = cursor.getPos();
334 final int indexTo = cursor.getUpperBound();
335 for (int i = indexFrom; i < indexTo; i++) {
336 final char current = buf.charAt(i);
337 if ((delimiterPredicate != null && delimiterPredicate.test(current)) || isWhitespace(current)) {
338 break;
339 }
340 pos++;
341 dst.append(current);
342 }
343 cursor.updatePos(pos);
344 }
345
346
347
348
349 @Deprecated
350 public void copyContent(final CharSequence buf, final Cursor cursor, final BitSet bitSet,
351 final StringBuilder dst) {
352 copyContent(buf, cursor, bitSet != null ? bitSet::get : null, dst);
353 }
354
355
356
357
358
359
360
361
362
363
364
365 public void copyUnquotedContent(final CharSequence buf, final Cursor cursor,
366 final Delimiter delimiterPredicate, final StringBuilder dst) {
367 Args.notNull(buf, "Char sequence");
368 Args.notNull(cursor, "Parser cursor");
369 Args.notNull(dst, "String builder");
370 int pos = cursor.getPos();
371 final int indexFrom = cursor.getPos();
372 final int indexTo = cursor.getUpperBound();
373 for (int i = indexFrom; i < indexTo; i++) {
374 final char current = buf.charAt(i);
375 if ((delimiterPredicate != null && delimiterPredicate.test(current))
376 || isWhitespace(current) || current == DQUOTE) {
377 break;
378 }
379 pos++;
380 dst.append(current);
381 }
382 cursor.updatePos(pos);
383 }
384
385
386
387
388 @Deprecated
389 public void copyUnquotedContent(final CharSequence buf, final Cursor cursor,
390 final BitSet bitSet, final StringBuilder dst) {
391 copyUnquotedContent(buf, cursor, bitSet != null ? bitSet::get : null, dst);
392 }
393
394
395
396
397
398
399
400
401 public void copyQuotedContent(final CharSequence buf, final Cursor cursor,
402 final StringBuilder dst) {
403 Args.notNull(buf, "Char sequence");
404 Args.notNull(cursor, "Parser cursor");
405 Args.notNull(dst, "String builder");
406 if (cursor.atEnd()) {
407 return;
408 }
409 int pos = cursor.getPos();
410 int indexFrom = cursor.getPos();
411 final int indexTo = cursor.getUpperBound();
412 char current = buf.charAt(pos);
413 if (current != DQUOTE) {
414 return;
415 }
416 pos++;
417 indexFrom++;
418 boolean escaped = false;
419 for (int i = indexFrom; i < indexTo; i++, pos++) {
420 current = buf.charAt(i);
421 if (escaped) {
422 if (current != DQUOTE && current != ESCAPE) {
423 dst.append(ESCAPE);
424 }
425 dst.append(current);
426 escaped = false;
427 } else {
428 if (current == DQUOTE) {
429 pos++;
430 break;
431 }
432 if (current == ESCAPE) {
433 escaped = true;
434 } else if (current != CR && current != LF) {
435 dst.append(current);
436 }
437 }
438 }
439 cursor.updatePos(pos);
440 }
441
442 }