View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  
28  package org.apache.hc.core5.net;
29  
30  import java.nio.ByteBuffer;
31  import java.nio.CharBuffer;
32  import java.nio.charset.Charset;
33  import java.nio.charset.StandardCharsets;
34  import java.util.BitSet;
35  
36  /**
37   * Percent-encoding.
38   *
39   * @since 5.1
40   */
41  public class PercentCodec {
42  
43      static final BitSet GEN_DELIMS = new BitSet(256);
44      static final BitSet SUB_DELIMS = new BitSet(256);
45      static final BitSet UNRESERVED = new BitSet(256);
46      static final BitSet URIC = new BitSet(256);
47  
48      static {
49          GEN_DELIMS.set(':');
50          GEN_DELIMS.set('/');
51          GEN_DELIMS.set('?');
52          GEN_DELIMS.set('#');
53          GEN_DELIMS.set('[');
54          GEN_DELIMS.set(']');
55          GEN_DELIMS.set('@');
56  
57          SUB_DELIMS.set('!');
58          SUB_DELIMS.set('$');
59          SUB_DELIMS.set('&');
60          SUB_DELIMS.set('\'');
61          SUB_DELIMS.set('(');
62          SUB_DELIMS.set(')');
63          SUB_DELIMS.set('*');
64          SUB_DELIMS.set('+');
65          SUB_DELIMS.set(',');
66          SUB_DELIMS.set(';');
67          SUB_DELIMS.set('=');
68  
69          for (int i = 'a'; i <= 'z'; i++) {
70              UNRESERVED.set(i);
71          }
72          for (int i = 'A'; i <= 'Z'; i++) {
73              UNRESERVED.set(i);
74          }
75          // numeric characters
76          for (int i = '0'; i <= '9'; i++) {
77              UNRESERVED.set(i);
78          }
79          UNRESERVED.set('-');
80          UNRESERVED.set('.');
81          UNRESERVED.set('_');
82          UNRESERVED.set('~');
83          URIC.or(SUB_DELIMS);
84          URIC.or(UNRESERVED);
85      }
86  
87      static final BitSet RFC5987_UNRESERVED = new BitSet(256);
88  
89      static {
90          // Alphanumeric characters
91          for (int i = 'a'; i <= 'z'; i++) {
92              RFC5987_UNRESERVED.set(i);
93          }
94          for (int i = 'A'; i <= 'Z'; i++) {
95              RFC5987_UNRESERVED.set(i);
96          }
97          for (int i = '0'; i <= '9'; i++) {
98              RFC5987_UNRESERVED.set(i);
99          }
100 
101         // Additional characters as per RFC 5987 attr-char
102         RFC5987_UNRESERVED.set('!');
103         RFC5987_UNRESERVED.set('#');
104         RFC5987_UNRESERVED.set('$');
105         RFC5987_UNRESERVED.set('&');
106         RFC5987_UNRESERVED.set('+');
107         RFC5987_UNRESERVED.set('-');
108         RFC5987_UNRESERVED.set('.');
109         RFC5987_UNRESERVED.set('^');
110         RFC5987_UNRESERVED.set('_');
111         RFC5987_UNRESERVED.set('`');
112         RFC5987_UNRESERVED.set('|');
113         RFC5987_UNRESERVED.set('~');
114     }
115 
116     private static final int RADIX = 16;
117 
118     static void encode(final StringBuilder buf, final CharSequence content, final Charset charset,
119                        final BitSet safechars, final boolean blankAsPlus) {
120         if (content == null) {
121             return;
122         }
123         final CharBuffer cb = CharBuffer.wrap(content);
124         final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb);
125         while (bb.hasRemaining()) {
126             final int b = bb.get() & 0xff;
127             if (safechars.get(b)) {
128                 buf.append((char) b);
129             } else if (blankAsPlus && b == ' ') {
130                 buf.append("+");
131             } else {
132                 buf.append("%");
133                 final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
134                 final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
135                 buf.append(hex1);
136                 buf.append(hex2);
137             }
138         }
139     }
140 
141     static void encode(final StringBuilder buf, final CharSequence content, final Charset charset, final boolean blankAsPlus) {
142         encode(buf, content, charset, UNRESERVED, blankAsPlus);
143     }
144 
145     public static void encode(final StringBuilder buf, final CharSequence content, final Charset charset) {
146         encode(buf, content, charset, UNRESERVED, false);
147     }
148 
149     public static String encode(final CharSequence content, final Charset charset) {
150         if (content == null) {
151             return null;
152         }
153         final StringBuilder buf = new StringBuilder();
154         encode(buf, content, charset, UNRESERVED, false);
155         return buf.toString();
156     }
157 
158     static String decode(final CharSequence content, final Charset charset, final boolean plusAsBlank) {
159         if (content == null) {
160             return null;
161         }
162         final ByteBuffer bb = ByteBuffer.allocate(content.length());
163         final CharBuffer cb = CharBuffer.wrap(content);
164         while (cb.hasRemaining()) {
165             final char c = cb.get();
166             if (c == '%' && cb.remaining() >= 2) {
167                 final char uc = cb.get();
168                 final char lc = cb.get();
169                 final int u = Character.digit(uc, RADIX);
170                 final int l = Character.digit(lc, RADIX);
171                 if (u != -1 && l != -1) {
172                     bb.put((byte) ((u << 4) + l));
173                 } else {
174                     bb.put((byte) '%');
175                     bb.put((byte) uc);
176                     bb.put((byte) lc);
177                 }
178             } else if (plusAsBlank && c == '+') {
179                 bb.put((byte) ' ');
180             } else {
181                 bb.put((byte) c);
182             }
183         }
184         bb.flip();
185         return (charset != null ? charset : StandardCharsets.UTF_8).decode(bb).toString();
186     }
187 
188     public static String decode(final CharSequence content, final Charset charset) {
189         return decode(content, charset, false);
190     }
191 
192     public static final PercentCodec RFC3986 = new PercentCodec(UNRESERVED);
193     public static final PercentCodec RFC5987 = new PercentCodec(RFC5987_UNRESERVED);
194 
195     private final BitSet unreserved;
196 
197     private PercentCodec(final BitSet unreserved) {
198         this.unreserved = unreserved;
199     }
200 
201     public PercentCodec() {
202         this.unreserved = UNRESERVED;
203     }
204 
205     /**
206      * @since 5.3
207      */
208     public void encode(final StringBuilder buf, final CharSequence content) {
209         encode(buf, content, StandardCharsets.UTF_8, unreserved, false);
210     }
211 
212     /**
213      * @since 5.3
214      */
215     public String encode(final CharSequence content) {
216         if (content == null) {
217             return null;
218         }
219         final StringBuilder buf = new StringBuilder();
220         encode(buf, content, StandardCharsets.UTF_8, unreserved, false);
221         return buf.toString();
222     }
223 
224     /**
225      * @since 5.3
226      */
227     public String decode(final CharSequence content) {
228         return decode(content, StandardCharsets.UTF_8, false);
229     }
230 
231 }