View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.hc.client5.http.utils;
28  
29  import java.net.URI;
30  import java.net.URISyntaxException;
31  import java.util.ArrayList;
32  import java.util.Iterator;
33  import java.util.List;
34  import java.util.Locale;
35  import java.util.Stack;
36  
37  import org.apache.hc.core5.http.HttpHost;
38  import org.apache.hc.core5.net.URIAuthority;
39  import org.apache.hc.core5.net.URIBuilder;
40  import org.apache.hc.core5.util.Args;
41  import org.apache.hc.core5.util.TextUtils;
42  
43  /**
44   * A collection of utilities for {@link URI URIs}, to workaround
45   * bugs within the class or for ease-of-use features.
46   *
47   * @since 4.0
48   */
49  public class URIUtils {
50  
51      /**
52       * A convenience method for creating a new {@link URI} whose scheme, host
53       * and port are taken from the target host, but whose path, query and
54       * fragment are taken from the existing URI. The fragment is only used if
55       * dropFragment is false. The path is set to "/" if not explicitly specified.
56       *
57       * @param uri
58       *            Contains the path, query and fragment to use.
59       * @param target
60       *            Contains the scheme, host and port to use.
61       * @param dropFragment
62       *            True if the fragment should not be copied.
63       *
64       * @throws URISyntaxException
65       *             If the resulting URI is invalid.
66       */
67      public static URI rewriteURI(
68              final URI uri,
69              final HttpHost target,
70              final boolean dropFragment) throws URISyntaxException {
71          Args.notNull(uri, "URI");
72          if (uri.isOpaque()) {
73              return uri;
74          }
75          final URIBuilder uribuilder = new URIBuilder(uri);
76          if (target != null) {
77              uribuilder.setScheme(target.getSchemeName());
78              uribuilder.setHost(target.getHostName());
79              uribuilder.setPort(target.getPort());
80          } else {
81              uribuilder.setScheme(null);
82              uribuilder.setHost((String) null);
83              uribuilder.setPort(-1);
84          }
85          if (dropFragment) {
86              uribuilder.setFragment(null);
87          }
88          final List<String> originalPathSegments = uribuilder.getPathSegments();
89          final List<String> pathSegments = new ArrayList<>(originalPathSegments);
90          for (final Iterator<String> it = pathSegments.iterator(); it.hasNext(); ) {
91              final String pathSegment = it.next();
92              if (pathSegment.isEmpty() && it.hasNext()) {
93                  it.remove();
94              }
95          }
96          if (pathSegments.size() != originalPathSegments.size()) {
97              uribuilder.setPathSegments(pathSegments);
98          }
99          if (pathSegments.isEmpty()) {
100             uribuilder.setPathSegments("");
101         }
102         return uribuilder.build();
103     }
104 
105     /**
106      * A convenience method for
107      * {@link URIUtils#rewriteURI(URI, HttpHost, boolean)} that always keeps the
108      * fragment.
109      */
110     public static URI rewriteURI(
111             final URI uri,
112             final HttpHost target) throws URISyntaxException {
113         return rewriteURI(uri, target, false);
114     }
115 
116     /**
117      * A convenience method that creates a new {@link URI} whose scheme, host, port, path,
118      * query are taken from the existing URI, dropping any fragment or user-information.
119      * The path is set to "/" if not explicitly specified. The existing URI is returned
120      * unmodified if it has no fragment or user-information and has a path.
121      *
122      * @param uri
123      *            original URI.
124      * @throws URISyntaxException
125      *             If the resulting URI is invalid.
126      */
127     public static URI rewriteURI(final URI uri) throws URISyntaxException {
128         Args.notNull(uri, "URI");
129         if (uri.isOpaque()) {
130             return uri;
131         }
132         final URIBuilder uribuilder = new URIBuilder(uri);
133         if (uribuilder.getUserInfo() != null) {
134             uribuilder.setUserInfo(null);
135         }
136         if (TextUtils.isEmpty(uribuilder.getPath())) {
137             uribuilder.setPath("/");
138         }
139         if (uribuilder.getHost() != null) {
140             uribuilder.setHost(uribuilder.getHost().toLowerCase(Locale.ROOT));
141         }
142         uribuilder.setFragment(null);
143         return uribuilder.build();
144     }
145 
146     /**
147      * Resolves a URI reference against a base URI. Work-around for bug in
148      * java.net.URI (http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
149      *
150      * @param baseURI the base URI
151      * @param reference the URI reference
152      * @return the resulting URI
153      */
154     public static URI resolve(final URI baseURI, final String reference) {
155         return resolve(baseURI, URI.create(reference));
156     }
157 
158     /**
159      * Resolves a URI reference against a base URI. Work-around for bugs in
160      * java.net.URI (e.g. http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
161      *
162      * @param baseURI the base URI
163      * @param reference the URI reference
164      * @return the resulting URI
165      */
166     public static URI resolve(final URI baseURI, final URI reference) {
167         Args.notNull(baseURI, "Base URI");
168         Args.notNull(reference, "Reference URI");
169         final String s = reference.toASCIIString();
170         if (s.startsWith("?")) {
171             String baseUri = baseURI.toASCIIString();
172             final int i = baseUri.indexOf('?');
173             baseUri = i > -1 ? baseUri.substring(0, i) : baseUri;
174             return URI.create(baseUri + s);
175         }
176         final boolean emptyReference = s.isEmpty();
177         URI resolved;
178         if (emptyReference) {
179             resolved = baseURI.resolve(URI.create("#"));
180             final String resolvedString = resolved.toASCIIString();
181             resolved = URI.create(resolvedString.substring(0, resolvedString.indexOf('#')));
182         } else {
183             resolved = baseURI.resolve(reference);
184         }
185         try {
186             return normalizeSyntax(resolved);
187         } catch (final URISyntaxException ex) {
188             throw new IllegalArgumentException(ex);
189         }
190     }
191 
192     /**
193      * Removes dot segments according to RFC 3986, section 5.2.4 and
194      * Syntax-Based Normalization according to RFC 3986, section 6.2.2.
195      *
196      * @param uri the original URI
197      * @return the URI without dot segments
198      */
199     static URI normalizeSyntax(final URI uri) throws URISyntaxException {
200         if (uri.isOpaque() || uri.getAuthority() == null) {
201             // opaque and file: URIs
202             return uri;
203         }
204         Args.check(uri.isAbsolute(), "Base URI must be absolute");
205         final URIBuilder builder = new URIBuilder(uri);
206         final String path = builder.getPath();
207         if (path != null && !path.equals("/")) {
208             final String[] inputSegments = path.split("/");
209             final Stack<String> outputSegments = new Stack<>();
210             for (final String inputSegment : inputSegments) {
211                 if ((inputSegment.isEmpty()) || (".".equals(inputSegment))) {
212                     // Do nothing
213                 } else if ("..".equals(inputSegment)) {
214                     if (!outputSegments.isEmpty()) {
215                         outputSegments.pop();
216                     }
217                 } else {
218                     outputSegments.push(inputSegment);
219                 }
220             }
221             final StringBuilder outputBuffer = new StringBuilder();
222             for (final String outputSegment : outputSegments) {
223                 outputBuffer.append('/').append(outputSegment);
224             }
225             if (path.lastIndexOf('/') == path.length() - 1) {
226                 // path.endsWith("/") || path.equals("")
227                 outputBuffer.append('/');
228             }
229             builder.setPath(outputBuffer.toString());
230         }
231         if (builder.getScheme() != null) {
232             builder.setScheme(builder.getScheme().toLowerCase(Locale.ROOT));
233         }
234         if (builder.getHost() != null) {
235             builder.setHost(builder.getHost().toLowerCase(Locale.ROOT));
236         }
237         return builder.build();
238     }
239 
240     /**
241      * Extracts target host from the given {@link URI}.
242      *
243      * @param uri
244      * @return the target host if the URI is absolute or {@code null} if the URI is
245      * relative or does not contain a valid host name.
246      *
247      * @since 4.1
248      */
249     public static HttpHost extractHost(final URI uri) {
250         if (uri == null) {
251             return null;
252         }
253         if (uri.isAbsolute()) {
254             if (uri.getHost() == null) { // normal parse failed; let's do it ourselves
255                 // authority does not seem to care about the valid character-set for host names
256                 if (uri.getAuthority() != null) {
257                     String content = uri.getAuthority();
258                     // Strip off any leading user credentials
259                     int at = content.indexOf('@');
260                     if (at != -1) {
261                         content = content.substring(at + 1);
262                     }
263                     final String scheme = uri.getScheme();
264                     final String hostname;
265                     final int port;
266                     at = content.indexOf(":");
267                     if (at != -1) {
268                         hostname = content.substring(0, at);
269                         try {
270                             final String portText = content.substring(at + 1);
271                             port = !TextUtils.isEmpty(portText) ? Integer.parseInt(portText) : -1;
272                         } catch (final NumberFormatException ex) {
273                             return null;
274                         }
275                     } else {
276                         hostname = content;
277                         port = -1;
278                     }
279                     try {
280                         return new HttpHost(scheme, hostname, port);
281                     } catch (final IllegalArgumentException ex) {
282                         return null;
283                     }
284                 }
285             } else {
286                 return new HttpHost(uri.getScheme(), uri.getHost(), uri.getPort());
287             }
288         }
289         return null;
290     }
291 
292     /**
293      * Derives the interpreted (absolute) URI that was used to generate the last
294      * request. This is done by extracting the request-uri and target origin for
295      * the last request and scanning all the redirect locations for the last
296      * fragment identifier, then combining the result into a {@link URI}.
297      *
298      * @param originalURI
299      *            original request before any redirects
300      * @param target
301      *            if the last URI is relative, it is resolved against this target,
302      *            or {@code null} if not available.
303      * @param redirects
304      *            collection of redirect locations since the original request
305      *            or {@code null} if not available.
306      * @return interpreted (absolute) URI
307      */
308     public static URI resolve(
309             final URI originalURI,
310             final HttpHost target,
311             final List<URI> redirects) throws URISyntaxException {
312         Args.notNull(originalURI, "Request URI");
313         final URIBuilder uribuilder;
314         if (redirects == null || redirects.isEmpty()) {
315             uribuilder = new URIBuilder(originalURI);
316         } else {
317             uribuilder = new URIBuilder(redirects.get(redirects.size() - 1));
318             String frag = uribuilder.getFragment();
319             // read interpreted fragment identifier from redirect locations
320             for (int i = redirects.size() - 1; frag == null && i >= 0; i--) {
321                 frag = redirects.get(i).getFragment();
322             }
323             uribuilder.setFragment(frag);
324         }
325         // read interpreted fragment identifier from original request
326         if (uribuilder.getFragment() == null) {
327             uribuilder.setFragment(originalURI.getFragment());
328         }
329         // last target origin
330         if (target != null && !uribuilder.isAbsolute()) {
331             uribuilder.setScheme(target.getSchemeName());
332             uribuilder.setHost(target.getHostName());
333             uribuilder.setPort(target.getPort());
334         }
335         return uribuilder.build();
336     }
337 
338     /**
339      * Convenience factory method for {@link URI} instances.
340      *
341      * @since 5.0
342      */
343     public static URI create(final HttpHost host, final String path) throws URISyntaxException {
344         final URIBuilder builder = new URIBuilder(path);
345         if (host != null) {
346             builder.setHost(host.getHostName()).setPort(host.getPort()).setScheme(host.getSchemeName());
347         }
348         return builder.build();
349     }
350 
351     /**
352      * Convenience factory method for {@link URI} instances.
353      *
354      * @since 5.0
355      */
356     public static URI create(final String scheme, final URIAuthority host, final String path) throws URISyntaxException {
357         final URIBuilder builder = new URIBuilder(path);
358         if (scheme != null) {
359             builder.setScheme(scheme);
360         }
361         if (host != null) {
362             builder.setHost(host.getHostName()).setPort(host.getPort());
363         }
364         return builder.build();
365     }
366 
367     /**
368      * This class should not be instantiated.
369      */
370     private URIUtils() {
371     }
372 
373 }