View Javadoc
1   package org.codehaus.plexus.util.xml;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.File;
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.io.OutputStream;
23  import java.io.OutputStreamWriter;
24  import java.io.Reader;
25  import java.io.Writer;
26  
27  import org.codehaus.plexus.util.ReaderFactory;
28  import org.codehaus.plexus.util.StringUtils;
29  import org.codehaus.plexus.util.WriterFactory;
30  import org.codehaus.plexus.util.xml.pull.MXParser;
31  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
32  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
33  
34  /**
35   * Common XML utilities methods.
36   *
37   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
38   *
39   * @since 1.5.7
40   */
41  public class XmlUtil
42  {
43      /** The default line indenter size i.e. 2. */
44      public static final int DEFAULT_INDENTATION_SIZE = 2;
45  
46      /** The default line separator ("\n" on UNIX) */
47      public static final String DEFAULT_LINE_SEPARATOR = System.getProperty( "line.separator" );
48  
49      /**
50       * Determines if a given File shall be handled as XML.
51       *
52       * @param f not null file
53       * @return <code>true</code> if the given file has XML content, <code>false</code> otherwise.
54       */
55      public static boolean isXml( File f )
56      {
57          if ( f == null )
58          {
59              throw new IllegalArgumentException( "f could not be null." );
60          }
61  
62          if ( !f.isFile() )
63          {
64              throw new IllegalArgumentException( "The file '" + f.getAbsolutePath() + "' is not a file." );
65          }
66  
67          try ( Reader reader = ReaderFactory.newXmlReader( f ) )
68          {
69              XmlPullParser parser = new MXParser();
70              parser.setInput( reader );
71              parser.nextToken();
72              return true;
73          }
74          catch ( Exception e )
75          {
76              return false;
77          }
78      }
79  
80      /**
81       * Pretty format the input reader. For instance, the following input:
82       * 
83       * <pre>
84       * &lt;div&gt;&lt;b&gt;content&lt;/b&gt;&lt;/div&gt;
85       * </pre>
86       * 
87       * becomes
88       * 
89       * <pre>
90       * &lt;div&gt;
91       *   &lt;b&gt;content&lt;/b&gt;
92       * &lt;/div&gt;
93       * </pre>
94       *
95       * @param reader not null
96       * @param writer not null
97       * @throws IOException if any or invalid xml content
98       * @see #prettyFormat(Reader, Writer, int, String)
99       * @see ReaderFactory to read an xml content
100      * @see WriterFactory to write an xml content
101      */
102     public static void prettyFormat( Reader reader, Writer writer )
103         throws IOException
104     {
105         prettyFormat( reader, writer, DEFAULT_INDENTATION_SIZE, DEFAULT_LINE_SEPARATOR );
106     }
107 
108     /**
109      * Pretty format the input reader. For instance, the following input:
110      * 
111      * <pre>
112      * &lt;div&gt;&lt;b&gt;content&lt;/b&gt;&lt;/div&gt;
113      * </pre>
114      * 
115      * becomes
116      * 
117      * <pre>
118      * &lt;div&gt;
119      *   &lt;b&gt;content&lt;/b&gt;
120      * &lt;/div&gt;
121      * </pre>
122      *
123      * @param reader not null
124      * @param writer not null
125      * @param indentSize positive number for the indentation
126      * @param lineSeparator the wanted line separator
127      * @throws IOException if any or invalid xml content
128      * @see ReaderFactory to read an xml content
129      * @see WriterFactory to write an xml content
130      */
131     public static void prettyFormat( Reader reader, Writer writer, int indentSize, String lineSeparator )
132         throws IOException
133     {
134         if ( reader == null )
135         {
136             throw new IllegalArgumentException( "The reader is null" );
137         }
138         if ( writer == null )
139         {
140             throw new IllegalArgumentException( "The writer is null" );
141         }
142         if ( indentSize < 0 )
143         {
144             indentSize = 0;
145         }
146 
147         PrettyPrintXMLWriter xmlWriter = new PrettyPrintXMLWriter( writer );
148         xmlWriter.setLineIndenter( StringUtils.repeat( " ", indentSize ) );
149         xmlWriter.setLineSeparator( lineSeparator );
150 
151         XmlPullParser parser = new MXParser();
152         try
153         {
154             parser.setInput( reader );
155 
156             prettyFormatInternal( parser, xmlWriter );
157         }
158         catch ( XmlPullParserException e )
159         {
160             throw new IOException( "Unable to parse the XML: " + e.getMessage() );
161         }
162     }
163 
164     /**
165      * Pretty format the input stream. For instance, the following input:
166      * 
167      * <pre>
168      * &lt;div&gt;&lt;b&gt;content&lt;/b&gt;&lt;/div&gt;
169      * </pre>
170      * 
171      * becomes
172      * 
173      * <pre>
174      * &lt;div&gt;
175      *   &lt;b&gt;content&lt;/b&gt;
176      * &lt;/div&gt;
177      * </pre>
178      *
179      * @param is not null
180      * @param os not null
181      * @throws IOException if any or invalid xml content
182      * @see #prettyFormat(InputStream, OutputStream, int, String)
183      */
184     public static void prettyFormat( InputStream is, OutputStream os )
185         throws IOException
186     {
187         prettyFormat( is, os, DEFAULT_INDENTATION_SIZE, DEFAULT_LINE_SEPARATOR );
188     }
189 
190     /**
191      * Pretty format the input stream. For instance, the following input:
192      * 
193      * <pre>
194      * &lt;div&gt;&lt;b&gt;content&lt;/b&gt;&lt;/div&gt;
195      * </pre>
196      * 
197      * becomes
198      * 
199      * <pre>
200      * &lt;div&gt;
201      *   &lt;b&gt;content&lt;/b&gt;
202      * &lt;/div&gt;
203      * </pre>
204      *
205      * @param is not null
206      * @param os not null
207      * @param indentSize positive number for the indentation
208      * @param lineSeparator the wanted line separator
209      * @throws IOException if any or invalid xml content
210      */
211     public static void prettyFormat( InputStream is, OutputStream os, int indentSize, String lineSeparator )
212         throws IOException
213     {
214         if ( is == null )
215         {
216             throw new IllegalArgumentException( "The is is null" );
217         }
218         if ( os == null )
219         {
220             throw new IllegalArgumentException( "The os is null" );
221         }
222         if ( indentSize < 0 )
223         {
224             indentSize = 0;
225         }
226 
227         try ( Reader reader = ReaderFactory.newXmlReader( is );
228               Writer writer = new OutputStreamWriter( os ) )
229         {
230             final PrettyPrintXMLWriter xmlWriter = new PrettyPrintXMLWriter( writer );
231             xmlWriter.setLineIndenter( StringUtils.repeat( " ", indentSize ) );
232             xmlWriter.setLineSeparator( lineSeparator );
233 
234             final XmlPullParser parser = new MXParser();
235             parser.setInput( reader );
236 
237             prettyFormatInternal( parser, xmlWriter );
238         }
239         catch ( XmlPullParserException e )
240         {
241             throw new IOException( "Unable to parse the XML: " + e.getMessage() );
242         }
243     }
244 
245     /**
246      * @param parser not null
247      * @param writer not null
248      * @throws XmlPullParserException if any
249      * @throws IOException if any
250      */
251     private static void prettyFormatInternal( XmlPullParser parser, PrettyPrintXMLWriter writer )
252         throws XmlPullParserException, IOException
253     {
254         boolean hasTag = false;
255         boolean hasComment = false;
256         int eventType = parser.getEventType();
257         while ( eventType != XmlPullParser.END_DOCUMENT )
258         {
259             if ( eventType == XmlPullParser.START_TAG )
260             {
261                 hasTag = true;
262                 if ( hasComment )
263                 {
264                     writer.writeText( writer.getLineIndenter() );
265                     hasComment = false;
266                 }
267                 writer.startElement( parser.getName() );
268                 for ( int i = 0; i < parser.getAttributeCount(); i++ )
269                 {
270                     String key = parser.getAttributeName( i );
271                     String value = parser.getAttributeValue( i );
272                     writer.addAttribute( key, value );
273                 }
274             }
275             else if ( eventType == XmlPullParser.TEXT )
276             {
277                 String text = parser.getText();
278                 if ( !text.trim().equals( "" ) )
279                 {
280                     text = StringUtils.removeDuplicateWhitespace( text );
281                     writer.writeText( text );
282                 }
283             }
284             else if ( eventType == XmlPullParser.END_TAG )
285             {
286                 hasTag = false;
287                 writer.endElement();
288             }
289             else if ( eventType == XmlPullParser.COMMENT )
290             {
291                 hasComment = true;
292                 if ( !hasTag )
293                 {
294                     writer.writeMarkup( writer.getLineSeparator() );
295                     for ( int i = 0; i < writer.getDepth(); i++ )
296                     {
297                         writer.writeMarkup( writer.getLineIndenter() );
298                     }
299                 }
300                 writer.writeMarkup( "<!--" + parser.getText().trim() + " -->" );
301                 if ( !hasTag )
302                 {
303                     writer.writeMarkup( writer.getLineSeparator() );
304                     for ( int i = 0; i < writer.getDepth() - 1; i++ )
305                     {
306                         writer.writeMarkup( writer.getLineIndenter() );
307                     }
308                 }
309             }
310             else if ( eventType == XmlPullParser.DOCDECL )
311             {
312                 writer.writeMarkup( "<!DOCTYPE" + parser.getText() + ">" );
313                 writer.endOfLine();
314             }
315             else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
316             {
317                 writer.writeMarkup( "<?" + parser.getText() + "?>" );
318                 writer.endOfLine();
319             }
320             else if ( eventType == XmlPullParser.CDSECT )
321             {
322                 writer.writeMarkup( "<![CDATA[" + parser.getText() + "]]>" );
323             }
324             else if ( eventType == XmlPullParser.ENTITY_REF )
325             {
326                 writer.writeMarkup( "&" + parser.getName() + ";" );
327             }
328 
329             eventType = parser.nextToken();
330         }
331     }
332 }