View Javadoc
1   package org.codehaus.plexus.util.xml;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.File;
20  import java.io.IOException;
21  import java.io.OutputStream;
22  import java.io.OutputStreamWriter;
23  import java.io.StringWriter;
24  import java.io.Writer;
25  import java.nio.file.Files;
26  import java.util.Locale;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  
30  /**
31   * Character stream that handles (or at least attempts to) all the necessary Voodo to figure out the charset encoding of
32   * the XML document written to the stream.
33   * 
34   * @author <a href="mailto:hboutemy@codehaus.org">Herve Boutemy</a>
35   *
36   * @since 1.4.4
37   */
38  public class XmlStreamWriter
39      extends Writer
40  {
41      private static final int BUFFER_SIZE = 4096;
42  
43      private StringWriter xmlPrologWriter = new StringWriter( BUFFER_SIZE );
44  
45      private OutputStream out;
46  
47      private Writer writer;
48  
49      private String encoding;
50  
51      public XmlStreamWriter( OutputStream out )
52      {
53          this.out = out;
54      }
55  
56      public XmlStreamWriter( File file )
57          throws IOException
58      {
59          this( Files.newOutputStream( file.toPath() ) );
60      }
61  
62      public String getEncoding()
63      {
64          return encoding;
65      }
66  
67      @Override
68      public void close()
69          throws IOException
70      {
71          if ( writer == null )
72          {
73              encoding = "UTF-8";
74              writer = new OutputStreamWriter( out, encoding );
75              writer.write( xmlPrologWriter.toString() );
76          }
77          writer.close();
78      }
79  
80      @Override
81      public void flush()
82          throws IOException
83      {
84          if ( writer != null )
85          {
86              writer.flush();
87          }
88      }
89  
90      private void detectEncoding( char[] cbuf, int off, int len )
91          throws IOException
92      {
93          int size = len;
94          StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
95          if ( xmlProlog.length() + len > BUFFER_SIZE )
96          {
97              size = BUFFER_SIZE - xmlProlog.length();
98          }
99          xmlPrologWriter.write( cbuf, off, size );
100 
101         // try to determine encoding
102         if ( xmlProlog.length() >= 5 )
103         {
104             if ( xmlProlog.substring( 0, 5 ).equals( "<?xml" ) )
105             {
106                 // try to extract encoding from XML prolog
107                 int xmlPrologEnd = xmlProlog.indexOf( "?>" );
108                 if ( xmlPrologEnd > 0 )
109                 {
110                     // ok, full XML prolog written: let's extract encoding
111                     Matcher m = ENCODING_PATTERN.matcher( xmlProlog.substring( 0, xmlPrologEnd ) );
112                     if ( m.find() )
113                     {
114                         encoding = m.group( 1 ).toUpperCase( Locale.ENGLISH );
115                         encoding = encoding.substring( 1, encoding.length() - 1 );
116                     }
117                     else
118                     {
119                         // no encoding found in XML prolog: using default encoding
120                         encoding = "UTF-8";
121                     }
122                 }
123                 else
124                 {
125                     if ( xmlProlog.length() >= BUFFER_SIZE )
126                     {
127                         // no encoding found in first characters: using default encoding
128                         encoding = "UTF-8";
129                     }
130                 }
131             }
132             else
133             {
134                 // no XML prolog: using default encoding
135                 encoding = "UTF-8";
136             }
137             if ( encoding != null )
138             {
139                 // encoding has been chosen: let's do it
140                 xmlPrologWriter = null;
141                 writer = new OutputStreamWriter( out, encoding );
142                 writer.write( xmlProlog.toString() );
143                 if ( len > size )
144                 {
145                     writer.write( cbuf, off + size, len - size );
146                 }
147             }
148         }
149     }
150 
151     @Override
152     public void write( char[] cbuf, int off, int len )
153         throws IOException
154     {
155         if ( xmlPrologWriter != null )
156         {
157             detectEncoding( cbuf, off, len );
158         }
159         else
160         {
161             writer.write( cbuf, off, len );
162         }
163     }
164 
165     static final Pattern ENCODING_PATTERN = XmlReader.ENCODING_PATTERN;
166 }