2009/05/20 - Apache Shale has been retired.

For more information, please explore the Attic.

Coverage Report - org.apache.shale.clay.parser.AttributeTokenizer
 
Classes in this File Line Coverage Branch Coverage Complexity
AttributeTokenizer
100%
74/74
N/A
2
AttributeTokenizer$AttributeEntry
100%
8/8
N/A
2
AttributeTokenizer$TokenIterator
100%
10/10
N/A
2
AttributeTokenizer$TokenOffset
100%
10/10
N/A
2
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to you under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 /*
 19  
  * $Id: AttributeTokenizer.java 464373 2006-10-16 04:21:54Z rahul $
 20  
  */
 21  
 package org.apache.shale.clay.parser;
 22  
 
 23  
 import java.util.ArrayList;
 24  
 import java.util.Iterator;
 25  
 import java.util.Map;
 26  
 
 27  
 import org.apache.commons.logging.Log;
 28  
 import org.apache.commons.logging.LogFactory;
 29  
 import org.apache.shale.util.Messages;
 30  
 
 31  
 /**
 32  
  * <p>
 33  
  * Tokenizes a portion of the document for attributes. The complete document is
 34  
  * passed by reference and new {@link Token} offsets are created for the name
 35  
  * and value of the discovered attributes.
 36  
  * </p>
 37  
  */
 38  
 
 39  2338
 public class AttributeTokenizer {
 40  
 
 41  
     /**
 42  
      * <p>
 43  
      * Message resources for this class.
 44  
      * </p>
 45  
      */
 46  1
     private static Messages messages = new Messages(
 47  1
             "org.apache.shale.clay.Bundle", AttributeTokenizer.class
 48  
             .getClassLoader());
 49  
 
 50  
     /**
 51  
      * <p>
 52  
      * Common logging utility.
 53  
      * </p>
 54  
      */
 55  
     private static Log log;
 56  
     static {
 57  1
         log = LogFactory.getLog(AttributeTokenizer.class);
 58  1
     }
 59  
 
 60  
     /**
 61  
      * <p>
 62  
      * Internal document buffer.
 63  
      * </p>
 64  
      */
 65  484
     private StringBuffer buffer = null;
 66  
 
 67  
     /**
 68  
      * <p>
 69  
      * Beginning offset of the starting node.
 70  
      * </p>
 71  
      */
 72  484
     private int beginOffset = 0;
 73  
 
 74  
     /**
 75  
      * <p>
 76  
      * Ending offset of the starting node.
 77  
      * </p>
 78  
      */
 79  484
     private int endOffset = 0;
 80  
 
 81  
     /**
 82  
      * <p>Line number the target node is located in.</p>
 83  
      */
 84  484
     private int lineNumber = 0;
 85  
 
 86  
     /**
 87  
      * <p>Line begining document offset where the target node is located.</p>
 88  
      */
 89  484
     private int lineBeginOffset = 0;
 90  
 
 91  
 
 92  
     /**
 93  
      * <p>
 94  
      * Overloaded constructor that is passed the complete document and the
 95  
      * starting and ending offset of the node body within the document.
 96  
      * </p>
 97  
      *
 98  
      * @param buffer document
 99  
      * @param beginOffset start index of node body in the document
 100  
      * @param endOffset end index of node body in the document
 101  
      * @param lineNumber line number of the node within the document
 102  
      * @param lineBeginOffset index in the document that the line begins
 103  
      */
 104  
     public AttributeTokenizer(StringBuffer buffer, int beginOffset,
 105  484
             int endOffset, int lineNumber, int lineBeginOffset) {
 106  484
         this.buffer = buffer;
 107  484
         this.beginOffset = beginOffset;
 108  484
         this.endOffset = endOffset;
 109  484
         this.lineBeginOffset = lineBeginOffset;
 110  484
         this.lineNumber = lineNumber;
 111  
 
 112  484
     }
 113  
 
 114  
     /**
 115  
      * <p>
 116  
      * Inner class implementing the {@link Token} interface. This class will
 117  
      * define an attribute's key and value offsets
 118  
      * </p>
 119  
      */
 120  
     private class TokenOffset implements Token {
 121  
         /**
 122  
          * <p>Starting offset of the token.</p>
 123  
          */
 124  2136
         private int beginOffset = 0;
 125  
 
 126  
         /**
 127  
          * <p>Ending offset of the token.</p>
 128  
          */
 129  2136
         private int endOffset = 0;
 130  
 
 131  
         /**
 132  
          * @param beginOffset token start index
 133  
          * @param endOffset token end index
 134  
          */
 135  2136
         public TokenOffset(int beginOffset, int endOffset) {
 136  2136
             this.beginOffset = beginOffset;
 137  2136
             this.endOffset = endOffset;
 138  2136
         }
 139  
 
 140  
         /**
 141  
          * @return starting offset of the token in the document
 142  
          */
 143  
         public int getBeginOffset() {
 144  
             return beginOffset;
 145  
         }
 146  
 
 147  
         /**
 148  
          * @return ending offset of the token in the document
 149  
          */
 150  
         public int getEndOffset() {
 151  
             return endOffset;
 152  
         }
 153  
 
 154  
         /**
 155  
          * @return parsed document
 156  
          */
 157  
         public StringBuffer getDocument() {
 158  
             return buffer;
 159  
         }
 160  
 
 161  
         /**
 162  
          * @return token text between the beginOffset and endOffset
 163  
          */
 164  
         public String getRawText() {
 165  2338
             String pickel = null;
 166  
             try {
 167  2338
                 pickel = buffer.substring(beginOffset, endOffset);
 168  
             } catch (RuntimeException e) {
 169  
                 log.error(toString(), e);
 170  
                 throw e;
 171  2338
             }
 172  2338
             return pickel;
 173  
         }
 174  
 
 175  
         /**
 176  
          * @return line number the token is found on within the document
 177  
          */
 178  
         public int getLineNumber() {
 179  
            return lineNumber;
 180  
         }
 181  
 
 182  
         /**
 183  
          * @return offset within the document that the token line is found
 184  
          */
 185  
         public int getLineBeginOffset() {
 186  
            return lineBeginOffset;
 187  
         }
 188  
 
 189  
         /**
 190  
          * @return description of the token
 191  
          */
 192  
         public String toString() {
 193  
             return messages.getMessage("node.token.range",
 194  
                     new Object[] {
 195  
                 new Integer(beginOffset),
 196  
                         new Integer(endOffset),
 197  
                         new Integer(lineNumber),
 198  
                         new Integer(lineBeginOffset)});
 199  
         }
 200  
 
 201  
     }
 202  
 
 203  
     /**
 204  
      * <p>
 205  
      * This inner class implements the <code>Map.Entry</code> interfaces. It
 206  
      * holds a reference to the key and value parts of an attribute. Both the
 207  
      * key and value attributes are {@link Token} instances.
 208  
      * </p>
 209  
      */
 210  
     private class AttributeEntry implements Map.Entry {
 211  
         /**
 212  
          * <p>Token offset of the attribute key.</p>
 213  
          */
 214  1104
         private TokenOffset key = null;
 215  
 
 216  
         /**
 217  
          * <p>Token offset of the attribute value.</p>
 218  
          */
 219  1104
         private TokenOffset value = null;
 220  
 
 221  
         /**
 222  
          * <p>
 223  
          * Overloaded constructor is passed a {@link Token} for the key and
 224  
          * value attributes.
 225  
          * </p>
 226  
          *
 227  
          * @param key token key offset
 228  
          * @param value token value offset
 229  
          */
 230  1104
         public AttributeEntry(TokenOffset key, TokenOffset value) {
 231  1104
             this.key = key;
 232  1104
             this.value = value;
 233  1104
         }
 234  
 
 235  
         /**
 236  
          * <p>
 237  
          * Returns the attribute name {@link Token} offset.
 238  
          * </p>
 239  
          *
 240  
          * @return TokenOffset for the attribute key
 241  
          */
 242  
         public Object getKey() {
 243  1104
             return key;
 244  
         }
 245  
 
 246  
         /**
 247  
          * <p>
 248  
          * Returns the attribute value {@link Token} offset.
 249  
          * </p>
 250  
          *
 251  
          * @return TokenOffset of the attribute value
 252  
          */
 253  
         public Object getValue() {
 254  1050
             return value;
 255  
         }
 256  
 
 257  
         /**
 258  
          * <p>
 259  
          * Sets the attribute value {@link Token} offset.
 260  
          * </p>
 261  
          *
 262  
          * @param value TokenOffset value
 263  
          * @return value token offset
 264  
          */
 265  
         public Object setValue(Object value) {
 266  
             this.value = (TokenOffset) value;
 267  
             return value;
 268  
         }
 269  
 
 270  
         /**
 271  
          * @return description of the attribute
 272  
          */
 273  
         public String toString() {
 274  
             StringBuffer buff = new StringBuffer();
 275  
             TokenOffset key = (TokenOffset) getKey();
 276  
             TokenOffset value = (TokenOffset) getValue();
 277  
 
 278  
             buff.append("key: [").append((key != null ? key.getRawText() : null))
 279  
             .append("]\n").append("value: [")
 280  
             .append((value != null ? value.getRawText() : null))
 281  
             .append("]");
 282  
 
 283  
             return buff.toString();
 284  
         }
 285  
     }
 286  
 
 287  
     /**
 288  
      * <p>
 289  
      * The current offset within the <code>beginOffset</code> and
 290  
      * <code>endOffset</code> of the Node within the document.
 291  
      */
 292  484
     private int currOffset = 0;
 293  
 
 294  
     /**
 295  
      * <p>
 296  
      * Builds an <code>ArrayList</code> of
 297  
      * {@link AttributeTokenizer.AttributeEntry} instances identifying
 298  
      * name and value pairs.
 299  
      * </p>
 300  
      *
 301  
      * @param tokenIndex populated attribute offset of a beging node body
 302  
      */
 303  
     protected synchronized void parse(ArrayList tokenIndex) {
 304  484
         currOffset = beginOffset;
 305  
 
 306  484
         if (log.isDebugEnabled()) {
 307  
             log.debug(messages.getMessage("attribute.range", new Object[] {
 308  
                 new Integer(beginOffset), new Integer(endOffset) }));
 309  
         }
 310  
 
 311  1588
         while (currOffset < endOffset) {
 312  
             // skip leading spaces
 313  1141
             int startOffset = currOffset;
 314  2164
             while (Character.isWhitespace(buffer.charAt(currOffset))) {
 315  1023
                 currOffset++;
 316  1023
             }
 317  
 
 318  1141
             if (log.isDebugEnabled()) {
 319  
                 if (currOffset > startOffset) {
 320  
                     log.debug(messages
 321  
                             .getMessage("attribute.skip.space",
 322  
                             new Object[] { new Integer(currOffset
 323  
                                     - startOffset) }));
 324  
                 }
 325  
             }
 326  
 
 327  
             // looks for the key value delimiter
 328  1141
             TokenOffset key = nextToken(currOffset, " ", "=", true);
 329  1141
             if (key == null) {
 330  37
                 break;
 331  
             }
 332  
 
 333  1104
             boolean skipValue = false;
 334  1104
             currOffset++;
 335  1104
             String delim = " "; // old school html color=red
 336  1104
             String otherDelim = "\"";
 337  1104
             if (currOffset < buffer.length()
 338  
                 && buffer.charAt(currOffset) == '"') {
 339  
 
 340  
                 // xmlish attribute
 341  799
                 delim = "\"";
 342  799
                 otherDelim = " ";
 343  799
                 currOffset++;
 344  799
             } else if (currOffset < buffer.length() && currOffset > 0
 345  
                     && buffer.charAt(currOffset - 1) == ' ') {
 346  
 
 347  
                 //attribute without value
 348  67
                 currOffset--;        //back up <option selected value=
 349  67
                 skipValue = true;
 350  
             }
 351  
 
 352  1104
             TokenOffset value = null;
 353  1104
             if (!skipValue) {   // no value part <option selected value=xxx>
 354  1037
                value = nextToken(currOffset, delim, otherDelim, false);
 355  
             }
 356  
 
 357  1104
             tokenIndex.add(new AttributeEntry(key, value));
 358  
 
 359  1104
             currOffset++;
 360  1104
             key = null;
 361  1104
             value = null;
 362  1104
         }
 363  
 
 364  484
         if (log.isDebugEnabled()) {
 365  
             log.debug(messages.getMessage("attributes.total.found",
 366  
                     new Object[] { new Integer(tokenIndex.size()) }));
 367  
         }
 368  
 
 369  484
     }
 370  
 
 371  
     /**
 372  
      * <p>
 373  
      * Returns the next {@link Token} given an <code>startOffset</code> and a
 374  
      * <code>endDelim</code>.
 375  
      * </p>
 376  
      *
 377  
      * @param startOffset begining offset in the document
 378  
      * @param endDelim primary token delimiter
 379  
      * @param otherDelim secondary token delimiter
 380  
      * @param isKey looking for an attribute name not a value
 381  
      * @return next token offset
 382  
      */
 383  
     protected TokenOffset nextToken(int startOffset, String endDelim, String otherDelim, boolean isKey) {
 384  
         //If isKey is true, we are looking for an attribute name with a endDelim or otherDelim.
 385  
         //Pick the one that comes first.
 386  
 
 387  
         //If isKey is false we are looking for an attribute value.  The endDelim is the best guess
 388  
         //and the otherDelim is the next best guess.
 389  2178
         if (isKey) {
 390  1141
             int offsetEnd = Math.min(buffer.indexOf(endDelim, startOffset), endOffset);
 391  1141
             int offsetOther = Math.min(buffer.indexOf(otherDelim, startOffset), endOffset);
 392  1141
             if (offsetEnd == -1) {
 393  14
                currOffset = offsetOther;
 394  14
             } else if (offsetOther == -1) {
 395  26
                currOffset = offsetOther;
 396  26
             } else {
 397  1101
                currOffset = Math.min(offsetEnd, offsetOther);
 398  
             }
 399  1141
         } else {
 400  1037
            currOffset = Math.min(buffer.indexOf(endDelim, startOffset), endOffset);
 401  
            // try another delimiter
 402  1037
            if (currOffset == -1) {
 403  12
               currOffset = Math.min(buffer.indexOf(otherDelim, startOffset), endOffset);
 404  
            }
 405  
         }
 406  
 
 407  
 
 408  2178
         if (currOffset == -1) {
 409  38
             currOffset = endOffset;
 410  
         }
 411  
 
 412  
         // look for the value delimiter or the end of the parse fragment,
 413  
         // whichever comes first
 414  2178
         if (currOffset > -1 && currOffset <= endOffset && startOffset < currOffset) {
 415  
 
 416  2136
             int e = currOffset;
 417  
             //forgive an attribute with Inconsistent delimiters, color=red"
 418  2136
             if (buffer.charAt(e - 1) == '"'
 419  
                 || (Character.isWhitespace(buffer.charAt(e - 1))
 420  
                 && buffer.charAt(e - 1) != ' ')) {
 421  3
               --e;
 422  
             }
 423  
 
 424  
 
 425  2136
             TokenOffset value = new TokenOffset(startOffset, e);
 426  
 
 427  2136
             if (log.isDebugEnabled()) {
 428  
                 log.debug(messages.getMessage("attribute.token.range",
 429  
                         new Object[] { new Integer(startOffset),
 430  
                                 new Integer(e) }));
 431  
             }
 432  
 
 433  2136
             return value;
 434  
         }
 435  
 
 436  42
         return null;
 437  
     }
 438  
 
 439  
     /**
 440  
      * <p>Inner class implementing the <code>Iterator</code>
 441  
      * interface. This class is a decorator of a <code>ArrayList</code>
 442  
      * of nodes.
 443  
      * </p>
 444  
      */
 445  
     private class TokenIterator implements Iterator {
 446  
 
 447  
         /**
 448  
          * <p>All the attribute entry tokens in the node body.</p>
 449  
          */
 450  484
         private ArrayList tokenIndex = null;
 451  
 
 452  
         /**
 453  
          * <p>Internal <code>tokenIndex</code> iterator.</p>
 454  
          */
 455  484
         private Iterator ti = null;
 456  
 
 457  
         /**
 458  
          * <p>Constructor parses the node body into a collection of
 459  
          * {@link AttributeTokenizer.AttributeEntry}.
 460  
          * </p>
 461  
          */
 462  484
         public TokenIterator() {
 463  484
             tokenIndex = new ArrayList();
 464  484
             parse(tokenIndex);
 465  484
             ti = tokenIndex.iterator();
 466  484
         }
 467  
 
 468  
         /**
 469  
          * <p>Retuns <code>true</code> if there are more
 470  
          * {@link AttributeTokenizer.AttributeEntry} in the collection.
 471  
          * </p>
 472  
          *
 473  
          * @return <code>true</code> if there are more tokens
 474  
          */
 475  
         public boolean hasNext() {
 476  1588
             return ti.hasNext();
 477  
         }
 478  
 
 479  
         /**
 480  
          * <p>Retuns the next {@link AttributeTokenizer.AttributeEntry}
 481  
          * in the collection.
 482  
          * </p>
 483  
          *
 484  
          * @return returns the next token
 485  
          */
 486  
         public Object next() {
 487  1104
             Map.Entry attribute = (Map.Entry) ti.next();
 488  1104
             return attribute;
 489  
         }
 490  
 
 491  
         /**
 492  
          * <p>This method is not implemented.</p>
 493  
          *
 494  
          * @deprecated
 495  
          */
 496  
         public void remove() {
 497  
             // NA
 498  
         }
 499  
     }
 500  
 
 501  
     /**
 502  
      * <p>Returns an instance of an <code>Iterator</code> that
 503  
      * will enumerate attributes in the document where the attributes
 504  
      * are represented by a {@link AttributeTokenizer.AttributeEntry} instance.
 505  
      * </p>
 506  
      *
 507  
      * @return returns a {@link AttributeTokenizer.TokenIterator} iterator.
 508  
      */
 509  
     public Iterator iterator() {
 510  484
         return new TokenIterator();
 511  
     }
 512  
 
 513  
 }