001    /**
002    
003     * ========================================
004    
005     * JFreeReport : a free Java report library
006    
007     * ========================================
008    
009     *
010    
011     * Project Info:  http://reporting.pentaho.org/
012    
013     *
014    
015     * (C) Copyright 2000-2007, by Object Refinery Limited, Pentaho Corporation and Contributors.
016    
017     *
018    
019     * This library is free software; you can redistribute it and/or modify it under the terms
020    
021     * of the GNU Lesser General Public License as published by the Free Software Foundation;
022    
023     * either version 2.1 of the License, or (at your option) any later version.
024    
025     *
026    
027     * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
028    
029     * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
030    
031     * See the GNU Lesser General Public License for more details.
032    
033     *
034    
035     * You should have received a copy of the GNU Lesser General Public License along with this
036    
037     * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
038    
039     * Boston, MA 02111-1307, USA.
040    
041     *
042    
043     * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
044    
045     * in the United States and other countries.]
046    
047     *
048    
049     * ------------
050    
051     * $Id: CharacterEntityParser.java,v 1.12 2007/04/01 18:49:33 taqua Exp $
052    
053     * ------------
054    
055     * (C) Copyright 2000-2005, by Object Refinery Limited.
056    
057     * (C) Copyright 2005-2007, by Pentaho Corporation.
058    
059     */
060    
061    package org.jfree.report.util;
062    
063    
064    
065    import java.util.Enumeration;
066    
067    import java.util.Properties;
068    
069    
070    
071    /**
072    
073     * The character entity parser replaces all known occurrences of an entity in the format
074    
075     * &entityname;.
076    
077     *
078    
079     * @author Thomas Morgner
080    
081     */
082    
083    public class CharacterEntityParser
084    
085    {
086    
087      /**
088    
089       * the entities, keyed by entity name.
090    
091       */
092    
093      private final Properties entities;
094    
095    
096    
097      /**
098    
099       * the reverse lookup entities, keyed by character.
100    
101       */
102    
103      private final Properties reverse;
104    
105    
106    
107      /**
108    
109       * Creates a new CharacterEntityParser and initializes the parser with the given set of
110    
111       * entities.
112    
113       *
114    
115       * @param characterEntities the entities used for the parser
116    
117       */
118    
119      public CharacterEntityParser (final Properties characterEntities)
120    
121      {
122    
123        entities = characterEntities;
124    
125        reverse = new Properties();
126    
127        final Enumeration keys = entities.keys();
128    
129        while (keys.hasMoreElements())
130    
131        {
132    
133          final String key = (String) keys.nextElement();
134    
135          final String value = entities.getProperty(key);
136    
137          reverse.setProperty(value, key);
138    
139        }
140    
141      }
142    
143    
144    
145      /**
146    
147       * create a new Character entity parser and initializes the parser with the entities
148    
149       * defined in the XML standard.
150    
151       *
152    
153       * @return the CharacterEntityParser initialized with XML entities.
154    
155       */
156    
157      public static CharacterEntityParser createXMLEntityParser ()
158    
159      {
160    
161        final Properties entities = new Properties();
162    
163        entities.setProperty("amp", "&");
164    
165        entities.setProperty("quot", "\"");
166    
167        entities.setProperty("lt", "<");
168    
169        entities.setProperty("gt", ">");
170    
171        entities.setProperty("apos", "\u0027");
172    
173        return new CharacterEntityParser(entities);
174    
175      }
176    
177    
178    
179      /**
180    
181       * returns the entities used in the parser.
182    
183       *
184    
185       * @return the properties for this parser.
186    
187       */
188    
189      private Properties getEntities ()
190    
191      {
192    
193        return entities;
194    
195      }
196    
197    
198    
199      /**
200    
201       * returns the reverse-lookup table for the entities.
202    
203       *
204    
205       * @return the reverse-lookup properties for this parsers.
206    
207       */
208    
209      private Properties getReverse ()
210    
211      {
212    
213        return reverse;
214    
215      }
216    
217    
218    
219      /**
220    
221       * Looks up the character for the entity name specified in <code>key</code>.
222    
223       *
224    
225       * @param key the entity name
226    
227       * @return the character as string with a length of 1
228    
229       */
230    
231      private String lookupCharacter (final String key)
232    
233      {
234    
235        return getEntities().getProperty(key);
236    
237      }
238    
239    
240    
241      /**
242    
243       * Performs a reverse lookup, to retrieve the entity name for a given character.
244    
245       *
246    
247       * @param character the character that should be translated into the entity
248    
249       * @return the entity name for the character or the untranslated character.
250    
251       */
252    
253      private String lookupEntity (final String character)
254    
255      {
256    
257        final String val = getReverse().getProperty(character);
258    
259        if (val == null)
260    
261        {
262    
263          return null;
264    
265        }
266    
267        else
268    
269        {
270    
271          return "&" + val + ";";
272    
273        }
274    
275      }
276    
277    
278    
279      /**
280    
281       * Encode the given String, so that all known entites are encoded. All characters
282    
283       * represented by these entites are now removed from the string.
284    
285       *
286    
287       * @param value the original string
288    
289       * @return the encoded string.
290    
291       */
292    
293      public String encodeEntities (final String value)
294    
295      {
296    
297        final StringBuffer writer = new StringBuffer();
298    
299        for (int i = 0; i < value.length(); i++)
300    
301        {
302    
303          final String character = String.valueOf(value.charAt(i));
304    
305          final String lookup = lookupEntity(character);
306    
307          if (lookup == null)
308    
309          {
310    
311            writer.append(character);
312    
313          }
314    
315          else
316    
317          {
318    
319            writer.append(lookup);
320    
321          }
322    
323        }
324    
325        return writer.toString();
326    
327      }
328    
329    
330    
331      /**
332    
333       * Decode the string, all known entities are replaced by their resolved characters.
334    
335       *
336    
337       * @param value the string that should be decoded.
338    
339       * @return the decoded string.
340    
341       */
342    
343      public String decodeEntities (final String value)
344    
345      {
346    
347        int parserIndex = 0;
348    
349        int subStart = value.indexOf("&", parserIndex);
350    
351        if (subStart == -1)
352    
353        {
354    
355          return value;
356    
357        }
358    
359        int subEnd = value.indexOf(";", subStart);
360    
361        if (subEnd == -1)
362    
363        {
364    
365          return value;
366    
367        }
368    
369    
370    
371        final StringBuffer bufValue = new StringBuffer(value.substring(0, subStart));
372    
373        do
374    
375        {
376    
377          // at this point we know, that there is at least one entity ..
378    
379          if (value.charAt(subStart + 1) == '#')
380    
381          {
382    
383            final int subValue = TextUtilities.parseInt(value.substring(subStart + 2, subEnd), 0);
384    
385            if ((subValue >= 1) && (subValue <= 65536))
386    
387            {
388    
389              final char[] chr = new char[1];
390    
391              chr[0] = (char) subValue;
392    
393              bufValue.append(chr);
394    
395            }
396    
397            else
398    
399            {
400    
401              // invalid entity, do not decode ..
402    
403              bufValue.append(value.substring(subStart, subEnd));
404    
405            }
406    
407          }
408    
409          else
410    
411          {
412    
413            final String entity = value.substring(subStart + 1, subEnd);
414    
415            final String replaceString = lookupCharacter(entity);
416    
417            if (replaceString != null)
418    
419            {
420    
421              bufValue.append(decodeEntities(replaceString));
422    
423            }
424    
425            else
426    
427            {
428    
429              bufValue.append("&");
430    
431              bufValue.append(entity);
432    
433              bufValue.append(";");
434    
435            }
436    
437          }
438    
439          parserIndex = subEnd + 1;
440    
441          subStart = value.indexOf("&", parserIndex);
442    
443          if (subStart == -1)
444    
445          {
446    
447            bufValue.append(value.substring(parserIndex));
448    
449            subEnd = -1;
450    
451          }
452    
453          else
454    
455          {
456    
457            subEnd = value.indexOf(";", subStart);
458    
459            if (subEnd == -1)
460    
461            {
462    
463              bufValue.append(value.substring(parserIndex));
464    
465            }
466    
467            else
468    
469            {
470    
471              bufValue.append(value.substring(parserIndex, subStart));
472    
473            }
474    
475          }
476    
477        }
478    
479        while (subStart != -1 && subEnd != -1);
480    
481    
482    
483        return bufValue.toString();
484    
485      }
486    
487    }
488    
489    
490