001 /** 002 003 * ======================================== 004 005 * JFreeReport : a free Java report library 006 007 * ======================================== 008 009 * 010 011 * Project Info: http://reporting.pentaho.org/ 012 013 * 014 015 * (C) Copyright 2000-2007, by Object Refinery Limited, Pentaho Corporation and Contributors. 016 017 * 018 019 * This library is free software; you can redistribute it and/or modify it under the terms 020 021 * of the GNU Lesser General Public License as published by the Free Software Foundation; 022 023 * either version 2.1 of the License, or (at your option) any later version. 024 025 * 026 027 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 028 029 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 030 031 * See the GNU Lesser General Public License for more details. 032 033 * 034 035 * You should have received a copy of the GNU Lesser General Public License along with this 036 037 * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, 038 039 * Boston, MA 02111-1307, USA. 040 041 * 042 043 * [Java is a trademark or registered trademark of Sun Microsystems, Inc. 044 045 * in the United States and other countries.] 046 047 * 048 049 * ------------ 050 051 * $Id: CharacterEntityParser.java,v 1.12 2007/04/01 18:49:33 taqua Exp $ 052 053 * ------------ 054 055 * (C) Copyright 2000-2005, by Object Refinery Limited. 056 057 * (C) Copyright 2005-2007, by Pentaho Corporation. 058 059 */ 060 061 package org.jfree.report.util; 062 063 064 065 import java.util.Enumeration; 066 067 import java.util.Properties; 068 069 070 071 /** 072 073 * The character entity parser replaces all known occurrences of an entity in the format 074 075 * &entityname;. 076 077 * 078 079 * @author Thomas Morgner 080 081 */ 082 083 public class CharacterEntityParser 084 085 { 086 087 /** 088 089 * the entities, keyed by entity name. 090 091 */ 092 093 private final Properties entities; 094 095 096 097 /** 098 099 * the reverse lookup entities, keyed by character. 100 101 */ 102 103 private final Properties reverse; 104 105 106 107 /** 108 109 * Creates a new CharacterEntityParser and initializes the parser with the given set of 110 111 * entities. 112 113 * 114 115 * @param characterEntities the entities used for the parser 116 117 */ 118 119 public CharacterEntityParser (final Properties characterEntities) 120 121 { 122 123 entities = characterEntities; 124 125 reverse = new Properties(); 126 127 final Enumeration keys = entities.keys(); 128 129 while (keys.hasMoreElements()) 130 131 { 132 133 final String key = (String) keys.nextElement(); 134 135 final String value = entities.getProperty(key); 136 137 reverse.setProperty(value, key); 138 139 } 140 141 } 142 143 144 145 /** 146 147 * create a new Character entity parser and initializes the parser with the entities 148 149 * defined in the XML standard. 150 151 * 152 153 * @return the CharacterEntityParser initialized with XML entities. 154 155 */ 156 157 public static CharacterEntityParser createXMLEntityParser () 158 159 { 160 161 final Properties entities = new Properties(); 162 163 entities.setProperty("amp", "&"); 164 165 entities.setProperty("quot", "\""); 166 167 entities.setProperty("lt", "<"); 168 169 entities.setProperty("gt", ">"); 170 171 entities.setProperty("apos", "\u0027"); 172 173 return new CharacterEntityParser(entities); 174 175 } 176 177 178 179 /** 180 181 * returns the entities used in the parser. 182 183 * 184 185 * @return the properties for this parser. 186 187 */ 188 189 private Properties getEntities () 190 191 { 192 193 return entities; 194 195 } 196 197 198 199 /** 200 201 * returns the reverse-lookup table for the entities. 202 203 * 204 205 * @return the reverse-lookup properties for this parsers. 206 207 */ 208 209 private Properties getReverse () 210 211 { 212 213 return reverse; 214 215 } 216 217 218 219 /** 220 221 * Looks up the character for the entity name specified in <code>key</code>. 222 223 * 224 225 * @param key the entity name 226 227 * @return the character as string with a length of 1 228 229 */ 230 231 private String lookupCharacter (final String key) 232 233 { 234 235 return getEntities().getProperty(key); 236 237 } 238 239 240 241 /** 242 243 * Performs a reverse lookup, to retrieve the entity name for a given character. 244 245 * 246 247 * @param character the character that should be translated into the entity 248 249 * @return the entity name for the character or the untranslated character. 250 251 */ 252 253 private String lookupEntity (final String character) 254 255 { 256 257 final String val = getReverse().getProperty(character); 258 259 if (val == null) 260 261 { 262 263 return null; 264 265 } 266 267 else 268 269 { 270 271 return "&" + val + ";"; 272 273 } 274 275 } 276 277 278 279 /** 280 281 * Encode the given String, so that all known entites are encoded. All characters 282 283 * represented by these entites are now removed from the string. 284 285 * 286 287 * @param value the original string 288 289 * @return the encoded string. 290 291 */ 292 293 public String encodeEntities (final String value) 294 295 { 296 297 final StringBuffer writer = new StringBuffer(); 298 299 for (int i = 0; i < value.length(); i++) 300 301 { 302 303 final String character = String.valueOf(value.charAt(i)); 304 305 final String lookup = lookupEntity(character); 306 307 if (lookup == null) 308 309 { 310 311 writer.append(character); 312 313 } 314 315 else 316 317 { 318 319 writer.append(lookup); 320 321 } 322 323 } 324 325 return writer.toString(); 326 327 } 328 329 330 331 /** 332 333 * Decode the string, all known entities are replaced by their resolved characters. 334 335 * 336 337 * @param value the string that should be decoded. 338 339 * @return the decoded string. 340 341 */ 342 343 public String decodeEntities (final String value) 344 345 { 346 347 int parserIndex = 0; 348 349 int subStart = value.indexOf("&", parserIndex); 350 351 if (subStart == -1) 352 353 { 354 355 return value; 356 357 } 358 359 int subEnd = value.indexOf(";", subStart); 360 361 if (subEnd == -1) 362 363 { 364 365 return value; 366 367 } 368 369 370 371 final StringBuffer bufValue = new StringBuffer(value.substring(0, subStart)); 372 373 do 374 375 { 376 377 // at this point we know, that there is at least one entity .. 378 379 if (value.charAt(subStart + 1) == '#') 380 381 { 382 383 final int subValue = TextUtilities.parseInt(value.substring(subStart + 2, subEnd), 0); 384 385 if ((subValue >= 1) && (subValue <= 65536)) 386 387 { 388 389 final char[] chr = new char[1]; 390 391 chr[0] = (char) subValue; 392 393 bufValue.append(chr); 394 395 } 396 397 else 398 399 { 400 401 // invalid entity, do not decode .. 402 403 bufValue.append(value.substring(subStart, subEnd)); 404 405 } 406 407 } 408 409 else 410 411 { 412 413 final String entity = value.substring(subStart + 1, subEnd); 414 415 final String replaceString = lookupCharacter(entity); 416 417 if (replaceString != null) 418 419 { 420 421 bufValue.append(decodeEntities(replaceString)); 422 423 } 424 425 else 426 427 { 428 429 bufValue.append("&"); 430 431 bufValue.append(entity); 432 433 bufValue.append(";"); 434 435 } 436 437 } 438 439 parserIndex = subEnd + 1; 440 441 subStart = value.indexOf("&", parserIndex); 442 443 if (subStart == -1) 444 445 { 446 447 bufValue.append(value.substring(parserIndex)); 448 449 subEnd = -1; 450 451 } 452 453 else 454 455 { 456 457 subEnd = value.indexOf(";", subStart); 458 459 if (subEnd == -1) 460 461 { 462 463 bufValue.append(value.substring(parserIndex)); 464 465 } 466 467 else 468 469 { 470 471 bufValue.append(value.substring(parserIndex, subStart)); 472 473 } 474 475 } 476 477 } 478 479 while (subStart != -1 && subEnd != -1); 480 481 482 483 return bufValue.toString(); 484 485 } 486 487 } 488 489 490