SimplePatternParser.java |
/* * $Id: SimplePatternParser.java,v 1.23 2010/09/29 17:21:48 agoubard Exp $ * * See the COPYRIGHT file for redistribution and use restrictions. */ package org.xins.common.text; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.xins.common.MandatoryArgumentChecker; /** * Simple pattern parser. * * <h3>Format</h3> * * <p>A simple pattern is a text string that may contain letters, digits, * underscores, hyphens, dots and the wildcard characters <code>'*'</code> * (asterisk) and <code>'?'</code> (question mark). * * <p>The location of an asterisk indicates any number of characters is * allowed. The location of a question mark indicates exactly one character is * expected. * * <p>To allow matching of simple patterns, a simple pattern is first compiled * into a Perl 5 regular expression. Every asterisk is converted to * <code>".*"</code>, while every question mark is converted to * <code>"."</code>. * * <h3>Examples</h3> * * <p>Examples of conversions from a simple pattern to a Perl 4 regular * expression: * * <table> * <tr><th>Simple pattern</th><th>Perl 5 regex equivalent</th></tr> * <tr><td></td> <td></td> </tr> * <tr><td>*</td> <td>.*</td> </tr> * <tr><td>?</td> <td>.</td> </tr> * <tr><td>_Get*</td> <td>_Get.*</td> </tr> * <tr><td>_Get*i?n</td> <td>_Get.*i.n</td> </tr> * <tr><td>*on</td> <td>.*on</td> </tr> * <tr><td>_Get*,_Dis*</td> <td>_Get.*|_Dis.*</td> </tr> * </table> * * @version $Revision: 1.23 $ $Date: 2010/09/29 17:21:48 $ * @author <a href="mailto:ernst@ernstdehaan.com">Ernst de Haan</a> * @author Peter Troon * * @since XINS 1.0.0 */ public class SimplePatternParser { /** * Creates a new <code>SimplePatternParser</code> object. */ public SimplePatternParser() { // empty } /** * Converts the specified simple pattern to a Perl 5 regular expression. * * @param simplePattern * the simple pattern, cannot be <code>null</code>. * * @return * the Java pattern, never <code>null</code>. * * @throws IllegalArgumentException * if <code>simplePattern == null</code>. * * @throws ParseException * if provided simplePattern is invalid or could not be parsed. */ public Pattern parseSimplePattern(String simplePattern) throws IllegalArgumentException, ParseException { MandatoryArgumentChecker.check("simplePattern", simplePattern); simplePattern = convertToPerl5RegularExpression(simplePattern); try { return Pattern.compile(simplePattern); } catch (PatternSyntaxException pse) { throw new ParseException("An error occurred while parsing the pattern '" + simplePattern + "'."); } } /** * Converts the pattern to a Perl 5 Regular Expression. This means that * every asterisk is replaced by a dot and an asterisk, every question mark * is replaced by a dot, an accent circunflex is prepended to the pattern * and a dollar sign is appended to the pattern. * * @param pattern * the pattern to be converted, may not be <code>null</code>. * * @return * the converted pattern, not <code>null</code>. * * @throws NullPointerException * if <code>pattern == null</code>. * * @throws ParseException * if provided simplePattern is invalid or could not be parsed. */ private String convertToPerl5RegularExpression(String pattern) throws NullPointerException, ParseException { // Short-circuit if the pattern is empty int length = pattern.length(); if (length < 1) { return ""; } // Convert to char array and construct buffer char[] contents = pattern.toCharArray(); StringBuffer buffer = new StringBuffer(length * 2); // Loop through all characters char prevChar = (char) 0; for (int i= 0; i < length; i++) { char currChar = contents[i]; if (currChar >= 'a' && currChar <= 'z') { buffer.append(currChar); } else if (currChar >= 'A' && currChar <= 'Z') { buffer.append(currChar); } else if (currChar >= '0' && currChar <= '9') { buffer.append(currChar); } else if (currChar == '_') { buffer.append(currChar); } else if (currChar == '-') { buffer.append(currChar); } else if (currChar == '.') { buffer.append("\\."); } else if ((currChar == '*' || currChar == '?') && (prevChar == '*' || prevChar == '?')) { String detail = "The pattern \"" + pattern + "\" is invalid since it contains two subsequent wildcard characters ('" + prevChar + "' and '" + currChar + "') at positions " + (i - 1) + " and " + i + '.'; throw new ParseException(detail); } else if (currChar == '*') { buffer.append(".*"); } else if (currChar == '?') { buffer.append('.'); } else if (currChar == ',') { buffer.append('|'); } else { throw new ParseException("The pattern \"" + pattern + "\" is invalid. The character '" + currChar + "' is not allowed."); } prevChar = currChar; } return buffer.toString(); } }