| SimplePatternParser.java |
/*
* $Id: SimplePatternParser.java,v 1.23 2010/09/29 17:21:48 agoubard Exp $
*
* See the COPYRIGHT file for redistribution and use restrictions.
*/
package org.xins.common.text;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.xins.common.MandatoryArgumentChecker;
/**
* Simple pattern parser.
*
* <h3>Format</h3>
*
* <p>A simple pattern is a text string that may contain letters, digits,
* underscores, hyphens, dots and the wildcard characters <code>'*'</code>
* (asterisk) and <code>'?'</code> (question mark).
*
* <p>The location of an asterisk indicates any number of characters is
* allowed. The location of a question mark indicates exactly one character is
* expected.
*
* <p>To allow matching of simple patterns, a simple pattern is first compiled
* into a Perl 5 regular expression. Every asterisk is converted to
* <code>".*"</code>, while every question mark is converted to
* <code>"."</code>.
*
* <h3>Examples</h3>
*
* <p>Examples of conversions from a simple pattern to a Perl 4 regular
* expression:
*
* <table>
* <tr><th>Simple pattern</th><th>Perl 5 regex equivalent</th></tr>
* <tr><td></td> <td></td> </tr>
* <tr><td>*</td> <td>.*</td> </tr>
* <tr><td>?</td> <td>.</td> </tr>
* <tr><td>_Get*</td> <td>_Get.*</td> </tr>
* <tr><td>_Get*i?n</td> <td>_Get.*i.n</td> </tr>
* <tr><td>*on</td> <td>.*on</td> </tr>
* <tr><td>_Get*,_Dis*</td> <td>_Get.*|_Dis.*</td> </tr>
* </table>
*
* @version $Revision: 1.23 $ $Date: 2010/09/29 17:21:48 $
* @author <a href="mailto:ernst@ernstdehaan.com">Ernst de Haan</a>
* @author Peter Troon
*
* @since XINS 1.0.0
*/
public class SimplePatternParser {
/**
* Creates a new <code>SimplePatternParser</code> object.
*/
public SimplePatternParser() {
// empty
}
/**
* Converts the specified simple pattern to a Perl 5 regular expression.
*
* @param simplePattern
* the simple pattern, cannot be <code>null</code>.
*
* @return
* the Java pattern, never <code>null</code>.
*
* @throws IllegalArgumentException
* if <code>simplePattern == null</code>.
*
* @throws ParseException
* if provided simplePattern is invalid or could not be parsed.
*/
public Pattern parseSimplePattern(String simplePattern)
throws IllegalArgumentException, ParseException {
MandatoryArgumentChecker.check("simplePattern", simplePattern);
simplePattern = convertToPerl5RegularExpression(simplePattern);
try {
return Pattern.compile(simplePattern);
} catch (PatternSyntaxException pse) {
throw new ParseException("An error occurred while parsing the pattern '" + simplePattern + "'.");
}
}
/**
* Converts the pattern to a Perl 5 Regular Expression. This means that
* every asterisk is replaced by a dot and an asterisk, every question mark
* is replaced by a dot, an accent circunflex is prepended to the pattern
* and a dollar sign is appended to the pattern.
*
* @param pattern
* the pattern to be converted, may not be <code>null</code>.
*
* @return
* the converted pattern, not <code>null</code>.
*
* @throws NullPointerException
* if <code>pattern == null</code>.
*
* @throws ParseException
* if provided simplePattern is invalid or could not be parsed.
*/
private String convertToPerl5RegularExpression(String pattern)
throws NullPointerException, ParseException {
// Short-circuit if the pattern is empty
int length = pattern.length();
if (length < 1) {
return "";
}
// Convert to char array and construct buffer
char[] contents = pattern.toCharArray();
StringBuffer buffer = new StringBuffer(length * 2);
// Loop through all characters
char prevChar = (char) 0;
for (int i= 0; i < length; i++) {
char currChar = contents[i];
if (currChar >= 'a' && currChar <= 'z') {
buffer.append(currChar);
} else if (currChar >= 'A' && currChar <= 'Z') {
buffer.append(currChar);
} else if (currChar >= '0' && currChar <= '9') {
buffer.append(currChar);
} else if (currChar == '_') {
buffer.append(currChar);
} else if (currChar == '-') {
buffer.append(currChar);
} else if (currChar == '.') {
buffer.append("\\.");
} else if ((currChar == '*' || currChar == '?') && (prevChar == '*' || prevChar == '?')) {
String detail = "The pattern \"" + pattern
+ "\" is invalid since it contains two subsequent wildcard characters ('"
+ prevChar + "' and '" + currChar + "') at positions " + (i - 1)
+ " and " + i + '.';
throw new ParseException(detail);
} else if (currChar == '*') {
buffer.append(".*");
} else if (currChar == '?') {
buffer.append('.');
} else if (currChar == ',') {
buffer.append('|');
} else {
throw new ParseException("The pattern \"" + pattern + "\" is invalid. The character '" + currChar + "' is not allowed.");
}
prevChar = currChar;
}
return buffer.toString();
}
}