Is there an equivalent of java.util.regex for "glob" type patterns?
Solution 1:
Globbing is also planned for implemented in Java 7.
See FileSystem.getPathMatcher(String)
and the "Finding Files" tutorial.
Solution 2:
There's nothing built-in, but it's pretty simple to convert something glob-like to a regex:
public static String createRegexFromGlob(String glob)
{
String out = "^";
for(int i = 0; i < glob.length(); ++i)
{
final char c = glob.charAt(i);
switch(c)
{
case '*': out += ".*"; break;
case '?': out += '.'; break;
case '.': out += "\\."; break;
case '\\': out += "\\\\"; break;
default: out += c;
}
}
out += '$';
return out;
}
this works for me, but I'm not sure if it covers the glob "standard", if there is one :)
Update by Paul Tomblin: I found a perl program that does glob conversion, and adapting it to Java I end up with:
private String convertGlobToRegEx(String line)
{
LOG.info("got line [" + line + "]");
line = line.trim();
int strLen = line.length();
StringBuilder sb = new StringBuilder(strLen);
// Remove beginning and ending * globs because they're useless
if (line.startsWith("*"))
{
line = line.substring(1);
strLen--;
}
if (line.endsWith("*"))
{
line = line.substring(0, strLen-1);
strLen--;
}
boolean escaping = false;
int inCurlies = 0;
for (char currentChar : line.toCharArray())
{
switch (currentChar)
{
case '*':
if (escaping)
sb.append("\\*");
else
sb.append(".*");
escaping = false;
break;
case '?':
if (escaping)
sb.append("\\?");
else
sb.append('.');
escaping = false;
break;
case '.':
case '(':
case ')':
case '+':
case '|':
case '^':
case '$':
case '@':
case '%':
sb.append('\\');
sb.append(currentChar);
escaping = false;
break;
case '\\':
if (escaping)
{
sb.append("\\\\");
escaping = false;
}
else
escaping = true;
break;
case '{':
if (escaping)
{
sb.append("\\{");
}
else
{
sb.append('(');
inCurlies++;
}
escaping = false;
break;
case '}':
if (inCurlies > 0 && !escaping)
{
sb.append(')');
inCurlies--;
}
else if (escaping)
sb.append("\\}");
else
sb.append("}");
escaping = false;
break;
case ',':
if (inCurlies > 0 && !escaping)
{
sb.append('|');
}
else if (escaping)
sb.append("\\,");
else
sb.append(",");
break;
default:
escaping = false;
sb.append(currentChar);
}
}
return sb.toString();
}
I'm editing into this answer rather than making my own because this answer put me on the right track.
Solution 3:
Thanks to everyone here for their contributions. I wrote a more comprehensive conversion than any of the previous answers:
/**
* Converts a standard POSIX Shell globbing pattern into a regular expression
* pattern. The result can be used with the standard {@link java.util.regex} API to
* recognize strings which match the glob pattern.
* <p/>
* See also, the POSIX Shell language:
* http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_13_01
*
* @param pattern A glob pattern.
* @return A regex pattern to recognize the given glob pattern.
*/
public static final String convertGlobToRegex(String pattern) {
StringBuilder sb = new StringBuilder(pattern.length());
int inGroup = 0;
int inClass = 0;
int firstIndexInClass = -1;
char[] arr = pattern.toCharArray();
for (int i = 0; i < arr.length; i++) {
char ch = arr[i];
switch (ch) {
case '\\':
if (++i >= arr.length) {
sb.append('\\');
} else {
char next = arr[i];
switch (next) {
case ',':
// escape not needed
break;
case 'Q':
case 'E':
// extra escape needed
sb.append('\\');
default:
sb.append('\\');
}
sb.append(next);
}
break;
case '*':
if (inClass == 0)
sb.append(".*");
else
sb.append('*');
break;
case '?':
if (inClass == 0)
sb.append('.');
else
sb.append('?');
break;
case '[':
inClass++;
firstIndexInClass = i+1;
sb.append('[');
break;
case ']':
inClass--;
sb.append(']');
break;
case '.':
case '(':
case ')':
case '+':
case '|':
case '^':
case '$':
case '@':
case '%':
if (inClass == 0 || (firstIndexInClass == i && ch == '^'))
sb.append('\\');
sb.append(ch);
break;
case '!':
if (firstIndexInClass == i)
sb.append('^');
else
sb.append('!');
break;
case '{':
inGroup++;
sb.append('(');
break;
case '}':
inGroup--;
sb.append(')');
break;
case ',':
if (inGroup > 0)
sb.append('|');
else
sb.append(',');
break;
default:
sb.append(ch);
}
}
return sb.toString();
}
And the unit tests to prove it works:
/**
* @author Neil Traft
*/
public class StringUtils_ConvertGlobToRegex_Test {
@Test
public void star_becomes_dot_star() throws Exception {
assertEquals("gl.*b", StringUtils.convertGlobToRegex("gl*b"));
}
@Test
public void escaped_star_is_unchanged() throws Exception {
assertEquals("gl\\*b", StringUtils.convertGlobToRegex("gl\\*b"));
}
@Test
public void question_mark_becomes_dot() throws Exception {
assertEquals("gl.b", StringUtils.convertGlobToRegex("gl?b"));
}
@Test
public void escaped_question_mark_is_unchanged() throws Exception {
assertEquals("gl\\?b", StringUtils.convertGlobToRegex("gl\\?b"));
}
@Test
public void character_classes_dont_need_conversion() throws Exception {
assertEquals("gl[-o]b", StringUtils.convertGlobToRegex("gl[-o]b"));
}
@Test
public void escaped_classes_are_unchanged() throws Exception {
assertEquals("gl\\[-o\\]b", StringUtils.convertGlobToRegex("gl\\[-o\\]b"));
}
@Test
public void negation_in_character_classes() throws Exception {
assertEquals("gl[^a-n!p-z]b", StringUtils.convertGlobToRegex("gl[!a-n!p-z]b"));
}
@Test
public void nested_negation_in_character_classes() throws Exception {
assertEquals("gl[[^a-n]!p-z]b", StringUtils.convertGlobToRegex("gl[[!a-n]!p-z]b"));
}
@Test
public void escape_carat_if_it_is_the_first_char_in_a_character_class() throws Exception {
assertEquals("gl[\\^o]b", StringUtils.convertGlobToRegex("gl[^o]b"));
}
@Test
public void metachars_are_escaped() throws Exception {
assertEquals("gl..*\\.\\(\\)\\+\\|\\^\\$\\@\\%b", StringUtils.convertGlobToRegex("gl?*.()+|^$@%b"));
}
@Test
public void metachars_in_character_classes_dont_need_escaping() throws Exception {
assertEquals("gl[?*.()+|^$@%]b", StringUtils.convertGlobToRegex("gl[?*.()+|^$@%]b"));
}
@Test
public void escaped_backslash_is_unchanged() throws Exception {
assertEquals("gl\\\\b", StringUtils.convertGlobToRegex("gl\\\\b"));
}
@Test
public void slashQ_and_slashE_are_escaped() throws Exception {
assertEquals("\\\\Qglob\\\\E", StringUtils.convertGlobToRegex("\\Qglob\\E"));
}
@Test
public void braces_are_turned_into_groups() throws Exception {
assertEquals("(glob|regex)", StringUtils.convertGlobToRegex("{glob,regex}"));
}
@Test
public void escaped_braces_are_unchanged() throws Exception {
assertEquals("\\{glob\\}", StringUtils.convertGlobToRegex("\\{glob\\}"));
}
@Test
public void commas_dont_need_escaping() throws Exception {
assertEquals("(glob,regex),", StringUtils.convertGlobToRegex("{glob\\,regex},"));
}
}