diff --git a/src/main/java/org/gcube/socialnetworking/tokenization/GCubeStringTokenizer.java b/src/main/java/org/gcube/socialnetworking/tokenization/GCubeStringTokenizer.java index 76baa7a..10230ac 100644 --- a/src/main/java/org/gcube/socialnetworking/tokenization/GCubeStringTokenizer.java +++ b/src/main/java/org/gcube/socialnetworking/tokenization/GCubeStringTokenizer.java @@ -7,7 +7,13 @@ import java.util.regex.Pattern; public class GCubeStringTokenizer { + /** + * The default delimiter regex is any whitespaces '\s' + */ + public static final String DEFAULT_DELIMITER_REGEX = "\\s"; + private final String originalString; + private final String delimiterRegex; private Pattern pattern; private Matcher matcher; @@ -15,8 +21,13 @@ public class GCubeStringTokenizer { private List tokens; public GCubeStringTokenizer(String string) { + this(string, DEFAULT_DELIMITER_REGEX); + } + + public GCubeStringTokenizer(String string, String delimiterRegex) { this.originalString = string; - this.pattern = Pattern.compile("\\s"); + this.delimiterRegex = delimiterRegex; + this.pattern = Pattern.compile(delimiterRegex); this.matcher = pattern.matcher(originalString); } @@ -49,4 +60,12 @@ public class GCubeStringTokenizer { return tokens; } + public String getOriginalString() { + return originalString; + } + + public String getDelimiterRegex() { + return delimiterRegex; + } + }