social-util-library/src/main/java/org/gcube/socialnetworking/tokenization/SocialStringTokenizer.java

53 lines
1.4 KiB
Java

package org.gcube.socialnetworking.tokenization;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SocialStringTokenizer {
private final String originalString;
private Pattern pattern;
private Matcher matcher;
private List<Token> tokens;
public SocialStringTokenizer(String string) {
this.originalString = string;
this.pattern = Pattern.compile("\\s");
this.matcher = pattern.matcher(originalString);
}
protected Token getToken(int tokenStart) {
int tokenEnd = matcher.start();
int delimiterStart = tokenEnd;
int delimiterEnd = matcher.end();
String tokenString = originalString.substring(tokenStart, tokenEnd);
String delimiter = originalString.substring(delimiterStart, delimiterEnd);
Token token = new Token(tokenString, delimiter, tokenStart, tokenEnd);
return token;
}
public List<Token> getTokens() {
if(tokens==null) {
tokens = new ArrayList<>();
int tokenStart = 0;
while(matcher.find()) {
Token token = getToken(tokenStart);
tokens.add(token);
tokenStart = matcher.end();
}
if(tokenStart!=originalString.length()){
int tokenEnd = originalString.length();
String tokenString = originalString.substring(tokenStart, tokenEnd);
Token token = new Token(tokenString, "", tokenStart, tokenEnd);
tokens.add(token);
}
}
return tokens;
}
}