diff --git a/src/main/java/org/gcube/socialnetworking/socialtoken/HashTagToken.java b/src/main/java/org/gcube/socialnetworking/socialtoken/HashTagToken.java index 7e28ce1..4b60a44 100644 --- a/src/main/java/org/gcube/socialnetworking/socialtoken/HashTagToken.java +++ b/src/main/java/org/gcube/socialnetworking/socialtoken/HashTagToken.java @@ -3,9 +3,12 @@ package org.gcube.socialnetworking.socialtoken; import org.gcube.portal.databook.client.GCubeSocialNetworking; import org.gcube.socialnetworking.tokenization.Token; +/** + * @author Luca Frosini (ISTI - CNR) + */ public class HashTagToken extends ReplaceableToken { - protected SanitizedHashTag sanitizedTag; + protected SanitizedHashTag sanitizedHashTag; public HashTagToken(String token, String delimiter, int start, int end) { super(token, delimiter, start, end); @@ -18,23 +21,32 @@ public class HashTagToken extends ReplaceableToken { public String getTokenReplacement() { if(!replaced) { try { - String tag = getTag(); - String linkTarget = ReplaceableToken.createHref("", GCubeSocialNetworking.HASHTAG_OID, tag); - tokenReplacement = ReplaceableToken.createLink(linkTarget, tag, null) + sanitizedTag.getPostfix(); + String hashTag = getHashTag(); + String linkTarget = ReplaceableToken.createHref("", GCubeSocialNetworking.HASHTAG_OID, hashTag); + tokenReplacement = sanitizedHashTag.getPrefix() + ReplaceableToken.createLink(linkTarget, hashTag, null) + sanitizedHashTag.getPostfix(); } catch(Exception e) { tokenReplacement = token; } - replaced = true; } return tokenReplacement; } - public String getTag() throws Exception { - if(sanitizedTag==null) { - sanitizedTag = new SanitizedHashTag(token); + public String getHashTag() throws Exception { + if(sanitizedHashTag==null) { + sanitizedHashTag = new SanitizedHashTag(token); } - return sanitizedTag.getTag(); + return sanitizedHashTag.getHashTag(); } + + public static SanitizedHashTag isHashTag(String hastag) { + try { + return new SanitizedHashTag(hastag); + } catch(IllegalArgumentException e) { + // not an HashTag + return null; + } + + } } diff --git a/src/main/java/org/gcube/socialnetworking/socialtoken/SanitizedHashTag.java b/src/main/java/org/gcube/socialnetworking/socialtoken/SanitizedHashTag.java index 6762b18..e2928c4 100644 --- a/src/main/java/org/gcube/socialnetworking/socialtoken/SanitizedHashTag.java +++ b/src/main/java/org/gcube/socialnetworking/socialtoken/SanitizedHashTag.java @@ -1,45 +1,74 @@ package org.gcube.socialnetworking.socialtoken; +import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; public class SanitizedHashTag { - private static final String TAG_REGEX = "^#[\\w-_]*"; + private static final String RECOGNIZE_HASHTAG_REGEX = "^.{0,3}#[\\w-]*[\\W]{0,3}"; + private static final Pattern RECOGNIZE_HASHTAG_PATTERN; - private static final Pattern pattern; + private static final String HASHTAG_REGEX = "#[\\w-]*"; + private static final Pattern HASHTAG_PATTERN; static { - pattern = Pattern.compile(TAG_REGEX); + HASHTAG_PATTERN = Pattern.compile(HASHTAG_REGEX); + RECOGNIZE_HASHTAG_PATTERN = Pattern.compile(RECOGNIZE_HASHTAG_REGEX); } - protected String tag; + protected String prefix; + protected String hashTag; protected String postfix; - public SanitizedHashTag(String string) throws Exception { - if(string==null || string.compareTo("")==0 || !string.startsWith("#")) { - throw new Exception(string + "is not a valid TAG"); + public SanitizedHashTag(String string) throws IllegalArgumentException { + if(Objects.isNull(string) || string.isEmpty()) { + throw new IllegalArgumentException(string + " is not a valid TAG"); } + Matcher recognizeMatcher = SanitizedHashTag.RECOGNIZE_HASHTAG_PATTERN.matcher(string); + if(!recognizeMatcher.find()) { + throw new IllegalArgumentException(string + " is not a valid TAG"); + }else { + if(recognizeMatcher.end()!=(string.length())) { + throw new IllegalArgumentException(string + " is not a valid TAG"); + } + } - Matcher matcher = SanitizedHashTag.pattern.matcher(string); + Matcher matcher = SanitizedHashTag.HASHTAG_PATTERN.matcher(string); if(matcher.find()) { - tag = string.substring(matcher.start(), matcher.end()); + prefix = string.substring(0,matcher.start()); + hashTag = string.substring(matcher.start(), matcher.end()); postfix = string.substring(matcher.end()); }else { - throw new Exception(string + "is not a valid TAG"); + throw new IllegalArgumentException(string + " is not a valid TAG"); } } - - public String getTag() { - return tag; + + public String getPrefix() { + return prefix; + } + + public String getHashTag() { + return hashTag; } public String getPostfix() { return postfix; } + public String toString() { + StringBuffer stringBuffer = new StringBuffer(); + stringBuffer.append("Prefix : '"); + stringBuffer.append(prefix); + stringBuffer.append("' - Hashtag : '"); + stringBuffer.append(hashTag); + stringBuffer.append("' - Postfix : '"); + stringBuffer.append(postfix); + stringBuffer.append("'"); + return stringBuffer.toString(); + } } \ No newline at end of file diff --git a/src/main/java/org/gcube/socialnetworking/socialtoken/SanitizedURL.java b/src/main/java/org/gcube/socialnetworking/socialtoken/SanitizedURL.java index 97a082f..cf05200 100644 --- a/src/main/java/org/gcube/socialnetworking/socialtoken/SanitizedURL.java +++ b/src/main/java/org/gcube/socialnetworking/socialtoken/SanitizedURL.java @@ -2,6 +2,7 @@ package org.gcube.socialnetworking.socialtoken; import java.net.MalformedURLException; import java.net.URL; +import java.util.Objects; public class SanitizedURL { @@ -12,7 +13,7 @@ public class SanitizedURL { protected final URL url; public SanitizedURL(String urlString) throws MalformedURLException { - if(urlString==null || urlString.compareTo("")==0) { + if(Objects.isNull(urlString) || urlString.isEmpty()) { throw new MalformedURLException(); } diff --git a/src/main/java/org/gcube/socialnetworking/socialtoken/SocialMessageParser.java b/src/main/java/org/gcube/socialnetworking/socialtoken/SocialMessageParser.java index 68ffa82..0de4085 100644 --- a/src/main/java/org/gcube/socialnetworking/socialtoken/SocialMessageParser.java +++ b/src/main/java/org/gcube/socialnetworking/socialtoken/SocialMessageParser.java @@ -1,7 +1,6 @@ package org.gcube.socialnetworking.socialtoken; import java.io.StringWriter; -import java.net.URL; import java.util.ArrayList; import java.util.List; @@ -44,26 +43,28 @@ public class SocialMessageParser { GCubeStringTokenizer socialStringTokenizer = new GCubeStringTokenizer(originalMessage); for(Token token : socialStringTokenizer.getTokens()) { String tokenString = token.getToken(); - if(tokenString.startsWith("#")) { - HashTagToken tagToken = new HashTagToken(token); - try { - hashtags.add(tagToken.getTag()); - tokens.add(tagToken); - tagTokens.add(tagToken); - continue; - }catch (Exception e) { - // Not a valid tag - } - } - URL url = URLToken.isURL(tokenString); - if(url!=null) { + SanitizedURL sanitizedURL = URLToken.isURL(tokenString); + if(sanitizedURL!=null) { URLToken urlToken = new URLToken(token); tokens.add(urlToken); urlTokens.add(urlToken); continue; } + SanitizedHashTag sanitizedHashTag = HashTagToken.isHashTag(tokenString); + if(sanitizedHashTag!=null) { + HashTagToken hashTagToken = new HashTagToken(token); + try { + hashtags.add(hashTagToken.getHashTag()); + tokens.add(hashTagToken); + tagTokens.add(hashTagToken); + continue; + }catch (Exception e) { + // Not a valid tag + } + } + ReplaceableToken replaceableToken = new ReplaceableToken(token); tokens.add(replaceableToken); } diff --git a/src/main/java/org/gcube/socialnetworking/socialtoken/URLToken.java b/src/main/java/org/gcube/socialnetworking/socialtoken/URLToken.java index 8e45761..fccde43 100644 --- a/src/main/java/org/gcube/socialnetworking/socialtoken/URLToken.java +++ b/src/main/java/org/gcube/socialnetworking/socialtoken/URLToken.java @@ -19,17 +19,6 @@ public class URLToken extends ReplaceableToken { super(token); } - public static URL isURL(String url) { - try { - SanitizedURL sanitizedURL = new SanitizedURL(url); - return sanitizedURL.getURL(); - } catch(MalformedURLException e) { - // not an URL - return null; - } - - } - public String getTokenReplacement() { if(!replaced) { try { @@ -52,4 +41,13 @@ public class URLToken extends ReplaceableToken { return sanitizedURL.getURL(); } + public static SanitizedURL isURL(String url) { + try { + return new SanitizedURL(url); + } catch(MalformedURLException e) { + // not an URL + return null; + } + + } } diff --git a/src/test/java/org/gcube/social_networking/socialutillibrary/MessageParserTest.java b/src/test/java/org/gcube/social_networking/socialutillibrary/MessageParserTest.java index 889edfd..7056b4e 100644 --- a/src/test/java/org/gcube/social_networking/socialutillibrary/MessageParserTest.java +++ b/src/test/java/org/gcube/social_networking/socialutillibrary/MessageParserTest.java @@ -5,9 +5,11 @@ import java.util.ArrayList; import java.util.List; import org.gcube.portlets.widgets.pickitem.shared.ItemBean; +import org.gcube.socialnetworking.socialtoken.SanitizedHashTag; import org.gcube.socialnetworking.socialtoken.SanitizedURL; import org.gcube.socialnetworking.socialtoken.SocialMessageParser; import org.gcube.socialnetworking.socialtoken.URLToken; +import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,7 +93,7 @@ public class MessageParserTest { SocialMessageParser messageParser = new SocialMessageParser(TEST_LUCA_6); logger.debug(messageParser.getParsedMessage()); messageParser = new SocialMessageParser(TEST_LUCA_7); - logger.debug(messageParser.getParsedMessage()); + logger.debug("{}", messageParser.getParsedMessage()); } @@ -118,5 +120,51 @@ public class MessageParserTest { text = "post \"a text with #hashtag);\""; SocialMessageParser messageParser = new SocialMessageParser(text); logger.debug(messageParser.getParsedMessage()); + + text = "\"#hashtag and #hashtag repeated."; + messageParser = new SocialMessageParser(text); + logger.debug(messageParser.getParsedMessage()); + } + + @Test + public void checkHashTag() throws Exception { + String token = "\"#hashtag\""; + SanitizedHashTag sanitizedHashTag = new SanitizedHashTag(token); + logger.trace("{}", sanitizedHashTag); + Assert.assertTrue(sanitizedHashTag.getPrefix().compareTo("\"")==0); + Assert.assertTrue(sanitizedHashTag.getHashTag().compareTo("#hashtag")==0); + Assert.assertTrue(sanitizedHashTag.getPostfix().compareTo("\"")==0); + } + + @Test(expected=IllegalArgumentException.class) + public void hasTagwithURL() throws Exception { + String token = "https://wiki.gcube-system.org/gcube/GCat_Background#GeoSpatial_search_for_datasets:_via_API_or_Search_Widget"; + new SanitizedHashTag(token); + } + + @Test + public void checkHashTag2() throws Exception { + String token = "\"#hashtag\");"; + SanitizedHashTag sanitizedHashTag = new SanitizedHashTag(token); + logger.trace("{}", sanitizedHashTag); + Assert.assertTrue(sanitizedHashTag.getPrefix().compareTo("\"")==0); + Assert.assertTrue(sanitizedHashTag.getHashTag().compareTo("#hashtag")==0); + Assert.assertTrue(sanitizedHashTag.getPostfix().compareTo("\");")==0); + } + + @Test + public void checkHashTag3() throws Exception { + String token = ";(\"#hashtag\");"; + SanitizedHashTag sanitizedHashTag = new SanitizedHashTag(token); + logger.trace("{}", sanitizedHashTag); + Assert.assertTrue(sanitizedHashTag.getPrefix().compareTo(";(\"")==0); + Assert.assertTrue(sanitizedHashTag.getHashTag().compareTo("#hashtag")==0); + Assert.assertTrue(sanitizedHashTag.getPostfix().compareTo("\");")==0); + } + + @Test(expected=IllegalArgumentException.class) + public void checkHashTag4() throws Exception { + String token = ";(\"#hashtag\");]"; + new SanitizedHashTag(token); } }