Fixes Bug #17844 Hashtag does not support numbers e.g. #4.15.0

Fixes Bug #17811 Avoid # being considered an hastagged topic
This commit is contained in:
Massimiliano Assante 2019-12-06 15:17:29 +01:00
parent 2c0614a7d0
commit 004db5a034
3 changed files with 68 additions and 46 deletions

View File

@ -10,7 +10,7 @@
<groupId>org.gcube.socialnetworking</groupId>
<artifactId>social-util-library</artifactId>
<version>1.7.1</version>
<version>1.7.2-SNAPSHOT</version>
<name>social-util-library</name>
<description>
The social-util-library contains utility functions that can be used by the social-networking portlets.

View File

@ -6,10 +6,10 @@ import java.util.regex.Pattern;
public class SanitizedHashTag {
private static final String RECOGNIZE_HASHTAG_REGEX = "^.{0,3}#[\\w-]*[\\W]{0,3}";
private static final String RECOGNIZE_HASHTAG_REGEX = "^.{0,3}#[\\w.-]{1,}[\\W]{0,3}";
private static final Pattern RECOGNIZE_HASHTAG_PATTERN;
private static final String HASHTAG_REGEX = "#[\\w-]*";
private static final String HASHTAG_REGEX = "#[\\w.-]{1,}";
private static final Pattern HASHTAG_PATTERN;
static {
@ -41,6 +41,11 @@ public class SanitizedHashTag {
prefix = string.substring(0,matcher.start());
hashTag = string.substring(matcher.start(), matcher.end());
postfix = string.substring(matcher.end());
if (hashTag.endsWith(".")) {
hashTag = hashTag.substring(0, hashTag.length()-1);
postfix += ".";
}
}else {
throw new IllegalArgumentException(string + " is not a valid TAG");
}

View File

@ -15,80 +15,80 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MessageParserTest {
private static final Logger logger = LoggerFactory.getLogger(MessageParserTest.class);
public static final String TEST_11 = "Dear all, this is a test to ignore, to select a week for the upcoming 194th #Tcom event, "
+ "hosted by #Apple in #Cupertino, please use this #Doodle: http://Doodle.com/poll/not-existing-poll \n\n"
+ "We're closing the poll next Thursday 16th March.";
public static final String TEST_12 = "Just because I am so happy to have the SPARQL-endpoint available, \n"
+ "sharing some sample SPARQL queries: \n\n" + "* Classes & usage counts: \n"
+ "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=%09SELECT+%3Fp+%28COUNT%28%3Fp%29+as+%3FpCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+%3Fp+%5B%5D%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Fp&format=text%2Fhtml&timeout=0&debug=on\n"
+ "\n" + "* properties and usage counts: \n"
+ "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=SELECT+%3Ftype+%28COUNT%28%3Ftype%29+as+%3FtypeCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+a+%3Ftype%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Ftype&format=text%2Fhtml&timeout=0&debug=on\n";
public static final String TEST_13 = "Dear members,\n"
+ "The item 'Webinar on Ontology Management using VOCBENCH in the context of AGINFRAPLUS Project' has been just published by Leonardo Candela .\n"
+ "You can find it here: http://data.d4science.org/ctlg/AGINFRAplus/webinar_on_ontology_management_using_vocbench_in_the_context_of_aginfraplus_project \n"
+ "#AGINFRAPLUS #VOCBENCH";
public static final String TEST_LUCA_1 = "Dear members,\n"
+ "The item 'just a test with time fields' has been just published by Francesco Mangiacrapa.\n"
+ "You can find it here: http://data-d.d4science.org/ctlg/NextNext/just_a_test_with_time_fields\n"
+ "#Text_mining #Field_1 #B3";
public static final String TEST_LUCA_2 = "Francesco Mangiacrapa prova &lt;a href=\"#\"&gt;Francesco Mangiacrapa&lt;/a&gt;";
public static final String TEST_LUCA_3 = "test &nbsp";
public static final String TEST_LUCA_4 = "test &&lt;nbsp &lt;";
public static final String TEST_LUCA_5 = "Accedete a questo link che vi porta ad un post su linkedin. \n" +
"https://www.linkedin.com/feed/update/urn:li:activity:6488779074213801984/\n" +
"I numeri riportati sono veri ed è motivo di orgoglio per tutti noi aver contribuito alla realizzazione della d4s infra che ha questo utilizzo via i diversi gateway. ";
public static final String TEST_LUCA_6 = "(www.google.it)";
public static final String TEST_LUCA_7 = "Hello (https://doodle.com/poll/not-existing-poll)";
public static final String TEST_LUCA_8 = "Hello this link \"https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=SELECT+%3Ftype+%28COUNT%28%3Ftype%29+as+%3FtypeCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+a+%3Ftype%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Ftype&format=text%2Fhtml&timeout=0&debug=on\" is a SPARQL query ";
@Test
public void test13() {
SocialMessageParser messageParser = new SocialMessageParser(TEST_13);
logger.debug(messageParser.getParsedMessage());
}
@Test
public void test() {
String message = "Prova #Pippo Pollo http://google) <a href=\"/test\">Luca</a> https://www.linkedin.com/feed/update/urn:li:activity:6488779074213801984 :) ";
SocialMessageParser messageParser = new SocialMessageParser(message);
logger.debug(messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_11);
logger.debug(messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_12);
logger.debug(messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_13);
logger.debug(messageParser.getParsedMessage());
List<ItemBean> mentionedUsers = new ArrayList<>();
ItemBean itemBean = new ItemBean("21150", "francesco.mangiacrapa", "Francesco Mangiacrapa", "");
itemBean.setItemGroup(false);
mentionedUsers.add(itemBean);
messageParser = new SocialMessageParser(TEST_LUCA_1);
logger.debug(messageParser.getParsedMessage(mentionedUsers, "/group/nextnext"));
messageParser = new SocialMessageParser(TEST_LUCA_2);
logger.debug(messageParser.getParsedMessage(mentionedUsers, "/group/nextnext"));
}
@Test
public void anotherTest() {
SocialMessageParser messageParser = new SocialMessageParser(TEST_LUCA_4);
@ -98,21 +98,21 @@ public class MessageParserTest {
messageParser = new SocialMessageParser(TEST_LUCA_8);
logger.debug(messageParser.getParsedMessage());
}
@Test
public void urlWithParentesisTest() {
SocialMessageParser messageParser = new SocialMessageParser(TEST_LUCA_6);
logger.debug(messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_LUCA_7);
logger.debug("{}", messageParser.getParsedMessage());
}
@Test(expected=MalformedURLException.class)
public void auxTest() throws MalformedURLException {
new SanitizedURL(null);
}
protected String findFirstLink(String message) {
try {
SocialMessageParser messageParser = new SocialMessageParser(message);
@ -122,21 +122,37 @@ public class MessageParserTest {
return null;
}
}
@Test
public void testTest() {
String text = "Hello (https://doodle.com/poll/not-existing-poll)";
logger.debug(findFirstLink(text));
text = "post \"a text with #hashtag);\"";
SocialMessageParser messageParser = new SocialMessageParser(text);
logger.debug(messageParser.getParsedMessage());
text = "\"#hashtag and #hashtag repeated.";
messageParser = new SocialMessageParser(text);
logger.debug(messageParser.getParsedMessage());
String text = "Hello (https://doodle.com/poll/not-existing-poll)";
logger.debug(findFirstLink(text));
text = "post \"a text with #hashtag);\"";
SocialMessageParser messageParser = new SocialMessageParser(text);
logger.debug(messageParser.getParsedMessage());
text = "\"#hashtag and #hashtag repeated.";
messageParser = new SocialMessageParser(text);
logger.debug(messageParser.getParsedMessage());
}
@Test
public void checkHashTag7() throws Exception {
String text = "Checking hashtag between quotes #hashtag1; #hashtag1, #hashtag3. \"#hashtag5\" is recognized (#anotherHashtag) #4. #4.12 # ";
SocialMessageParser messageParser = new SocialMessageParser(text);
logger.debug(messageParser.getParsedMessage());
List<String> htags = messageParser.getHashtags();
for (String hTag : htags) {
logger.debug("found hashtag:"+hTag);
}
logger.debug("messageParser.getHashtags().size() should be 7, is: "+ htags.size());
Assert.assertTrue(htags.size() == 7);
}
@Test
public void checkHashTag() throws Exception {
String token = "\"#hashtag\"";
@ -146,13 +162,13 @@ public class MessageParserTest {
Assert.assertTrue(sanitizedHashTag.getHashTag().compareTo("#hashtag")==0);
Assert.assertTrue(sanitizedHashTag.getPostfix().compareTo("\"")==0);
}
@Test(expected=IllegalArgumentException.class)
public void hasTagwithURL() throws Exception {
String token = "https://wiki.gcube-system.org/gcube/GCat_Background#GeoSpatial_search_for_datasets:_via_API_or_Search_Widget";
new SanitizedHashTag(token);
}
@Test
public void checkHashTag2() throws Exception {
String token = "\"#hashtag\");";
@ -162,7 +178,7 @@ public class MessageParserTest {
Assert.assertTrue(sanitizedHashTag.getHashTag().compareTo("#hashtag")==0);
Assert.assertTrue(sanitizedHashTag.getPostfix().compareTo("\");")==0);
}
@Test
public void checkHashTag3() throws Exception {
String token = ";(\"#hashtag\");";
@ -172,7 +188,8 @@ public class MessageParserTest {
Assert.assertTrue(sanitizedHashTag.getHashTag().compareTo("#hashtag")==0);
Assert.assertTrue(sanitizedHashTag.getPostfix().compareTo("\");")==0);
}
@Test(expected=IllegalArgumentException.class)
public void checkHashTag4() throws Exception {
String token = ";(\"#hashtag\");]";