Merge remote-tracking branch 'origin/feature/18356'

This commit is contained in:
Massimiliano Assante 2021-01-15 11:01:16 +01:00
commit 2e0abbe260
13 changed files with 104 additions and 15 deletions

View File

@ -22,7 +22,6 @@
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="org.eclipse.jst.component.nondependency" value=""/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">

View File

@ -3,7 +3,9 @@ org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=1.8

View File

@ -1,4 +1,4 @@
# gCube System - Social Util Library
# gCube Social Util Library
## Structure of the project
@ -19,13 +19,31 @@ See [Releases](https://code-repo.d4science.org/gCubeSystem/social-util-library/r
## Authors
* **Luca Frosini** ([ORCID](https://orcid.org/0000-0003-3183-2291)) - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
* **Massimiliano Assante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
* **Costantino Perciante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
## Maintainers
* **Luca Frosini** ([ORCID](https://orcid.org/0000-0003-3183-2291)) - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
* **Massimiliano Assante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
## How to Cite this Software
Tell people how to cite this software.
* Cite an associated paper?
* Use a specific BibTeX entry for the software?
@software{,
title = {gCube Social Util Library},
author = {{Frosini, Luca, Assante, Massimiliano, Perciante, Costantino}},
organization = {ISTI - CNR},
address = {Pisa, Italy},
year = 2021,
url = {http://www.gcube-system.org/}
}
## License
This project is licensed under the EUPL V.1.1 License - see the [LICENSE.md](LICENSE.md) file for details.
@ -40,9 +58,23 @@ by favouring the realisation of reuse oriented policies.
The projects leading to this software have received funding from a series of European Union programmes including:
- the Sixth Framework Programme for Research and Technological Development
- DILIGENT (grant no. 004260);
- DILIGENT (grant no. 004260).
- the Seventh Framework Programme for research, technological development and demonstration
- D4Science (grant no. 212488), D4Science-II (grant no.239019), ENVRI (grant no. 283465), EUBrazilOpenBio (grant no. 288754), iMarine(grant no. 283644);
- D4Science (grant no. 212488);
- D4Science-II (grant no.239019);
- ENVRI (grant no. 283465);
- iMarine(grant no. 283644);
- EUBrazilOpenBio (grant no. 288754).
- the H2020 research and innovation programme
- BlueBRIDGE (grant no. 675680), EGIEngage (grant no. 654142), ENVRIplus (grant no. 654182), Parthenos (grant no. 654119), SoBigData (grant no. 654024),DESIRA (grant no. 818194), ARIADNEplus (grant no. 823914), RISIS2 (grant no. 824091), PerformFish (grant no. 727610), AGINFRAplus (grant no. 731001);
- SoBigData (grant no. 654024);
- PARTHENOS (grant no. 654119);
- EGIEngage (grant no. 654142);
- ENVRIplus (grant no. 654182);
- BlueBRIDGE (grant no. 675680);
- PerformFish (grant no. 727610);
- AGINFRAplus (grant no. 731001);
- DESIRA (grant no. 818194);
- ARIADNEplus (grant no. 823914);
- RISIS2 (grant no. 824091);

View File

@ -31,6 +31,11 @@ import org.htmlparser.beans.StringBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Costantino Perciante (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
* @author Luca Frosini (ISTI - CNR)
*/
public class Utils {
/**

View File

@ -5,6 +5,7 @@ import org.gcube.socialnetworking.tokenization.Token;
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class HashTagToken extends ReplaceableToken {

View File

@ -6,6 +6,9 @@ import java.util.Map;
import org.apache.commons.codec.binary.Base64;
import org.gcube.socialnetworking.tokenization.Token;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class ReplaceableToken extends Token {
protected boolean replaced;

View File

@ -4,6 +4,10 @@ import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class SanitizedHashTag {
private static final String RECOGNIZE_HASHTAG_REGEX = "^.{0,3}#[\\w.-]{1,}[\\W]{0,3}";

View File

@ -4,6 +4,10 @@ import java.net.MalformedURLException;
import java.net.URL;
import java.util.Objects;
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class SanitizedURL {
private static String CHARACTERS_TO_REMOVE = "[\\.\\,\\;\\(\\)\\:\\\"\\'\\\\\\\\\\«\\»]";
@ -17,8 +21,6 @@ public class SanitizedURL {
throw new MalformedURLException();
}
prefix = urlString.substring(0,1);
if(prefix.matches(CHARACTERS_TO_REMOVE)) {
prefix = urlString.substring(0, 1);
@ -31,16 +33,28 @@ public class SanitizedURL {
urlString = "http://" + urlString;
}
postfix = urlString.substring(urlString.length()-1);
if(postfix.matches(CHARACTERS_TO_REMOVE)) {
urlString = urlString.substring(0, urlString.length()-1);
}else {
postfix = "";
}
urlString = stripPostfix(urlString);
url = new URL(urlString);
}
private String stripPostfix(String urlString) {
String testPostFix = urlString.substring(urlString.length()-1);
if(testPostFix.matches(CHARACTERS_TO_REMOVE)) {
postfix = urlString.substring(urlString.length()-1) + postfix;
urlString = urlString.substring(0, urlString.length()-1);
/*
* The urlString could terminate with more than one character to be escaped
* So we need to further analyse the sanitised URL
* e.g. 'https://data.d4science.net/tfXA),' terminates with '),'
*/
urlString = stripPostfix(urlString);
}
return urlString;
}
public String getPrefix() {
return prefix;
}

View File

@ -15,6 +15,9 @@ import org.gcube.vomanagement.usermanagement.model.GCubeTeam;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class SocialMessageParser {
private static final Logger logger = LoggerFactory.getLogger(SocialMessageParser.class);

View File

@ -7,6 +7,9 @@ import java.util.Map;
import org.gcube.socialnetworking.tokenization.Token;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class URLToken extends ReplaceableToken {
protected SanitizedURL sanitizedURL;

View File

@ -5,6 +5,9 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class GCubeStringTokenizer {
/**

View File

@ -1,5 +1,8 @@
package org.gcube.socialnetworking.tokenization;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class Token{
protected final String token;

View File

@ -56,6 +56,22 @@ public class MessageParserTest {
public static final String TEST_LUCA_8 = "Hello this link \"https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=SELECT+%3Ftype+%28COUNT%28%3Ftype%29+as+%3FtypeCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+a+%3Ftype%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Ftype&format=text%2Fhtml&timeout=0&debug=on\" is a SPARQL query ";
public static final String TEST_BUG_18356 = "Dear B-C colleagues,\n"
+ "\n"
+ "Just a kind reminder that we look forward to welcoming all of you during the Projects upcoming Blue-Cloud “Service Exploitation & Sustainability Plan (SE&SP) and Roadmap to 2030” Workshop next Thursday, January 21st (9.30am to 1.30pm). Everyone's welcome for an interactive and productive discussion.\n"
+ "\n"
+ "Please find here a link to the workshop Agenda (https://data.d4science.net/tfXA), including connection details.\n"
+ "\n"
+ "As discussed, the workshop will provide an opportunity to share the high-level results of the initial phase of stakeholder consultations towards the B-C Roadmap and to work within the Partnership to discuss and align some of the underlying, strategic concepts and ideas that will set the direction for the B-C SE&SP and B-C Roadmap to 2030.\n"
+ "\n"
+ "To inform the discussions to be held during the workshop, we have produced a report bringing together all the feedback, messages and recommendations gathered during the initial phase of stakeholder consultations (available here - https://data.d4science.net/JEm7). It is a long document, but we have included a one-page Executive Summary with high-level, key messages & recommendations, in case useful.\n"
+ "\n"
+ "We look forward to a productive discussion with you next Thursday!\n"
+ "\n"
+ "Kind regards,\n"
+ "Julia";
@Test
public void test13() {
SocialMessageParser messageParser = new SocialMessageParser(TEST_13);
@ -105,7 +121,8 @@ public class MessageParserTest {
logger.debug(messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_LUCA_7);
logger.debug("{}", messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_BUG_18356);
logger.debug("{}", messageParser.getParsedMessage());
}
@Test(expected=MalformedURLException.class)