Merge remote-tracking branch 'origin/feature/18356'

master
Massimiliano Assante 3 years ago
commit 2e0abbe260

@ -22,7 +22,6 @@
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="org.eclipse.jst.component.nondependency" value=""/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">

@ -3,7 +3,9 @@ org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=1.8

@ -1,4 +1,4 @@
# gCube System - Social Util Library
# gCube Social Util Library
## Structure of the project
@ -19,13 +19,31 @@ See [Releases](https://code-repo.d4science.org/gCubeSystem/social-util-library/r
## Authors
* **Luca Frosini** ([ORCID](https://orcid.org/0000-0003-3183-2291)) - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
* **Massimiliano Assante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
* **Costantino Perciante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
## Maintainers
* **Luca Frosini** ([ORCID](https://orcid.org/0000-0003-3183-2291)) - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
* **Massimiliano Assante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
## How to Cite this Software
Tell people how to cite this software.
* Cite an associated paper?
* Use a specific BibTeX entry for the software?
@software{,
title = {gCube Social Util Library},
author = {{Frosini, Luca, Assante, Massimiliano, Perciante, Costantino}},
organization = {ISTI - CNR},
address = {Pisa, Italy},
year = 2021,
url = {http://www.gcube-system.org/}
}
## License
This project is licensed under the EUPL V.1.1 License - see the [LICENSE.md](LICENSE.md) file for details.
@ -40,9 +58,23 @@ by favouring the realisation of reuse oriented policies.
The projects leading to this software have received funding from a series of European Union programmes including:
- the Sixth Framework Programme for Research and Technological Development
- DILIGENT (grant no. 004260);
- DILIGENT (grant no. 004260).
- the Seventh Framework Programme for research, technological development and demonstration
- D4Science (grant no. 212488), D4Science-II (grant no.239019), ENVRI (grant no. 283465), EUBrazilOpenBio (grant no. 288754), iMarine(grant no. 283644);
- D4Science (grant no. 212488);
- D4Science-II (grant no.239019);
- ENVRI (grant no. 283465);
- iMarine(grant no. 283644);
- EUBrazilOpenBio (grant no. 288754).
- the H2020 research and innovation programme
- BlueBRIDGE (grant no. 675680), EGIEngage (grant no. 654142), ENVRIplus (grant no. 654182), Parthenos (grant no. 654119), SoBigData (grant no. 654024),DESIRA (grant no. 818194), ARIADNEplus (grant no. 823914), RISIS2 (grant no. 824091), PerformFish (grant no. 727610), AGINFRAplus (grant no. 731001);
- SoBigData (grant no. 654024);
- PARTHENOS (grant no. 654119);
- EGIEngage (grant no. 654142);
- ENVRIplus (grant no. 654182);
- BlueBRIDGE (grant no. 675680);
- PerformFish (grant no. 727610);
- AGINFRAplus (grant no. 731001);
- DESIRA (grant no. 818194);
- ARIADNEplus (grant no. 823914);
- RISIS2 (grant no. 824091);

@ -31,6 +31,11 @@ import org.htmlparser.beans.StringBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Costantino Perciante (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
* @author Luca Frosini (ISTI - CNR)
*/
public class Utils {
/**

@ -5,6 +5,7 @@ import org.gcube.socialnetworking.tokenization.Token;
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class HashTagToken extends ReplaceableToken {

@ -6,6 +6,9 @@ import java.util.Map;
import org.apache.commons.codec.binary.Base64;
import org.gcube.socialnetworking.tokenization.Token;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class ReplaceableToken extends Token {
protected boolean replaced;

@ -4,6 +4,10 @@ import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class SanitizedHashTag {
private static final String RECOGNIZE_HASHTAG_REGEX = "^.{0,3}#[\\w.-]{1,}[\\W]{0,3}";

@ -4,6 +4,10 @@ import java.net.MalformedURLException;
import java.net.URL;
import java.util.Objects;
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class SanitizedURL {
private static String CHARACTERS_TO_REMOVE = "[\\.\\,\\;\\(\\)\\:\\\"\\'\\“\\”\\\\\\«\\»]";
@ -17,8 +21,6 @@ public class SanitizedURL {
throw new MalformedURLException();
}
prefix = urlString.substring(0,1);
if(prefix.matches(CHARACTERS_TO_REMOVE)) {
prefix = urlString.substring(0, 1);
@ -31,16 +33,28 @@ public class SanitizedURL {
urlString = "http://" + urlString;
}
postfix = urlString.substring(urlString.length()-1);
if(postfix.matches(CHARACTERS_TO_REMOVE)) {
urlString = urlString.substring(0, urlString.length()-1);
}else {
postfix = "";
}
postfix = "";
urlString = stripPostfix(urlString);
url = new URL(urlString);
}
private String stripPostfix(String urlString) {
String testPostFix = urlString.substring(urlString.length()-1);
if(testPostFix.matches(CHARACTERS_TO_REMOVE)) {
postfix = urlString.substring(urlString.length()-1) + postfix;
urlString = urlString.substring(0, urlString.length()-1);
/*
* The urlString could terminate with more than one character to be escaped
* So we need to further analyse the sanitised URL
* e.g. 'https://data.d4science.net/tfXA),' terminates with '),'
*/
urlString = stripPostfix(urlString);
}
return urlString;
}
public String getPrefix() {
return prefix;
}

@ -15,6 +15,9 @@ import org.gcube.vomanagement.usermanagement.model.GCubeTeam;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class SocialMessageParser {
private static final Logger logger = LoggerFactory.getLogger(SocialMessageParser.class);

@ -7,6 +7,9 @@ import java.util.Map;
import org.gcube.socialnetworking.tokenization.Token;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class URLToken extends ReplaceableToken {
protected SanitizedURL sanitizedURL;

@ -5,6 +5,9 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class GCubeStringTokenizer {
/**

@ -1,5 +1,8 @@
package org.gcube.socialnetworking.tokenization;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class Token{
protected final String token;

@ -56,6 +56,22 @@ public class MessageParserTest {
public static final String TEST_LUCA_8 = "Hello this link \"https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=SELECT+%3Ftype+%28COUNT%28%3Ftype%29+as+%3FtypeCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+a+%3Ftype%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Ftype&format=text%2Fhtml&timeout=0&debug=on\" is a SPARQL query ";
public static final String TEST_BUG_18356 = "Dear B-C colleagues,\n"
+ "\n"
+ "Just a kind reminder that we look forward to welcoming all of you during the Projects upcoming Blue-Cloud “Service Exploitation & Sustainability Plan (SE&SP) and Roadmap to 2030” Workshop next Thursday, January 21st (9.30am to 1.30pm). Everyone's welcome for an interactive and productive discussion.\n"
+ "\n"
+ "Please find here a link to the workshop Agenda (https://data.d4science.net/tfXA), including connection details.\n"
+ "\n"
+ "As discussed, the workshop will provide an opportunity to share the high-level results of the initial phase of stakeholder consultations towards the B-C Roadmap and to work within the Partnership to discuss and align some of the underlying, strategic concepts and ideas that will set the direction for the B-C SE&SP and B-C Roadmap to 2030.\n"
+ "\n"
+ "To inform the discussions to be held during the workshop, we have produced a report bringing together all the feedback, messages and recommendations gathered during the initial phase of stakeholder consultations (available here - https://data.d4science.net/JEm7). It is a long document, but we have included a one-page Executive Summary with high-level, key messages & recommendations, in case useful.\n"
+ "\n"
+ "We look forward to a productive discussion with you next Thursday!\n"
+ "\n"
+ "Kind regards,\n"
+ "Julia";
@Test
public void test13() {
SocialMessageParser messageParser = new SocialMessageParser(TEST_13);
@ -105,7 +121,8 @@ public class MessageParserTest {
logger.debug(messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_LUCA_7);
logger.debug("{}", messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_BUG_18356);
logger.debug("{}", messageParser.getParsedMessage());
}
@Test(expected=MalformedURLException.class)

Loading…
Cancel
Save