Merge remote-tracking branch 'origin/feature/18356'
This commit is contained in:
commit
2e0abbe260
|
@ -22,7 +22,6 @@
|
|||
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
|
||||
<attributes>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
<attribute name="org.eclipse.jst.component.nondependency" value=""/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
|
||||
|
|
|
@ -3,7 +3,9 @@ org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
|
|||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
|
||||
org.eclipse.jdt.core.compiler.compliance=1.8
|
||||
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
|
||||
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
|
||||
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
|
||||
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
|
||||
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
|
||||
org.eclipse.jdt.core.compiler.release=disabled
|
||||
org.eclipse.jdt.core.compiler.source=1.8
|
||||
|
|
40
README.md
40
README.md
|
@ -1,4 +1,4 @@
|
|||
# gCube System - Social Util Library
|
||||
# gCube Social Util Library
|
||||
|
||||
## Structure of the project
|
||||
|
||||
|
@ -19,13 +19,31 @@ See [Releases](https://code-repo.d4science.org/gCubeSystem/social-util-library/r
|
|||
|
||||
## Authors
|
||||
|
||||
* **Luca Frosini** ([ORCID](https://orcid.org/0000-0003-3183-2291)) - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
|
||||
* **Massimiliano Assante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
|
||||
* **Costantino Perciante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
|
||||
|
||||
|
||||
## Maintainers
|
||||
|
||||
* **Luca Frosini** ([ORCID](https://orcid.org/0000-0003-3183-2291)) - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
|
||||
* **Massimiliano Assante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
|
||||
|
||||
## How to Cite this Software
|
||||
|
||||
Tell people how to cite this software.
|
||||
* Cite an associated paper?
|
||||
* Use a specific BibTeX entry for the software?
|
||||
|
||||
@software{,
|
||||
title = {gCube Social Util Library},
|
||||
author = {{Frosini, Luca, Assante, Massimiliano, Perciante, Costantino}},
|
||||
organization = {ISTI - CNR},
|
||||
address = {Pisa, Italy},
|
||||
year = 2021,
|
||||
url = {http://www.gcube-system.org/}
|
||||
}
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the EUPL V.1.1 License - see the [LICENSE.md](LICENSE.md) file for details.
|
||||
|
@ -40,9 +58,23 @@ by favouring the realisation of reuse oriented policies.
|
|||
The projects leading to this software have received funding from a series of European Union programmes including:
|
||||
|
||||
- the Sixth Framework Programme for Research and Technological Development
|
||||
- DILIGENT (grant no. 004260);
|
||||
- DILIGENT (grant no. 004260).
|
||||
- the Seventh Framework Programme for research, technological development and demonstration
|
||||
- D4Science (grant no. 212488), D4Science-II (grant no.239019), ENVRI (grant no. 283465), EUBrazilOpenBio (grant no. 288754), iMarine(grant no. 283644);
|
||||
- D4Science (grant no. 212488);
|
||||
- D4Science-II (grant no.239019);
|
||||
- ENVRI (grant no. 283465);
|
||||
- iMarine(grant no. 283644);
|
||||
- EUBrazilOpenBio (grant no. 288754).
|
||||
- the H2020 research and innovation programme
|
||||
- BlueBRIDGE (grant no. 675680), EGIEngage (grant no. 654142), ENVRIplus (grant no. 654182), Parthenos (grant no. 654119), SoBigData (grant no. 654024),DESIRA (grant no. 818194), ARIADNEplus (grant no. 823914), RISIS2 (grant no. 824091), PerformFish (grant no. 727610), AGINFRAplus (grant no. 731001);
|
||||
- SoBigData (grant no. 654024);
|
||||
- PARTHENOS (grant no. 654119);
|
||||
- EGIEngage (grant no. 654142);
|
||||
- ENVRIplus (grant no. 654182);
|
||||
- BlueBRIDGE (grant no. 675680);
|
||||
- PerformFish (grant no. 727610);
|
||||
- AGINFRAplus (grant no. 731001);
|
||||
- DESIRA (grant no. 818194);
|
||||
- ARIADNEplus (grant no. 823914);
|
||||
- RISIS2 (grant no. 824091);
|
||||
|
||||
|
|
@ -31,6 +31,11 @@ import org.htmlparser.beans.StringBean;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author Costantino Perciante (ISTI - CNR)
|
||||
* @author Massimiliano Assante (ISTI - CNR)
|
||||
* @author Luca Frosini (ISTI - CNR)
|
||||
*/
|
||||
public class Utils {
|
||||
|
||||
/**
|
||||
|
|
|
@ -5,6 +5,7 @@ import org.gcube.socialnetworking.tokenization.Token;
|
|||
|
||||
/**
|
||||
* @author Luca Frosini (ISTI - CNR)
|
||||
* @author Massimiliano Assante (ISTI - CNR)
|
||||
*/
|
||||
public class HashTagToken extends ReplaceableToken {
|
||||
|
||||
|
|
|
@ -6,6 +6,9 @@ import java.util.Map;
|
|||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.gcube.socialnetworking.tokenization.Token;
|
||||
|
||||
/**
|
||||
* @author Luca Frosini (ISTI - CNR)
|
||||
*/
|
||||
public class ReplaceableToken extends Token {
|
||||
|
||||
protected boolean replaced;
|
||||
|
|
|
@ -4,6 +4,10 @@ import java.util.Objects;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* @author Luca Frosini (ISTI - CNR)
|
||||
* @author Massimiliano Assante (ISTI - CNR)
|
||||
*/
|
||||
public class SanitizedHashTag {
|
||||
|
||||
private static final String RECOGNIZE_HASHTAG_REGEX = "^.{0,3}#[\\w.-]{1,}[\\W]{0,3}";
|
||||
|
|
|
@ -4,6 +4,10 @@ import java.net.MalformedURLException;
|
|||
import java.net.URL;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* @author Luca Frosini (ISTI - CNR)
|
||||
* @author Massimiliano Assante (ISTI - CNR)
|
||||
*/
|
||||
public class SanitizedURL {
|
||||
|
||||
private static String CHARACTERS_TO_REMOVE = "[\\.\\,\\;\\(\\)\\:\\\"\\'\\“\\”\\‘\\’\\«\\»]";
|
||||
|
@ -17,8 +21,6 @@ public class SanitizedURL {
|
|||
throw new MalformedURLException();
|
||||
}
|
||||
|
||||
|
||||
|
||||
prefix = urlString.substring(0,1);
|
||||
if(prefix.matches(CHARACTERS_TO_REMOVE)) {
|
||||
prefix = urlString.substring(0, 1);
|
||||
|
@ -31,16 +33,28 @@ public class SanitizedURL {
|
|||
urlString = "http://" + urlString;
|
||||
}
|
||||
|
||||
postfix = urlString.substring(urlString.length()-1);
|
||||
if(postfix.matches(CHARACTERS_TO_REMOVE)) {
|
||||
urlString = urlString.substring(0, urlString.length()-1);
|
||||
}else {
|
||||
postfix = "";
|
||||
}
|
||||
postfix = "";
|
||||
urlString = stripPostfix(urlString);
|
||||
|
||||
url = new URL(urlString);
|
||||
}
|
||||
|
||||
private String stripPostfix(String urlString) {
|
||||
String testPostFix = urlString.substring(urlString.length()-1);
|
||||
if(testPostFix.matches(CHARACTERS_TO_REMOVE)) {
|
||||
postfix = urlString.substring(urlString.length()-1) + postfix;
|
||||
urlString = urlString.substring(0, urlString.length()-1);
|
||||
|
||||
/*
|
||||
* The urlString could terminate with more than one character to be escaped
|
||||
* So we need to further analyse the sanitised URL
|
||||
* e.g. 'https://data.d4science.net/tfXA),' terminates with '),'
|
||||
*/
|
||||
urlString = stripPostfix(urlString);
|
||||
}
|
||||
return urlString;
|
||||
}
|
||||
|
||||
public String getPrefix() {
|
||||
return prefix;
|
||||
}
|
||||
|
|
|
@ -15,6 +15,9 @@ import org.gcube.vomanagement.usermanagement.model.GCubeTeam;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author Luca Frosini (ISTI - CNR)
|
||||
*/
|
||||
public class SocialMessageParser {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(SocialMessageParser.class);
|
||||
|
|
|
@ -7,6 +7,9 @@ import java.util.Map;
|
|||
|
||||
import org.gcube.socialnetworking.tokenization.Token;
|
||||
|
||||
/**
|
||||
* @author Luca Frosini (ISTI - CNR)
|
||||
*/
|
||||
public class URLToken extends ReplaceableToken {
|
||||
|
||||
protected SanitizedURL sanitizedURL;
|
||||
|
|
|
@ -5,6 +5,9 @@ import java.util.List;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* @author Luca Frosini (ISTI - CNR)
|
||||
*/
|
||||
public class GCubeStringTokenizer {
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
package org.gcube.socialnetworking.tokenization;
|
||||
|
||||
/**
|
||||
* @author Luca Frosini (ISTI - CNR)
|
||||
*/
|
||||
public class Token{
|
||||
|
||||
protected final String token;
|
||||
|
|
|
@ -56,6 +56,22 @@ public class MessageParserTest {
|
|||
public static final String TEST_LUCA_8 = "Hello this link \"https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=SELECT+%3Ftype+%28COUNT%28%3Ftype%29+as+%3FtypeCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+a+%3Ftype%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Ftype&format=text%2Fhtml&timeout=0&debug=on\" is a SPARQL query ";
|
||||
|
||||
|
||||
public static final String TEST_BUG_18356 = "Dear B-C colleagues,\n"
|
||||
+ "\n"
|
||||
+ "Just a kind reminder that we look forward to welcoming all of you during the Project’s upcoming Blue-Cloud “Service Exploitation & Sustainability Plan (SE&SP) and Roadmap to 2030” Workshop next Thursday, January 21st (9.30am to 1.30pm). Everyone's welcome for an interactive and productive discussion.\n"
|
||||
+ "\n"
|
||||
+ "Please find here a link to the workshop Agenda (https://data.d4science.net/tfXA), including connection details.\n"
|
||||
+ "\n"
|
||||
+ "As discussed, the workshop will provide an opportunity to share the high-level results of the initial phase of stakeholder consultations towards the B-C Roadmap and to work within the Partnership to discuss and align some of the underlying, strategic concepts and ideas that will set the direction for the B-C SE&SP and B-C Roadmap to 2030.\n"
|
||||
+ "\n"
|
||||
+ "To inform the discussions to be held during the workshop, we have produced a report bringing together all the feedback, messages and recommendations gathered during the initial phase of stakeholder consultations (available here - https://data.d4science.net/JEm7). It is a long document, but we have included a one-page Executive Summary with high-level, key messages & recommendations, in case useful.\n"
|
||||
+ "\n"
|
||||
+ "We look forward to a productive discussion with you next Thursday!\n"
|
||||
+ "\n"
|
||||
+ "Kind regards,\n"
|
||||
+ "Julia";
|
||||
|
||||
|
||||
@Test
|
||||
public void test13() {
|
||||
SocialMessageParser messageParser = new SocialMessageParser(TEST_13);
|
||||
|
@ -105,7 +121,8 @@ public class MessageParserTest {
|
|||
logger.debug(messageParser.getParsedMessage());
|
||||
messageParser = new SocialMessageParser(TEST_LUCA_7);
|
||||
logger.debug("{}", messageParser.getParsedMessage());
|
||||
|
||||
messageParser = new SocialMessageParser(TEST_BUG_18356);
|
||||
logger.debug("{}", messageParser.getParsedMessage());
|
||||
}
|
||||
|
||||
@Test(expected=MalformedURLException.class)
|
||||
|
|
Loading…
Reference in New Issue