Compare commits

...

17 Commits

Author SHA1 Message Date
Roberto Cirillo dfa541264e Update 'CHANGELOG.md'
escaped parentheses in changelog
2021-01-27 16:44:59 +01:00
Massimiliano Assante 56b8bb7598 removed SA plugin 2021-01-15 12:49:55 +01:00
Massimiliano Assante c5581b1147 Bug Fixed: Social networking error in parsing URLs ending with ")"
Added changelog.md
2021-01-15 12:48:03 +01:00
Massimiliano Assante 2bdbb47719 Merge remote-tracking branch 'origin/feature/18356' 2021-01-15 11:37:02 +01:00
Luca Frosini 5169601bf1 Fixed url parsing 2021-01-15 11:31:39 +01:00
Massimiliano Assante 2e0abbe260 Merge remote-tracking branch 'origin/feature/18356' 2021-01-15 11:01:16 +01:00
Massimiliano Assante 8dd0a540aa just eclipse settings 2021-01-15 11:01:01 +01:00
Luca Frosini 3735db9ded Added comment 2021-01-15 10:50:29 +01:00
Luca Frosini 81c7b28c62 Fixed bug #18356 2021-01-15 10:47:39 +01:00
Luca Frosini 2692ae036e Fixed authorship 2021-01-15 10:08:33 +01:00
Massimiliano Assante 30ae7642fd fixed pom 2020-03-26 10:44:34 +01:00
Massimiliano Assante 8c742db131 Updated pom for release 2019-12-06 15:20:48 +01:00
Massimiliano Assante 004db5a034 Fixes Bug #17844 Hashtag does not support numbers e.g. #4.15.0
Fixes Bug #17811 Avoid # being considered an hastagged topic
2019-12-06 15:17:29 +01:00
Massimiliano Assante 2c0614a7d0 updated pom and test file 2019-12-03 18:27:14 +01:00
Massimiliano Assante 2ce495afb2 merged for bug fix #18122 2019-12-03 18:04:07 +01:00
Massimiliano Assante 48e514898d Updated pom version 2019-12-03 18:01:40 +01:00
Massimiliano Assante dafe9e8599 Fixes Bug #18122 Social Util Library parsing exceptions 2019-12-03 18:00:48 +01:00
22 changed files with 315 additions and 277 deletions

View File

@ -25,9 +25,9 @@
<attribute name="org.eclipse.jst.component.nondependency" value=""/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="owner.project.facets" value="java"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>

View File

@ -1,9 +1,11 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
org.eclipse.jdt.core.compiler.compliance=1.7
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=1.7
org.eclipse.jdt.core.compiler.source=1.8

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<faceted-project>
<installed facet="java" version="1.7"/>
<installed facet="jst.utility" version="1.0"/>
<installed facet="java" version="1.8"/>
</faceted-project>

51
CHANGELOG.md Normal file
View File

@ -0,0 +1,51 @@
# Changelog for social util library
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [v1.7.3] - 2019-12-19
[#18356] Bug Fixed: Social networking ... error in parsing URLs ending with "\)"
## [v1.7.2] - 2019-12-19
[#18122] Bug Fixed: Social Util Library parsing exceptions
[#17811] Bug Fixed: Fixed Avoid # being considered an hastagged topic
[#17844] Bug Fixed: Hashtag does not support numbers e.g. #4.15.0
## [v1.6.0] - 2019-03-29
[#13207] Revised social networking library mechanism for http links, mentions and hashtags recognition
## [v1.3.0] - 2017-12-31
[#10700] Fixed regex for hashtags
[#10984] Fixed regex for urls
## [v1.2.1] - 2017-11-13
[#10234] Fixed regex for urls
## [v1.2.0] - 2017-04-13
[#19709] Email Templates library to support a new template or invitation to join VREs
Added method to remove html tags from a text (useful for #247)
## [v1.0.1] - 2016-10-01
[#4937] Hashtag regular expression updated
URL regular expression updated
## [v1.0.0] - 2016-06-01
First release

26
FUNDING.md Normal file
View File

@ -0,0 +1,26 @@
# Acknowledgments
The projects leading to this software have received funding from a series of European Union programmes including:
- the Sixth Framework Programme for Research and Technological Development
- [DILIGENT](https://cordis.europa.eu/project/id/004260) (grant no. 004260).
- the Seventh Framework Programme for research, technological development and demonstration
- [D4Science](https://cordis.europa.eu/project/id/212488) (grant no. 212488);
- [D4Science-II](https://cordis.europa.eu/project/id/239019) (grant no.239019);
- [ENVRI](https://cordis.europa.eu/project/id/283465) (grant no. 283465);
- [iMarine](https://cordis.europa.eu/project/id/283644) (grant no. 283644);
- [EUBrazilOpenBio](https://cordis.europa.eu/project/id/288754) (grant no. 288754).
- the H2020 research and innovation programme
- [SoBigData](https://cordis.europa.eu/project/id/654024) (grant no. 654024);
- [PARTHENOS](https://cordis.europa.eu/project/id/654119) (grant no. 654119);
- [EGI-Engage](https://cordis.europa.eu/project/id/654142) (grant no. 654142);
- [ENVRI PLUS](https://cordis.europa.eu/project/id/654182) (grant no. 654182);
- [BlueBRIDGE](https://cordis.europa.eu/project/id/675680) (grant no. 675680);
- [PerformFISH](https://cordis.europa.eu/project/id/727610) (grant no. 727610);
- [AGINFRA PLUS](https://cordis.europa.eu/project/id/731001) (grant no. 731001);
- [DESIRA](https://cordis.europa.eu/project/id/818194) (grant no. 818194);
- [ARIADNEplus](https://cordis.europa.eu/project/id/823914) (grant no. 823914);
- [RISIS 2](https://cordis.europa.eu/project/id/824091) (grant no. 824091);
- [EOSC-Pillar](https://cordis.europa.eu/project/id/857650) (grant no. 857650);
- [Blue Cloud](https://cordis.europa.eu/project/id/862409) (grant no. 862409);
- [SoBigData-PlusPlus](https://cordis.europa.eu/project/id/871042) (grant no. 871042);

View File

@ -1,4 +1,4 @@
# gCube System - Social Util Library
# gCube Social Util Library
## Structure of the project
@ -19,13 +19,31 @@ See [Releases](https://code-repo.d4science.org/gCubeSystem/social-util-library/r
## Authors
* **Luca Frosini** ([ORCID](https://orcid.org/0000-0003-3183-2291)) - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
* **Massimiliano Assante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
* **Costantino Perciante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
## Maintainers
* **Luca Frosini** ([ORCID](https://orcid.org/0000-0003-3183-2291)) - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
* **Massimiliano Assante** - [ISTI-CNR Infrascience Group](http://nemis.isti.cnr.it/groups/infrascience)
## How to Cite this Software
Tell people how to cite this software.
* Cite an associated paper?
* Use a specific BibTeX entry for the software?
@software{,
title = {gCube Social Util Library},
author = {{Frosini, Luca, Assante, Massimiliano, Perciante, Costantino}},
organization = {ISTI - CNR},
address = {Pisa, Italy},
year = 2021,
url = {http://www.gcube-system.org/}
}
## License
This project is licensed under the EUPL V.1.1 License - see the [LICENSE.md](LICENSE.md) file for details.
@ -40,9 +58,23 @@ by favouring the realisation of reuse oriented policies.
The projects leading to this software have received funding from a series of European Union programmes including:
- the Sixth Framework Programme for Research and Technological Development
- DILIGENT (grant no. 004260);
- DILIGENT (grant no. 004260).
- the Seventh Framework Programme for research, technological development and demonstration
- D4Science (grant no. 212488), D4Science-II (grant no.239019), ENVRI (grant no. 283465), EUBrazilOpenBio (grant no. 288754), iMarine(grant no. 283644);
- D4Science (grant no. 212488);
- D4Science-II (grant no.239019);
- ENVRI (grant no. 283465);
- iMarine(grant no. 283644);
- EUBrazilOpenBio (grant no. 288754).
- the H2020 research and innovation programme
- BlueBRIDGE (grant no. 675680), EGIEngage (grant no. 654142), ENVRIplus (grant no. 654182), Parthenos (grant no. 654119), SoBigData (grant no. 654024),DESIRA (grant no. 818194), ARIADNEplus (grant no. 823914), RISIS2 (grant no. 824091), PerformFish (grant no. 727610), AGINFRAplus (grant no. 731001);
- SoBigData (grant no. 654024);
- PARTHENOS (grant no. 654119);
- EGIEngage (grant no. 654142);
- ENVRIplus (grant no. 654182);
- BlueBRIDGE (grant no. 675680);
- PerformFish (grant no. 727610);
- AGINFRAplus (grant no. 731001);
- DESIRA (grant no. 818194);
- ARIADNEplus (grant no. 823914);
- RISIS2 (grant no. 824091);

View File

@ -1,40 +0,0 @@
<ReleaseNotes>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-6-0"
date="2019-03-29">
<Change>Revised social networking library mechanism for http links, mentions and hashtags recognition, #13207</Change>
</Changeset>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-4-0"
date="2018-06-19">
<Change>Removed deprecated dependency ws-mail widget</Change>
</Changeset>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-3-0"
date="2017-12-31">
<Change>Fixed regex for hashtags (#10700)
</Change>
<Change>Fixed regex for urls (#10984)</Change>
</Changeset>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-2-1"
date="2017-11-13">
<Change>Fixed regex for urls (#10234)
</Change>
</Changeset>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-2-0"
date="2017-04-13">
<Change>Added method to remove html tags from a text (useful for #247)
</Change>
</Changeset>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-1-0"
date="2017-04-13">
<Change>Bug #8063, Share Updates links are truncated in some cases
</Change>
</Changeset>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-0-1"
date="2016-10-01">
<Change>Hashtag regular expression updated (See ticket #4937)</Change>
<Change>Url regular expression updated</Change>
</Changeset>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-0-0"
date="2016-06-01">
<Change>First Release</Change>
</Changeset>
</ReleaseNotes>

View File

@ -1,42 +0,0 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>servicearchive</id>
<formats>
<format>tar.gz</format>
</formats>
<baseDirectory>/</baseDirectory>
<fileSets>
<fileSet>
<directory>${distroDirectory}</directory>
<outputDirectory>/</outputDirectory>
<useDefaultExcludes>true</useDefaultExcludes>
<includes>
<include>README</include>
<include>LICENSE</include>
<include>changelog.xml</include>
<include>profile.xml</include>
</includes>
<fileMode>755</fileMode>
<filtered>true</filtered>
</fileSet>
<fileSet>
<directory>target/apidocs</directory>
<outputDirectory>/${artifactId}/doc/api</outputDirectory>
<useDefaultExcludes>true</useDefaultExcludes>
<fileMode>755</fileMode>
</fileSet>
</fileSets>
<files>
<file>
<source>${distroDirectory}/profile.xml</source>
<outputDirectory>./</outputDirectory>
<filtered>true</filtered>
</file>
<file>
<source>target/${build.finalName}.jar</source>
<outputDirectory>/${artifactId}</outputDirectory>
</file>
</files>
</assembly>

View File

@ -1 +0,0 @@
${gcube.license}

View File

@ -1,67 +0,0 @@
The gCube System - ${name}
--------------------------------------------------
${description}
${gcube.description}
${gcube.funding}
Version
--------------------------------------------------
${version} (${buildDate})
Please see the file named "changelog.xml" in this directory for the release notes.
Authors
--------------------------------------------------
* Massimiliano Assante (massimiliano.assante-AT-isti.cnr.it), Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo" - CNR, Pisa (Italy).
* Costantino Perciante (costantino.perciante-AT-isti.cnr.it), Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo" - CNR, Pisa (Italy).
Maintainers
-----------
* Massimiliano Assante (massimiliano.assante-AT-isti.cnr.it), Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo" - CNR, Pisa (Italy).
* Costantino Perciante (costantino.perciante-AT-isti.cnr.it), Istituto di Scienza e Tecnologie dell'Informazione "A. Faedo" - CNR, Pisa (Italy).
Download information
--------------------------------------------------
Source code is available from SVN:
${scm.url}
Binaries can be downloaded from the gCube website:
${gcube.website}
Installation
--------------------------------------------------
Installation documentation is available on-line in the gCube Wiki:
${gcube.wikiRoot}/
Documentation
--------------------------------------------------
Documentation is available on-line in the gCube Wiki:
${gcube.wikiRoot}/
${gcube.wikiRoot}/
Support
--------------------------------------------------
Bugs and support requests can be reported in the gCube issue tracking tool:
${gcube.issueTracking}
Licensing
--------------------------------------------------
This software is licensed under the terms you may find in the file named "LICENSE" in this directory.

View File

@ -1,25 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<Resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ID></ID>
<Type>Library</Type>
<Profile>
<Description>${description}</Description>
<Class>Portal</Class>
<Name>${artifactId}</Name>
<Version>${version}</Version>
<Packages>
<Software>
<Name>${artifactId}</Name>
<Version>${version}</Version>
<MavenCoordinates>
<groupId>${groupId}</groupId>
<artifactId>${artifactId}</artifactId>
<version>${version}</version>
</MavenCoordinates>
<Files>
<File>${build.finalName}.jar</File>
</Files>
</Software>
</Packages>
</Profile>
</Resource>

43
pom.xml
View File

@ -1,4 +1,5 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@ -10,7 +11,7 @@
<groupId>org.gcube.socialnetworking</groupId>
<artifactId>social-util-library</artifactId>
<version>1.7.0</version>
<version>1.7.3</version>
<name>social-util-library</name>
<description>
The social-util-library contains utility functions that can be used by the social-networking portlets.
@ -35,7 +36,7 @@
<dependency>
<groupId>org.gcube.distribution</groupId>
<artifactId>maven-portal-bom</artifactId>
<version>LATEST</version>
<version>3.6.0</version>
<type>pom</type>
<scope>import</scope>
</dependency>
@ -76,11 +77,8 @@
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</dependency>
<!-- dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<scope>provided</scope>
</dependency -->
<!-- dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId>
<scope>provided</scope> </dependency -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
@ -124,15 +122,14 @@
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.7</source>
<target>1.8</target>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.2</version>
<executions>
<execution>
<goals>
@ -145,34 +142,10 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.12</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>descriptor.xml</descriptor>
</descriptors>
<archive>
<manifest>
<mainClass>fully.qualified.MainClass</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>servicearchive</id>
<phase>install</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>

View File

@ -31,6 +31,11 @@ import org.htmlparser.beans.StringBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Costantino Perciante (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
* @author Luca Frosini (ISTI - CNR)
*/
public class Utils {
/**

View File

@ -5,6 +5,7 @@ import org.gcube.socialnetworking.tokenization.Token;
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class HashTagToken extends ReplaceableToken {

View File

@ -6,6 +6,9 @@ import java.util.Map;
import org.apache.commons.codec.binary.Base64;
import org.gcube.socialnetworking.tokenization.Token;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class ReplaceableToken extends Token {
protected boolean replaced;

View File

@ -4,12 +4,16 @@ import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class SanitizedHashTag {
private static final String RECOGNIZE_HASHTAG_REGEX = "^.{0,3}#[\\w-]*[\\W]{0,3}";
private static final String RECOGNIZE_HASHTAG_REGEX = "^.{0,3}#[\\w.-]{1,}[\\W]{0,3}";
private static final Pattern RECOGNIZE_HASHTAG_PATTERN;
private static final String HASHTAG_REGEX = "#[\\w-]*";
private static final String HASHTAG_REGEX = "#[\\w.-]{1,}";
private static final Pattern HASHTAG_PATTERN;
static {
@ -41,6 +45,11 @@ public class SanitizedHashTag {
prefix = string.substring(0,matcher.start());
hashTag = string.substring(matcher.start(), matcher.end());
postfix = string.substring(matcher.end());
if (hashTag.endsWith(".")) {
hashTag = hashTag.substring(0, hashTag.length()-1);
postfix += ".";
}
}else {
throw new IllegalArgumentException(string + " is not a valid TAG");
}

View File

@ -4,41 +4,75 @@ import java.net.MalformedURLException;
import java.net.URL;
import java.util.Objects;
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class SanitizedURL {
private static String CHARACTERS_TO_REMOVE = "[\\.\\,\\;\\(\\)\\:\\\"\\'\\\\\\\\\\«\\»]";
private static String CHARACTERS_TO_REMOVE = "[\\{\\}\\[\\]\\.\\,\\;\\(\\)\\:\\\"\\'\\\\\\\\\\«\\»]";
protected String prefix;
protected String postfix;
protected final URL url;
public SanitizedURL(String urlString) throws MalformedURLException {
if(Objects.isNull(urlString) || urlString.isEmpty()) {
if(Objects.isNull(urlString) || urlString.isEmpty() || urlString.length()<2) {
throw new MalformedURLException();
}
prefix = urlString.substring(0,1);;
if(prefix.matches(CHARACTERS_TO_REMOVE)) {
prefix = urlString.substring(0, 1);
urlString = urlString.substring(1);
}else {
prefix = "";
}
prefix = "";
postfix = "";
urlString = stripPrefix(urlString);
if(urlString.startsWith("www.")) {
urlString = "http://" + urlString;
}
postfix = urlString.substring(urlString.length()-1);
if(postfix.matches(CHARACTERS_TO_REMOVE)) {
urlString = urlString.substring(0, urlString.length()-1);
}else {
postfix = "";
}
postfix = "";
urlString = stripPostfix(urlString);
url = new URL(urlString);
}
private String stripPrefix(String urlString) throws MalformedURLException {
if(Objects.isNull(urlString) || urlString.isEmpty() || urlString.length()<2) {
throw new MalformedURLException();
}
String testPrefix = urlString.substring(0,1);
if(testPrefix.matches(CHARACTERS_TO_REMOVE)) {
prefix = prefix + urlString.substring(0, 1);
urlString = urlString.substring(1);
urlString = stripPrefix(urlString);
}
return urlString;
}
private String stripPostfix(String urlString) throws MalformedURLException {
if(Objects.isNull(urlString) || urlString.isEmpty() || urlString.length()<2) {
throw new MalformedURLException();
}
String testPostfix = urlString.substring(urlString.length()-1);
if(testPostfix.matches(CHARACTERS_TO_REMOVE)) {
postfix = urlString.substring(urlString.length()-1) + postfix;
urlString = urlString.substring(0, urlString.length()-1);
/*
* The urlString could terminate with more than one character to be escaped
* So we need to further analyse the sanitised URL
* e.g. 'https://data.d4science.net/tfXA),' terminates with '),'
*/
urlString = stripPostfix(urlString);
}
return urlString;
}
public String getPrefix() {
return prefix;
}

View File

@ -15,6 +15,9 @@ import org.gcube.vomanagement.usermanagement.model.GCubeTeam;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class SocialMessageParser {
private static final Logger logger = LoggerFactory.getLogger(SocialMessageParser.class);

View File

@ -7,6 +7,9 @@ import java.util.Map;
import org.gcube.socialnetworking.tokenization.Token;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class URLToken extends ReplaceableToken {
protected SanitizedURL sanitizedURL;

View File

@ -5,6 +5,9 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class GCubeStringTokenizer {
/**

View File

@ -1,5 +1,8 @@
package org.gcube.socialnetworking.tokenization;
/**
* @author Luca Frosini (ISTI - CNR)
*/
public class Token{
protected final String token;

View File

@ -15,73 +15,110 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MessageParserTest {
private static final Logger logger = LoggerFactory.getLogger(MessageParserTest.class);
public static final String TEST_11 = "Dear all, this is a test to ignore, to select a week for the upcoming 194th #Tcom event, "
+ "hosted by #Apple in #Cupertino, please use this #Doodle: http://Doodle.com/poll/not-existing-poll \n\n"
+ "We're closing the poll next Thursday 16th March.";
public static final String TEST_12 = "Just because I am so happy to have the SPARQL-endpoint available, \n"
+ "sharing some sample SPARQL queries: \n\n" + "* Classes & usage counts: \n"
+ "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=%09SELECT+%3Fp+%28COUNT%28%3Fp%29+as+%3FpCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+%3Fp+%5B%5D%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Fp&format=text%2Fhtml&timeout=0&debug=on\n"
+ "\n" + "* properties and usage counts: \n"
+ "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=SELECT+%3Ftype+%28COUNT%28%3Ftype%29+as+%3FtypeCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+a+%3Ftype%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Ftype&format=text%2Fhtml&timeout=0&debug=on\n";
public static final String TEST_13 = "Dear members,\n"
+ "The item 'Webinar on Ontology Management using VOCBENCH in the context of AGINFRAPLUS Project' has been just published by Leonardo Candela .\n"
+ "You can find it here: http://data.d4science.org/ctlg/AGINFRAplus/webinar_on_ontology_management_using_vocbench_in_the_context_of_aginfraplus_project \n"
+ "#AGINFRAPLUS #VOCBENCH";
public static final String TEST_LUCA_1 = "Dear members,\n"
+ "The item 'just a test with time fields' has been just published by Francesco Mangiacrapa.\n"
+ "You can find it here: http://data-d.d4science.org/ctlg/NextNext/just_a_test_with_time_fields\n"
+ "#Text_mining #Field_1 #B3";
public static final String TEST_LUCA_2 = "Francesco Mangiacrapa prova &lt;a href=\"#\"&gt;Francesco Mangiacrapa&lt;/a&gt;";
public static final String TEST_LUCA_3 = "test &nbsp";
public static final String TEST_LUCA_4 = "test &&lt;nbsp &lt;";
public static final String TEST_LUCA_5 = "Accedete a questo link che vi porta ad un post su linkedin. \n" +
"https://www.linkedin.com/feed/update/urn:li:activity:6488779074213801984/\n" +
"I numeri riportati sono veri ed è motivo di orgoglio per tutti noi aver contribuito alla realizzazione della d4s infra che ha questo utilizzo via i diversi gateway. ";
public static final String TEST_LUCA_6 = "(www.google.it)";
public static final String TEST_LUCA_7 = "Hello (https://doodle.com/poll/not-existing-poll)";
public static final String TEST_LUCA_8 = "Hello this link \"https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=SELECT+%3Ftype+%28COUNT%28%3Ftype%29+as+%3FtypeCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+a+%3Ftype%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Ftype&format=text%2Fhtml&timeout=0&debug=on\" is a SPARQL query ";
public static final String TEST_BUG_18356 = "Dear B-C colleagues,\n"
+ "\n"
+ "Just a kind reminder that we look forward to welcoming all of you during the Projects upcoming Blue-Cloud “Service Exploitation & Sustainability Plan (SE&SP) and Roadmap to 2030” Workshop next Thursday, January 21st (9.30am to 1.30pm). Everyone's welcome for an interactive and productive discussion.\n"
+ "\n"
+ "Please find here a link to the workshop Agenda, (https://data.d4science.net/tfXA), including connection details.\n"
+ "\n"
+ "As discussed, the workshop will provide an opportunity to share the high-level results of the initial phase of stakeholder consultations towards the B-C Roadmap and to work within the Partnership to discuss and align some of the underlying, strategic concepts and ideas that will set the direction for the B-C SE&SP and B-C Roadmap to 2030.\n"
+ "\n"
+ "To inform the discussions to be held during the workshop, we have produced a report bringing together all the feedback, messages and recommendations gathered during the initial phase of stakeholder consultations (available here - https://data.d4science.net/JEm7). It is a long document, but we have included a one-page Executive Summary with high-level, key messages & recommendations, in case useful.\n"
+ "\n"
+ "We look forward to a productive discussion with you next Thursday!\n"
+ "\n"
+ "Kind regards,\n"
+ "Julia";
public static final String TEST_BUG_18356_2 = "Dear B-C colleagues,\n"
+ "\n"
+ "Just a kind reminder that we look forward to welcoming all of you during the Projects upcoming Blue-Cloud “Service Exploitation & Sustainability Plan (SE&SP) and Roadmap to 2030” Workshop next Thursday, January 21st (9.30am to 1.30pm). Everyone's welcome for an interactive and productive discussion.\n"
+ "\n"
+ "Please find here a link to the workshop Agenda [(https://data.d4science.net/tfXA)], including connection details.\n"
+ "\n"
+ "As discussed, the workshop will provide an opportunity to share the high-level results of the initial phase of stakeholder consultations towards the B-C Roadmap and to work within the Partnership to discuss and align some of the underlying, strategic concepts and ideas that will set the direction for the B-C SE&SP and B-C Roadmap to 2030.\n"
+ "\n"
+ "To inform the discussions to be held during the workshop, we have produced a report bringing together all the feedback, messages and recommendations gathered during the initial phase of stakeholder consultations (available here - https://data.d4science.net/JEm7). It is a long document, but we have included a one-page Executive Summary with high-level, key messages & recommendations, in case useful.\n"
+ "\n"
+ "We look forward to a productive discussion with you next Thursday!\n"
+ "\n"
+ "Kind regards,\n"
+ "Julia";
@Test
public void test13() {
SocialMessageParser messageParser = new SocialMessageParser(TEST_13);
logger.debug(messageParser.getParsedMessage());
}
@Test
public void test() {
String message = "Prova #Pippo Pollo http://google) <a href=\"/test\">Luca</a> https://www.linkedin.com/feed/update/urn:li:activity:6488779074213801984 :) ";
SocialMessageParser messageParser = new SocialMessageParser(message);
logger.debug(messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_11);
logger.debug(messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_12);
logger.debug(messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_13);
logger.debug(messageParser.getParsedMessage());
List<ItemBean> mentionedUsers = new ArrayList<>();
ItemBean itemBean = new ItemBean("21150", "francesco.mangiacrapa", "Francesco Mangiacrapa", "");
itemBean.setItemGroup(false);
mentionedUsers.add(itemBean);
messageParser = new SocialMessageParser(TEST_LUCA_1);
logger.debug(messageParser.getParsedMessage(mentionedUsers, "/group/nextnext"));
messageParser = new SocialMessageParser(TEST_LUCA_2);
logger.debug(messageParser.getParsedMessage(mentionedUsers, "/group/nextnext"));
}
@Test
public void anotherTest() {
SocialMessageParser messageParser = new SocialMessageParser(TEST_LUCA_4);
@ -91,21 +128,32 @@ public class MessageParserTest {
messageParser = new SocialMessageParser(TEST_LUCA_8);
logger.debug(messageParser.getParsedMessage());
}
@Test
public void urlWithParentesisTest() {
SocialMessageParser messageParser = new SocialMessageParser(TEST_LUCA_6);
logger.debug(messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_LUCA_7);
logger.debug("{}", messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_BUG_18356);
logger.debug("{}", messageParser.getParsedMessage());
messageParser = new SocialMessageParser(TEST_BUG_18356_2);
logger.debug("{}", messageParser.getParsedMessage());
messageParser = new SocialMessageParser(":)");
logger.debug("{}", messageParser.getParsedMessage());
}
@Test
public void parseNonURL() {
SocialMessageParser messageParser = new SocialMessageParser(":)");
logger.debug("{}", messageParser.getParsedMessage());
}
@Test(expected=MalformedURLException.class)
public void auxTest() throws MalformedURLException {
new SanitizedURL(null);
}
protected String findFirstLink(String message) {
try {
SocialMessageParser messageParser = new SocialMessageParser(message);
@ -115,21 +163,37 @@ public class MessageParserTest {
return null;
}
}
@Test
public void testTest() {
String text = "Hello (https://doodle.com/poll/not-existing-poll)";
logger.debug(findFirstLink(text));
text = "post \"a text with #hashtag);\"";
SocialMessageParser messageParser = new SocialMessageParser(text);
logger.debug(messageParser.getParsedMessage());
text = "\"#hashtag and #hashtag repeated.";
messageParser = new SocialMessageParser(text);
logger.debug(messageParser.getParsedMessage());
String text = "Hello (https://doodle.com/poll/not-existing-poll)";
logger.debug(findFirstLink(text));
text = "post \"a text with #hashtag);\"";
SocialMessageParser messageParser = new SocialMessageParser(text);
logger.debug(messageParser.getParsedMessage());
text = "\"#hashtag and #hashtag repeated.";
messageParser = new SocialMessageParser(text);
logger.debug(messageParser.getParsedMessage());
}
@Test
public void checkHashTag7() throws Exception {
String text = "Checking hashtag between quotes #hashtag1; #hashtag1, #hashtag3. \"#hashtag5\" is recognized (#anotherHashtag) #4. #4.12 # ";
SocialMessageParser messageParser = new SocialMessageParser(text);
logger.debug(messageParser.getParsedMessage());
List<String> htags = messageParser.getHashtags();
for (String hTag : htags) {
logger.debug("found hashtag:"+hTag);
}
logger.debug("messageParser.getHashtags().size() should be 7, is: "+ htags.size());
Assert.assertTrue(htags.size() == 7);
}
@Test
public void checkHashTag() throws Exception {
String token = "\"#hashtag\"";
@ -139,13 +203,13 @@ public class MessageParserTest {
Assert.assertTrue(sanitizedHashTag.getHashTag().compareTo("#hashtag")==0);
Assert.assertTrue(sanitizedHashTag.getPostfix().compareTo("\"")==0);
}
@Test(expected=IllegalArgumentException.class)
public void hasTagwithURL() throws Exception {
String token = "https://wiki.gcube-system.org/gcube/GCat_Background#GeoSpatial_search_for_datasets:_via_API_or_Search_Widget";
new SanitizedHashTag(token);
}
@Test
public void checkHashTag2() throws Exception {
String token = "\"#hashtag\");";
@ -155,7 +219,7 @@ public class MessageParserTest {
Assert.assertTrue(sanitizedHashTag.getHashTag().compareTo("#hashtag")==0);
Assert.assertTrue(sanitizedHashTag.getPostfix().compareTo("\");")==0);
}
@Test
public void checkHashTag3() throws Exception {
String token = ";(\"#hashtag\");";
@ -165,7 +229,8 @@ public class MessageParserTest {
Assert.assertTrue(sanitizedHashTag.getHashTag().compareTo("#hashtag")==0);
Assert.assertTrue(sanitizedHashTag.getPostfix().compareTo("\");")==0);
}
@Test(expected=IllegalArgumentException.class)
public void checkHashTag4() throws Exception {
String token = ";(\"#hashtag\");]";