Fixing find URL

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/social-networking/social-util-library@176813 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Luca Frosini 2019-01-25 10:01:29 +00:00
parent c8387db64c
commit 9e7bd7e00b
4 changed files with 61 additions and 8 deletions

View File

@ -39,21 +39,25 @@ public class Utils {
/**
* Hashtag regex enhanced for ticket #4937
*/
@Deprecated
private static final String HASHTAG_REGEX = "^#\\w+([-_.]?\\w+)*|\\s#\\w+([-_.]?\\w+)*|(?<=[\\[({])#\\w+([-_.]?\\w+)";
/**
* IPv4 regex
*/
@Deprecated
public static final String IPV4_REGEX = "\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b";
/**
* IPV6 regex needs to be put in square brackets, see below
*/
@Deprecated
public static final String IPV6_REGEX = "\\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b";
/**
* Pattern for URLS
*/
@Deprecated
private static final Pattern URL_PATTERN = Pattern.compile(
"\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" +
"(\\w+:\\w+@)?((([-\\w]+\\.)+(com|org|net|gov" +
@ -174,6 +178,7 @@ public class Utils {
* @param feedText
* @return the text with the clickable url in it
*/
@Deprecated
public static String extractURL(String feedText) {
// separate input by spaces ( URLs have no spaces )
feedText = feedText.replaceAll("(\r\n|\n)"," <br/> ");
@ -271,6 +276,7 @@ public class Utils {
* @param item a text token
* @return the actual http link
*/
@Deprecated
public static String getHttpToken(String originalItem) {
// apply pattern
String item = null;
@ -374,6 +380,7 @@ public class Utils {
* @param text
* @return a String without html tags
*/
@Deprecated
public static String removeHTMLFromText(String text){
if(text == null)

View File

@ -23,7 +23,10 @@ public class SocialMessageParser {
private final String originalMessage;
private StringWriter stringWriter;
private List<ReplaceableToken> tokens;
private List<TagToken> tagTokens;
private List<URLToken> urlTokens;
private List<String> hashtags;
@ -34,12 +37,18 @@ public class SocialMessageParser {
public List<ReplaceableToken> getTokens() {
if(tokens==null){
tokens = new ArrayList<>();
tagTokens = new ArrayList<>();
urlTokens = new ArrayList<>();
hashtags = new ArrayList<>();
GCubeStringTokenizer socialStringTokenizer = new GCubeStringTokenizer(originalMessage);
for(Token token : socialStringTokenizer.getTokens()) {
String tokenString = token.getToken();
if(tokenString.startsWith("#")) {
TagToken tagToken = new TagToken(token);
tokens.add(tagToken);
tagTokens.add(tagToken);
hashtags.add(tagToken.getToken());
continue;
}
@ -47,6 +56,7 @@ public class SocialMessageParser {
if(url!=null) {
URLToken urlToken = new URLToken(token);
tokens.add(urlToken);
urlTokens.add(urlToken);
continue;
}
@ -60,13 +70,9 @@ public class SocialMessageParser {
public String getParsedMessage() {
if(stringWriter==null) {
stringWriter = new StringWriter();
hashtags = new ArrayList<>();
for(ReplaceableToken token : getTokens()) {
stringWriter.append(token.getTokenReplacement());
stringWriter.append(token.getDelimiter());
if(token instanceof TagToken) {
hashtags.add(token.getToken());
}
}
}
return stringWriter.toString();
@ -108,9 +114,23 @@ public class SocialMessageParser {
}
public List<String> getHashtags() {
if(stringWriter==null) {
getParsedMessage();
if(tokens==null){
getTokens();
}
return hashtags;
}
public List<TagToken> getTagTokens() {
if(tokens==null){
getTokens();
}
return tagTokens;
}
public List<URLToken> getURLTokens() {
if(tokens==null){
getTokens();
}
return urlTokens;
}
}

View File

@ -9,6 +9,8 @@ import org.gcube.socialnetworking.tokenization.Token;
public class URLToken extends ReplaceableToken {
protected SanitizedURL sanitizedURL;
public URLToken(String token, String delimiter, int start, int end) {
super(token, delimiter, start, end);
}
@ -33,8 +35,7 @@ public class URLToken extends ReplaceableToken {
try {
Map<String,String> anchorAttibutes = new HashMap<>(1);
anchorAttibutes.put("target", "_blank");
SanitizedURL sanitizedURL = new SanitizedURL(token);
String url = sanitizedURL.getURL().toString();
String url = getExtractedURL().toString();
tokenReplacement = sanitizedURL.getPrefix() + ReplaceableToken.createLink(url, url, anchorAttibutes) + sanitizedURL.getPostfix();
}catch(MalformedURLException e) {
tokenReplacement = token;
@ -43,5 +44,12 @@ public class URLToken extends ReplaceableToken {
}
return tokenReplacement;
}
public URL getExtractedURL() throws MalformedURLException {
if(sanitizedURL==null) {
sanitizedURL = new SanitizedURL(token);
}
return sanitizedURL.getURL();
}
}

View File

@ -2,6 +2,7 @@ package org.gcube.social_networking.socialutillibrary;
import java.util.List;
import org.gcube.socialnetworking.socialtoken.SocialMessageParser;
import org.gcube.socialnetworking.socialtoken.URLToken;
import org.junit.Test;
import org.slf4j.Logger;
@ -38,5 +39,22 @@ public class TestUnit {
logger.debug(urlToken.getTokenReplacement());
}
}
protected String findFirstLink(String message) {
try {
SocialMessageParser messageParser = new SocialMessageParser(message);
List<URLToken> urlTokens = messageParser.getURLTokens();
return urlTokens.get(0).getExtractedURL().toString();
}catch (Exception e) {
return null;
}
}
@Test
public void testTest() {
String text = "Hello (https://doodle.com/poll/not-existing-poll)";
logger.debug(findFirstLink(text));
}
}