fixed url recognition in text
git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/social-networking/social-util-library@132831 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
483c585f14
commit
af643cdc15
|
@ -2,6 +2,7 @@
|
||||||
<Changeset component="org.gcube.socialnetworking.social-util-library.1-1-0"
|
<Changeset component="org.gcube.socialnetworking.social-util-library.1-1-0"
|
||||||
date="2016-10-01">
|
date="2016-10-01">
|
||||||
<Change>Hashtag regular expression updated (See ticket #4937)</Change>
|
<Change>Hashtag regular expression updated (See ticket #4937)</Change>
|
||||||
|
<Change>Url regular expression updated</Change>
|
||||||
</Changeset>
|
</Changeset>
|
||||||
<Changeset component="org.gcube.socialnetworking.social-util-library.1-0-0"
|
<Changeset component="org.gcube.socialnetworking.social-util-library.1-0-0"
|
||||||
date="2016-06-01">
|
date="2016-06-01">
|
||||||
|
|
|
@ -41,6 +41,21 @@ public class Utils {
|
||||||
*/
|
*/
|
||||||
private static final String HASHTAG_REGEX = "^#\\w+([.]?\\w+)*|\\s#\\w+([.]?\\w+)*";
|
private static final String HASHTAG_REGEX = "^#\\w+([.]?\\w+)*|\\s#\\w+([.]?\\w+)*";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pattern for URLS
|
||||||
|
*/
|
||||||
|
private static final Pattern URL_PATTERN = Pattern.compile(
|
||||||
|
"\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" +
|
||||||
|
"(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
|
||||||
|
"|mil|biz|info|mobi|name|aero|jobs|museum" +
|
||||||
|
"|travel|[a-z]{2,5}))(:[\\d]{1,5})?" +
|
||||||
|
"(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" +
|
||||||
|
"((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
|
||||||
|
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" +
|
||||||
|
"(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
|
||||||
|
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" +
|
||||||
|
"(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param preview
|
* @param preview
|
||||||
|
@ -143,7 +158,8 @@ public class Utils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* utility method that extract an url ina text when you paste a link
|
* utility method that extract an url ina text when you paste a link.
|
||||||
|
* It returns the first (if any) meaningful url among the ones available.
|
||||||
* @param feedText
|
* @param feedText
|
||||||
* @return the text with the clickable url in it
|
* @return the text with the clickable url in it
|
||||||
*/
|
*/
|
||||||
|
@ -154,14 +170,14 @@ public class Utils {
|
||||||
// Attempt to convert each item into an URL.
|
// Attempt to convert each item into an URL.
|
||||||
for( String item : parts ) {
|
for( String item : parts ) {
|
||||||
String toCheck = getHttpToken(item);
|
String toCheck = getHttpToken(item);
|
||||||
|
logger.debug("To check is " + toCheck);
|
||||||
if (toCheck != null) {
|
if (toCheck != null) {
|
||||||
try {
|
try {
|
||||||
new URL(toCheck);
|
new URL(toCheck);
|
||||||
return toCheck;
|
return toCheck;
|
||||||
} catch (MalformedURLException e) {
|
} catch (MalformedURLException e) {
|
||||||
// If there was an URL then it's not valid
|
// If there was an URL then it's not valid
|
||||||
logger.error("MalformedURLException returning... ");
|
logger.error("MalformedURLException skipping token " + toCheck);
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -224,8 +240,10 @@ public class Utils {
|
||||||
sb.append("<a class=\"link\" style=\"font-size:14px;\" href=\"").append(url).append("\" target=\"_blank\">").append(url).append("</a> ");
|
sb.append("<a class=\"link\" style=\"font-size:14px;\" href=\"").append(url).append("\" target=\"_blank\">").append(url).append("</a> ");
|
||||||
} catch (MalformedURLException e) {
|
} catch (MalformedURLException e) {
|
||||||
// If there was an URL then it's not valid
|
// If there was an URL then it's not valid
|
||||||
logger.error("MalformedURLException returning... ");
|
logger.error("MalformedURLException not converting token = " + toCheck);
|
||||||
return feedText;
|
sb.append(parts[i]);
|
||||||
|
sb.append(" ");
|
||||||
|
//return feedText;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
sb.append(parts[i]);
|
sb.append(parts[i]);
|
||||||
|
@ -239,18 +257,31 @@ public class Utils {
|
||||||
* @param item a text token
|
* @param item a text token
|
||||||
* @return the actual http link
|
* @return the actual http link
|
||||||
*/
|
*/
|
||||||
public static String getHttpToken(String item) {
|
public static String getHttpToken(String originalItem) {
|
||||||
if (item.startsWith("http") || item.startsWith("www") || item.startsWith("(www") || item.startsWith("(http")) {
|
|
||||||
if (item.startsWith("("))
|
// apply pattern
|
||||||
item = item.substring(1, item.length());
|
String item = null;
|
||||||
if (item.endsWith(".") || item.endsWith(")")) { //sometimes people write the url and close the phrase with a .
|
Matcher matcher = URL_PATTERN.matcher(originalItem);
|
||||||
item = item.substring(0, item.length()-1);
|
if(matcher.find()){
|
||||||
}
|
logger.debug("Found match url " + matcher.group());
|
||||||
item = item.startsWith("www") ? "http://"+item : item;
|
item = matcher.group();
|
||||||
logger.debug("getHttpToken returns -> " + item);
|
}else
|
||||||
return item;
|
return null;
|
||||||
}
|
|
||||||
return null;
|
item = item.startsWith("www") ? "http://"+item : item;
|
||||||
|
logger.debug("getHttpToken returns -> " + item);
|
||||||
|
return item;
|
||||||
|
|
||||||
|
// if (item.startsWith("http") || item.startsWith("www") || item.startsWith("(www") || item.startsWith("(http")) {
|
||||||
|
// if (item.startsWith("("))
|
||||||
|
// item = item.substring(1, item.length());
|
||||||
|
// if (item.endsWith(".") || item.endsWith(")")) { //sometimes people write the url and close the phrase with a .
|
||||||
|
// item = item.substring(0, item.length()-1);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return item;
|
||||||
|
// }
|
||||||
|
// return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -5,7 +5,6 @@ import java.util.List;
|
||||||
|
|
||||||
public class TestUnit {
|
public class TestUnit {
|
||||||
|
|
||||||
|
|
||||||
//@Test
|
//@Test
|
||||||
public void testHashtag() {
|
public void testHashtag() {
|
||||||
String text = "This is a test with hashtag #T6 and #T6.1 but also #T6. that has '.' that is useless and #T43.43 and #gcube4.1.0gcore #gcube4.1.0";
|
String text = "This is a test with hashtag #T6 and #T6.1 but also #T6. that has '.' that is useless and #T43.43 and #gcube4.1.0gcore #gcube4.1.0";
|
||||||
|
@ -13,4 +12,13 @@ public class TestUnit {
|
||||||
System.out.println("Hashtags are " + hashtags);
|
System.out.println("Hashtags are " + hashtags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//@Test
|
||||||
|
public void extractUrl(){
|
||||||
|
|
||||||
|
String url = "http tosajndjsa :httphttps://www.google.tv www.google.cloud www https http (http://digirolamo.com: www.google.it";
|
||||||
|
String result = Utils.transformUrls(url);
|
||||||
|
System.out.println("urls are " + result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue