diff --git a/distro/changelog.xml b/distro/changelog.xml
index 28ced40..3d8e4f9 100644
--- a/distro/changelog.xml
+++ b/distro/changelog.xml
@@ -2,6 +2,7 @@
Hashtag regular expression updated (See ticket #4937)
+ Url regular expression updated
diff --git a/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java b/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java
index 5346fb0..bc3d260 100644
--- a/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java
+++ b/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java
@@ -41,6 +41,21 @@ public class Utils {
*/
private static final String HASHTAG_REGEX = "^#\\w+([.]?\\w+)*|\\s#\\w+([.]?\\w+)*";
+ /**
+ * Pattern for URLS
+ */
+ private static final Pattern URL_PATTERN = Pattern.compile(
+ "\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" +
+ "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
+ "|mil|biz|info|mobi|name|aero|jobs|museum" +
+ "|travel|[a-z]{2,5}))(:[\\d]{1,5})?" +
+ "(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" +
+ "((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
+ "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" +
+ "(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
+ "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" +
+ "(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b");
+
/**
*
* @param preview
@@ -143,7 +158,8 @@ public class Utils {
}
/**
- * utility method that extract an url ina text when you paste a link
+ * utility method that extract an url ina text when you paste a link.
+ * It returns the first (if any) meaningful url among the ones available.
* @param feedText
* @return the text with the clickable url in it
*/
@@ -154,14 +170,14 @@ public class Utils {
// Attempt to convert each item into an URL.
for( String item : parts ) {
String toCheck = getHttpToken(item);
+ logger.debug("To check is " + toCheck);
if (toCheck != null) {
try {
new URL(toCheck);
return toCheck;
} catch (MalformedURLException e) {
// If there was an URL then it's not valid
- logger.error("MalformedURLException returning... ");
- return null;
+ logger.error("MalformedURLException skipping token " + toCheck);
}
}
}
@@ -224,8 +240,10 @@ public class Utils {
sb.append("").append(url).append(" ");
} catch (MalformedURLException e) {
// If there was an URL then it's not valid
- logger.error("MalformedURLException returning... ");
- return feedText;
+ logger.error("MalformedURLException not converting token = " + toCheck);
+ sb.append(parts[i]);
+ sb.append(" ");
+ //return feedText;
}
} else {
sb.append(parts[i]);
@@ -239,18 +257,31 @@ public class Utils {
* @param item a text token
* @return the actual http link
*/
- public static String getHttpToken(String item) {
- if (item.startsWith("http") || item.startsWith("www") || item.startsWith("(www") || item.startsWith("(http")) {
- if (item.startsWith("("))
- item = item.substring(1, item.length());
- if (item.endsWith(".") || item.endsWith(")")) { //sometimes people write the url and close the phrase with a .
- item = item.substring(0, item.length()-1);
- }
- item = item.startsWith("www") ? "http://"+item : item;
- logger.debug("getHttpToken returns -> " + item);
- return item;
- }
- return null;
+ public static String getHttpToken(String originalItem) {
+
+ // apply pattern
+ String item = null;
+ Matcher matcher = URL_PATTERN.matcher(originalItem);
+ if(matcher.find()){
+ logger.debug("Found match url " + matcher.group());
+ item = matcher.group();
+ }else
+ return null;
+
+ item = item.startsWith("www") ? "http://"+item : item;
+ logger.debug("getHttpToken returns -> " + item);
+ return item;
+
+ // if (item.startsWith("http") || item.startsWith("www") || item.startsWith("(www") || item.startsWith("(http")) {
+ // if (item.startsWith("("))
+ // item = item.substring(1, item.length());
+ // if (item.endsWith(".") || item.endsWith(")")) { //sometimes people write the url and close the phrase with a .
+ // item = item.substring(0, item.length()-1);
+ // }
+ //
+ // return item;
+ // }
+ // return null;
}
/**
diff --git a/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java b/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java
index 926e8cc..6e6c364 100644
--- a/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java
+++ b/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java
@@ -5,7 +5,6 @@ import java.util.List;
public class TestUnit {
-
//@Test
public void testHashtag() {
String text = "This is a test with hashtag #T6 and #T6.1 but also #T6. that has '.' that is useless and #T43.43 and #gcube4.1.0gcore #gcube4.1.0";
@@ -13,4 +12,13 @@ public class TestUnit {
System.out.println("Hashtags are " + hashtags);
}
+ //@Test
+ public void extractUrl(){
+
+ String url = "http tosajndjsa :httphttps://www.google.tv www.google.cloud www https http (http://digirolamo.com: www.google.it";
+ String result = Utils.transformUrls(url);
+ System.out.println("urls are " + result);
+ }
+
+
}