diff --git a/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java b/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java index bc3d260..e8894b0 100644 --- a/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java +++ b/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java @@ -51,11 +51,25 @@ public class Utils { "|travel|[a-z]{2,5}))(:[\\d]{1,5})?" + "(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" + "((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + - "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" + + "([-\\w~!$+|.,*:=/]|%[a-f\\d]{2})*)" + "(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" + "(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b"); + // old was but it doesn't work for urls like https://next.d4science.org/group/nextnext/data-catalogue?path=/dataset/test_for_visibility + // since it removed the string after "path" + // private static final Pattern URL_PATTERN = Pattern.compile( + // "\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" + + // "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" + + // "|mil|biz|info|mobi|name|aero|jobs|museum" + + // "|travel|[a-z]{2,5}))(:[\\d]{1,5})?" + + // "(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" + + // "((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + + // "([-\\w~!$+|.,*:=/]|%[a-f\\d]{2}/)*)" + + // "(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + + // "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" + + // "(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b"); + /** * * @param preview diff --git a/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java b/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java index 6e6c364..10c8be4 100644 --- a/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java +++ b/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java @@ -12,10 +12,10 @@ public class TestUnit { System.out.println("Hashtags are " + hashtags); } - //@Test + // @Test public void extractUrl(){ - String url = "http tosajndjsa :httphttps://www.google.tv www.google.cloud www https http (http://digirolamo.com: www.google.it"; + String url = "http tosajndjsa :httphttps://www.google.tv www.google.cloud www https http (http://digirolamo.com: www.google.it https://next.d4science.org/group/nextnext/data-catalogue?path=/dataset/test_for_visibility"; String result = Utils.transformUrls(url); System.out.println("urls are " + result); }