diff --git a/distro/changelog.xml b/distro/changelog.xml index fe4ce8a..65bddb3 100644 --- a/distro/changelog.xml +++ b/distro/changelog.xml @@ -1,4 +1,8 @@ + + Fixed regex for urls (#10234) + Added method to remove html tags from a text (useful for #247) diff --git a/pom.xml b/pom.xml index bac13eb..6a61147 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.gcube.socialnetworking social-util-library - 1.2.0-SNAPSHOT + 1.2.1-SNAPSHOT social-util-library The social-util-library contains utility functions that can be used by the social-networking portlets. diff --git a/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java b/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java index 196e1fd..012cc98 100644 --- a/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java +++ b/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java @@ -49,12 +49,12 @@ public class Utils { "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" + "|mil|biz|info|mobi|name|aero|jobs|museum" + "|travel|[a-z]{2,5}))(:[\\d]{1,5})?" + - "(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" + - "((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + - "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" + - "(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + - "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" + - "(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b"); + "(((\\/([-\\w~!$+|.,=]|%[a-fA-F\\d]{2})+)+|\\/)+|\\?|#)?" + + "((\\?([-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" + + "([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)" + + "(&(?:[-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" + + "([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)*)*" + + "(#([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)?\\b"); /** * @@ -267,6 +267,7 @@ public class Utils { } Matcher matcher = URL_PATTERN.matcher(originalItem); + System.out.println(URL_PATTERN.pattern()); if(matcher.find()){ logger.debug("Found match url " + matcher.group()); item = matcher.group(); diff --git a/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java b/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java index 099421d..99d132f 100644 --- a/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java +++ b/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java @@ -12,11 +12,11 @@ public class TestUnit { System.out.println("Hashtags are " + hashtags); } - // @Test + //@Test public void extractUrl(){ - String url = "http tosajndjsa :httphttps://www.google.tv www.google.cloud www https http (http://digirolamo.com: www.google.it https://next.d4science.org/group/nextnext/data-catalogue?path=/dataset/test_for_visibility"; - String result = Utils.transformUrls(url); + String url = "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=%09SELECT+%3Fp+%28COUNT%28%3Fp%29+as+%3FpCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+%3Fp+%5B%5D%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Fp&format=text%2Fhtml&timeout=0&debug=on"; + String result = Utils.extractURL(url); System.out.println("urls are " + result); }