minor fix for managing url with query of the type =/path/path

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/social-networking/social-util-library@133720 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Costantino Perciante 2016-10-28 13:17:24 +00:00
parent af643cdc15
commit 3d57eabdfd
2 changed files with 17 additions and 3 deletions

View File

@ -51,11 +51,25 @@ public class Utils {
"|travel|[a-z]{2,5}))(:[\\d]{1,5})?" +
"(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" +
"((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" +
"([-\\w~!$+|.,*:=/]|%[a-f\\d]{2})*)" +
"(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" +
"(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b");
// old was but it doesn't work for urls like https://next.d4science.org/group/nextnext/data-catalogue?path=/dataset/test_for_visibility
// since it removed the string after "path"
// private static final Pattern URL_PATTERN = Pattern.compile(
// "\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" +
// "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
// "|mil|biz|info|mobi|name|aero|jobs|museum" +
// "|travel|[a-z]{2,5}))(:[\\d]{1,5})?" +
// "(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" +
// "((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
// "([-\\w~!$+|.,*:=/]|%[a-f\\d]{2}/)*)" +
// "(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
// "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" +
// "(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b");
/**
*
* @param preview

View File

@ -12,10 +12,10 @@ public class TestUnit {
System.out.println("Hashtags are " + hashtags);
}
//@Test
// @Test
public void extractUrl(){
String url = "http tosajndjsa :httphttps://www.google.tv www.google.cloud www https http (http://digirolamo.com: www.google.it";
String url = "http tosajndjsa :httphttps://www.google.tv www.google.cloud www https http (http://digirolamo.com: www.google.it https://next.d4science.org/group/nextnext/data-catalogue?path=/dataset/test_for_visibility";
String result = Utils.transformUrls(url);
System.out.println("urls are " + result);
}