regex for urls fixed #10234 sub-regex for encoding (i.e. %3F) didn't recognize uppercase characters

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/social-networking/social-util-library@158302 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Costantino Perciante 2017-11-08 15:30:10 +00:00
parent df13c3264b
commit 87aa47eae1
4 changed files with 15 additions and 10 deletions

View File

@ -1,4 +1,8 @@
<ReleaseNotes>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-2-1"
date="2017-11-13">
<Change>Fixed regex for urls (#10234)
</Change>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-2-0"
date="2017-04-13">
<Change>Added method to remove html tags from a text (useful for #247)

View File

@ -10,7 +10,7 @@
<groupId>org.gcube.socialnetworking</groupId>
<artifactId>social-util-library</artifactId>
<version>1.2.0-SNAPSHOT</version>
<version>1.2.1-SNAPSHOT</version>
<name>social-util-library</name>
<description>
The social-util-library contains utility functions that can be used by the social-networking portlets.

View File

@ -49,12 +49,12 @@ public class Utils {
"(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
"|mil|biz|info|mobi|name|aero|jobs|museum" +
"|travel|[a-z]{2,5}))(:[\\d]{1,5})?" +
"(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" +
"((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" +
"(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" +
"(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b");
"(((\\/([-\\w~!$+|.,=]|%[a-fA-F\\d]{2})+)+|\\/)+|\\?|#)?" +
"((\\?([-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)" +
"(&(?:[-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)*)*" +
"(#([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)?\\b");
/**
*
@ -267,6 +267,7 @@ public class Utils {
}
Matcher matcher = URL_PATTERN.matcher(originalItem);
System.out.println(URL_PATTERN.pattern());
if(matcher.find()){
logger.debug("Found match url " + matcher.group());
item = matcher.group();

View File

@ -12,11 +12,11 @@ public class TestUnit {
System.out.println("Hashtags are " + hashtags);
}
// @Test
//@Test
public void extractUrl(){
String url = "http tosajndjsa :httphttps://www.google.tv www.google.cloud www https http (http://digirolamo.com: www.google.it https://next.d4science.org/group/nextnext/data-catalogue?path=/dataset/test_for_visibility";
String result = Utils.transformUrls(url);
String url = "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=%09SELECT+%3Fp+%28COUNT%28%3Fp%29+as+%3FpCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+%3Fp+%5B%5D%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Fp&format=text%2Fhtml&timeout=0&debug=on";
String result = Utils.extractURL(url);
System.out.println("urls are " + result);
}