regex for urls fixed #10234 sub-regex for encoding (i.e. %3F) didn't recognize uppercase characters

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/social-networking/social-util-library@158302 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Costantino Perciante 2017-11-08 15:30:10 +00:00
parent df13c3264b
commit 87aa47eae1
4 changed files with 15 additions and 10 deletions

View File

@ -1,4 +1,8 @@
<ReleaseNotes> <ReleaseNotes>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-2-1"
date="2017-11-13">
<Change>Fixed regex for urls (#10234)
</Change>
<Changeset component="org.gcube.socialnetworking.social-util-library.1-2-0" <Changeset component="org.gcube.socialnetworking.social-util-library.1-2-0"
date="2017-04-13"> date="2017-04-13">
<Change>Added method to remove html tags from a text (useful for #247) <Change>Added method to remove html tags from a text (useful for #247)

View File

@ -10,7 +10,7 @@
<groupId>org.gcube.socialnetworking</groupId> <groupId>org.gcube.socialnetworking</groupId>
<artifactId>social-util-library</artifactId> <artifactId>social-util-library</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1-SNAPSHOT</version>
<name>social-util-library</name> <name>social-util-library</name>
<description> <description>
The social-util-library contains utility functions that can be used by the social-networking portlets. The social-util-library contains utility functions that can be used by the social-networking portlets.

View File

@ -49,12 +49,12 @@ public class Utils {
"(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" + "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
"|mil|biz|info|mobi|name|aero|jobs|museum" + "|mil|biz|info|mobi|name|aero|jobs|museum" +
"|travel|[a-z]{2,5}))(:[\\d]{1,5})?" + "|travel|[a-z]{2,5}))(:[\\d]{1,5})?" +
"(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" + "(((\\/([-\\w~!$+|.,=]|%[a-fA-F\\d]{2})+)+|\\/)+|\\?|#)?" +
"((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + "((\\?([-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" + "([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)" +
"(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + "(&(?:[-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" + "([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)*)*" +
"(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b"); "(#([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)?\\b");
/** /**
* *
@ -267,6 +267,7 @@ public class Utils {
} }
Matcher matcher = URL_PATTERN.matcher(originalItem); Matcher matcher = URL_PATTERN.matcher(originalItem);
System.out.println(URL_PATTERN.pattern());
if(matcher.find()){ if(matcher.find()){
logger.debug("Found match url " + matcher.group()); logger.debug("Found match url " + matcher.group());
item = matcher.group(); item = matcher.group();

View File

@ -12,11 +12,11 @@ public class TestUnit {
System.out.println("Hashtags are " + hashtags); System.out.println("Hashtags are " + hashtags);
} }
// @Test //@Test
public void extractUrl(){ public void extractUrl(){
String url = "http tosajndjsa :httphttps://www.google.tv www.google.cloud www https http (http://digirolamo.com: www.google.it https://next.d4science.org/group/nextnext/data-catalogue?path=/dataset/test_for_visibility"; String url = "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=%09SELECT+%3Fp+%28COUNT%28%3Fp%29+as+%3FpCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+%3Fp+%5B%5D%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Fp&format=text%2Fhtml&timeout=0&debug=on";
String result = Utils.transformUrls(url); String result = Utils.extractURL(url);
System.out.println("urls are " + result); System.out.println("urls are " + result);
} }