regex for urls fixed #10234 sub-regex for encoding (i.e. %3F) didn't recognize uppercase characters
git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/social-networking/social-util-library@158302 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
df13c3264b
commit
87aa47eae1
|
@ -1,4 +1,8 @@
|
||||||
<ReleaseNotes>
|
<ReleaseNotes>
|
||||||
|
<Changeset component="org.gcube.socialnetworking.social-util-library.1-2-1"
|
||||||
|
date="2017-11-13">
|
||||||
|
<Change>Fixed regex for urls (#10234)
|
||||||
|
</Change>
|
||||||
<Changeset component="org.gcube.socialnetworking.social-util-library.1-2-0"
|
<Changeset component="org.gcube.socialnetworking.social-util-library.1-2-0"
|
||||||
date="2017-04-13">
|
date="2017-04-13">
|
||||||
<Change>Added method to remove html tags from a text (useful for #247)
|
<Change>Added method to remove html tags from a text (useful for #247)
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -10,7 +10,7 @@
|
||||||
|
|
||||||
<groupId>org.gcube.socialnetworking</groupId>
|
<groupId>org.gcube.socialnetworking</groupId>
|
||||||
<artifactId>social-util-library</artifactId>
|
<artifactId>social-util-library</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1-SNAPSHOT</version>
|
||||||
<name>social-util-library</name>
|
<name>social-util-library</name>
|
||||||
<description>
|
<description>
|
||||||
The social-util-library contains utility functions that can be used by the social-networking portlets.
|
The social-util-library contains utility functions that can be used by the social-networking portlets.
|
||||||
|
|
|
@ -49,12 +49,12 @@ public class Utils {
|
||||||
"(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
|
"(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
|
||||||
"|mil|biz|info|mobi|name|aero|jobs|museum" +
|
"|mil|biz|info|mobi|name|aero|jobs|museum" +
|
||||||
"|travel|[a-z]{2,5}))(:[\\d]{1,5})?" +
|
"|travel|[a-z]{2,5}))(:[\\d]{1,5})?" +
|
||||||
"(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" +
|
"(((\\/([-\\w~!$+|.,=]|%[a-fA-F\\d]{2})+)+|\\/)+|\\?|#)?" +
|
||||||
"((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
|
"((\\?([-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" +
|
||||||
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" +
|
"([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)" +
|
||||||
"(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
|
"(&(?:[-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" +
|
||||||
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" +
|
"([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)*)*" +
|
||||||
"(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b");
|
"(#([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)?\\b");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -267,6 +267,7 @@ public class Utils {
|
||||||
}
|
}
|
||||||
|
|
||||||
Matcher matcher = URL_PATTERN.matcher(originalItem);
|
Matcher matcher = URL_PATTERN.matcher(originalItem);
|
||||||
|
System.out.println(URL_PATTERN.pattern());
|
||||||
if(matcher.find()){
|
if(matcher.find()){
|
||||||
logger.debug("Found match url " + matcher.group());
|
logger.debug("Found match url " + matcher.group());
|
||||||
item = matcher.group();
|
item = matcher.group();
|
||||||
|
|
|
@ -12,11 +12,11 @@ public class TestUnit {
|
||||||
System.out.println("Hashtags are " + hashtags);
|
System.out.println("Hashtags are " + hashtags);
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Test
|
//@Test
|
||||||
public void extractUrl(){
|
public void extractUrl(){
|
||||||
|
|
||||||
String url = "http tosajndjsa :httphttps://www.google.tv www.google.cloud www https http (http://digirolamo.com: www.google.it https://next.d4science.org/group/nextnext/data-catalogue?path=/dataset/test_for_visibility";
|
String url = "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=%09SELECT+%3Fp+%28COUNT%28%3Fp%29+as+%3FpCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+%3Fp+%5B%5D%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Fp&format=text%2Fhtml&timeout=0&debug=on";
|
||||||
String result = Utils.transformUrls(url);
|
String result = Utils.extractURL(url);
|
||||||
System.out.println("urls are " + result);
|
System.out.println("urls are " + result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue