urls with ipv4 and 6 are now supported

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/social-networking/social-util-library@162343 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Costantino Perciante 2018-01-19 10:55:54 +00:00
parent 93243fdba2
commit 77eec1a78c
2 changed files with 18 additions and 8 deletions

View File

@ -41,14 +41,24 @@ public class Utils {
*/
private static final String HASHTAG_REGEX = "^#\\w+([-_.]?\\w+)*|\\s#\\w+([-_.]?\\w+)*|(?<=[\\[({])#\\w+([-_.]?\\w+)";
/**
* IPv4 regex
*/
public static final String IPV4_REGEX = "\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b";
/**
* IPV6 regex needs to be put in square brackets, see below
*/
public static final String IPV6_REGEX = "\\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b";
/**
* Pattern for URLS
*/
private static final Pattern URL_PATTERN = Pattern.compile(
"\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" +
"(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
"(\\w+:\\w+@)?((([-\\w]+\\.)+(com|org|net|gov" +
"|mil|biz|info|mobi|name|aero|jobs|museum" +
"|travel|[a-z]{2,5}))(:[\\d]{1,5})?" +
"|travel|[a-z]{2,5}))|("+ IPV4_REGEX +")|(\\["+ IPV6_REGEX +"\\]))(:[\\d]{1,5})?" +
"(((\\/([-\\w~!$+|.,=]|%[a-fA-F\\d]{2})+)+|\\/)+|\\?|#)?" +
"((\\?([-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)" +
@ -260,12 +270,12 @@ public class Utils {
public static String getHttpToken(String originalItem) {
// apply pattern
String item = null;
//needed because we escape the text and a URL containing the "&" would arrive ad &amp; and the matcher below would stop at ;
if (originalItem.startsWith("http") || originalItem.startsWith("www") ) {
originalItem = originalItem.replaceAll("amp;", "");
}
Matcher matcher = URL_PATTERN.matcher(originalItem);
if(matcher.find()){
logger.debug("Found match url " + matcher.group());
@ -352,18 +362,18 @@ public class Utils {
description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
return description;
}
/**
* Remove HTML tags from text by using the following '\\<[^>]*>"'
* @param text
* @return a String without html tags
*/
public static String removeHTMLFromText(String text){
if(text == null)
return null;
else
return text.replaceAll("\\<[^>]*>","");
}
}

View File

@ -17,7 +17,7 @@ public class TestUnit {
//@Test
public void extractUrl(){
String url = "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=%09SELECT+%3Fp+%28COUNT%28%3Fp%29+as+%3FpCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+%3Fp+%5B%5D%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Fp&format=text%2Fhtml&timeout=0&debug=on";
String url = " test http://[2001:db8:0:1:1:1:1:1]:8080/group/preeco/what-if?p_p_id=simul_WAR_simulfishgrowthportlet&p_p_lifecycle=0 ";
String result = Utils.extractURL(url);
System.out.println("urls are " + result);
}