urls with ipv4 and 6 are now supported
git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/social-networking/social-util-library@162343 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
93243fdba2
commit
77eec1a78c
|
@ -41,14 +41,24 @@ public class Utils {
|
|||
*/
|
||||
private static final String HASHTAG_REGEX = "^#\\w+([-_.]?\\w+)*|\\s#\\w+([-_.]?\\w+)*|(?<=[\\[({])#\\w+([-_.]?\\w+)";
|
||||
|
||||
/**
|
||||
* IPv4 regex
|
||||
*/
|
||||
public static final String IPV4_REGEX = "\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b";
|
||||
|
||||
/**
|
||||
* IPV6 regex needs to be put in square brackets, see below
|
||||
*/
|
||||
public static final String IPV6_REGEX = "\\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b";
|
||||
|
||||
/**
|
||||
* Pattern for URLS
|
||||
*/
|
||||
private static final Pattern URL_PATTERN = Pattern.compile(
|
||||
"\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" +
|
||||
"(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
|
||||
"(\\w+:\\w+@)?((([-\\w]+\\.)+(com|org|net|gov" +
|
||||
"|mil|biz|info|mobi|name|aero|jobs|museum" +
|
||||
"|travel|[a-z]{2,5}))(:[\\d]{1,5})?" +
|
||||
"|travel|[a-z]{2,5}))|("+ IPV4_REGEX +")|(\\["+ IPV6_REGEX +"\\]))(:[\\d]{1,5})?" +
|
||||
"(((\\/([-\\w~!$+|.,=]|%[a-fA-F\\d]{2})+)+|\\/)+|\\?|#)?" +
|
||||
"((\\?([-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" +
|
||||
"([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)" +
|
||||
|
@ -260,12 +270,12 @@ public class Utils {
|
|||
public static String getHttpToken(String originalItem) {
|
||||
// apply pattern
|
||||
String item = null;
|
||||
|
||||
|
||||
//needed because we escape the text and a URL containing the "&" would arrive ad & and the matcher below would stop at ;
|
||||
if (originalItem.startsWith("http") || originalItem.startsWith("www") ) {
|
||||
originalItem = originalItem.replaceAll("amp;", "");
|
||||
}
|
||||
|
||||
|
||||
Matcher matcher = URL_PATTERN.matcher(originalItem);
|
||||
if(matcher.find()){
|
||||
logger.debug("Found match url " + matcher.group());
|
||||
|
@ -352,18 +362,18 @@ public class Utils {
|
|||
description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
|
||||
return description;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Remove HTML tags from text by using the following '\\<[^>]*>"'
|
||||
* @param text
|
||||
* @return a String without html tags
|
||||
*/
|
||||
public static String removeHTMLFromText(String text){
|
||||
|
||||
|
||||
if(text == null)
|
||||
return null;
|
||||
else
|
||||
return text.replaceAll("\\<[^>]*>","");
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ public class TestUnit {
|
|||
//@Test
|
||||
public void extractUrl(){
|
||||
|
||||
String url = "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=%09SELECT+%3Fp+%28COUNT%28%3Fp%29+as+%3FpCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+%3Fp+%5B%5D%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Fp&format=text%2Fhtml&timeout=0&debug=on";
|
||||
String url = " test http://[2001:db8:0:1:1:1:1:1]:8080/group/preeco/what-if?p_p_id=simul_WAR_simulfishgrowthportlet&p_p_lifecycle=0 ";
|
||||
String result = Utils.extractURL(url);
|
||||
System.out.println("urls are " + result);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue