From 77eec1a78c73c9716c47dbe74ef11800713d2622 Mon Sep 17 00:00:00 2001 From: Costantino Perciante Date: Fri, 19 Jan 2018 10:55:54 +0000 Subject: [PATCH] urls with ipv4 and 6 are now supported git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/social-networking/social-util-library@162343 82a268e6-3cf1-43bd-a215-b396298e98cf --- .../socialutillibrary/Utils.java | 24 +++++++++++++------ .../socialutillibrary/TestUnit.java | 2 +- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java b/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java index b328ffc..92c6545 100644 --- a/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java +++ b/src/main/java/org/gcube/social_networking/socialutillibrary/Utils.java @@ -41,14 +41,24 @@ public class Utils { */ private static final String HASHTAG_REGEX = "^#\\w+([-_.]?\\w+)*|\\s#\\w+([-_.]?\\w+)*|(?<=[\\[({])#\\w+([-_.]?\\w+)"; + /** + * IPv4 regex + */ + public static final String IPV4_REGEX = "\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b"; + + /** + * IPV6 regex needs to be put in square brackets, see below + */ + public static final String IPV6_REGEX = "\\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\\b"; + /** * Pattern for URLS */ private static final Pattern URL_PATTERN = Pattern.compile( "\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" + - "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" + + "(\\w+:\\w+@)?((([-\\w]+\\.)+(com|org|net|gov" + "|mil|biz|info|mobi|name|aero|jobs|museum" + - "|travel|[a-z]{2,5}))(:[\\d]{1,5})?" + + "|travel|[a-z]{2,5}))|("+ IPV4_REGEX +")|(\\["+ IPV6_REGEX +"\\]))(:[\\d]{1,5})?" + "(((\\/([-\\w~!$+|.,=]|%[a-fA-F\\d]{2})+)+|\\/)+|\\?|#)?" + "((\\?([-\\w~!$+|.,*:]|%[a-fA-F\\d{2}])+=?" + "([-\\w~!$+|.,*:=]|%[a-fA-F\\d]{2})*)" + @@ -260,12 +270,12 @@ public class Utils { public static String getHttpToken(String originalItem) { // apply pattern String item = null; - + //needed because we escape the text and a URL containing the "&" would arrive ad & and the matcher below would stop at ; if (originalItem.startsWith("http") || originalItem.startsWith("www") ) { originalItem = originalItem.replaceAll("amp;", ""); } - + Matcher matcher = URL_PATTERN.matcher(originalItem); if(matcher.find()){ logger.debug("Found match url " + matcher.group()); @@ -352,18 +362,18 @@ public class Utils { description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description); return description; } - + /** * Remove HTML tags from text by using the following '\\<[^>]*>"' * @param text * @return a String without html tags */ public static String removeHTMLFromText(String text){ - + if(text == null) return null; else return text.replaceAll("\\<[^>]*>",""); - + } } diff --git a/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java b/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java index 00d7e01..df9acd1 100644 --- a/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java +++ b/src/test/java/org/gcube/social_networking/socialutillibrary/TestUnit.java @@ -17,7 +17,7 @@ public class TestUnit { //@Test public void extractUrl(){ - String url = "https://virtuoso.parthenos.d4science.org/sparql?default-graph-uri=&query=%09SELECT+%3Fp+%28COUNT%28%3Fp%29+as+%3FpCount%29++%0D%0A%09%09%09%09%09WHERE+%7B%5B%5D+%3Fp+%5B%5D%7D%0D%0A%09%09%09%09%09GROUP+BY+%3Fp&format=text%2Fhtml&timeout=0&debug=on"; + String url = " test http://[2001:db8:0:1:1:1:1:1]:8080/group/preeco/what-if?p_p_id=simul_WAR_simulfishgrowthportlet&p_p_lifecycle=0 "; String result = Utils.extractURL(url); System.out.println("urls are " + result); }