social-util-library/src/main/java/org/gcube/socialnetworking/socialtoken/SanitizedURL.java

88 lines
2.2 KiB
Java
Raw Normal View History

package org.gcube.socialnetworking.socialtoken;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Objects;
2021-01-15 10:08:33 +01:00
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class SanitizedURL {
2021-01-15 11:31:39 +01:00
private static String CHARACTERS_TO_REMOVE = "[\\{\\}\\[\\]\\.\\,\\;\\(\\)\\:\\\"\\'\\\\\\\\\\«\\»]";
protected String prefix;
protected String postfix;
protected final URL url;
public SanitizedURL(String urlString) throws MalformedURLException {
if(Objects.isNull(urlString) || urlString.isEmpty() || urlString.length()<2) {
throw new MalformedURLException();
}
2021-01-15 11:31:39 +01:00
prefix = "";
postfix = "";
urlString = stripPrefix(urlString);
if(urlString.startsWith("www.")) {
urlString = "http://" + urlString;
}
2021-01-15 10:47:39 +01:00
postfix = "";
urlString = stripPostfix(urlString);
2021-01-15 11:31:39 +01:00
url = new URL(urlString);
}
2021-01-15 11:31:39 +01:00
private String stripPrefix(String urlString) throws MalformedURLException {
if(Objects.isNull(urlString) || urlString.isEmpty() || urlString.length()<2) {
throw new MalformedURLException();
}
String testPrefix = urlString.substring(0,1);
if(testPrefix.matches(CHARACTERS_TO_REMOVE)) {
prefix = prefix + urlString.substring(0, 1);
urlString = urlString.substring(1);
urlString = stripPrefix(urlString);
}
return urlString;
}
private String stripPostfix(String urlString) throws MalformedURLException {
if(Objects.isNull(urlString) || urlString.isEmpty() || urlString.length()<2) {
throw new MalformedURLException();
}
String testPostfix = urlString.substring(urlString.length()-1);
if(testPostfix.matches(CHARACTERS_TO_REMOVE)) {
2021-01-15 10:47:39 +01:00
postfix = urlString.substring(urlString.length()-1) + postfix;
urlString = urlString.substring(0, urlString.length()-1);
2021-01-15 10:50:29 +01:00
/*
* The urlString could terminate with more than one character to be escaped
* So we need to further analyse the sanitised URL
* e.g. 'https://data.d4science.net/tfXA),' terminates with '),'
*/
2021-01-15 10:47:39 +01:00
urlString = stripPostfix(urlString);
}
return urlString;
}
public String getPrefix() {
return prefix;
}
public String getPostfix() {
return postfix;
}
public URL getURL() {
return url;
}
}