social-util-library/src/main/java/org/gcube/socialnetworking/socialtoken/SanitizedURL.java

70 lines
1.7 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package org.gcube.socialnetworking.socialtoken;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Objects;
/**
* @author Luca Frosini (ISTI - CNR)
* @author Massimiliano Assante (ISTI - CNR)
*/
public class SanitizedURL {
private static String CHARACTERS_TO_REMOVE = "[\\.\\,\\;\\(\\)\\:\\\"\\'\\\\\\\\\\«\\»]";
protected String prefix;
protected String postfix;
protected final URL url;
public SanitizedURL(String urlString) throws MalformedURLException {
if(Objects.isNull(urlString) || urlString.isEmpty() || urlString.length()<2) {
throw new MalformedURLException();
}
prefix = urlString.substring(0,1);
if(prefix.matches(CHARACTERS_TO_REMOVE)) {
prefix = urlString.substring(0, 1);
urlString = urlString.substring(1);
}else {
prefix = "";
}
if(urlString.startsWith("www.")) {
urlString = "http://" + urlString;
}
postfix = "";
urlString = stripPostfix(urlString);
url = new URL(urlString);
}
private String stripPostfix(String urlString) {
String testPostFix = urlString.substring(urlString.length()-1);
if(testPostFix.matches(CHARACTERS_TO_REMOVE)) {
postfix = urlString.substring(urlString.length()-1) + postfix;
urlString = urlString.substring(0, urlString.length()-1);
/*
* The urlString could terminate with more than one character to be escaped
* So we need to further analyse the sanitised URL
* e.g. 'https://data.d4science.net/tfXA),' terminates with '),'
*/
urlString = stripPostfix(urlString);
}
return urlString;
}
public String getPrefix() {
return prefix;
}
public String getPostfix() {
return postfix;
}
public URL getURL() {
return url;
}
}