package eu.dnetlib.pace.clustering; import java.net.MalformedURLException; import java.net.URL; import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.config.Config; @ClusteringClass("urlclustering") public class UrlClustering extends AbstractPaceFunctions implements ClusteringFunction { protected Map params; public UrlClustering(final Map params) { this.params = params; } @Override public Collection apply(final Config conf, List fields) { try { return fields .stream() .filter(f -> !f.isEmpty()) .map(this::asUrl) .map(URL::getHost) .collect(Collectors.toCollection(HashSet::new)); } catch (IllegalStateException e) { return new HashSet<>(); } } @Override public Map getParams() { return null; } private URL asUrl(String value) { try { return new URL(value); } catch (MalformedURLException e) { // should not happen as checked by pace typing throw new IllegalStateException("invalid URL: " + value); } } }