dnet-hadoop/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java

35 lines
806 B
Java
Raw Normal View History

2023-07-06 10:28:53 +02:00
package eu.dnetlib.pace.clustering;
import java.util.Collection;
import java.util.List;
import java.util.Map;
2020-02-10 12:38:40 +01:00
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Lists;
2023-07-06 10:28:53 +02:00
import eu.dnetlib.pace.config.Config;
@ClusteringClass("spacetrimmingfieldvalue")
public class SpaceTrimmingFieldValue extends AbstractClusteringFunction {
public SpaceTrimmingFieldValue(final Map<String, Integer> params) {
super(params);
}
@Override
protected Collection<String> doApply(final Config conf, final String s) {
final List<String> res = Lists.newArrayList();
2023-07-06 10:28:53 +02:00
res
.add(
StringUtils.isBlank(s) ? RandomStringUtils.random(getParams().get("randomLength"))
: s.toLowerCase().replaceAll("\\s+", ""));
return res;
}
}