2023-07-06 10:28:53 +02:00
|
|
|
|
2018-10-02 10:37:54 +02:00
|
|
|
package eu.dnetlib.pace.clustering;
|
|
|
|
|
|
|
|
import java.util.Collection;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
|
|
|
|
2020-02-10 12:38:40 +01:00
|
|
|
import org.apache.commons.lang3.RandomStringUtils;
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
2018-10-02 10:37:54 +02:00
|
|
|
|
|
|
|
import com.google.common.collect.Lists;
|
|
|
|
|
2023-07-06 10:28:53 +02:00
|
|
|
import eu.dnetlib.pace.config.Config;
|
|
|
|
|
2018-10-24 12:09:41 +02:00
|
|
|
@ClusteringClass("spacetrimmingfieldvalue")
|
2018-10-02 10:37:54 +02:00
|
|
|
public class SpaceTrimmingFieldValue extends AbstractClusteringFunction {
|
|
|
|
|
2023-10-02 09:25:12 +02:00
|
|
|
public SpaceTrimmingFieldValue(final Map<String, Object> params) {
|
2018-10-02 10:37:54 +02:00
|
|
|
super(params);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2019-10-08 14:53:52 +02:00
|
|
|
protected Collection<String> doApply(final Config conf, final String s) {
|
2018-10-02 10:37:54 +02:00
|
|
|
final List<String> res = Lists.newArrayList();
|
|
|
|
|
2023-07-06 10:28:53 +02:00
|
|
|
res
|
|
|
|
.add(
|
2023-10-02 09:25:12 +02:00
|
|
|
StringUtils.isBlank(s) ? RandomStringUtils.random(param("randomLength"))
|
2023-07-06 10:28:53 +02:00
|
|
|
: s.toLowerCase().replaceAll("\\s+", ""));
|
2018-10-02 10:37:54 +02:00
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|