dnet-hadoop/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java

40 lines
1010 B
Java
Raw Normal View History

2020-05-08 16:49:47 +02:00
package eu.dnetlib.dhp.broker.oa.matchers.simple;
2020-05-13 12:00:27 +02:00
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
2020-06-22 08:51:31 +02:00
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
2020-05-13 12:00:27 +02:00
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
2020-06-11 11:25:18 +02:00
public class EnrichMissingAbstract extends UpdateMatcher<String> {
private static final int MIN_LENGTH = 200;
2020-05-13 12:00:27 +02:00
public EnrichMissingAbstract() {
2020-06-26 11:20:45 +02:00
super(1,
2020-06-12 09:47:55 +02:00
s -> Topic.ENRICH_MISSING_ABSTRACT,
(p, s) -> p.getAbstracts().add(s),
s -> s);
}
@Override
2020-06-22 08:51:31 +02:00
protected List<String> findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) {
2020-06-16 12:34:13 +02:00
if (isMissing(target.getAbstracts()) && !isMissing(source.getAbstracts())) {
return source
.getAbstracts()
.stream()
.filter(s -> StringUtils.normalizeSpace(s).length() >= MIN_LENGTH)
.map(Arrays::asList)
.findFirst()
.orElse(new ArrayList<>());
2020-05-13 12:00:27 +02:00
}
return new ArrayList<>();
}
}