2020-05-08 16:49:47 +02:00
|
|
|
|
2020-05-22 17:17:41 +02:00
|
|
|
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
2020-05-07 12:31:26 +02:00
|
|
|
|
2020-05-13 12:00:27 +02:00
|
|
|
import java.util.ArrayList;
|
2020-05-07 12:31:26 +02:00
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.List;
|
|
|
|
|
2020-11-19 10:42:10 +01:00
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
2020-06-22 08:51:31 +02:00
|
|
|
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
2020-05-13 12:00:27 +02:00
|
|
|
import eu.dnetlib.dhp.broker.model.Topic;
|
2020-05-22 17:17:41 +02:00
|
|
|
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
2020-05-07 12:31:26 +02:00
|
|
|
|
2020-06-11 11:25:18 +02:00
|
|
|
public class EnrichMissingAbstract extends UpdateMatcher<String> {
|
2020-05-07 12:31:26 +02:00
|
|
|
|
2020-11-19 10:42:10 +01:00
|
|
|
private static final int MIN_LENGTH = 200;
|
|
|
|
|
2020-05-13 12:00:27 +02:00
|
|
|
public EnrichMissingAbstract() {
|
2020-06-26 11:20:45 +02:00
|
|
|
super(1,
|
2020-06-12 09:47:55 +02:00
|
|
|
s -> Topic.ENRICH_MISSING_ABSTRACT,
|
|
|
|
(p, s) -> p.getAbstracts().add(s),
|
|
|
|
s -> s);
|
2020-05-07 12:31:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2020-06-22 08:51:31 +02:00
|
|
|
protected List<String> findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) {
|
2020-06-16 12:34:13 +02:00
|
|
|
if (isMissing(target.getAbstracts()) && !isMissing(source.getAbstracts())) {
|
2020-11-19 10:42:10 +01:00
|
|
|
return source
|
|
|
|
.getAbstracts()
|
|
|
|
.stream()
|
|
|
|
.filter(s -> StringUtils.normalizeSpace(s).length() >= MIN_LENGTH)
|
|
|
|
.map(Arrays::asList)
|
|
|
|
.findFirst()
|
|
|
|
.orElse(new ArrayList<>());
|
2020-05-13 12:00:27 +02:00
|
|
|
}
|
2020-11-19 10:42:10 +01:00
|
|
|
return new ArrayList<>();
|
2020-05-07 12:31:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|