considering abstract > MIN_LENGTH in ENRICH_MISSING_ABSTRACT

This commit is contained in:
Michele Artini 2020-11-19 10:42:10 +01:00
parent 33da2e3d6c
commit ab08d12c46
1 changed files with 12 additions and 3 deletions

View File

@ -5,12 +5,16 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
public class EnrichMissingAbstract extends UpdateMatcher<String> {
private static final int MIN_LENGTH = 200;
public EnrichMissingAbstract() {
super(1,
s -> Topic.ENRICH_MISSING_ABSTRACT,
@ -21,10 +25,15 @@ public class EnrichMissingAbstract extends UpdateMatcher<String> {
@Override
protected List<String> findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) {
if (isMissing(target.getAbstracts()) && !isMissing(source.getAbstracts())) {
return Arrays.asList(source.getAbstracts().get(0));
} else {
return new ArrayList<>();
return source
.getAbstracts()
.stream()
.filter(s -> StringUtils.normalizeSpace(s).length() >= MIN_LENGTH)
.map(Arrays::asList)
.findFirst()
.orElse(new ArrayList<>());
}
return new ArrayList<>();
}
}