considering abstract > MIN_LENGTH in ENRICH_MISSING_ABSTRACT

This commit is contained in:
Michele Artini 2020-11-19 10:42:10 +01:00
parent 33da2e3d6c
commit ab08d12c46
1 changed files with 12 additions and 3 deletions

View File

@ -5,12 +5,16 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerMainEntity;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
public class EnrichMissingAbstract extends UpdateMatcher<String> { public class EnrichMissingAbstract extends UpdateMatcher<String> {
private static final int MIN_LENGTH = 200;
public EnrichMissingAbstract() { public EnrichMissingAbstract() {
super(1, super(1,
s -> Topic.ENRICH_MISSING_ABSTRACT, s -> Topic.ENRICH_MISSING_ABSTRACT,
@ -21,10 +25,15 @@ public class EnrichMissingAbstract extends UpdateMatcher<String> {
@Override @Override
protected List<String> findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) { protected List<String> findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) {
if (isMissing(target.getAbstracts()) && !isMissing(source.getAbstracts())) { if (isMissing(target.getAbstracts()) && !isMissing(source.getAbstracts())) {
return Arrays.asList(source.getAbstracts().get(0)); return source
} else { .getAbstracts()
return new ArrayList<>(); .stream()
.filter(s -> StringUtils.normalizeSpace(s).length() >= MIN_LENGTH)
.map(Arrays::asList)
.findFirst()
.orElse(new ArrayList<>());
} }
return new ArrayList<>();
} }
} }