forked from D-Net/dnet-hadoop
considering abstract > MIN_LENGTH in ENRICH_MISSING_ABSTRACT
This commit is contained in:
parent
33da2e3d6c
commit
ab08d12c46
|
@ -5,12 +5,16 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
||||||
import eu.dnetlib.dhp.broker.model.Topic;
|
import eu.dnetlib.dhp.broker.model.Topic;
|
||||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||||
|
|
||||||
public class EnrichMissingAbstract extends UpdateMatcher<String> {
|
public class EnrichMissingAbstract extends UpdateMatcher<String> {
|
||||||
|
|
||||||
|
private static final int MIN_LENGTH = 200;
|
||||||
|
|
||||||
public EnrichMissingAbstract() {
|
public EnrichMissingAbstract() {
|
||||||
super(1,
|
super(1,
|
||||||
s -> Topic.ENRICH_MISSING_ABSTRACT,
|
s -> Topic.ENRICH_MISSING_ABSTRACT,
|
||||||
|
@ -21,10 +25,15 @@ public class EnrichMissingAbstract extends UpdateMatcher<String> {
|
||||||
@Override
|
@Override
|
||||||
protected List<String> findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) {
|
protected List<String> findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) {
|
||||||
if (isMissing(target.getAbstracts()) && !isMissing(source.getAbstracts())) {
|
if (isMissing(target.getAbstracts()) && !isMissing(source.getAbstracts())) {
|
||||||
return Arrays.asList(source.getAbstracts().get(0));
|
return source
|
||||||
} else {
|
.getAbstracts()
|
||||||
return new ArrayList<>();
|
.stream()
|
||||||
|
.filter(s -> StringUtils.normalizeSpace(s).length() >= MIN_LENGTH)
|
||||||
|
.map(Arrays::asList)
|
||||||
|
.findFirst()
|
||||||
|
.orElse(new ArrayList<>());
|
||||||
}
|
}
|
||||||
|
return new ArrayList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue