1
0
Fork 0

refactoring

This commit is contained in:
Michele Artini 2020-06-12 09:47:55 +02:00
parent 472cf77639
commit c22cb5a3c6
16 changed files with 130 additions and 309 deletions

View File

@ -6,10 +6,14 @@ import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.Function;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.broker.objects.Publication;
import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Field;
@ -18,9 +22,17 @@ import eu.dnetlib.pace.config.DedupConfig;
public abstract class UpdateMatcher<T> { public abstract class UpdateMatcher<T> {
private final boolean multipleUpdate; private final boolean multipleUpdate;
private final Function<T, Topic> topicFunction;
private final BiConsumer<Publication, T> compileHighlightFunction;
private final Function<T, String> highlightToStringFunction;
public UpdateMatcher(final boolean multipleUpdate) { public UpdateMatcher(final boolean multipleUpdate, final Function<T, Topic> topicFunction,
final BiConsumer<Publication, T> compileHighlightFunction,
final Function<T, String> highlightToStringFunction) {
this.multipleUpdate = multipleUpdate; this.multipleUpdate = multipleUpdate;
this.topicFunction = topicFunction;
this.compileHighlightFunction = compileHighlightFunction;
this.highlightToStringFunction = highlightToStringFunction;
} }
public Collection<UpdateInfo<T>> searchUpdatesForRecord(final ResultWithRelations res, public Collection<UpdateInfo<T>> searchUpdatesForRecord(final ResultWithRelations res,
@ -31,7 +43,11 @@ public abstract class UpdateMatcher<T> {
for (final ResultWithRelations source : others) { for (final ResultWithRelations source : others) {
if (source != res) { if (source != res) {
for (final UpdateInfo<T> info : findUpdates(source, res, dedupConfig)) { for (final T hl : findDifferences(source, res)) {
final Topic topic = getTopicFunction().apply(hl);
final UpdateInfo<T> info = new UpdateInfo<>(topic, hl, source, res, getCompileHighlightFunction(),
getHighlightToStringFunction(),
dedupConfig);
final String s = DigestUtils.md5Hex(info.getHighlightValueAsString()); final String s = DigestUtils.md5Hex(info.getHighlightValueAsString());
if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) { if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {
} else { } else {
@ -55,8 +71,7 @@ public abstract class UpdateMatcher<T> {
} }
} }
protected abstract List<UpdateInfo<T>> findUpdates(ResultWithRelations source, ResultWithRelations target, protected abstract List<T> findDifferences(ResultWithRelations source, ResultWithRelations target);
DedupConfig dedupConfig);
protected static boolean isMissing(final List<Field<String>> list) { protected static boolean isMissing(final List<Field<String>> list) {
return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue()); return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue());
@ -66,4 +81,20 @@ public abstract class UpdateMatcher<T> {
return field == null || StringUtils.isBlank(field.getValue()); return field == null || StringUtils.isBlank(field.getValue());
} }
public boolean isMultipleUpdate() {
return multipleUpdate;
}
public Function<T, Topic> getTopicFunction() {
return topicFunction;
}
public BiConsumer<Publication, T> getCompileHighlightFunction() {
return compileHighlightFunction;
}
public Function<T, String> getHighlightToStringFunction() {
return highlightToStringFunction;
}
} }

View File

@ -8,29 +8,26 @@ import java.util.stream.Collectors;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedDataset; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedDataset;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.pace.config.DedupConfig;
public abstract class AbstractEnrichMissingDataset public abstract class AbstractEnrichMissingDataset
extends UpdateMatcher<eu.dnetlib.broker.objects.Dataset> { extends UpdateMatcher<eu.dnetlib.broker.objects.Dataset> {
private final Topic topic;
public AbstractEnrichMissingDataset(final Topic topic) { public AbstractEnrichMissingDataset(final Topic topic) {
super(true); super(true,
this.topic = topic; rel -> topic,
(p, rel) -> p.getDatasets().add(rel),
rel -> rel.getInstances().get(0).getUrl());
} }
protected abstract boolean filterByType(String relType); protected abstract boolean filterByType(String relType);
@Override @Override
protected final List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates( protected final List<eu.dnetlib.broker.objects.Dataset> findDifferences(
final ResultWithRelations source, final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final Set<String> existingDatasets = target final Set<String> existingDatasets = target
.getDatasets() .getDatasets()
@ -47,26 +44,8 @@ public abstract class AbstractEnrichMissingDataset
.map(RelatedDataset::getRelDataset) .map(RelatedDataset::getRelDataset)
.filter(d -> !existingDatasets.contains(d.getId())) .filter(d -> !existingDatasets.contains(d.getId()))
.map(ConversionUtils::oafDatasetToBrokerDataset) .map(ConversionUtils::oafDatasetToBrokerDataset)
.map(i -> generateUpdateInfo(i, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
protected final UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
final eu.dnetlib.broker.objects.Dataset highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
getTopic(),
highlightValue, source, target,
(p, rel) -> p.getDatasets().add(rel),
rel -> rel.getInstances().get(0).getUrl(),
dedupConfig);
}
public Topic getTopic() {
return topic;
}
} }

View File

@ -5,26 +5,25 @@ import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.broker.objects.Project;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMissingProject public class EnrichMissingProject
extends UpdateMatcher<eu.dnetlib.broker.objects.Project> { extends UpdateMatcher<eu.dnetlib.broker.objects.Project> {
public EnrichMissingProject() { public EnrichMissingProject() {
super(true); super(true,
prj -> Topic.ENRICH_MISSING_PROJECT,
(p, prj) -> p.getProjects().add(prj),
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode());
} }
@Override @Override
protected List<UpdateInfo<eu.dnetlib.broker.objects.Project>> findUpdates(final ResultWithRelations source, protected List<Project> findDifferences(final ResultWithRelations source, final ResultWithRelations target) {
final ResultWithRelations target,
final DedupConfig dedupConfig) {
if (source.getProjects().isEmpty()) { if (source.getProjects().isEmpty()) {
return Arrays.asList(); return Arrays.asList();
} else { } else {
@ -33,21 +32,7 @@ public class EnrichMissingProject
.stream() .stream()
.map(RelatedProject::getRelProject) .map(RelatedProject::getRelProject)
.map(ConversionUtils::oafProjectToBrokerProject) .map(ConversionUtils::oafProjectToBrokerProject)
.map(p -> generateUpdateInfo(p, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
} }
public UpdateInfo<eu.dnetlib.broker.objects.Project> generateUpdateInfo(
final eu.dnetlib.broker.objects.Project highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_PROJECT,
highlightValue, source, target,
(p, prj) -> p.getProjects().add(prj),
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode(), dedupConfig);
}
} }

View File

@ -8,22 +8,22 @@ import java.util.stream.Collectors;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMoreProject extends UpdateMatcher<eu.dnetlib.broker.objects.Project> { public class EnrichMoreProject extends UpdateMatcher<eu.dnetlib.broker.objects.Project> {
public EnrichMoreProject() { public EnrichMoreProject() {
super(true); super(true,
prj -> Topic.ENRICH_MORE_PROJECT,
(p, prj) -> p.getProjects().add(prj),
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode());
} }
@Override @Override
protected List<UpdateInfo<eu.dnetlib.broker.objects.Project>> findUpdates(final ResultWithRelations source, protected List<eu.dnetlib.broker.objects.Project> findDifferences(final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final Set<String> existingProjects = source final Set<String> existingProjects = source
.getProjects() .getProjects()
@ -38,20 +38,7 @@ public class EnrichMoreProject extends UpdateMatcher<eu.dnetlib.broker.objects.P
.map(RelatedProject::getRelProject) .map(RelatedProject::getRelProject)
.filter(p -> !existingProjects.contains(p.getId())) .filter(p -> !existingProjects.contains(p.getId()))
.map(ConversionUtils::oafProjectToBrokerProject) .map(ConversionUtils::oafProjectToBrokerProject)
.map(p -> generateUpdateInfo(p, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public UpdateInfo<eu.dnetlib.broker.objects.Project> generateUpdateInfo(
final eu.dnetlib.broker.objects.Project highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MORE_PROJECT,
highlightValue, source, target,
(p, prj) -> p.getProjects().add(prj),
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode(), dedupConfig);
}
} }

View File

@ -8,29 +8,27 @@ import java.util.stream.Collectors;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedPublication; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedPublication;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.pace.config.DedupConfig;
public abstract class AbstractEnrichMissingPublication public abstract class AbstractEnrichMissingPublication
extends UpdateMatcher<eu.dnetlib.broker.objects.Publication> { extends UpdateMatcher<eu.dnetlib.broker.objects.Publication> {
private final Topic topic;
public AbstractEnrichMissingPublication(final Topic topic) { public AbstractEnrichMissingPublication(final Topic topic) {
super(true); super(true,
this.topic = topic; rel -> topic,
(p, rel) -> p.getPublications().add(rel),
rel -> rel.getInstances().get(0).getUrl());
} }
protected abstract boolean filterByType(String relType); protected abstract boolean filterByType(String relType);
@Override @Override
protected final List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates( protected final List<eu.dnetlib.broker.objects.Publication> findDifferences(
final ResultWithRelations source, final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final Set<String> existingPublications = target final Set<String> existingPublications = target
.getPublications() .getPublications()
@ -47,24 +45,7 @@ public abstract class AbstractEnrichMissingPublication
.map(RelatedPublication::getRelPublication) .map(RelatedPublication::getRelPublication)
.filter(d -> !existingPublications.contains(d.getId())) .filter(d -> !existingPublications.contains(d.getId()))
.map(ConversionUtils::oafResultToBrokerPublication) .map(ConversionUtils::oafResultToBrokerPublication)
.map(i -> generateUpdateInfo(i, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
protected final UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
final eu.dnetlib.broker.objects.Publication highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
getTopic(),
highlightValue, source, target,
(p, rel) -> p.getPublications().add(rel),
rel -> rel.getInstances().get(0).getUrl(), dedupConfig);
}
public Topic getTopic() {
return topic;
}
} }

View File

@ -8,23 +8,23 @@ import java.util.stream.Collectors;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedSoftware; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedSoftware;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMissingSoftware public class EnrichMissingSoftware
extends UpdateMatcher<eu.dnetlib.broker.objects.Software> { extends UpdateMatcher<eu.dnetlib.broker.objects.Software> {
public EnrichMissingSoftware() { public EnrichMissingSoftware() {
super(true); super(true,
s -> Topic.ENRICH_MISSING_SOFTWARE,
(p, s) -> p.getSoftwares().add(s),
s -> s.getName());
} }
@Override @Override
protected List<UpdateInfo<eu.dnetlib.broker.objects.Software>> findUpdates( protected List<eu.dnetlib.broker.objects.Software> findDifferences(
final ResultWithRelations source, final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
if (source.getSoftwares().isEmpty()) { if (source.getSoftwares().isEmpty()) {
return Arrays.asList(); return Arrays.asList();
@ -34,21 +34,8 @@ public class EnrichMissingSoftware
.stream() .stream()
.map(RelatedSoftware::getRelSoftware) .map(RelatedSoftware::getRelSoftware)
.map(ConversionUtils::oafSoftwareToBrokerSoftware) .map(ConversionUtils::oafSoftwareToBrokerSoftware)
.map(p -> generateUpdateInfo(p, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
} }
public UpdateInfo<eu.dnetlib.broker.objects.Software> generateUpdateInfo(
final eu.dnetlib.broker.objects.Software highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_SOFTWARE,
highlightValue, source, target,
(p, s) -> p.getSoftwares().add(s),
s -> s.getName(), dedupConfig);
}
} }

View File

@ -8,24 +8,24 @@ import java.util.stream.Collectors;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedSoftware; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedSoftware;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMoreSoftware public class EnrichMoreSoftware
extends UpdateMatcher<eu.dnetlib.broker.objects.Software> { extends UpdateMatcher<eu.dnetlib.broker.objects.Software> {
public EnrichMoreSoftware() { public EnrichMoreSoftware() {
super(true); super(true,
s -> Topic.ENRICH_MORE_SOFTWARE,
(p, s) -> p.getSoftwares().add(s),
s -> s.getName());
} }
@Override @Override
protected List<UpdateInfo<eu.dnetlib.broker.objects.Software>> findUpdates( protected List<eu.dnetlib.broker.objects.Software> findDifferences(
final ResultWithRelations source, final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final Set<String> existingSoftwares = source final Set<String> existingSoftwares = source
.getSoftwares() .getSoftwares()
@ -40,20 +40,7 @@ public class EnrichMoreSoftware
.map(RelatedSoftware::getRelSoftware) .map(RelatedSoftware::getRelSoftware)
.filter(p -> !existingSoftwares.contains(p.getId())) .filter(p -> !existingSoftwares.contains(p.getId()))
.map(ConversionUtils::oafSoftwareToBrokerSoftware) .map(ConversionUtils::oafSoftwareToBrokerSoftware)
.map(p -> generateUpdateInfo(p, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public UpdateInfo<eu.dnetlib.broker.objects.Software> generateUpdateInfo(
final eu.dnetlib.broker.objects.Software highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MORE_SOFTWARE,
highlightValue, source, target,
(p, s) -> p.getSoftwares().add(s),
s -> s.getName(), dedupConfig);
}
} }

View File

@ -7,38 +7,25 @@ import java.util.List;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMissingAbstract extends UpdateMatcher<String> { public class EnrichMissingAbstract extends UpdateMatcher<String> {
public EnrichMissingAbstract() { public EnrichMissingAbstract() {
super(false); super(false,
s -> Topic.ENRICH_MISSING_ABSTRACT,
(p, s) -> p.getAbstracts().add(s),
s -> s);
} }
@Override @Override
protected List<UpdateInfo<String>> findUpdates(final ResultWithRelations source, protected List<String> findDifferences(final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
if (isMissing(target.getResult().getDescription()) && !isMissing(source.getResult().getDescription())) { if (isMissing(target.getResult().getDescription()) && !isMissing(source.getResult().getDescription())) {
return Arrays return Arrays
.asList( .asList(source.getResult().getDescription().get(0).getValue());
generateUpdateInfo(
source.getResult().getDescription().get(0).getValue(), source, target, dedupConfig));
} }
return new ArrayList<>(); return new ArrayList<>();
} }
public UpdateInfo<String> generateUpdateInfo(final String highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_ABSTRACT,
highlightValue, source, target,
(p, s) -> p.getAbstracts().add(s),
s -> s, dedupConfig);
}
} }

View File

@ -8,22 +8,22 @@ import java.util.stream.Collectors;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMissingAuthorOrcid extends UpdateMatcher<String> { public class EnrichMissingAuthorOrcid extends UpdateMatcher<String> {
public EnrichMissingAuthorOrcid() { public EnrichMissingAuthorOrcid() {
super(true); super(true,
aut -> Topic.ENRICH_MISSING_AUTHOR_ORCID,
(p, aut) -> p.getCreators().add(aut),
aut -> aut);
} }
@Override @Override
protected List<UpdateInfo<String>> findUpdates(final ResultWithRelations source, protected List<String> findDifferences(final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final Set<String> existingOrcids = target final Set<String> existingOrcids = target
.getResult() .getResult()
@ -35,7 +35,7 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher<String> {
.map(pid -> pid.getValue()) .map(pid -> pid.getValue())
.collect(Collectors.toSet()); .collect(Collectors.toSet());
final List<UpdateInfo<String>> list = new ArrayList<>(); final List<String> list = new ArrayList<>();
for (final Author author : source.getResult().getAuthor()) { for (final Author author : source.getResult().getAuthor()) {
final String name = author.getFullname(); final String name = author.getFullname();
@ -43,26 +43,11 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher<String> {
for (final StructuredProperty pid : author.getPid()) { for (final StructuredProperty pid : author.getPid()) {
if (pid.getQualifier().getClassid().equalsIgnoreCase("orcid") if (pid.getQualifier().getClassid().equalsIgnoreCase("orcid")
&& !existingOrcids.contains(pid.getValue())) { && !existingOrcids.contains(pid.getValue())) {
list list.add(name + " [ORCID: " + pid.getValue() + "]");
.add(
generateUpdateInfo(name + " [ORCID: " + pid.getValue() + "]", source, target, dedupConfig));
;
} }
} }
} }
return list; return list;
} }
public UpdateInfo<String> generateUpdateInfo(final String highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_AUTHOR_ORCID,
highlightValue, source, target,
(p, aut) -> p.getCreators().add(aut),
aut -> aut,
dedupConfig);
}
} }

View File

@ -10,20 +10,20 @@ import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> { public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
public EnrichMissingOpenAccess() { public EnrichMissingOpenAccess() {
super(true); super(true,
i -> Topic.ENRICH_MISSING_OA_VERSION,
(p, i) -> p.getInstances().add(i),
Instance::getUrl);
} }
@Override @Override
protected List<UpdateInfo<Instance>> findUpdates(final ResultWithRelations source, protected List<Instance> findDifferences(final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final long count = target final long count = target
.getResult() .getResult()
.getInstance() .getInstance()
@ -43,19 +43,7 @@ public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
.filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS)) .filter(i -> i.getAccessright().getClassid().equals(BrokerConstants.OPEN_ACCESS))
.map(ConversionUtils::oafInstanceToBrokerInstances) .map(ConversionUtils::oafInstanceToBrokerInstances)
.flatMap(List::stream) .flatMap(List::stream)
.map(i -> generateUpdateInfo(i, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_OA_VERSION,
highlightValue, source, target,
(p, i) -> p.getInstances().add(i),
Instance::getUrl, dedupConfig);
}
} }

View File

@ -9,20 +9,20 @@ import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMissingPid extends UpdateMatcher<Pid> { public class EnrichMissingPid extends UpdateMatcher<Pid> {
public EnrichMissingPid() { public EnrichMissingPid() {
super(true); super(true,
pid -> Topic.ENRICH_MISSING_PID,
(p, pid) -> p.getPids().add(pid),
pid -> pid.getType() + "::" + pid.getValue());
} }
@Override @Override
protected List<UpdateInfo<Pid>> findUpdates(final ResultWithRelations source, protected List<Pid> findDifferences(final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final long count = target.getResult().getPid().size(); final long count = target.getResult().getPid().size();
if (count > 0) { if (count > 0) {
@ -34,19 +34,7 @@ public class EnrichMissingPid extends UpdateMatcher<Pid> {
.getPid() .getPid()
.stream() .stream()
.map(ConversionUtils::oafPidToBrokerPid) .map(ConversionUtils::oafPidToBrokerPid)
.map(i -> generateUpdateInfo(i, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public UpdateInfo<Pid> generateUpdateInfo(final Pid highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_PID,
highlightValue, source, target,
(p, pid) -> p.getPids().add(pid),
pid -> pid.getType() + "::" + pid.getValue(), dedupConfig);
}
} }

View File

@ -7,39 +7,25 @@ import java.util.List;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMissingPublicationDate extends UpdateMatcher<String> { public class EnrichMissingPublicationDate extends UpdateMatcher<String> {
public EnrichMissingPublicationDate() { public EnrichMissingPublicationDate() {
super(false); super(false,
date -> Topic.ENRICH_MISSING_PUBLICATION_DATE,
(p, date) -> p.setPublicationdate(date),
s -> s);
} }
@Override @Override
protected List<UpdateInfo<String>> findUpdates(final ResultWithRelations source, protected List<String> findDifferences(final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
if (isMissing(target.getResult().getDateofacceptance()) if (isMissing(target.getResult().getDateofacceptance())
&& !isMissing(source.getResult().getDateofacceptance())) { && !isMissing(source.getResult().getDateofacceptance())) {
return Arrays return Arrays.asList(source.getResult().getDateofacceptance().getValue());
.asList(
generateUpdateInfo(
source.getResult().getDateofacceptance().getValue(), source, target, dedupConfig));
} }
return new ArrayList<>(); return new ArrayList<>();
} }
public UpdateInfo<String> generateUpdateInfo(final String highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MISSING_PUBLICATION_DATE,
highlightValue, source, target,
(p, date) -> p.setPublicationdate(date),
s -> s, dedupConfig);
}
} }

View File

@ -10,22 +10,22 @@ import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> { public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
public EnrichMissingSubject() { public EnrichMissingSubject() {
super(true); super(true,
pair -> Topic.fromPath("ENRICH/MISSING/SUBJECT/" + pair.getLeft()),
(p, pair) -> p.getSubjects().add(pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight());
} }
@Override @Override
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final ResultWithRelations source, protected List<Pair<String, String>> findDifferences(final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final Set<String> existingTypes = target final Set<String> existingTypes = target
.getResult() .getResult()
.getSubject() .getSubject()
@ -40,20 +40,7 @@ public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
.stream() .stream()
.filter(pid -> !existingTypes.contains(pid.getQualifier().getClassid())) .filter(pid -> !existingTypes.contains(pid.getQualifier().getClassid()))
.map(ConversionUtils::oafSubjectToPair) .map(ConversionUtils::oafSubjectToPair)
.map(i -> generateUpdateInfo(i, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.fromPath("ENRICH/MISSING/SUBJECT/" + highlightValue.getLeft()),
highlightValue, source, target,
(p, pair) -> p.getSubjects().add(pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight(), dedupConfig);
}
} }

View File

@ -10,20 +10,20 @@ import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> { public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
public EnrichMoreOpenAccess() { public EnrichMoreOpenAccess() {
super(true); super(true,
i -> Topic.ENRICH_MORE_OA_VERSION,
(p, i) -> p.getInstances().add(i),
Instance::getUrl);
} }
@Override @Override
protected List<UpdateInfo<Instance>> findUpdates(final ResultWithRelations source, protected List<Instance> findDifferences(final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final Set<String> urls = target final Set<String> urls = target
.getResult() .getResult()
.getInstance() .getInstance()
@ -41,19 +41,7 @@ public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
.map(ConversionUtils::oafInstanceToBrokerInstances) .map(ConversionUtils::oafInstanceToBrokerInstances)
.flatMap(List::stream) .flatMap(List::stream)
.filter(i -> !urls.contains(i.getUrl())) .filter(i -> !urls.contains(i.getUrl()))
.map(i -> generateUpdateInfo(i, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public UpdateInfo<Instance> generateUpdateInfo(final Instance highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MORE_OA_VERSION,
highlightValue, source, target,
(p, i) -> p.getInstances().add(i),
Instance::getUrl, dedupConfig);
}
} }

View File

@ -9,20 +9,20 @@ import eu.dnetlib.broker.objects.Pid;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMorePid extends UpdateMatcher<Pid> { public class EnrichMorePid extends UpdateMatcher<Pid> {
public EnrichMorePid() { public EnrichMorePid() {
super(true); super(true,
pid -> Topic.ENRICH_MORE_PID,
(p, pid) -> p.getPids().add(pid),
pid -> pid.getType() + "::" + pid.getValue());
} }
@Override @Override
protected List<UpdateInfo<Pid>> findUpdates(final ResultWithRelations source, protected List<Pid> findDifferences(final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final Set<String> existingPids = target final Set<String> existingPids = target
.getResult() .getResult()
.getPid() .getPid()
@ -36,19 +36,7 @@ public class EnrichMorePid extends UpdateMatcher<Pid> {
.stream() .stream()
.filter(pid -> !existingPids.contains(pid.getQualifier().getClassid() + "::" + pid.getValue())) .filter(pid -> !existingPids.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
.map(ConversionUtils::oafPidToBrokerPid) .map(ConversionUtils::oafPidToBrokerPid)
.map(i -> generateUpdateInfo(i, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public UpdateInfo<Pid> generateUpdateInfo(final Pid highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.ENRICH_MORE_PID,
highlightValue, source, target,
(p, pid) -> p.getPids().add(pid),
pid -> pid.getType() + "::" + pid.getValue(), dedupConfig);
}
} }

View File

@ -10,20 +10,20 @@ import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.broker.model.Topic; import eu.dnetlib.dhp.broker.model.Topic;
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher; import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
import eu.dnetlib.dhp.broker.oa.util.ConversionUtils; import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations; import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.ResultWithRelations;
import eu.dnetlib.pace.config.DedupConfig;
public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> { public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> {
public EnrichMoreSubject() { public EnrichMoreSubject() {
super(true); super(true,
pair -> Topic.fromPath("ENRICH/MORE/SUBJECT/" + pair.getLeft()),
(p, pair) -> p.getSubjects().add(pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight());
} }
@Override @Override
protected List<UpdateInfo<Pair<String, String>>> findUpdates(final ResultWithRelations source, protected List<Pair<String, String>> findDifferences(final ResultWithRelations source,
final ResultWithRelations target, final ResultWithRelations target) {
final DedupConfig dedupConfig) {
final Set<String> existingSubjects = target final Set<String> existingSubjects = target
.getResult() .getResult()
.getSubject() .getSubject()
@ -37,20 +37,7 @@ public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> {
.stream() .stream()
.filter(pid -> !existingSubjects.contains(pid.getQualifier().getClassid() + "::" + pid.getValue())) .filter(pid -> !existingSubjects.contains(pid.getQualifier().getClassid() + "::" + pid.getValue()))
.map(ConversionUtils::oafSubjectToPair) .map(ConversionUtils::oafSubjectToPair)
.map(i -> generateUpdateInfo(i, source, target, dedupConfig))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
final ResultWithRelations source,
final ResultWithRelations target,
final DedupConfig dedupConfig) {
return new UpdateInfo<>(
Topic.fromPath("ENRICH/MORE/SUBJECT/" + highlightValue.getLeft()),
highlightValue, source, target,
(p, pair) -> p.getSubjects().add(pair.getRight()),
pair -> pair.getLeft() + "::" + pair.getRight(), dedupConfig);
}
} }