forked from D-Net/dnet-hadoop
partial implementation of events with rels
This commit is contained in:
parent
e43d4d7778
commit
3e34517479
|
@ -4,48 +4,45 @@ package eu.dnetlib.dhp.broker.model;
|
|||
public enum Topic {
|
||||
|
||||
// ENRICHMENT MISSING
|
||||
ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"),
|
||||
ENRICH_MISSING_ABSTRACT("ENRICH/MISSING/ABSTRACT"),
|
||||
ENRICH_MISSING_PUBLICATION_DATE("ENRICH/MISSING/PUBLICATION_DATE"),
|
||||
ENRICH_MISSING_PID("ENRICH/MISSING/PID"),
|
||||
ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"),
|
||||
ENRICH_MISSING_SOFTWARE("ENRICH/MISSING/SOFTWARE"),
|
||||
ENRICH_MISSING_SUBJECT_MESHEUROPMC("ENRICH/MISSING/SUBJECT/MESHEUROPMC"),
|
||||
ENRICH_MISSING_SUBJECT_ARXIV("ENRICH/MISSING/SUBJECT/ARXIV"),
|
||||
ENRICH_MISSING_SUBJECT_JEL("ENRICH/MISSING/SUBJECT/JEL"),
|
||||
ENRICH_MISSING_SUBJECT_DDC("ENRICH/MISSING/SUBJECT/DDC"),
|
||||
ENRICH_MISSING_SUBJECT_ACM("ENRICH/MISSING/SUBJECT/ACM"),
|
||||
ENRICH_MISSING_SUBJECT_RVK("ENRICH/MISSING/SUBJECT/RVK"),
|
||||
ENRICH_MISSING_AUTHOR_ORCID("ENRICH/MISSING/AUTHOR/ORCID"),
|
||||
ENRICH_MISSING_OA_VERSION("ENRICH/MISSING/OPENACCESS_VERSION"), ENRICH_MISSING_ABSTRACT(
|
||||
"ENRICH/MISSING/ABSTRACT"), ENRICH_MISSING_PUBLICATION_DATE(
|
||||
"ENRICH/MISSING/PUBLICATION_DATE"), ENRICH_MISSING_PID(
|
||||
"ENRICH/MISSING/PID"), ENRICH_MISSING_PROJECT("ENRICH/MISSING/PROJECT"), ENRICH_MISSING_SOFTWARE(
|
||||
"ENRICH/MISSING/SOFTWARE"), ENRICH_MISSING_SUBJECT_MESHEUROPMC(
|
||||
"ENRICH/MISSING/SUBJECT/MESHEUROPMC"), ENRICH_MISSING_SUBJECT_ARXIV(
|
||||
"ENRICH/MISSING/SUBJECT/ARXIV"), ENRICH_MISSING_SUBJECT_JEL(
|
||||
"ENRICH/MISSING/SUBJECT/JEL"), ENRICH_MISSING_SUBJECT_DDC(
|
||||
"ENRICH/MISSING/SUBJECT/DDC"), ENRICH_MISSING_SUBJECT_ACM(
|
||||
"ENRICH/MISSING/SUBJECT/ACM"), ENRICH_MISSING_SUBJECT_RVK(
|
||||
"ENRICH/MISSING/SUBJECT/RVK"), ENRICH_MISSING_AUTHOR_ORCID(
|
||||
"ENRICH/MISSING/AUTHOR/ORCID"),
|
||||
|
||||
// ENRICHMENT MORE
|
||||
ENRICH_MORE_PID("ENRICH/MORE/PID"),
|
||||
ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"),
|
||||
ENRICH_MORE_ABSTRACT("ENRICH/MORE/ABSTRACT"),
|
||||
ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"),
|
||||
ENRICH_MORE_PROJECT("ENRICH/MORE/PROJECT"),
|
||||
ENRICH_MORE_SUBJECT_MESHEUROPMC("ENRICH/MORE/SUBJECT/MESHEUROPMC"),
|
||||
ENRICH_MORE_SUBJECT_ARXIV("ENRICH/MORE/SUBJECT/ARXIV"),
|
||||
ENRICH_MORE_SUBJECT_JEL("ENRICH/MORE/SUBJECT/JEL"),
|
||||
ENRICH_MORE_SUBJECT_DDC("ENRICH/MORE/SUBJECT/DDC"),
|
||||
ENRICH_MORE_SUBJECT_ACM("ENRICH/MORE/SUBJECT/ACM"),
|
||||
ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"),
|
||||
ENRICH_MORE_PID("ENRICH/MORE/PID"), ENRICH_MORE_OA_VERSION("ENRICH/MORE/OPENACCESS_VERSION"), ENRICH_MORE_ABSTRACT(
|
||||
"ENRICH/MORE/ABSTRACT"), ENRICH_MORE_PUBLICATION_DATE("ENRICH/MORE/PUBLICATION_DATE"), ENRICH_MORE_PROJECT(
|
||||
"ENRICH/MORE/PROJECT"), ENRICH_MORE_SOFTWARE("ENRICH/MORE/SOFTWARE"), ENRICH_MORE_SUBJECT_MESHEUROPMC(
|
||||
"ENRICH/MORE/SUBJECT/MESHEUROPMC"), ENRICH_MORE_SUBJECT_ARXIV(
|
||||
"ENRICH/MORE/SUBJECT/ARXIV"), ENRICH_MORE_SUBJECT_JEL(
|
||||
"ENRICH/MORE/SUBJECT/JEL"), ENRICH_MORE_SUBJECT_DDC(
|
||||
"ENRICH/MORE/SUBJECT/DDC"), ENRICH_MORE_SUBJECT_ACM(
|
||||
"ENRICH/MORE/SUBJECT/ACM"), ENRICH_MORE_SUBJECT_RVK("ENRICH/MORE/SUBJECT/RVK"),
|
||||
|
||||
// ADDITION
|
||||
ADD_BY_PROJECT("ADD/BY_PROJECT"),
|
||||
|
||||
// OTHER RELS
|
||||
ENRICH_MISSING_PUBLICATION_IS_RELATED_TO("ENRICH/MISSING/PUBLICATION/IS_RELATED_TO"),
|
||||
ENRICH_MISSING_PUBLICATION_REFERENCES("ENRICH/MISSING/PUBLICATION/REFERENCES"),
|
||||
ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY("ENRICH/MISSING/PUBLICATION/IS_REFERENCED_BY"),
|
||||
ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO("ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_TO"),
|
||||
ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY("ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_BY"),
|
||||
ENRICH_MISSING_PUBLICATION_IS_RELATED_TO(
|
||||
"ENRICH/MISSING/PUBLICATION/IS_RELATED_TO"), ENRICH_MISSING_PUBLICATION_REFERENCES(
|
||||
"ENRICH/MISSING/PUBLICATION/REFERENCES"), ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY(
|
||||
"ENRICH/MISSING/PUBLICATION/IS_REFERENCED_BY"), ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO(
|
||||
"ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_TO"), ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY(
|
||||
"ENRICH/MISSING/PUBLICATION/IS_SUPPLEMENTED_BY"),
|
||||
|
||||
ENRICH_MISSING_DATASET_IS_RELATED_TO("ENRICH/MISSING/DATASET/IS_RELATED_TO"),
|
||||
ENRICH_MISSING_DATASET_REFERENCES("ENRICH/MISSING/DATASET/REFERENCES"),
|
||||
ENRICH_MISSING_DATASET_IS_REFERENCED_BY("ENRICH/MISSING/DATASET/IS_REFERENCED_BY"),
|
||||
ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO("ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_TO"),
|
||||
ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY("ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_BY");
|
||||
ENRICH_MISSING_DATASET_IS_RELATED_TO("ENRICH/MISSING/DATASET/IS_RELATED_TO"), ENRICH_MISSING_DATASET_REFERENCES(
|
||||
"ENRICH/MISSING/DATASET/REFERENCES"), ENRICH_MISSING_DATASET_IS_REFERENCED_BY(
|
||||
"ENRICH/MISSING/DATASET/IS_REFERENCED_BY"), ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO(
|
||||
"ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_TO"), ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY(
|
||||
"ENRICH/MISSING/DATASET/IS_SUPPLEMENTED_BY"),;
|
||||
|
||||
Topic(final String path) {
|
||||
this.path = path;
|
||||
|
@ -59,7 +56,9 @@ public enum Topic {
|
|||
|
||||
public static Topic fromPath(final String path) {
|
||||
for (final Topic t : Topic.values()) {
|
||||
if (t.getPath().equals(path)) { return t; }
|
||||
if (t.getPath().equals(path)) {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -4,11 +4,14 @@ package eu.dnetlib.dhp.broker.oa;
|
|||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -29,18 +32,33 @@ import eu.dnetlib.dhp.broker.model.Event;
|
|||
import eu.dnetlib.dhp.broker.model.EventFactory;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAbstract;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingAuthorOrcid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsReferencedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsRelatedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsSupplementedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetIsSupplementedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingDatasetReferences;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingOpenAccess;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingProject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationDate;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsReferencedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsRelatedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsSupplementedBy;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationIsSupplementedTo;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingPublicationReferences;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMissingSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreOpenAccess;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMorePid;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreProject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSoftware;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.EnrichMoreSubject;
|
||||
import eu.dnetlib.dhp.broker.oa.matchers.UpdateMatcher;
|
||||
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
@ -50,23 +68,43 @@ public class GenerateEventsApplication {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(GenerateEventsApplication.class);
|
||||
|
||||
private static final UpdateMatcher<?> enrichMissingAbstract = new EnrichMissingAbstract();
|
||||
private static final UpdateMatcher<?> enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid();
|
||||
private static final UpdateMatcher<?> enrichMissingOpenAccess = new EnrichMissingOpenAccess();
|
||||
private static final UpdateMatcher<?> enrichMissingPid = new EnrichMissingPid();
|
||||
private static final UpdateMatcher<?> enrichMissingProject = new EnrichMissingProject();
|
||||
private static final UpdateMatcher<?> enrichMissingPublicationDate = new EnrichMissingPublicationDate();
|
||||
private static final UpdateMatcher<?> enrichMissingSubject = new EnrichMissingSubject();
|
||||
private static final UpdateMatcher<?> enrichMoreOpenAccess = new EnrichMoreOpenAccess();
|
||||
private static final UpdateMatcher<?> enrichMorePid = new EnrichMorePid();
|
||||
private static final UpdateMatcher<?> enrichMoreSubject = new EnrichMoreSubject();
|
||||
// Simple Matchers
|
||||
private static final UpdateMatcher<Result, ?> enrichMissingAbstract = new EnrichMissingAbstract();
|
||||
private static final UpdateMatcher<Result, ?> enrichMissingAuthorOrcid = new EnrichMissingAuthorOrcid();
|
||||
private static final UpdateMatcher<Result, ?> enrichMissingOpenAccess = new EnrichMissingOpenAccess();
|
||||
private static final UpdateMatcher<Result, ?> enrichMissingPid = new EnrichMissingPid();
|
||||
private static final UpdateMatcher<Result, ?> enrichMissingPublicationDate = new EnrichMissingPublicationDate();
|
||||
private static final UpdateMatcher<Result, ?> enrichMissingSubject = new EnrichMissingSubject();
|
||||
private static final UpdateMatcher<Result, ?> enrichMoreOpenAccess = new EnrichMoreOpenAccess();
|
||||
private static final UpdateMatcher<Result, ?> enrichMorePid = new EnrichMorePid();
|
||||
private static final UpdateMatcher<Result, ?> enrichMoreSubject = new EnrichMoreSubject();
|
||||
|
||||
// Advanced matchers
|
||||
private static final UpdateMatcher<Pair<Result, List<Project>>, ?> enrichMissingProject = new EnrichMissingProject();
|
||||
private static final UpdateMatcher<Pair<Result, List<Project>>, ?> enrichMoreProject = new EnrichMoreProject();
|
||||
|
||||
private static final UpdateMatcher<Pair<Result, List<Software>>, ?> enrichMissingSoftware = new EnrichMissingSoftware();
|
||||
private static final UpdateMatcher<Pair<Result, List<Software>>, ?> enrichMoreSoftware = new EnrichMoreSoftware();
|
||||
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy();
|
||||
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy();
|
||||
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(GenerateEventsApplication.class
|
||||
.toString(
|
||||
GenerateEventsApplication.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
|
@ -82,9 +120,6 @@ public class GenerateEventsApplication {
|
|||
final String eventsPath = parser.get("eventsPath");
|
||||
log.info("eventsPath: {}", eventsPath);
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||
|
@ -111,17 +146,17 @@ public class GenerateEventsApplication {
|
|||
final String graphPath,
|
||||
final Class<R> resultClazz) {
|
||||
|
||||
final Dataset<R> results =
|
||||
readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz)
|
||||
final Dataset<R> results = readPath(
|
||||
spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz)
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference());
|
||||
|
||||
final Dataset<Relation> rels =
|
||||
readPath(spark, graphPath + "/relation", Relation.class)
|
||||
.filter(r -> r.getRelClass().equals("TODO")); // TODO mergedIN
|
||||
final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class)
|
||||
.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
|
||||
|
||||
final Column c = null; // TODO
|
||||
|
||||
final Dataset<Row> aa = results.joinWith(rels, results.col("id").equalTo(rels.col("source")), "inner")
|
||||
final Dataset<Row> aa = results
|
||||
.joinWith(rels, results.col("id").equalTo(rels.col("source")), "inner")
|
||||
.groupBy(rels.col("target"))
|
||||
.agg(c)
|
||||
.filter(x -> x.size() > 1)
|
||||
|
@ -134,7 +169,7 @@ public class GenerateEventsApplication {
|
|||
|
||||
}
|
||||
|
||||
private List<Event> generateSimpleEvents(final Result... children) {
|
||||
private List<Event> generateSimpleEvents(final Collection<Result> children) {
|
||||
final List<UpdateInfo<?>> list = new ArrayList<>();
|
||||
|
||||
for (final Result target : children) {
|
||||
|
@ -142,7 +177,6 @@ public class GenerateEventsApplication {
|
|||
list.addAll(enrichMissingAuthorOrcid.searchUpdatesForRecord(target, children));
|
||||
list.addAll(enrichMissingOpenAccess.searchUpdatesForRecord(target, children));
|
||||
list.addAll(enrichMissingPid.searchUpdatesForRecord(target, children));
|
||||
list.addAll(enrichMissingProject.searchUpdatesForRecord(target, children));
|
||||
list.addAll(enrichMissingPublicationDate.searchUpdatesForRecord(target, children));
|
||||
list.addAll(enrichMissingSubject.searchUpdatesForRecord(target, children));
|
||||
list.addAll(enrichMoreOpenAccess.searchUpdatesForRecord(target, children));
|
||||
|
@ -153,6 +187,87 @@ public class GenerateEventsApplication {
|
|||
return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private List<Event> generateProjectsEvents(final Collection<Pair<Result, List<Project>>> childrenWithProjects) {
|
||||
final List<UpdateInfo<?>> list = new ArrayList<>();
|
||||
|
||||
for (final Pair<Result, List<Project>> target : childrenWithProjects) {
|
||||
list.addAll(enrichMissingProject.searchUpdatesForRecord(target, childrenWithProjects));
|
||||
list.addAll(enrichMoreProject.searchUpdatesForRecord(target, childrenWithProjects));
|
||||
}
|
||||
|
||||
return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private List<Event> generateSoftwareEvents(final Collection<Pair<Result, List<Software>>> childrenWithSoftwares) {
|
||||
final List<UpdateInfo<?>> list = new ArrayList<>();
|
||||
|
||||
for (final Pair<Result, List<Software>> target : childrenWithSoftwares) {
|
||||
list.addAll(enrichMissingSoftware.searchUpdatesForRecord(target, childrenWithSoftwares));
|
||||
list.addAll(enrichMoreSoftware.searchUpdatesForRecord(target, childrenWithSoftwares));
|
||||
}
|
||||
return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private List<Event> generatePublicationRelatedEvents(final String relType,
|
||||
final Collection<Pair<Result, Map<String, List<Publication>>>> childrenWithRels) {
|
||||
|
||||
final List<UpdateInfo<?>> list = new ArrayList<>();
|
||||
|
||||
final List<Pair<Result, List<Publication>>> cleanedChildrens = childrenWithRels
|
||||
.stream()
|
||||
.filter(p -> p.getRight().containsKey(relType))
|
||||
.map(p -> Pair.of(p.getLeft(), p.getRight().get(relType)))
|
||||
.filter(p -> p.getRight().size() > 0)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
for (final Pair<Result, List<Publication>> target : cleanedChildrens) {
|
||||
if (relType.equals("isRelatedTo")) {
|
||||
list.addAll(enrichMisissingPublicationIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens));
|
||||
} else if (relType.equals("references")) {
|
||||
list.addAll(enrichMissingPublicationReferences.searchUpdatesForRecord(target, cleanedChildrens));
|
||||
} else if (relType.equals("isReferencedBy")) {
|
||||
list.addAll(enrichMissingPublicationIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens));
|
||||
} else if (relType.equals("isSupplementedTo")) {
|
||||
list.addAll(enrichMissingPublicationIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens));
|
||||
} else if (relType.equals("isSupplementedBy")) {
|
||||
list.addAll(enrichMissingPublicationIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens));
|
||||
}
|
||||
}
|
||||
|
||||
return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList());
|
||||
|
||||
}
|
||||
|
||||
private List<Event> generateDatasetRelatedEvents(final String relType,
|
||||
final Collection<Pair<Result, Map<String, List<eu.dnetlib.dhp.schema.oaf.Dataset>>>> childrenWithRels) {
|
||||
|
||||
final List<UpdateInfo<?>> list = new ArrayList<>();
|
||||
|
||||
final List<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>> cleanedChildrens = childrenWithRels
|
||||
.stream()
|
||||
.filter(p -> p.getRight().containsKey(relType))
|
||||
.map(p -> Pair.of(p.getLeft(), p.getRight().get(relType)))
|
||||
.filter(p -> p.getRight().size() > 0)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
for (final Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>> target : cleanedChildrens) {
|
||||
if (relType.equals("isRelatedTo")) {
|
||||
list.addAll(enrichMisissingDatasetIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens));
|
||||
} else if (relType.equals("references")) {
|
||||
list.addAll(enrichMissingDatasetReferences.searchUpdatesForRecord(target, cleanedChildrens));
|
||||
} else if (relType.equals("isReferencedBy")) {
|
||||
list.addAll(enrichMissingDatasetIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens));
|
||||
} else if (relType.equals("isSupplementedTo")) {
|
||||
list.addAll(enrichMissingDatasetIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens));
|
||||
} else if (relType.equals("isSupplementedBy")) {
|
||||
list.addAll(enrichMissingDatasetIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens));
|
||||
}
|
||||
}
|
||||
|
||||
return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList());
|
||||
|
||||
}
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
final SparkSession spark,
|
||||
final String inputPath,
|
||||
|
|
|
@ -9,7 +9,7 @@ import eu.dnetlib.dhp.broker.model.Topic;
|
|||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingAbstract extends UpdateMatcher<String> {
|
||||
public class EnrichMissingAbstract extends UpdateMatcher<Result, String> {
|
||||
|
||||
public EnrichMissingAbstract() {
|
||||
super(false);
|
||||
|
@ -24,7 +24,8 @@ public class EnrichMissingAbstract extends UpdateMatcher<String> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public UpdateInfo<String> generateUpdateInfo(final String highlightValue, final Result source,
|
||||
public UpdateInfo<String> generateUpdateInfo(final String highlightValue,
|
||||
final Result source,
|
||||
final Result target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_ABSTRACT,
|
||||
|
|
|
@ -10,7 +10,7 @@ import eu.dnetlib.dhp.broker.model.Topic;
|
|||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingAuthorOrcid extends UpdateMatcher<Pair<String, String>> {
|
||||
public class EnrichMissingAuthorOrcid extends UpdateMatcher<Result, Pair<String, String>> {
|
||||
|
||||
public EnrichMissingAuthorOrcid() {
|
||||
super(true);
|
||||
|
@ -24,7 +24,8 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher<Pair<String, String>
|
|||
|
||||
@Override
|
||||
public UpdateInfo<Pair<String, String>> generateUpdateInfo(final Pair<String, String> highlightValue,
|
||||
final Result source, final Result target) {
|
||||
final Result source,
|
||||
final Result target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_AUTHOR_ORCID,
|
||||
highlightValue, source, target,
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingDatasetIsReferencedBy
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
public EnrichMissingDatasetIsReferencedBy() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_DATASET_IS_REFERENCED_BY,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingDatasetIsRelatedTo
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
public EnrichMissingDatasetIsRelatedTo() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_DATASET_IS_RELATED_TO,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingDatasetIsSupplementedBy
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
public EnrichMissingDatasetIsSupplementedBy() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_BY,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingDatasetIsSupplementedTo
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
public EnrichMissingDatasetIsSupplementedTo() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_DATASET_IS_SUPPLEMENTED_TO,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingDatasetReferences
|
||||
extends UpdateMatcher<Pair<Result, List<Dataset>>, eu.dnetlib.broker.objects.Dataset> {
|
||||
|
||||
public EnrichMissingDatasetReferences() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Dataset>> findUpdates(final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Dataset> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Dataset highlightValue,
|
||||
final Pair<Result, List<Dataset>> source,
|
||||
final Pair<Result, List<Dataset>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_DATASET_REFERENCES,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> p.getDatasets().add(rel),
|
||||
rel -> rel.getInstances().get(0).getUrl());
|
||||
}
|
||||
}
|
|
@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
|||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingOpenAccess extends UpdateMatcher<Instance> {
|
||||
public class EnrichMissingOpenAccess extends UpdateMatcher<Result, Instance> {
|
||||
|
||||
public EnrichMissingOpenAccess() {
|
||||
super(true);
|
||||
|
|
|
@ -11,7 +11,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
|||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPid extends UpdateMatcher<Pid> {
|
||||
public class EnrichMissingPid extends UpdateMatcher<Result, Pid> {
|
||||
|
||||
public EnrichMissingPid() {
|
||||
super(true);
|
||||
|
|
|
@ -4,30 +4,35 @@ package eu.dnetlib.dhp.broker.oa.matchers;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.broker.objects.Project;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingProject extends UpdateMatcher<Project> {
|
||||
public class EnrichMissingProject
|
||||
extends UpdateMatcher<Pair<Result, List<Project>>, eu.dnetlib.broker.objects.Project> {
|
||||
|
||||
public EnrichMissingProject() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<Project>> findUpdates(final Result source, final Result target) {
|
||||
// return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f));
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Project>> findUpdates(final Pair<Result, List<Project>> source,
|
||||
final Pair<Result, List<Project>> target) {
|
||||
// TODO
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateInfo<Project> generateUpdateInfo(final Project highlightValue,
|
||||
final Result source,
|
||||
final Result target) {
|
||||
public UpdateInfo<eu.dnetlib.broker.objects.Project> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Project highlightValue,
|
||||
final Pair<Result, List<Project>> source,
|
||||
final Pair<Result, List<Project>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PROJECT,
|
||||
highlightValue, source, target,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, prj) -> p.getProjects().add(prj),
|
||||
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode());
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@ import eu.dnetlib.dhp.broker.model.Topic;
|
|||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationDate extends UpdateMatcher<String> {
|
||||
public class EnrichMissingPublicationDate extends UpdateMatcher<Result, String> {
|
||||
|
||||
public EnrichMissingPublicationDate() {
|
||||
super(false);
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationIsReferencedBy
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
public EnrichMissingPublicationIsReferencedBy() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PUBLICATION_IS_REFERENCED_BY,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> {
|
||||
}, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationIsRelatedTo
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
public EnrichMissingPublicationIsRelatedTo() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PUBLICATION_IS_RELATED_TO,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> {
|
||||
}, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationIsSupplementedBy
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
public EnrichMissingPublicationIsSupplementedBy() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_BY,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> {
|
||||
}, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationIsSupplementedTo
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
public EnrichMissingPublicationIsSupplementedTo() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PUBLICATION_IS_SUPPLEMENTED_TO,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> {
|
||||
}, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMissingPublicationReferences
|
||||
extends UpdateMatcher<Pair<Result, List<Publication>>, eu.dnetlib.broker.objects.Publication> {
|
||||
|
||||
public EnrichMissingPublicationReferences() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Publication>> findUpdates(
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
// TODO Auto-generated method stub
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected UpdateInfo<eu.dnetlib.broker.objects.Publication> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Publication highlightValue,
|
||||
final Pair<Result, List<Publication>> source,
|
||||
final Pair<Result, List<Publication>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_PUBLICATION_REFERENCES,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, rel) -> {
|
||||
}, // p.getPublications().add(rel), //TODO available in the future release of dnet-openaire-broker-common
|
||||
rel -> rel.getOriginalId());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
|
||||
public class EnrichMissingSoftware
|
||||
extends UpdateMatcher<Pair<Result, List<Software>>, eu.dnetlib.broker.objects.Software> {
|
||||
|
||||
public EnrichMissingSoftware() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Software>> findUpdates(
|
||||
final Pair<Result, List<Software>> source,
|
||||
final Pair<Result, List<Software>> target) {
|
||||
// TODO
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateInfo<eu.dnetlib.broker.objects.Software> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Software highlightValue,
|
||||
final Pair<Result, List<Software>> source,
|
||||
final Pair<Result, List<Software>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MISSING_SOFTWARE,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, s) -> p.getSoftwares().add(s),
|
||||
s -> s.getName());
|
||||
}
|
||||
|
||||
}
|
|
@ -14,7 +14,7 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class EnrichMissingSubject extends UpdateMatcher<Pair<String, String>> {
|
||||
public class EnrichMissingSubject extends UpdateMatcher<Result, Pair<String, String>> {
|
||||
|
||||
public EnrichMissingSubject() {
|
||||
super(true);
|
||||
|
|
|
@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
|||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMoreOpenAccess extends UpdateMatcher<Instance> {
|
||||
public class EnrichMoreOpenAccess extends UpdateMatcher<Result, Instance> {
|
||||
|
||||
public EnrichMoreOpenAccess() {
|
||||
super(true);
|
||||
|
|
|
@ -11,7 +11,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
|||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMorePid extends UpdateMatcher<Pid> {
|
||||
public class EnrichMorePid extends UpdateMatcher<Result, Pid> {
|
||||
|
||||
public EnrichMorePid() {
|
||||
super(true);
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMoreProject extends UpdateMatcher<Pair<Result, List<Project>>, eu.dnetlib.broker.objects.Project> {
|
||||
|
||||
public EnrichMoreProject() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Project>> findUpdates(final Pair<Result, List<Project>> source,
|
||||
final Pair<Result, List<Project>> target) {
|
||||
// TODO
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateInfo<eu.dnetlib.broker.objects.Project> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Project highlightValue,
|
||||
final Pair<Result, List<Project>> source,
|
||||
final Pair<Result, List<Project>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MORE_PROJECT,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, prj) -> p.getProjects().add(prj),
|
||||
prj -> prj.getFunder() + "::" + prj.getFundingProgram() + prj.getCode());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.matchers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import eu.dnetlib.dhp.broker.model.Topic;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
|
||||
public class EnrichMoreSoftware
|
||||
extends UpdateMatcher<Pair<Result, List<Software>>, eu.dnetlib.broker.objects.Software> {
|
||||
|
||||
public EnrichMoreSoftware() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<UpdateInfo<eu.dnetlib.broker.objects.Software>> findUpdates(
|
||||
final Pair<Result, List<Software>> source,
|
||||
final Pair<Result, List<Software>> target) {
|
||||
// TODO
|
||||
return Arrays.asList();
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateInfo<eu.dnetlib.broker.objects.Software> generateUpdateInfo(
|
||||
final eu.dnetlib.broker.objects.Software highlightValue,
|
||||
final Pair<Result, List<Software>> source,
|
||||
final Pair<Result, List<Software>> target) {
|
||||
return new UpdateInfo<>(
|
||||
Topic.ENRICH_MORE_SOFTWARE,
|
||||
highlightValue, source.getLeft(), target.getLeft(),
|
||||
(p, s) -> p.getSoftwares().add(s),
|
||||
s -> s.getName());
|
||||
}
|
||||
|
||||
}
|
|
@ -12,7 +12,7 @@ import eu.dnetlib.dhp.broker.oa.util.ConversionUtils;
|
|||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class EnrichMoreSubject extends UpdateMatcher<Pair<String, String>> {
|
||||
public class EnrichMoreSubject extends UpdateMatcher<Result, Pair<String, String>> {
|
||||
|
||||
public EnrichMoreSubject() {
|
||||
super(true);
|
||||
|
|
|
@ -12,9 +12,8 @@ import org.apache.commons.lang3.StringUtils;
|
|||
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public abstract class UpdateMatcher<T> {
|
||||
public abstract class UpdateMatcher<K, T> {
|
||||
|
||||
private final boolean multipleUpdate;
|
||||
|
||||
|
@ -22,15 +21,16 @@ public abstract class UpdateMatcher<T> {
|
|||
this.multipleUpdate = multipleUpdate;
|
||||
}
|
||||
|
||||
public Collection<UpdateInfo<T>> searchUpdatesForRecord(final Result res, final Result... others) {
|
||||
public Collection<UpdateInfo<T>> searchUpdatesForRecord(final K res, final Collection<K> others) {
|
||||
|
||||
final Map<String, UpdateInfo<T>> infoMap = new HashMap<>();
|
||||
|
||||
for (final Result source : others) {
|
||||
for (final K source : others) {
|
||||
if (source != res) {
|
||||
for (final UpdateInfo<T> info : findUpdates(source, res)) {
|
||||
final String s = DigestUtils.md5Hex(info.getHighlightValueAsString());
|
||||
if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {} else {
|
||||
if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {
|
||||
} else {
|
||||
infoMap.put(s, info);
|
||||
}
|
||||
}
|
||||
|
@ -51,11 +51,11 @@ public abstract class UpdateMatcher<T> {
|
|||
}
|
||||
}
|
||||
|
||||
protected abstract List<UpdateInfo<T>> findUpdates(Result source, Result target);
|
||||
protected abstract List<UpdateInfo<T>> findUpdates(K source, K target);
|
||||
|
||||
protected abstract UpdateInfo<T> generateUpdateInfo(final T highlightValue,
|
||||
final Result source,
|
||||
final Result target);
|
||||
final K source,
|
||||
final K target);
|
||||
|
||||
protected static boolean isMissing(final List<Field<String>> list) {
|
||||
return list == null || list.isEmpty() || StringUtils.isBlank(list.get(0).getValue());
|
||||
|
|
|
@ -4,4 +4,6 @@ package eu.dnetlib.dhp.broker.oa.util;
|
|||
public class BrokerConstants {
|
||||
|
||||
public final static String OPEN_ACCESS = "OPEN";
|
||||
public final static String IS_MERGED_IN_CLASS = "isMergedIn";
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue