From 1d4275acc44957fe412c79a66b9d84e51c044f30 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 17 Jun 2020 09:10:38 +0200 Subject: [PATCH] implemented first version of exportation of Scholexplorer into ActionSet --- .../dhp/schema/scholexplorer/DLIRelation.java | 14 + .../broker/oa/GenerateEventsApplication.java | 122 ++++-- .../dhp/broker/oa/matchers/UpdateMatcher.java | 6 +- .../simple/EnrichMissingAbstract.java | 6 +- .../simple/EnrichMissingAuthorOrcid.java | 3 +- .../simple/EnrichMissingOpenAccess.java | 7 +- .../oa/matchers/simple/EnrichMissingPid.java | 10 +- .../simple/EnrichMissingPublicationDate.java | 6 +- .../matchers/simple/EnrichMissingSubject.java | 3 +- .../matchers/simple/EnrichMoreOpenAccess.java | 3 +- .../oa/matchers/simple/EnrichMorePid.java | 6 +- .../oa/matchers/simple/EnrichMoreSubject.java | 3 +- .../dhp/broker/oa/util/EventGroup.java | 1 + .../dhp/broker/oa/util/ResultAggregator.java | 1 + .../dhp/broker/oa/util/ResultGroup.java | 1 + .../dhp/broker/oa/util/TrustUtils.java | 16 +- .../dhp/broker/oa/util/UpdateInfo.java | 9 +- .../dhp/broker/oa/util/TrustUtilsTest.java | 1 + .../orcid/oozie_app/config-default.xml | 30 +- .../parser/DatasetScholexplorerParser.java | 2 +- .../PublicationScholexplorerParser.java | 8 +- .../dhp/sx/graph/step1/oozie_app/workflow.xml | 4 +- .../java/eu/dnetlib/dhp/export/DLIToOAF.scala | 376 ++++++++++++++++++ .../SparkExportContentForOpenAire.scala | 118 ++++++ .../input_export_content_parameters.json | 14 + .../sx/export/oozie_app/config-default.xml | 42 ++ .../dhp/sx/export/oozie_app/workflow.xml | 49 +++ .../dhp/export/ExportDLITOOAFTest.scala | 75 ++++ .../eu/dnetlib/dhp/export/dataset.json | 101 +++++ .../eu/dnetlib/dhp/export/publication.json | 128 ++++++ .../eu/dnetlib/dhp/export/relation.json | 23 ++ 31 files changed, 1110 insertions(+), 78 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/SparkExportContentForOpenAire.scala create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/input_export_content_parameters.json create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/test/java/eu/dnetlib/dhp/export/ExportDLITOOAFTest.scala create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/dataset.json create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/publication.json create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/relation.json diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIRelation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIRelation.java index d2d2089c0..ca85fa14f 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIRelation.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/scholexplorer/DLIRelation.java @@ -1,11 +1,25 @@ package eu.dnetlib.dhp.schema.scholexplorer; +import java.util.List; + +import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Relation; public class DLIRelation extends Relation { + private String dateOfCollection; + private List collectedFrom; + + public List getCollectedFrom() { + return collectedFrom; + } + + public void setCollectedFrom(List collectedFrom) { + this.collectedFrom = collectedFrom; + } + public String getDateOfCollection() { return dateOfCollection; } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index 44bc5cb6e..ecf4e3eff 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -91,35 +91,29 @@ public class GenerateEventsApplication { private static final UpdateMatcher>, ?> enrichMoreSoftware = new EnrichMoreSoftware(); private static final UpdateMatcher>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = - new EnrichMissingPublicationIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy(); private static final UpdateMatcher>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = - new EnrichMissingPublicationIsSupplementedTo(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = - new EnrichMissingPublicationIsSupplementedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy(); - private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = - new EnrichMissingDatasetIsRelatedTo(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = - new EnrichMissingDatasetIsReferencedBy(); - private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = - new EnrichMissingDatasetReferences(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = - new EnrichMissingDatasetIsSupplementedTo(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = - new EnrichMissingDatasetIsSupplementedBy(); + private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy(); // Aggregators - private static final TypedColumn, ResultGroup> resultAggrTypedColumn = new ResultAggregator().toColumn(); + private static final TypedColumn, ResultGroup> resultAggrTypedColumn = new ResultAggregator() + .toColumn(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString(GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + .toString( + GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -172,18 +166,23 @@ public class GenerateEventsApplication { final Class resultClazz, final DedupConfig dedupConfig) { - final Dataset results = readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), Result.class) - .filter(r -> r.getDataInfo().getDeletedbyinference()); + final Dataset results = readPath( + spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), Result.class) + .filter(r -> r.getDataInfo().getDeletedbyinference()); final Dataset mergedRels = readPath(spark, graphPath + "/relation", Relation.class) .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); - return results.joinWith(mergedRels, results.col("id").equalTo(mergedRels.col("source")), "inner") + return results + .joinWith(mergedRels, results.col("id").equalTo(mergedRels.col("source")), "inner") .groupByKey((MapFunction, String>) t -> t._2.getTarget(), Encoders.STRING()) .agg(resultAggrTypedColumn) .map((MapFunction, ResultGroup>) t -> t._2, Encoders.kryo(ResultGroup.class)) .filter(ResultGroup::isValid) - .map((MapFunction) g -> GenerateEventsApplication.generateSimpleEvents(g, dedupConfig), Encoders.kryo(EventGroup.class)) + .map( + (MapFunction) g -> GenerateEventsApplication + .generateSimpleEvents(g, dedupConfig), + Encoders.kryo(EventGroup.class)) .flatMap(group -> group.getData().iterator(), Encoders.kryo(Event.class)); } @@ -207,16 +206,19 @@ public class GenerateEventsApplication { return events; } - private static Dataset generateRelationEvents(final SparkSession spark, + private static Dataset generateRelationEvents( + final SparkSession spark, final String graphPath, final Class sourceClass, final Class targetClass, final DedupConfig dedupConfig) { - final Dataset sources = readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), Result.class) - .filter(r -> r.getDataInfo().getDeletedbyinference()); + final Dataset sources = readPath( + spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), Result.class) + .filter(r -> r.getDataInfo().getDeletedbyinference()); - final Dataset targets = readPath(spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), targetClass); + final Dataset targets = readPath( + spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), targetClass); final Dataset mergedRels = readPath(spark, graphPath + "/relation", Relation.class) .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); @@ -224,7 +226,8 @@ public class GenerateEventsApplication { final Dataset rels = readPath(spark, graphPath + "/relation", Relation.class) .filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); - final Dataset duplicates = sources.joinWith(mergedRels, sources.col("id").equalTo(rels.col("source")), "inner") + final Dataset duplicates = sources + .joinWith(mergedRels, sources.col("id").equalTo(rels.col("source")), "inner") .groupByKey((MapFunction, String>) t -> t._2.getTarget(), Encoders.STRING()) .agg(resultAggrTypedColumn) .map((MapFunction, ResultGroup>) t -> t._2, Encoders.kryo(ResultGroup.class)) @@ -243,7 +246,8 @@ public class GenerateEventsApplication { return null; } - private List generateProjectsEvents(final Collection>> childrenWithProjects, final DedupConfig dedupConfig) { + private List generateProjectsEvents(final Collection>> childrenWithProjects, + final DedupConfig dedupConfig) { final List> list = new ArrayList<>(); for (final Pair> target : childrenWithProjects) { @@ -254,7 +258,8 @@ public class GenerateEventsApplication { return list.stream().map(EventFactory::newBrokerEvent).collect(Collectors.toList()); } - private List generateSoftwareEvents(final Collection>> childrenWithSoftwares, final DedupConfig dedupConfig) { + private List generateSoftwareEvents(final Collection>> childrenWithSoftwares, + final DedupConfig dedupConfig) { final List> list = new ArrayList<>(); for (final Pair> target : childrenWithSoftwares) { @@ -279,15 +284,30 @@ public class GenerateEventsApplication { for (final Pair> target : cleanedChildrens) { if (relType.equals("isRelatedTo")) { - list.addAll(enrichMisissingPublicationIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMisissingPublicationIsRelatedTo + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("references")) { - list.addAll(enrichMissingPublicationReferences.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingPublicationReferences + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isReferencedBy")) { - list.addAll(enrichMissingPublicationIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingPublicationIsReferencedBy + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isSupplementedTo")) { - list.addAll(enrichMissingPublicationIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingPublicationIsSupplementedTo + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isSupplementedBy")) { - list.addAll(enrichMissingPublicationIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingPublicationIsSupplementedBy + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } } @@ -310,15 +330,29 @@ public class GenerateEventsApplication { for (final Pair> target : cleanedChildrens) { if (relType.equals("isRelatedTo")) { - list.addAll(enrichMisissingDatasetIsRelatedTo.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMisissingDatasetIsRelatedTo + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("references")) { - list.addAll(enrichMissingDatasetReferences.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingDatasetReferences.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isReferencedBy")) { - list.addAll(enrichMissingDatasetIsReferencedBy.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingDatasetIsReferencedBy + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isSupplementedTo")) { - list.addAll(enrichMissingDatasetIsSupplementedTo.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingDatasetIsSupplementedTo + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } else if (relType.equals("isSupplementedBy")) { - list.addAll(enrichMissingDatasetIsSupplementedBy.searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); + list + .addAll( + enrichMissingDatasetIsSupplementedBy + .searchUpdatesForRecord(target, cleanedChildrens, dedupConfig)); } } @@ -339,8 +373,12 @@ public class GenerateEventsApplication { private static DedupConfig loadDedupConfig(final String isLookupUrl, final String profId) throws Exception { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); - final String conf = isLookUpService.getResourceProfileByQuery(String - .format("for $x in /RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '%s'] return $x//DEDUPLICATION/text()", profId)); + final String conf = isLookUpService + .getResourceProfileByQuery( + String + .format( + "for $x in /RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '%s'] return $x//DEDUPLICATION/text()", + profId)); final DedupConfig dedupConfig = new ObjectMapper().readValue(conf, DedupConfig.class); dedupConfig.getPace().initModel(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index 286b40ad5..95d43ae68 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -22,7 +22,8 @@ public abstract class UpdateMatcher { this.multipleUpdate = multipleUpdate; } - public Collection> searchUpdatesForRecord(final K res, final Collection others, final DedupConfig dedupConfig) { + public Collection> searchUpdatesForRecord(final K res, final Collection others, + final DedupConfig dedupConfig) { final Map> infoMap = new HashMap<>(); @@ -30,7 +31,8 @@ public abstract class UpdateMatcher { if (source != res) { for (final UpdateInfo info : findUpdates(source, res, dedupConfig)) { final String s = DigestUtils.md5Hex(info.getHighlightValueAsString()); - if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) {} else { + if (!infoMap.containsKey(s) || infoMap.get(s).getTrust() < info.getTrust()) { + } else { infoMap.put(s, info); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java index c3b6bda66..7dc340b3c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAbstract.java @@ -18,9 +18,11 @@ public class EnrichMissingAbstract extends UpdateMatcher { } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { if (isMissing(target.getDescription()) && !isMissing(source.getDescription())) { - return Arrays.asList(generateUpdateInfo(source.getDescription().get(0).getValue(), source, target, dedupConfig)); + return Arrays + .asList(generateUpdateInfo(source.getDescription().get(0).getValue(), source, target, dedupConfig)); } return new ArrayList<>(); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index 89292d3da..7a1677ae2 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -19,7 +19,8 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher>> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List>> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { // TODO // return Arrays.asList(new EnrichMissingAbstract("xxxxxxx", 0.9f)); return Arrays.asList(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java index 7f5a595cc..d14490ba8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingOpenAccess.java @@ -21,7 +21,8 @@ public class EnrichMissingOpenAccess extends UpdateMatcher { } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final long count = target .getInstance() .stream() @@ -29,7 +30,9 @@ public class EnrichMissingOpenAccess extends UpdateMatcher { .filter(right -> right.equals(BrokerConstants.OPEN_ACCESS)) .count(); - if (count > 0) { return Arrays.asList(); } + if (count > 0) { + return Arrays.asList(); + } return source .getInstance() diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index 6e106e669..20303ec1b 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -20,10 +20,13 @@ public class EnrichMissingPid extends UpdateMatcher { } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final long count = target.getPid().size(); - if (count > 0) { return Arrays.asList(); } + if (count > 0) { + return Arrays.asList(); + } return source .getPid() @@ -33,7 +36,8 @@ public class EnrichMissingPid extends UpdateMatcher { .collect(Collectors.toList()); } - public UpdateInfo generateUpdateInfo(final Pid highlightValue, final Result source, final Result target, final DedupConfig dedupConfig) { + public UpdateInfo generateUpdateInfo(final Pid highlightValue, final Result source, final Result target, + final DedupConfig dedupConfig) { return new UpdateInfo<>( Topic.ENRICH_MISSING_PID, highlightValue, source, target, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java index d2b28d65d..e1de8ce4d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDate.java @@ -18,9 +18,11 @@ public class EnrichMissingPublicationDate extends UpdateMatcher } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { if (isMissing(target.getDateofacceptance()) && !isMissing(source.getDateofacceptance())) { - return Arrays.asList(generateUpdateInfo(source.getDateofacceptance().getValue(), source, target, dedupConfig)); + return Arrays + .asList(generateUpdateInfo(source.getDateofacceptance().getValue(), source, target, dedupConfig)); } return new ArrayList<>(); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index de888ff87..c51f8991c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -23,7 +23,8 @@ public class EnrichMissingSubject extends UpdateMatcher>> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List>> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final Set existingTypes = target .getSubject() .stream() diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index 021449797..2ac04fd12 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -21,7 +21,8 @@ public class EnrichMoreOpenAccess extends UpdateMatcher { } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final Set urls = target .getInstance() .stream() diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index c64ed20ea..e4bf5d2c2 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -20,7 +20,8 @@ public class EnrichMorePid extends UpdateMatcher { } @Override - protected List> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final Set existingPids = target .getPid() .stream() @@ -36,7 +37,8 @@ public class EnrichMorePid extends UpdateMatcher { .collect(Collectors.toList()); } - public UpdateInfo generateUpdateInfo(final Pid highlightValue, final Result source, final Result target, final DedupConfig dedupConfig) { + public UpdateInfo generateUpdateInfo(final Pid highlightValue, final Result source, final Result target, + final DedupConfig dedupConfig) { return new UpdateInfo<>( Topic.ENRICH_MORE_PID, highlightValue, source, target, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index 3f7f5b3d5..d6e607c31 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -21,7 +21,8 @@ public class EnrichMoreSubject extends UpdateMatcher>> findUpdates(final Result source, final Result target, final DedupConfig dedupConfig) { + protected List>> findUpdates(final Result source, final Result target, + final DedupConfig dedupConfig) { final Set existingSubjects = target .getSubject() .stream() diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventGroup.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventGroup.java index 9c7081c79..25c7698a0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventGroup.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventGroup.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.io.Serializable; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultAggregator.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultAggregator.java index 94685eeae..475c76814 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultAggregator.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultAggregator.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import org.apache.spark.sql.Encoder; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultGroup.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultGroup.java index 8fe7a5939..2be673db0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultGroup.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ResultGroup.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import java.io.Serializable; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java index 6bf59c125..5338d4f3d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java @@ -1,14 +1,22 @@ + package eu.dnetlib.dhp.broker.oa.util; public class TrustUtils { public static float rescale(final double score, final double threshold) { - if (score >= BrokerConstants.MAX_TRUST) { return BrokerConstants.MAX_TRUST; } + if (score >= BrokerConstants.MAX_TRUST) { + return BrokerConstants.MAX_TRUST; + } - final double val = (score - threshold) * (BrokerConstants.MAX_TRUST - BrokerConstants.MIN_TRUST) / (BrokerConstants.MAX_TRUST - threshold); + final double val = (score - threshold) * (BrokerConstants.MAX_TRUST - BrokerConstants.MIN_TRUST) + / (BrokerConstants.MAX_TRUST - threshold); - if (val < BrokerConstants.MIN_TRUST) { return BrokerConstants.MIN_TRUST; } - if (val > BrokerConstants.MAX_TRUST) { return BrokerConstants.MAX_TRUST; } + if (val < BrokerConstants.MIN_TRUST) { + return BrokerConstants.MIN_TRUST; + } + if (val > BrokerConstants.MAX_TRUST) { + return BrokerConstants.MAX_TRUST; + } return (float) val; } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index de6a71397..893aa2827 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -68,8 +68,10 @@ public final class UpdateInfo { private float calculateTrust(final DedupConfig dedupConfig, final Result r1, final Result r2) { try { final ObjectMapper objectMapper = new ObjectMapper(); - final MapDocument doc1 = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, objectMapper.writeValueAsString(r1)); - final MapDocument doc2 = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, objectMapper.writeValueAsString(r2)); + final MapDocument doc1 = MapDocumentUtil + .asMapDocumentWithJPath(dedupConfig, objectMapper.writeValueAsString(r1)); + final MapDocument doc2 = MapDocumentUtil + .asMapDocumentWithJPath(dedupConfig, objectMapper.writeValueAsString(r2)); final double score = new TreeProcessor(dedupConfig).computeScore(doc1, doc2); final double threshold = dedupConfig.getWf().getThreshold(); @@ -118,7 +120,8 @@ public final class UpdateInfo { .map(Instance::getUrl) .flatMap(List::stream) .findFirst() - .orElse(null);; + .orElse(null); + ; final Provenance provenance = new Provenance().setId(provId).setRepositoryName(provRepo).setUrl(provUrl); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index 58f391c24..bb23d6085 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.broker.oa.util; import static org.junit.jupiter.api.Assertions.assertTrue; diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml index 5621415d9..fe14bb8cb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml @@ -1,14 +1,18 @@ jobTracker - yarnRM + hadoop-rm3.garr-pa1.d4science.org:8032 nameNode - hdfs://nameservice1 + hdfs://hadoop-rm1.garr-pa1.d4science.org:8020 - oozie.action.sharelib.for.java + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark spark2 @@ -16,7 +20,23 @@ true - oozie.launcher.mapreduce.map.java.opts - -Xmx4g + hive_metastore_uris + thrift://hadoop-edge2.garr-pa1.d4science.org:9083 + + + spark2YarnHistoryServerAddress + http://hadoop-edge1.garr-pa1.d4science.org:18089/ + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java index f49163c87..afba57bb8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java @@ -159,7 +159,7 @@ public class DatasetScholexplorerParser extends AbstractScholexplorerParser { .setDescription( descs .stream() - .map(it -> it.length() < 10000 ? it : it.substring(0, 10000)) +// .map(it -> it.length() < 10000 ? it : it.substring(0, 10000)) .map( it -> { final Field d = new Field<>(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/PublicationScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/PublicationScholexplorerParser.java index edbb444db..bf59a6f0e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/PublicationScholexplorerParser.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/PublicationScholexplorerParser.java @@ -213,10 +213,10 @@ public class PublicationScholexplorerParser extends AbstractScholexplorerParser .setValue( VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='description']")); - if (StringUtils.isNotBlank(description.getValue()) - && description.getValue().length() > 10000) { - description.setValue(description.getValue().substring(0, 10000)); - } +// if (StringUtils.isNotBlank(description.getValue()) +// && description.getValue().length() > 10000) { +// description.setValue(description.getValue().substring(0, 10000)); +// } parsedObject.setDescription(Collections.singletonList(description)); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml index ce00eff7b..d74d68663 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml @@ -47,8 +47,8 @@ - ${wf:conf('reuseContent') eq false} - ${wf:conf('reuseContent') eq true} + ${wf:conf('reuseContent') eq false} + ${wf:conf('reuseContent') eq true} diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala new file mode 100644 index 000000000..5d7c444b2 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/DLIToOAF.scala @@ -0,0 +1,376 @@ +package eu.dnetlib.dhp.export + +import java.time.LocalDateTime +import java.time.format.DateTimeFormatter + +import eu.dnetlib.dhp.common.PacePerson +import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Field, Instance, KeyValue, Publication, Qualifier, Relation, StructuredProperty} +import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIRelation} +import org.apache.commons.lang3.StringUtils + +import scala.collection.JavaConverters._ + + +case class DLIExternalReference(id: String, url: String, sitename: String, label: String, pid: String, classId: String) {} + +object DLIToOAF { + + + val collectedFromMap: Map[String, KeyValue] = Map( + "dli_________::r3d100010527" -> generateKeyValue("10|re3data_____::c2a591f440598b63d854556beaf01591", "European Nucleotide Archive"), + "dli_________::r3d100010255" -> generateKeyValue("10|re3data_____::480d275ed6f9666ee76d6a1215eabf26", "Inter-university Consortium for Political and Social Research"), + "dli_________::r3d100011868" -> generateKeyValue("10|re3data_____::db814dc656a911b556dba42a331cebe9", "Mendeley Data"), + "dli_________::elsevier" -> generateKeyValue("10|openaire____::8f87e10869299a5fe80b315695296b88", "Elsevier"), + "dli_________::openaire" -> generateKeyValue("10|infrastruct_::f66f1bd369679b5b077dcdf006089556", "OpenAIRE"), + "dli_________::thomsonreuters" -> generateKeyValue("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "Crossref"), + "dli_________::r3d100010216" -> generateKeyValue("10|re3data_____::0fd79429de04343dbbec705d9b5f429f", "4TU.Centre for Research Data"), + "dli_________::r3d100010134" -> generateKeyValue("10|re3data_____::9633d1e8c4309c833c2c442abeb0cfeb", "PANGAEA"), + "dli_________::ieee" -> generateKeyValue("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "Crossref"), + "dli_________::r3d100010197" -> generateKeyValue("10|re3data_____::9fd1d79973f7fda60cbe1d82e3819a68", "The Cambridge Structural Database"), + "dli_________::nature" -> generateKeyValue("10|openaire____::6e380d9cf51138baec8480f5a0ce3a2e", "Springer Nature"), + "dli_________::datacite" -> generateKeyValue("10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "Datacite"), + "dli_________::r3d100010578" -> generateKeyValue("10|re3data_____::c4d751f29a7568011a4c80136b30b444", "IEDA"), + "dli_________::r3d100010464" -> generateKeyValue("10|re3data_____::23e2a81591099828f6b83a1c83150666", "Research Data Australia"), + "dli_________::r3d100010327" -> generateKeyValue("10|re3data_____::a644620b81135243dc9acc15d2362246", "Worldwide Protein Data Bank"), + "dli_________::pubmed" -> generateKeyValue("10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357", "PubMed Central"), + "dli_________::europe_pmc__" -> generateKeyValue("10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", "Europe PubMed Central"), + "dli_________::crossref" -> generateKeyValue("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "Crossref") + ) + + + val relationTypeMapping: Map[String, (String, String)] = Map( + "IsReferencedBy" -> ("isRelatedTo", "relationship"), + "References" -> ("isRelatedTo", "relationship"), + "IsRelatedTo" -> ("isRelatedTo", "relationship"), + "IsSupplementedBy" -> ("IsSupplementedBy", "supplement"), + "Cites" -> ("cites", "citation"), + "Unknown" -> ("isRelatedTo", "relationship"), + "IsSourceOf" -> ("isRelatedTo", "relationship"), + "IsCitedBy" -> ("IsCitedBy", "citation"), + "Reviews" -> ("reviews", "review"), + "Describes" -> ("isRelatedTo", "relationship"), + "HasAssociationWith" -> ("isRelatedTo", "relationship") + ) + + val expectecdPidType = List("uniprot", "ena", "chembl", "ncbi-n", "ncbi-p", "genbank", "pdb", "url") + + + val filteredURL = List( + "www.ebi.ac.uk", + "www.uniprot.org", + "f1000.com", + "en.wikipedia.org", + "flybase.org", + "www.yeastgenome.org", + "research.bioinformatics.udel.edu", + "cancer.sanger.ac.uk", + "www.iedb.org", + "www.crd.york.ac.uk", + "www.wormbase.org", + "web.expasy.org", + "www.hal.inserm.fr", + "sabiork.h-its.org", + "zfin.org", + "www.pombase.org", + "www.guidetopharmacology.org", + "reactome.org" + ) + + + def filterPid(p: StructuredProperty): Boolean = { + if (expectecdPidType.contains(p.getQualifier.getClassname) && p.getQualifier.getClassname.equalsIgnoreCase("url")) + if (filteredURL.exists(u => p.getValue.contains(u))) + return true + else + return false + expectecdPidType.contains(p.getQualifier.getClassname) + } + + + def extractTitle(titles: java.util.List[StructuredProperty]): String = { + + if (titles == null) + return null + + val label = titles.asScala.map(p => p.getValue).find(p => p.nonEmpty) + label.orNull + } + + def convertDLIDatasetToExternalReference(dataset: DLIDataset): DLIExternalReference = { + val currentId = generateId(dataset.getId) + val pids = dataset.getPid.asScala.filter(filterPid) + + if (pids == null || pids.isEmpty) + return null + + val pid: StructuredProperty = pids.head + + + pid.getQualifier.getClassname match { + case "uniprot" => DLIExternalReference(generateId(dataset.getId), s"https://www.uniprot.org/uniprot/${pid.getValue}", "UniProt", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "ena" => + if(pid.getValue!= null && pid.getValue.nonEmpty && pid.getValue.length>7) + DLIExternalReference(generateId(dataset.getId), s"https://www.ebi.ac.uk/ena/data/view/${pid.getValue.substring(0, 8)}", "European Nucleotide Archive", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + else + null + case "chembl" => DLIExternalReference(generateId(dataset.getId), s"https://www.ebi.ac.uk/chembl/compound_report_card/${pid.getValue}", "ChEMBL", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "ncbi-n" => DLIExternalReference(generateId(dataset.getId), s"https://www.ncbi.nlm.nih.gov/nuccore/${pid.getValue}", "Nucleotide Database", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "ncbi-p" => DLIExternalReference(generateId(dataset.getId), s"https://www.ncbi.nlm.nih.gov/nuccore/${pid.getValue}", "Nucleotide Database", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "genbank" => DLIExternalReference(generateId(dataset.getId), s"https://www.ncbi.nlm.nih.gov/nuccore/${pid.getValue}", "GenBank", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "pdb" => DLIExternalReference(generateId(dataset.getId), s"https://www.ncbi.nlm.nih.gov/nuccore/${pid.getValue}", "Protein Data Bank", extractTitle(dataset.getTitle), pid.getValue, "accessionNumber") + case "url" => DLIExternalReference(generateId(dataset.getId), pid.getValue, "", extractTitle(dataset.getTitle), pid.getValue, "url") + + } + + + } + + + def convertDLIPublicationToOAF(p: DLIPublication): Publication = { + + val result = new Publication + result.setId(generateId(p.getId)) + result.setDataInfo(generateDataInfo(invisibile = true)) + if (p.getCollectedfrom == null || p.getCollectedfrom.size() == 0 || (p.getCollectedfrom.size() == 1 && p.getCollectedfrom.get(0) == null)) + return null + + result.setCollectedfrom(p.getCollectedfrom.asScala.map(c => collectedFromMap.getOrElse(c.getKey, null)).asJava) + result.setPid(p.getPid) + result.setDateofcollection(p.getDateofcollection) + result.setOriginalId(p.getPid.asScala.map(p => p.getValue).asJava) + result.setDateoftransformation(LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))) + if (p.getAuthor == null || p.getAuthor.isEmpty) + return null + result.setAuthor(p.getAuthor.asScala.map(convertAuthor).asJava) + result.setResulttype(createQualifier(p.getResulttype.getClassid, p.getResulttype.getClassname, "dnet:result_typologies", "dnet:result_typologies")) + + if (p.getSubject != null) + result.setSubject(p.getSubject.asScala.map(convertSubject).asJava) + + if (p.getTitle == null || p.getTitle.isEmpty) + return null + + result.setTitle(List(patchTitle(p.getTitle.get(0))).asJava) + + if (p.getRelevantdate == null || p.getRelevantdate.size() == 0) + return null + + result.setRelevantdate(p.getRelevantdate.asScala.map(patchRelevantDate).asJava) + + + result.setDescription(p.getDescription) + + result.setDateofacceptance(asField(p.getRelevantdate.get(0).getValue)) + result.setPublisher(p.getPublisher) + result.setSource(p.getSource) + result.setBestaccessright(createQualifier("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes")) + + val dois = result.getPid.asScala.filter(p => "doi".equalsIgnoreCase(p.getQualifier.getClassname)).map(p => p.getValue) + if (dois.isEmpty) + return null + + + val i: Instance = createInstance(s"https://dx.doi.org/${dois.head}", firstInstanceOrNull(p.getInstance()), result.getDateofacceptance) + + if (i != null) + result.setInstance(List(i).asJava) + + result + } + + + def convertDLIRelation(r: DLIRelation): Relation = { + + val result = new Relation + if (!relationTypeMapping.contains(r.getRelType)) + return null + + if (r.getCollectedFrom == null || r.getCollectedFrom.size() == 0 || (r.getCollectedFrom.size() == 1 && r.getCollectedFrom.get(0) == null)) + return null + val t = relationTypeMapping.get(r.getRelType) + + result.setRelType("resultResult") + result.setRelClass(t.get._1) + result.setSubRelType(t.get._2) + result.setCollectedfrom(r.getCollectedFrom.asScala.map(c => collectedFromMap.getOrElse(c.getKey, null)).filter(p => p != null).asJava) + result.setSource(generateId(r.getSource)) + result.setTarget(generateId(r.getTarget)) + + if (result.getSource.equals(result.getTarget)) + return null + result.setDataInfo(generateDataInfo()) + + result + } + + + def convertDLIDatasetTOOAF(d: DLIDataset): Dataset = { + + if (d.getCollectedfrom == null || d.getCollectedfrom.size() == 0 || (d.getCollectedfrom.size() == 1 && d.getCollectedfrom.get(0) == null)) + return null + val result: Dataset = new Dataset + result.setId(generateId(d.getId)) + result.setDataInfo(generateDataInfo()) + result.setCollectedfrom(d.getCollectedfrom.asScala.map(c => collectedFromMap.getOrElse(c.getKey, null)).asJava) + + + result.setPid(d.getPid) + + val fpids = result.getPid.asScala.filter(p => "doi".equalsIgnoreCase(p.getQualifier.getClassname) || + "pdb".equalsIgnoreCase(p.getQualifier.getClassname) + ).map(p => p.getValue) + + if (fpids == null || fpids.isEmpty) + return null + + + result.setDateofcollection(d.getDateofcollection) + result.setOriginalId(d.getPid.asScala.map(d => d.getValue).asJava) + result.setDateoftransformation(LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))) + if (d.getAuthor == null || d.getAuthor.isEmpty) + return null + result.setAuthor(d.getAuthor.asScala.map(convertAuthor).asJava) + result.setResulttype(createQualifier(d.getResulttype.getClassid, d.getResulttype.getClassname, "dnet:result_typologies", "dnet:result_typologies")) + + if (d.getSubject != null) + result.setSubject(d.getSubject.asScala.map(convertSubject).asJava) + + if (d.getTitle == null || d.getTitle.isEmpty) + return null + + result.setTitle(List(patchTitle(d.getTitle.get(0))).asJava) + + if (d.getRelevantdate == null || d.getRelevantdate.size() == 0) + return null + + result.setRelevantdate(d.getRelevantdate.asScala.map(patchRelevantDate).asJava) + + + result.setDescription(d.getDescription) + + result.setDateofacceptance(asField(d.getRelevantdate.get(0).getValue)) + result.setPublisher(d.getPublisher) + result.setSource(d.getSource) + result.setBestaccessright(createQualifier("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes")) + + + val instance_urls = if (fpids.head.length < 5) s"https://www.rcsb.org/structure/${fpids.head}" else s"https://dx.doi.org/${fpids.head}" + + val i: Instance = createInstance(instance_urls, firstInstanceOrNull(d.getInstance()), result.getDateofacceptance, true) + if (i != null) + result.setInstance(List(i).asJava) + + result + } + + + def firstInstanceOrNull(instances: java.util.List[Instance]): Instance = { + + if (instances == null || instances.size() == 0) + return null + instances.get(0) + + } + + + def createInstance(url: String, originalInstance: Instance, doa: Field[String], dataset: Boolean = false): Instance = { + + val i = new Instance + i.setUrl(List(url).asJava) + if (dataset) + i.setInstancetype(createQualifier("0021", "Dataset", "dnet:publication_resource", "dnet:publication_resource")) + else + i.setInstancetype(createQualifier("0000", "UNKNOWN", "dnet:publication_resource", "dnet:publication_resource")) + if (originalInstance != null && originalInstance.getHostedby != null) + i.setHostedby(originalInstance.getHostedby) + + i.setAccessright(createQualifier("UNKNOWN", "not available", "dnet:access_modes", "dnet:access_modes")) + i.setDateofacceptance(doa) + + i + + + } + + + def patchRelevantDate(d: StructuredProperty): StructuredProperty = { + d.setQualifier(createQualifier("UNKNOWN", "dnet:dataCite_date")) + d + + } + + def patchTitle(t: StructuredProperty): StructuredProperty = { + t.setQualifier(createQualifier("main title", "dnet:dataCite_title")) + t + } + + + def convertSubject(s: StructuredProperty): StructuredProperty = { + s.setQualifier(createQualifier("keyword", "dnet:subject_classification_typologies")) + s + + + } + + + def convertAuthor(a: Author): Author = { + if (a == null) + return a + val p = new PacePerson(a.getFullname, false) + if (p.isAccurate) { + a.setName(p.getNameString) + a.setSurname(p.getSurnameString) + } + a + } + + + def generateId(id: String): String = { + val md5 = if (id.contains("::")) StringUtils.substringAfter(id, "::") else StringUtils.substringAfter(id, "|") + s"50|scholix_____::$md5" + } + + + def generateKeyValue(key: String, value: String): KeyValue = { + val kv: KeyValue = new KeyValue() + kv.setKey(key) + kv.setValue(value) + kv.setDataInfo(generateDataInfo("0.9")) + kv + } + + + def generateDataInfo(trust: String = "0.9", invisibile: Boolean = false): DataInfo = { + val di = new DataInfo + di.setDeletedbyinference(false) + di.setInferred(false) + di.setInvisible(false) + di.setTrust(trust) + di.setProvenanceaction(createQualifier("sysimport:actionset", "dnet:provenanceActions")) + di + } + + def createQualifier(cls: String, sch: String): Qualifier = { + createQualifier(cls, cls, sch, sch) + } + + + def createQualifier(classId: String, className: String, schemeId: String, schemeName: String): Qualifier = { + val q: Qualifier = new Qualifier + q.setClassid(classId) + q.setClassname(className) + q.setSchemeid(schemeId) + q.setSchemename(schemeName) + q + } + + + def asField[T](value: T): Field[T] = { + val tmp = new Field[T] + tmp.setValue(value) + tmp + + + } + +} diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/SparkExportContentForOpenAire.scala b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/SparkExportContentForOpenAire.scala new file mode 100644 index 000000000..f3aa35549 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/SparkExportContentForOpenAire.scala @@ -0,0 +1,118 @@ +package eu.dnetlib.dhp.`export` + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIRelation} +import org.apache.commons.io.IOUtils +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.{SparkConf, SparkContext} +import org.codehaus.jackson.map.ObjectMapper +import scala.collection.mutable.ArrayBuffer + + +object SparkExportContentForOpenAire { + + + def main(args: Array[String]): Unit = { + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkExportContentForOpenAire.getClass.getResourceAsStream("input_export_content_parameters.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(SparkExportContentForOpenAire.getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val sc:SparkContext = spark.sparkContext + + val workingPath = parser.get("workingDirPath") + + implicit val pubEncoder: Encoder[Publication] = Encoders.bean(classOf[Publication]) + implicit val datEncoder: Encoder[OafDataset] = Encoders.bean(classOf[OafDataset]) + implicit val relEncoder: Encoder[Relation] = Encoders.bean(classOf[Relation]) + implicit val dliRelEncoder: Encoder[DLIRelation] = Encoders.bean(classOf[DLIRelation]) + import spark.implicits._ + +// +// val relRDD:RDD[Relation] = sc.textFile(s"$workingPath/relation_j") +// .map(s => new ObjectMapper().readValue(s, classOf[DLIRelation])) +// .filter(p => p.getDataInfo.getDeletedbyinference == false) +// .map(DLIToOAF.convertDLIRelation).filter(p=>p!= null) +// spark.createDataset(relRDD).write.mode(SaveMode.Overwrite).save(s"$workingPath/relationDS") +// +// val datRDD:RDD[OafDataset] = sc.textFile(s"$workingPath/dataset") +// .map(s => new ObjectMapper().readValue(s, classOf[DLIDataset])) +// .filter(p => p.getDataInfo.getDeletedbyinference == false) +// .map(DLIToOAF.convertDLIDatasetTOOAF).filter(p=>p!= null) +// spark.createDataset(datRDD).write.mode(SaveMode.Overwrite).save(s"$workingPath/datasetDS") +// +// +// val pubRDD:RDD[Publication] = sc.textFile(s"$workingPath/publication") +// .map(s => new ObjectMapper().readValue(s, classOf[DLIPublication])) +// .filter(p => p.getDataInfo.getDeletedbyinference == false) +// .map(DLIToOAF.convertDLIPublicationToOAF).filter(p=>p!= null) +// spark.createDataset(pubRDD).write.mode(SaveMode.Overwrite).save(s"$workingPath/publicationDS") +// +// +// +// val pubs:Dataset[Publication] = spark.read.load(s"$workingPath/publicationDS").as[Publication] +// val dats :Dataset[OafDataset] = spark.read.load(s"$workingPath/datasetDS").as[OafDataset] + var relDS :Dataset[Relation] = spark.read.load(s"$workingPath/relationDS").as[Relation] +// +// +// pubs.joinWith(relDS, pubs("id").equalTo(relDS("source"))).map(k => k._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/relationDS_f1") +// +// relDS= spark.read.load(s"$workingPath/relationDS_f1").as[Relation] +// +// relDS.joinWith(dats, relDS("target").equalTo(dats("id"))).map(k => k._1).write.mode(SaveMode.Overwrite).save(s"$workingPath/relationDS_filtered") +// +// +// val r_source = relDS.select(relDS("source")).distinct() +// val r_target = relDS.select(relDS("source")).distinct() +// +// +// pubs.joinWith(r_source, pubs("id").equalTo(r_source("source")), "inner").map(k => k._1).write.mode(SaveMode.Overwrite).save(s"$workingPath/publicationDS_filtered") +// +// dats.joinWith(r_target, dats("id").equalTo(r_target("target")), "inner").map(k => k._1).write.mode(SaveMode.Overwrite).save(s"$workingPath/datasetDS_filtered") +// +// spark.createDataset(sc.textFile(s"$workingPath/dataset") +// .map(s => new ObjectMapper().readValue(s, classOf[DLIDataset])) +// .map(DLIToOAF.convertDLIDatasetToExternalReference) +// .filter(p => p != null)).as[DLIExternalReference].write.mode(SaveMode.Overwrite).save(s"$workingPath/externalReference") +// + + val pf = spark.read.load(s"$workingPath/publicationDS_filtered").select("id") + relDS = spark.read.load(s"$workingPath/relationDS").as[Relation] + val relationTo = pf.joinWith(relDS, pf("id").equalTo(relDS("source")),"inner").map(t =>t._2) + + val extRef = spark.read.load(s"$workingPath/externalReference").as[DLIExternalReference] + + spark.createDataset(relationTo.joinWith(extRef, relationTo("target").equalTo(extRef("id")), "inner").map(d => { + val r = d._1 + val ext = d._2 + (r.getSource, ext) + }).rdd.groupByKey.map(f => { + var dli_ext = ArrayBuffer[DLIExternalReference]() + f._2.foreach(d => if (dli_ext.size < 100) dli_ext += d ) + (f._1, dli_ext) + })).write.mode(SaveMode.Overwrite).save(s"$workingPath/externalReference_grouped") + + + + + + + + + + + + + + + } + +} diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/input_export_content_parameters.json b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/input_export_content_parameters.json new file mode 100644 index 000000000..b92f87e08 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/input_export_content_parameters.json @@ -0,0 +1,14 @@ +[ + { + "paramName": "mt", + "paramLongName": "master", + "paramDescription": "should be local or yarn", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingDirPath", + "paramDescription": "the working path where generated files", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/config-default.xml new file mode 100644 index 000000000..59e5c059f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/config-default.xml @@ -0,0 +1,42 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.wf.rerun.failnodes + false + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/workflow.xml new file mode 100644 index 000000000..181ab80bf --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/sx/export/oozie_app/workflow.xml @@ -0,0 +1,49 @@ + + + + workingDirPath + the source path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + memory for individual executor + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn-cluster + cluster + ExtractOAF + eu.dnetlib.dhp.export.SparkExportContentForOpenAire + dhp-graph-provision-scholexplorer-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + ${sparkExtraOPT} + + --workingDirPath${workingDirPath} + --masteryarn-cluster + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/java/eu/dnetlib/dhp/export/ExportDLITOOAFTest.scala b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/java/eu/dnetlib/dhp/export/ExportDLITOOAFTest.scala new file mode 100644 index 000000000..c9d33dbe4 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/java/eu/dnetlib/dhp/export/ExportDLITOOAFTest.scala @@ -0,0 +1,75 @@ +package eu.dnetlib.dhp.export + +import java.time.LocalDateTime +import java.time.format.DateTimeFormatter + +import eu.dnetlib.dhp.schema.oaf.Relation +import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIRelation} +import org.apache.spark.SparkConf +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SparkSession +import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} +import org.junit.jupiter.api.Test + +import scala.io.Source + +class ExportDLITOOAFTest { + + val mapper = new ObjectMapper() + + @Test + def testDate():Unit = { + println(LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))) + + } + + @Test + def testPublicationMapping():Unit = { + + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + val json = Source.fromInputStream(getClass.getResourceAsStream("publication.json")).mkString + + + val oaf =DLIToOAF.convertDLIPublicationToOAF(mapper.readValue(json, classOf[DLIPublication])) + + println(mapper.writeValueAsString(oaf)) + + + } + + + @Test + def testExternalReferenceMapping():Unit = { + + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + val json = Source.fromInputStream(getClass.getResourceAsStream("dataset.json")).mkString + + + val oaf =DLIToOAF.convertDLIDatasetToExternalReference(mapper.readValue(json, classOf[DLIDataset])) + + println(oaf) + + + } + + + + + + + + @Test + def testRelationMapping():Unit = { + + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + val json = Source.fromInputStream(getClass.getResourceAsStream("relation.json")).mkString + + + val oaf =DLIToOAF.convertDLIRelation(mapper.readValue(json, classOf[DLIRelation])) + + println(mapper.writeValueAsString(oaf)) + + + } + +} diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/dataset.json b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/dataset.json new file mode 100644 index 000000000..dae635730 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/dataset.json @@ -0,0 +1,101 @@ +{ + "dataInfo": { + "invisible": false, + "inferred": null, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": null + }, + "lastupdatetimestamp": null, + "id": "60|719f19e5a996de1b87cddf93871bf2d4", + "originalId": [ + "a0a3p2gws9::uniprot" + ], + "collectedfrom": [ + { + "key": "dli_________::europe_pmc__", + "value": "Europe PMC", + "dataInfo": null + } + ], + "pid": [ + { + "value": "acc63471", + "qualifier": { + "classid": "ena", + "classname": "ena", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": null + } + ], + "dateofcollection": "2019-07-05T12:47:11.545+02:00", + "dateoftransformation": null, + "extraInfo": null, + "oaiprovenance": null, + "author": null, + "resulttype": { + "classid": "dataset", + "classname": "dataset", + "schemeid": "dataset", + "schemename": "dataset" + }, + "language": null, + "country": null, + "subject": [], + "title": [ + { + "value": "CMD domain-containing protein", + "qualifier": null, + "dataInfo": null + } + ], + "relevantdate": [ + { + "value": "2019-07-15T16:14:28.636", + "qualifier": { + "classid": "resolvedDate", + "classname": "resolvedDate", + "schemeid": "dnet::date", + "schemename": "dnet::date" + }, + "dataInfo": null + } + ], + "description": null, + "dateofacceptance": null, + "publisher": { + "value": "UniProt", + "dataInfo": null + }, + "embargoenddate": null, + "source": null, + "fulltext": null, + "format": null, + "contributor": null, + "resourcetype": null, + "coverage": null, + "bestaccessright": null, + "context": null, + "externalReference": null, + "instance": [], + "storagedate": null, + "device": null, + "size": null, + "version": null, + "lastmetadataupdate": null, + "metadataversionnumber": null, + "geolocation": null, + "originalObjIdentifier": "europe_pmc__::719f19e5a996de1b87cddf93871bf2d4", + "dlicollectedfrom": [ + { + "id": "dli_________::europe_pmc__", + "name": "Europe PMC", + "completionStatus": "complete", + "collectionMode": null + } + ], + "completionStatus": "complete" +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/publication.json b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/publication.json new file mode 100644 index 000000000..4ab3de2da --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/publication.json @@ -0,0 +1,128 @@ +{ + "dataInfo": { + "invisible": false, + "inferred": null, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": null + }, + "lastupdatetimestamp": null, + "id": "50|9e117414be07bf03cbce8889d22d661a", + "originalId": [ + "9e117414be07bf03cbce8889d22d661a" + ], + "collectedfrom": [ + { + "key": "dli_________::crossref", + "value": "Crossref", + "dataInfo": null + } + ], + "pid": [ + { + "value": "10.1007/978-94-017-3490-5_15", + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": null + } + ], + "dateofcollection": "2020-06-08T07:28:55.731Z", + "dateoftransformation": null, + "extraInfo": null, + "oaiprovenance": null, + "author": [ + { + "fullname": "Calcaterra Domenico", + "name": null, + "surname": null, + "rank": null, + "pid": null, + "affiliation": null + }, + { + "fullname": "Parise Mario", + "name": null, + "surname": null, + "rank": null, + "pid": null, + "affiliation": null + } + ], + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "publication", + "schemename": "publication" + }, + "language": null, + "country": null, + "subject":[ + { + "value":"Strain-linked information about bacterial and archaeal biodiversity", + "qualifier":{ + "classid":"dnet:subject", + "classname":"dnet:subject", + "schemeid":"", + "schemename":"" + }, + "dataInfo":null + } + ], + "title": [ + { + "value": "The Contribution of Historical Information in the Assessment of Landslide Hazard", + "qualifier": null, + "dataInfo": null + } + ], + "relevantdate": [ + { + "value": "2013-01-29T16:50:44Z", + "qualifier": { + "classid": "date", + "classname": "date", + "schemeid": "dnet::date", + "schemename": "dnet::date" + }, + "dataInfo": null + } + ], + "description": [ + { + "value": null, + "dataInfo": null + } + ], + "dateofacceptance": null, + "publisher": { + "value": "Springer Netherlands", + "dataInfo": null + }, + "embargoenddate": null, + "source": null, + "fulltext": null, + "format": null, + "contributor": null, + "resourcetype": null, + "coverage": null, + "bestaccessright": null, + "context": null, + "externalReference": null, + "instance": [], + "journal": null, + "originalObjIdentifier": "dli_resolver::9e117414be07bf03cbce8889d22d661a", + "dlicollectedfrom": [ + { + "id": "dli_________::crossref", + "name": "Crossref", + "completionStatus": "complete", + "collectionMode": "resolved" + } + ], + "completionStatus": "complete" +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/relation.json b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/relation.json new file mode 100644 index 000000000..cdb0cfa1d --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/test/resources/eu/dnetlib/dhp/export/relation.json @@ -0,0 +1,23 @@ +{ + "subRelType": null, + "relClass": "datacite", + "dataInfo": { + "deletedbyinference": false, + "provenanceaction": null, + "inferred": null, + "inferenceprovenance": null, + "invisible": false, + "trust": "0.9" + }, + "target": "50|00062410e2a15322480277d063c181bb", + "lastupdatetimestamp": null, + "relType": "IsReferencedBy", + "source": "60|4ee78ab329b49416b45c3774c132f244", + "collectedFrom": [ + { + "dataInfo": null, + "value": "Europe PMC", + "key": "dli_________::europe_pmc__" + } + ] +}