From ec1dac584779fd672bca9dbee7f9a1bba6f15336 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 28 Feb 2023 08:55:38 +0100 Subject: [PATCH] remove all not needed classes for the branch. Here only stuff related to the EOSC --- .../eu/dnetlib/dhp/ExecCreateSchemas.java | 10 +- .../dnetlib/dhp/{oa => eosc}/model/APC.java | 2 +- .../dhp/{oa => eosc}/model/AccessRight.java | 2 +- .../model/AlternateIdentifier.java | 2 +- .../dhp/{oa => eosc}/model/Author.java | 4 +- .../dhp/{oa => eosc}/model/AuthorPid.java | 10 +- .../model/AuthorPidSchemeValue.java | 5 +- .../{oa => eosc}/model/BestAccessRight.java | 2 +- .../model}/CfHbKeyValue.java | 15 +- .../dhp/{oa => eosc}/model/Container.java | 2 +- .../community => eosc/model}/Context.java | 4 +- .../dhp/{oa => eosc}/model/Country.java | 5 +- .../eu/dnetlib/dhp/eosc/model/EoscResult.java | 71 -- .../eu/dnetlib/dhp/eosc/model/Funder.java | 58 ++ .../dhp/{oa => eosc}/model/GeoLocation.java | 2 +- .../eu/dnetlib/dhp/eosc/model/Indicator.java | 25 +- .../dhp/{oa => eosc}/model/Instance.java | 53 +- .../dhp/{oa => eosc}/model/Language.java | 2 +- .../dhp/{oa => eosc}/model/Measure.java | 2 +- .../{oa => eosc}/model/OpenAccessRoute.java | 2 +- .../community => eosc/model}/Project.java | 61 +- .../eu/dnetlib/dhp/eosc/model/Provenance.java | 43 + .../model/graph => eosc/model}/RelType.java | 2 +- .../eu/dnetlib/dhp/eosc/model/Relation.java | 4 - .../dhp/{oa => eosc}/model/Result.java | 189 ++-- .../dhp/{oa => eosc}/model/ResultCountry.java | 3 +- .../dhp/{oa => eosc}/model/ResultPid.java | 2 +- .../eu/dnetlib/dhp/eosc/model/Subject.java | 2 - .../dnetlib/dhp/eosc/model/UsageCounts.java | 11 +- .../community => eosc/model}/Validated.java | 15 +- .../java/eu/dnetlib/dhp/oa/model/Subject.java | 40 - .../dhp/oa/model/SubjectSchemeValue.java | 42 - .../oa/model/community/CommunityInstance.java | 42 - .../oa/model/community/CommunityResult.java | 72 -- .../dhp/oa/model/community/Funder.java | 24 - .../dhp/oa/model/graph/Datasource.java | 2 +- .../dhp/oa/model/graph/GraphResult.java | 28 - .../dhp/oa/model/graph/Organization.java | 2 +- .../dnetlib/dhp/oa/model/graph/Relation.java | 99 -- .../dhp/oa/model/graph/ResearchCommunity.java | 24 - .../oa/model/graph/ResearchInitiative.java | 89 -- .../src/test/java/GenerateJsonSchema.java | 42 +- .../dhp/oa/graph/dump/DumpProducts.java | 55 +- .../dhp/oa/graph/dump/ResultMapper.java | 342 +++---- .../graph/dump/community/CommunitySplit.java | 81 -- .../community/SparkDumpCommunityProducts.java | 67 -- .../community/SparkSplitForCommunity.java | 50 - .../oa/graph/dump/complete/ContextInfo.java | 84 -- .../dump/complete/CreateContextEntities.java | 110 --- .../dump/complete/CreateContextRelation.java | 128 --- .../dump/complete/DumpGraphEntities.java | 520 ----------- .../dhp/oa/graph/dump/complete/Extractor.java | 201 ---- .../graph/dump/complete/OrganizationMap.java | 21 - .../dhp/oa/graph/dump/complete/Process.java | 99 -- .../dump/complete/QueryInformationSystem.java | 198 ---- .../dump/complete/SparkCollectAndSave.java | 122 --- .../dump/complete/SparkDumpEntitiesJob.java | 54 -- .../dump/complete/SparkDumpRelationJob.java | 135 --- .../SparkExtractRelationFromEntities.java | 54 -- .../complete/SparkOrganizationRelation.java | 179 ---- .../SparkSelectValidRelationsJob.java | 136 --- .../{community => eosc}/CommunityMap.java | 2 +- .../dump/{complete => eosc}/Constants.java | 2 +- ...ExtendEoscResultWithOrganizationStep2.java | 19 +- .../dump/eosc/ExtendWithUsageCounts.java | 20 +- .../{ => eosc}/QueryInformationSystem.java | 3 +- .../{community => eosc}/ResultProject.java | 4 +- .../dump/{ => eosc}/SaveCommunityMap.java | 2 +- .../dump/eosc/SelectEoscResultsJobStep1.java | 27 +- .../oa/graph/dump/eosc/SparkDumpRelation.java | 11 +- .../eosc/SparkExtendResultWithRelation.java | 96 ++ .../SparkPrepareResultProject.java | 11 +- .../graph/dump/eosc/SparkSelectRelation.java | 60 +- .../SparkUpdateProjectInfo.java | 32 +- .../dhp/oa/graph/dump/{ => eosc}/Utils.java | 4 +- .../funderresults/SparkDumpFunderResults.java | 128 --- .../SparkResultLinkedToProject.java | 111 --- .../ProjectsSubsetSparkJob.java | 82 -- .../dump/eoscdump/oozie_app/workflow.xml | 141 ++- .../input_extendwithrelation_parameters.json | 24 + .../dhp/oa/graph/dump/DumpJobTest.java | 874 +----------------- .../dhp/oa/graph/dump/GenerateJsonSchema.java | 4 +- .../dump/PrepareResultProjectJobTest.java | 4 +- .../dump/QueryInformationSystemTest.java | 1 + .../oa/graph/dump/SplitForCommunityTest.java | 143 --- .../oa/graph/dump/UpdateProjectInfoTest.java | 26 +- .../dhp/oa/graph/dump/ZenodoUploadTest.java | 3 +- .../graph/dump/complete/CreateEntityTest.java | 173 ---- .../dump/complete/CreateRelationTest.java | 723 --------------- ...DumpOrganizationProjectDatasourceTest.java | 157 ---- .../graph/dump/complete/DumpRelationTest.java | 305 ------ .../ExtractRelationFromEntityTest.java | 175 ---- .../dump/complete/FunderParsingTest.java | 70 -- .../complete/QueryInformationSystemTest.java | 810 ---------------- .../RelationFromOrganizationTest.java | 121 --- .../dump/complete/SelectRelationTest.java | 95 -- .../graph/dump/eosc/SelectEoscResultTest.java | 28 +- .../ResultLinkedToProjectTest.java | 139 --- .../dump/funderresult/SplitPerFunderTest.java | 145 --- .../projectssubset/ProjectSubsetTest.java | 124 --- .../addProjectInfo/publication_extendedmodel | 4 +- .../graph/dump/addProjectInfo/software.json | 12 +- .../eosc/input/indicators/publication.json | 1 + 103 files changed, 941 insertions(+), 7727 deletions(-) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/APC.java (94%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/AccessRight.java (95%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/AlternateIdentifier.java (96%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/Author.java (96%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/AuthorPid.java (89%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/AuthorPidSchemeValue.java (91%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/BestAccessRight.java (98%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa/model/community => eosc/model}/CfHbKeyValue.java (91%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/Container.java (99%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa/model/community => eosc/model}/Context.java (96%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/Country.java (95%) delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/GeoLocation.java (97%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/Instance.java (64%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/Language.java (95%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/Measure.java (96%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/OpenAccessRoute.java (89%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa/model/community => eosc/model}/Project.java (50%) create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Provenance.java rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa/model/graph => eosc/model}/RelType.java (96%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/Result.java (58%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/ResultCountry.java (97%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa => eosc}/model/ResultPid.java (97%) rename dump-schema/src/main/java/eu/dnetlib/dhp/{oa/model/community => eosc/model}/Validated.java (54%) delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Subject.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/SubjectSchemeValue.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Funder.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/GraphResult.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Relation.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchCommunity.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchInitiative.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkSplitForCommunity.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/ContextInfo.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/OrganizationMap.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkExtractRelationFromEntities.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java rename dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/{community => eosc}/CommunityMap.java (75%) rename dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/{complete => eosc}/Constants.java (94%) rename dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/{ => eosc}/QueryInformationSystem.java (95%) rename dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/{community => eosc}/ResultProject.java (82%) rename dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/{ => eosc}/SaveCommunityMap.java (98%) create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkExtendResultWithRelation.java rename dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/{community => eosc}/SparkPrepareResultProject.java (95%) rename dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/{community => eosc}/SparkUpdateProjectInfo.java (72%) rename dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/{ => eosc}/Utils.java (93%) delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_extendwithrelation_parameters.json delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java delete mode 100644 dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectSubsetTest.java create mode 100644 dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/indicators/publication.json diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java b/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java index a9a0c49..4cb18b9 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java @@ -12,7 +12,7 @@ import com.fasterxml.jackson.databind.SerializationFeature; import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.victools.jsonschema.generator.*; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; +import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.oa.model.graph.*; public class ExecCreateSchemas { @@ -60,14 +60,8 @@ public class ExecCreateSchemas { ExecCreateSchemas ecs = new ExecCreateSchemas(); ecs.init(); - ecs.generate(GraphResult.class, DIRECTORY, "result_schema.json"); - ecs.generate(ResearchCommunity.class, DIRECTORY, "community_infrastructure_schema.json"); - ecs.generate(Datasource.class, DIRECTORY, "datasource_schema.json"); - ecs.generate(Project.class, DIRECTORY, "project_schema.json"); - ecs.generate(Relation.class, DIRECTORY, "relation_schema.json"); - ecs.generate(Organization.class, DIRECTORY, "organization_schema.json"); - ecs.generate(CommunityResult.class, DIRECTORY, "community_result_schema.json"); + ecs.generate(Result.class, DIRECTORY, "eosc_result_schema.json"); } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/APC.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/APC.java similarity index 94% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/APC.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/APC.java index 96c7b62..bc48000 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/APC.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/APC.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AccessRight.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AccessRight.java similarity index 95% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AccessRight.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AccessRight.java index 16d0088..dca59bf 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AccessRight.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AccessRight.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; /** * AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.BestAccessRight diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AlternateIdentifier.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AlternateIdentifier.java similarity index 96% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AlternateIdentifier.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AlternateIdentifier.java index db7313d..e8043b9 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AlternateIdentifier.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AlternateIdentifier.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Author.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Author.java similarity index 96% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Author.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Author.java index ed2babb..c921ecf 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Author.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Author.java @@ -1,8 +1,10 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; +import org.apache.commons.lang3.StringUtils; + import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; /** diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPid.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AuthorPid.java similarity index 89% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPid.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AuthorPid.java index f66e586..1bd6a0d 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPid.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AuthorPid.java @@ -1,8 +1,10 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; +import org.apache.commons.lang3.StringUtils; + import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; /** @@ -36,15 +38,15 @@ public class AuthorPid implements Serializable { public static AuthorPid newInstance(AuthorPidSchemeValue pid, Provenance provenance) { AuthorPid p = new AuthorPid(); - p.id = pid; - p.provenance = provenance; + p.setId(pid); + p.setProvenance(provenance); return p; } public static AuthorPid newInstance(AuthorPidSchemeValue pid) { AuthorPid p = new AuthorPid(); - p.id = pid; + p.setId(pid); return p; } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPidSchemeValue.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AuthorPidSchemeValue.java similarity index 91% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPidSchemeValue.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AuthorPidSchemeValue.java index 09eaba5..c12b5b0 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPidSchemeValue.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/AuthorPidSchemeValue.java @@ -1,8 +1,10 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; +import org.apache.commons.lang3.StringUtils; + import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; public class AuthorPidSchemeValue implements Serializable { @@ -37,4 +39,5 @@ public class AuthorPidSchemeValue implements Serializable { return cf; } + } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/BestAccessRight.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/BestAccessRight.java similarity index 98% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/BestAccessRight.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/BestAccessRight.java index abde526..cb1fa7b 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/BestAccessRight.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/BestAccessRight.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CfHbKeyValue.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/CfHbKeyValue.java similarity index 91% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CfHbKeyValue.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/CfHbKeyValue.java index 46ce417..5ae8b09 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CfHbKeyValue.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/CfHbKeyValue.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model.community; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; @@ -32,16 +32,15 @@ public class CfHbKeyValue implements Serializable { this.value = value; } - public static CfHbKeyValue newInstance(String key, String value) { - CfHbKeyValue inst = new CfHbKeyValue(); - inst.key = key; - inst.value = value; - return inst; - } - @JsonIgnore public boolean isBlank() { return StringUtils.isBlank(key) && StringUtils.isBlank(value); } + public static CfHbKeyValue newInstance(String key, String value) { + CfHbKeyValue inst = new CfHbKeyValue(); + inst.setKey(key); + inst.setValue(value); + return inst; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Container.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Container.java similarity index 99% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Container.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Container.java index 649a6b9..e574906 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Container.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Container.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Context.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Context.java similarity index 96% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Context.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Context.java index 90dd431..548012a 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Context.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Context.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model.community; +package eu.dnetlib.dhp.eosc.model; import java.util.List; import java.util.Objects; @@ -8,8 +8,6 @@ import java.util.stream.Collectors; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; -import eu.dnetlib.dhp.oa.model.Provenance; - /** * Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with * OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Country.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Country.java similarity index 95% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Country.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Country.java index 7a69de0..99bd617 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Country.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Country.java @@ -1,8 +1,10 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; +import org.apache.commons.lang3.StringUtils; + import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; /** @@ -43,5 +45,4 @@ public class Country implements Serializable { c.setLabel(label); return c; } - } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java deleted file mode 100644 index 65b568e..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java +++ /dev/null @@ -1,71 +0,0 @@ - -package eu.dnetlib.dhp.eosc.model; - -import java.util.List; -import java.util.Map; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.community.CommunityResult; - -/** - * @author miriam.baglioni - * @Date 29/07/22 - */ -public class EoscResult extends CommunityResult { - - @JsonSchema(description = "Describes a reference to the EOSC Interoperability Framework (IF) Guidelines") - private List eoscIF; - - @JsonSchema(description = "The subject dumped by type associated to the result") - private Map> subject; - - @JsonSchema(description = "The list of keywords associated to the result") - private List keywords; - - @JsonSchema(description = "The list of organizations the result is affiliated to") - private List affiliation; - - @JsonSchema(description = "The indicators for this result") - private Indicator indicator; - - public Indicator getIndicator() { - return indicator; - } - - public void setIndicator(Indicator indicator) { - this.indicator = indicator; - } - - public List getKeywords() { - return keywords; - } - - public void setKeywords(List keywords) { - this.keywords = keywords; - } - - public List getEoscIF() { - return eoscIF; - } - - public void setEoscIF(List eoscIF) { - this.eoscIF = eoscIF; - } - - public Map> getSubject() { - return subject; - } - - public void setSubject(Map> subject) { - this.subject = subject; - } - - public List getAffiliation() { - return affiliation; - } - - public void setAffiliation(List affiliation) { - this.affiliation = affiliation; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java new file mode 100644 index 0000000..cea8c3e --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Funder.java @@ -0,0 +1,58 @@ + +package eu.dnetlib.dhp.eosc.model; + +import java.io.Serializable; + +import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; + +/** + * @author miriam.baglioni + * @Date 26/01/23 + */ +public class Funder implements Serializable { + + @JsonSchema(description = "The short name of the funder (EC)") + private String shortName; + + @JsonSchema(description = "The name of the funder (European Commission)") + private String name; + + @JsonSchema( + description = "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)") + private String jurisdiction; + + public String getJurisdiction() { + return jurisdiction; + } + + public void setJurisdiction(String jurisdiction) { + this.jurisdiction = jurisdiction; + } + + public String getShortName() { + return shortName; + } + + public void setShortName(String shortName) { + this.shortName = shortName; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + @JsonSchema(description = "Stream of funding (e.g. for European Commission can be H2020 or FP7)") + private String fundingStream; + + public String getFundingStream() { + return fundingStream; + } + + public void setFundingStream(String fundingStream) { + this.fundingStream = fundingStream; + } +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/GeoLocation.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/GeoLocation.java similarity index 97% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/GeoLocation.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/GeoLocation.java index d8cbb39..1db8f76 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/GeoLocation.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/GeoLocation.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Indicator.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Indicator.java index 58df9bc..d856294 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Indicator.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Indicator.java @@ -10,18 +10,6 @@ import java.io.Serializable; public class Indicator implements Serializable { private UsageCounts usageCounts; - public static Indicator newInstance(UsageCounts uc) { - Indicator i = new Indicator(); - i.usageCounts = uc; - return i; - } - - public static Indicator newInstance(String downloads, String views) { - Indicator i = new Indicator(); - i.usageCounts = UsageCounts.newInstance(views, downloads); - return i; - } - public UsageCounts getUsageCounts() { return usageCounts; } @@ -29,4 +17,17 @@ public class Indicator implements Serializable { public void setUsageCounts(UsageCounts usageCounts) { this.usageCounts = usageCounts; } + + public static Indicator newInstance(UsageCounts uc) { + Indicator i = new Indicator(); + i.setUsageCounts(uc); + return i; + } + + public static Indicator newInstance(String downloads, String views) { + Indicator i = new Indicator(); + i.setUsageCounts(UsageCounts.newInstance(views, downloads)); + return i; + } + } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Instance.java similarity index 64% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Instance.java index 64e32af..7642a00 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Instance.java @@ -1,30 +1,35 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; import java.util.List; +import com.fasterxml.jackson.annotation.JsonInclude; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; /** - * Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published - * versions are two manifestations of the same research result. It has the following parameters: - license of type - * String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be - * dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. - - * type of type String to store the type of the instance as defined in the corresponding dnet vocabulary - * (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - url of type - * List list of locations where the instance is accessible. It corresponds to url of the instance to be dumped - - * publicationdate of type String to store the publication date of the instance ;// dateofacceptance; - refereed of type - * String to store information abour the review status of the instance. Possible values are 'Unknown', - * 'nonPeerReviewed', 'peerReviewed'. It corresponds to refereed.classname of the instance to be dumped - * - articleprocessingcharge of type APC to store the article processing charges possibly associated to the instance - * -pid of type List that is the list of pids associated to the result coming from authoritative sources for that pid - * -alternateIdentifier of type List that is the list of pids associated to the result coming from NON authoritative - * sources for that pid - * -measure list to represent the measure computed for this instance (for example the Bip!Finder ones). It corresponds to measures in the model + * @author miriam.baglioni + * @Date 02/02/23 + */ +/** + * It extends eu.dnetlib.dhp.dump.oaf.Instance with values related to the community dump. In the Result dump this + * information is not present because it is dumped as a set of relations between the result and the datasource. - + * hostedby of type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the + * instance can be viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and - + * key corresponds to hostedby.key - value corresponds to hostedby.value - collectedfrom of type + * eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been + * collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to + * collectedfrom.key - value corresponds to collectedfrom.value */ public class Instance implements Serializable { + @JsonSchema(description = "Information about the source from which the instance can be viewed or downloaded.") + private CfHbKeyValue hostedby; + + @JsonSchema(description = "Information about the source from which the record has been collected") + @JsonInclude(JsonInclude.Include.NON_NULL) + private CfHbKeyValue collectedfrom; + @JsonSchema(description = "Measures computed for this instance, for example Bip!Finder ones") private List measures; @@ -138,4 +143,20 @@ public class Instance implements Serializable { public void setMeasures(List measures) { this.measures = measures; } + + public CfHbKeyValue getHostedby() { + return hostedby; + } + + public void setHostedby(CfHbKeyValue hostedby) { + this.hostedby = hostedby; + } + + public CfHbKeyValue getCollectedfrom() { + return collectedfrom; + } + + public void setCollectedfrom(CfHbKeyValue collectedfrom) { + this.collectedfrom = collectedfrom; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Language.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Language.java similarity index 95% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Language.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Language.java index 82a8794..88b8c92 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Language.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Language.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Measure.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Measure.java similarity index 96% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Measure.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Measure.java index edc6949..0c4524e 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Measure.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Measure.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessRoute.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OpenAccessRoute.java similarity index 89% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessRoute.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OpenAccessRoute.java index 09a0236..4757004 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessRoute.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/OpenAccessRoute.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; /** * This Enum models the OpenAccess status, currently including only the values from Unpaywall diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Project.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java similarity index 50% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Project.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java index 421f772..0985de1 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Project.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Project.java @@ -1,19 +1,27 @@ -package eu.dnetlib.dhp.oa.model.community; +package eu.dnetlib.dhp.eosc.model; + +import java.io.Serializable; + +import org.apache.commons.lang3.StringUtils; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; -import eu.dnetlib.dhp.oa.model.Provenance; - /** - * To store information about the project related to the result. This information is not directly mapped from the result - * represented in the internal model because it is not there. The mapped result will be enriched with project - * information derived by relation between results and projects. Project extends eu.dnetlib.dhp.schema.dump.oaf.Project - * with the following parameters: - funder of type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information - * about the funder funding the project - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store - * information about the. provenance of the association between the result and the project + * @author miriam.baglioni + * @Date 26/01/23 */ -public class Project extends eu.dnetlib.dhp.oa.model.Project { +public class Project implements Serializable { + @JsonSchema(description = "The OpenAIRE id for the project") + protected String id;// OpenAIRE id + + @JsonSchema(description = "The grant agreement number") + protected String code; + + @JsonSchema(description = "The acronym of the project") + protected String acronym; + + protected String title; @JsonSchema(description = "Information about the funder funding the project") private Funder funder; @@ -46,6 +54,38 @@ public class Project extends eu.dnetlib.dhp.oa.model.Project { this.funder = funders; } + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getAcronym() { + return acronym; + } + + public void setAcronym(String acronym) { + this.acronym = acronym; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + public static Project newInstance(String id, String code, String acronym, String title, Funder funder) { Project project = new Project(); project.setAcronym(acronym); @@ -55,4 +95,5 @@ public class Project extends eu.dnetlib.dhp.oa.model.Project { project.setTitle(title); return project; } + } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Provenance.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Provenance.java new file mode 100644 index 0000000..54d6a44 --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Provenance.java @@ -0,0 +1,43 @@ + +package eu.dnetlib.dhp.eosc.model; + +import java.io.Serializable; + +import org.apache.commons.lang3.StringUtils; + +/** + * @author miriam.baglioni + * @Date 26/01/23 + */ +public class Provenance implements Serializable { + private String provenance; + private String trust; + + public String getProvenance() { + return provenance; + } + + public void setProvenance(String provenance) { + this.provenance = provenance; + } + + public String getTrust() { + return trust; + } + + public void setTrust(String trust) { + this.trust = trust; + } + + public static Provenance newInstance(String provenance, String trust) { + Provenance p = new Provenance(); + p.setProvenance(provenance); + p.setTrust(trust); + return p; + } + +// public String toStringProvenance(Provenance p) { +// return p.getProvenance() + p.getTrust(); +// } + +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/RelType.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/RelType.java similarity index 96% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/RelType.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/RelType.java index ea85187..7b3a82a 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/RelType.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/RelType.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model.graph; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Relation.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Relation.java index 0cfb774..31ee0ac 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Relation.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Relation.java @@ -6,10 +6,6 @@ import java.util.Objects; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; -import eu.dnetlib.dhp.oa.model.Provenance; -import eu.dnetlib.dhp.oa.model.graph.Node; -import eu.dnetlib.dhp.oa.model.graph.RelType; - /** * To represent the gereric relation between two entities. It has the following parameters: - private Node source to * represent the entity source of the relation - private Node target to represent the entity target of the relation - diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java similarity index 58% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java index 562f3db..a0db8b2 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Result.java @@ -1,76 +1,49 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; import java.util.List; +import java.util.Map; import com.fasterxml.jackson.annotation.JsonInclude; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; /** - * To represent the dumped result. It will be extended in the dump for Research Communities - Research - * Initiative/Infrastructures. It has the following parameters: - * - author of type - * List to describe the authors of a result. For each author in the result - * represented in the internal model one author in the esternal model is produced. - * - type of type String to represent - * the category of the result. Possible values are publication, dataset, software, other. It corresponds to - * resulttype.classname of the dumped result - * - language of type eu.dnetlib.dhp.schema.dump.oaf.Language to store - * information about the language of the result. It is dumped as - code corresponds to language.classid - value - * corresponds to language.classname - * - country of type List to store the country - * list to which the result is associated. For each country in the result respresented in the internal model one country - * in the external model is produces - subjects of type List to store the subjects for - * the result. For each subject in the result represented in the internal model one subject in the external model is - * produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first - * title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle - * of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to - * "subtitle" - description of type List to store the description of the result. It corresponds to the list of - * description.value in the result represented in the internal model - publicationdate of type String to store the - * pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model - - * publisher of type String to store information about the publisher. It corresponds to publisher.value of the result - * represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to - * embargoenddate.value of the result represented in the internal model - source of type List See definition of - * Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal - * model - format of type List It corresponds to the list of format.value in the result represented in the - * internal model - contributor of type List to represent contributors for this result. It corresponds to the - * list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds - * to the list of coverage.value in the result represented in the internal model - bestaccessright of type - * eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the - * manifestations of this research results. It corresponds to the same parameter in the result represented in the - * internal model - container of type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It - * corresponds to the parameter journal of the result represented in the internal model - documentationUrl of type - * List (only for results of type software) to store the URLs to the software documentation. It corresponds to - * the list of documentationUrl.value of the result represented in the internal model - codeRepositoryUrl of type String - * (only for results of type software) to store the URL to the repository with the source code. It corresponds to - * codeRepositoryUrl.value of the result represented in the internal model - programmingLanguage of type String (only - * for results of type software) to store the programming language. It corresponds to programmingLanguaga.classid of the - * result represented in the internal model - contactperson of type List (only for results of type other) to - * store the contact person for this result. It corresponds to the list of contactperson.value of the result represented - * in the internal model - contactgroup of type List (only for results of type other) to store the information - * for the contact group. It corresponds to the list of contactgroup.value of the result represented in the internal - * model - tool of type List (only fro results of type other) to store information about tool useful for the - * interpretation and/or re-used of the research product. It corresponds to the list of tool.value in the result - * represented in the internal modelt - size of type String (only for results of type dataset) to store the size of the - * dataset. It corresponds to size.value in the result represented in the internal model - version of type String (only - * for results of type dataset) to store the version. It corresponds to version.value of the result represented in the - * internal model - geolocation fo type List (only for results of type - * dataset) to store geolocation information. For each geolocation element in the result represented in the internal - * model a GeoLocation in the external model il produced - id of type String to store the OpenAIRE id of the result. It - * corresponds to the id of the result represented in the internal model - originalId of type List to store the - * original ids of the result. It corresponds to the originalId of the result represented in the internal model - pid of - * type List to store the persistent identifiers for the result. For - * each pid in the results represented in the internal model one pid in the external model is produced. The value - * correspondence is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model - - * value corresponds to the pid.value of the result represented in the internal model - dateofcollection of type String - * to store information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result - * represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of - * the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model - * + * @author miriam.baglioni + * @Date 29/07/22 */ public class Result implements Serializable { + @JsonSchema(description = "Describes a reference to the EOSC Interoperability Framework (IF) Guidelines") + private List eoscIF; + + @JsonSchema(description = "The subject dumped by type associated to the result") + private Map> subject; + + @JsonSchema(description = "The list of keywords associated to the result") + private List keywords; + + @JsonSchema(description = "The list of organizations the result is affiliated to") + private List affiliation; + + @JsonSchema(description = "The indicators for this result") + private Indicator indicator; + + @JsonSchema(description = "List of projects (i.e. grants) that (co-)funded the production ofn the research results") + private List projects; + + @JsonSchema( + description = "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu") + private List context; + + @JsonSchema(description = "Information about the sources from which the record has been collected") + @JsonInclude(JsonInclude.Include.NON_NULL) + protected List collectedfrom; + + @JsonSchema( + description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version") + private List instance; + private List author; // resulttype allows subclassing results into publications | datasets | software @@ -84,9 +57,6 @@ public class Result implements Serializable { @JsonSchema(description = "The list of countries associated to this result") private List country; - @JsonSchema(description = "Keywords associated to the result") - private List subjects; - @JsonSchema( description = "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software.") private String maintitle; @@ -169,6 +139,9 @@ public class Result implements Serializable { @JsonSchema(description = "Timestamp of last update of the record in OpenAIRE") private Long lastupdatetimestamp; + @JsonSchema(description = "The set of relations associated to this result") + private List relations; + public Long getLastupdatetimestamp() { return lastupdatetimestamp; } @@ -249,15 +222,6 @@ public class Result implements Serializable { this.country = country; } - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getSubjects() { - return subjects; - } - - public void setSubjects(List subjects) { - this.subjects = subjects; - } - public String getMaintitle() { return maintitle; } @@ -418,4 +382,83 @@ public class Result implements Serializable { this.geolocation = geolocation; } + public List getInstance() { + return instance; + } + + public void setInstance(List instance) { + this.instance = instance; + } + + public List getCollectedfrom() { + return collectedfrom; + } + + public void setCollectedfrom(List collectedfrom) { + this.collectedfrom = collectedfrom; + } + + public List getProjects() { + return projects; + } + + public void setProjects(List projects) { + this.projects = projects; + } + + public List getContext() { + return context; + } + + public void setContext(List context) { + this.context = context; + } + + public List getRelations() { + return relations; + } + + public void setRelations(List relations) { + this.relations = relations; + } + + public Indicator getIndicator() { + return indicator; + } + + public void setIndicator(Indicator indicator) { + this.indicator = indicator; + } + + public List getKeywords() { + return keywords; + } + + public void setKeywords(List keywords) { + this.keywords = keywords; + } + + public List getEoscIF() { + return eoscIF; + } + + public void setEoscIF(List eoscIF) { + this.eoscIF = eoscIF; + } + + public Map> getSubject() { + return subject; + } + + public void setSubject(Map> subject) { + this.subject = subject; + } + + public List getAffiliation() { + return affiliation; + } + + public void setAffiliation(List affiliation) { + this.affiliation = affiliation; + } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultCountry.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/ResultCountry.java similarity index 97% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultCountry.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/ResultCountry.java index 324d382..5d59cfd 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultCountry.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/ResultCountry.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; @@ -38,4 +38,5 @@ public class ResultCountry extends Country { public static ResultCountry newInstance(String code, String label, String provenance, String trust) { return newInstance(code, label, Provenance.newInstance(provenance, trust)); } + } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultPid.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/ResultPid.java similarity index 97% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultPid.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/ResultPid.java index 991412f..abe6ebc 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultPid.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/ResultPid.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.model; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Subject.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Subject.java index 549cf85..88c004e 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Subject.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Subject.java @@ -5,8 +5,6 @@ import java.io.Serializable; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; -import eu.dnetlib.dhp.oa.model.Provenance; - /** * @author miriam.baglioni * @Date 10/08/22 diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/UsageCounts.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/UsageCounts.java index fabd240..b634151 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/UsageCounts.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/UsageCounts.java @@ -3,6 +3,8 @@ package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; +import org.apache.commons.lang3.StringUtils; + /** * @author miriam.baglioni * @Date 04/11/22 @@ -29,8 +31,13 @@ public class UsageCounts implements Serializable { public static UsageCounts newInstance(String views, String downloads) { UsageCounts uc = new UsageCounts(); - uc.views = views; - uc.downloads = downloads; + uc.setViews(views); + uc.setDownloads(downloads); return uc; } + + public boolean isEmpty() { + return StringUtils.isEmpty(this.downloads) || StringUtils.isEmpty(this.views); + } + } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Validated.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Validated.java similarity index 54% rename from dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Validated.java rename to dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Validated.java index 015f282..fe104d7 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Validated.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/Validated.java @@ -1,13 +1,13 @@ -package eu.dnetlib.dhp.oa.model.community; +package eu.dnetlib.dhp.eosc.model; import java.io.Serializable; +import org.apache.commons.lang3.StringUtils; + /** - * To store information about the funder funding the project related to the result. It has the following parameters: - - * shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name - * (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to - * store the jurisdiction of the funder + * @author miriam.baglioni + * @Date 26/01/23 */ public class Validated implements Serializable { @@ -32,8 +32,9 @@ public class Validated implements Serializable { public static Validated newInstance(Boolean validated, String validationDate) { Validated v = new Validated(); - v.validatedByFunder = validated; - v.validationDate = validationDate; + v.setValidatedByFunder(validated); + v.setValidationDate(validationDate); return v; } + } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Subject.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Subject.java deleted file mode 100644 index b7e1872..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Subject.java +++ /dev/null @@ -1,40 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To represent keywords associated to the result. It has two parameters: - * - subject of type eu.dnetlib.dhp.schema.dump.oaf.SubjectSchemeValue to describe the subject. It mapped as: - * - schema it corresponds to qualifier.classid of the dumped subject - * - value it corresponds to the subject value - * - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo - * is not null. In this case: - * - provenance corresponds to dataInfo.provenanceaction.classname - * - trust corresponds to dataInfo.trust - */ -public class Subject implements Serializable { - private SubjectSchemeValue subject; - - @JsonSchema(description = "Why this subject is associated to the result") - private Provenance provenance; - - public SubjectSchemeValue getSubject() { - return subject; - } - - public void setSubject(SubjectSchemeValue subject) { - this.subject = subject; - } - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/SubjectSchemeValue.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/SubjectSchemeValue.java deleted file mode 100644 index a7e665d..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/SubjectSchemeValue.java +++ /dev/null @@ -1,42 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public class SubjectSchemeValue implements Serializable { - @JsonSchema( - description = "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies).") - private String scheme; - - @JsonSchema( - description = "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary).") - private String value; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static SubjectSchemeValue newInstance(String scheme, String value) { - SubjectSchemeValue cf = new SubjectSchemeValue(); - - cf.setScheme(scheme); - cf.setValue(value); - - return cf; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java deleted file mode 100644 index 23f7e3a..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java +++ /dev/null @@ -1,42 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.community; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Instance; - -/** - * It extends eu.dnetlib.dhp.dump.oaf.Instance with values related to the community dump. In the Result dump this - * information is not present because it is dumped as a set of relations between the result and the datasource. - - * hostedby of type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the - * instance can be viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and - - * key corresponds to hostedby.key - value corresponds to hostedby.value - collectedfrom of type - * eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been - * collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to - * collectedfrom.key - value corresponds to collectedfrom.value - */ -public class CommunityInstance extends Instance { - @JsonSchema(description = "Information about the source from which the instance can be viewed or downloaded.") - private CfHbKeyValue hostedby; - - @JsonSchema(description = "Information about the source from which the record has been collected") - @JsonInclude(JsonInclude.Include.NON_NULL) - private CfHbKeyValue collectedfrom; - - public CfHbKeyValue getHostedby() { - return hostedby; - } - - public void setHostedby(CfHbKeyValue hostedby) { - this.hostedby = hostedby; - } - - public CfHbKeyValue getCollectedfrom() { - return collectedfrom; - } - - public void setCollectedfrom(CfHbKeyValue collectedfrom) { - this.collectedfrom = collectedfrom; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java deleted file mode 100644 index 0f71495..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java +++ /dev/null @@ -1,72 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.community; - -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Result; - -/** - * extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type - * List to store the list of projects related to the result. The - * information is added after the result is mapped to the external model - context of type - * List to store information about the RC RI related to the result. - * For each context in the result represented in the internal model one context in the external model is produced - - * collectedfrom of type List to store information about the sources from which - * the record has been collected. For each collectedfrom in the result represented in the internal model one - * collectedfrom in the external model is produced - instance of type - * List to store all the instances associated to the result. - * It corresponds to the same parameter in the result represented in the internal model - */ -public class CommunityResult extends Result { - - @JsonSchema(description = "List of projects (i.e. grants) that (co-)funded the production ofn the research results") - private List projects; - - @JsonSchema( - description = "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu") - private List context; - - @JsonSchema(description = "Information about the sources from which the record has been collected") - @JsonInclude(JsonInclude.Include.NON_NULL) - protected List collectedfrom; - - @JsonSchema( - description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version") - private List instance; - - public List getInstance() { - return instance; - } - - public void setInstance(List instance) { - this.instance = instance; - } - - public List getCollectedfrom() { - return collectedfrom; - } - - public void setCollectedfrom(List collectedfrom) { - this.collectedfrom = collectedfrom; - } - - public List getProjects() { - return projects; - } - - public void setProjects(List projects) { - this.projects = projects; - } - - public List getContext() { - return context; - } - - public void setContext(List context) { - this.context = context; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Funder.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Funder.java deleted file mode 100644 index 2099429..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Funder.java +++ /dev/null @@ -1,24 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.community; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To store information about the funder funding the project related to the result. It has the following parameters: - - * shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name - * (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to - * store the jurisdiction of the funder - */ -public class Funder extends eu.dnetlib.dhp.oa.model.Funder { - - @JsonSchema(description = "Stream of funding (e.g. for European Commission can be H2020 or FP7)") - private String fundingStream; - - public String getFundingStream() { - return fundingStream; - } - - public void setFundingStream(String fundingStream) { - this.fundingStream = fundingStream; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Datasource.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Datasource.java index 7984f87..f2a6dd2 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Datasource.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Datasource.java @@ -6,7 +6,7 @@ import java.util.List; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; -import eu.dnetlib.dhp.oa.model.Container; +import eu.dnetlib.dhp.eosc.model.Container; /** * To store information about the datasource OpenAIRE collects information from. It contains the following parameters: - diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/GraphResult.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/GraphResult.java deleted file mode 100644 index 9b95881..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/GraphResult.java +++ /dev/null @@ -1,28 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.util.List; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Instance; -import eu.dnetlib.dhp.oa.model.Result; - -/** - * It extends the eu.dnetlib.dhp.schema.dump.oaf.Result with - instance of type - * List to store all the instances associated to the result. It corresponds to - * the same parameter in the result represented in the internal model - */ -public class GraphResult extends Result { - @JsonSchema( - description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version") - private List instance; - - public List getInstance() { - return instance; - } - - public void setInstance(List instance) { - this.instance = instance; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Organization.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Organization.java index 65211c8..69038ec 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Organization.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Organization.java @@ -6,7 +6,7 @@ import java.util.List; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; -import eu.dnetlib.dhp.oa.model.Country; +import eu.dnetlib.dhp.eosc.model.Country; /** * To represent the generic organizaiton. It has the following parameters: diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Relation.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Relation.java deleted file mode 100644 index 9f3832d..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Relation.java +++ /dev/null @@ -1,99 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; -import java.util.Objects; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Provenance; - -/** - * To represent the gereric relation between two entities. It has the following parameters: - private Node source to - * represent the entity source of the relation - private Node target to represent the entity target of the relation - - * private RelType reltype to represent the semantics of the relation - private Provenance provenance to represent the - * provenance of the relation - */ -public class Relation implements Serializable { - @JsonSchema(description = "The node source in the relation") - private Node source; - - @JsonSchema(description = "The node target in the relation") - private Node target; - - @JsonSchema(description = "To represent the semantics of a relation between two entities") - private RelType reltype; - - @JsonSchema(description = "The reason why OpenAIRE holds the relation ") - private Provenance provenance; - - @JsonSchema( - description = "True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)") - private boolean validated; - - @JsonSchema(description = "The date when the relation was collected from OpenAIRE") - private String validationDate; - - public Node getSource() { - return source; - } - - public void setSource(Node source) { - this.source = source; - } - - public Node getTarget() { - return target; - } - - public void setTarget(Node target) { - this.target = target; - } - - public RelType getReltype() { - return reltype; - } - - public void setReltype(RelType reltype) { - this.reltype = reltype; - } - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - - public void setValidated(boolean validate) { - this.validated = validate; - } - - public boolean getValidated() { - return validated; - } - - public void setValidationDate(String validationDate) { - this.validationDate = validationDate; - } - - public String getValidationDate() { - return validationDate; - } - - @Override - public int hashCode() { - - return Objects.hash(source.getId(), target.getId(), reltype.getType() + ":" + reltype.getName()); - } - - public static Relation newInstance(Node source, Node target, RelType reltype, Provenance provenance) { - Relation relation = new Relation(); - relation.source = source; - relation.target = target; - relation.reltype = reltype; - relation.provenance = provenance; - return relation; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchCommunity.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchCommunity.java deleted file mode 100644 index f40001b..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchCommunity.java +++ /dev/null @@ -1,24 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.util.List; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject - * to store the list of subjects related to the community - */ -public class ResearchCommunity extends ResearchInitiative { - @JsonSchema( - description = "Only for research communities: the list of the subjects associated to the research community") - private List subject; - - public List getSubject() { - return subject; - } - - public void setSubject(List subject) { - this.subject = subject; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchInitiative.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchInitiative.java deleted file mode 100644 index 7a82401..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchInitiative.java +++ /dev/null @@ -1,89 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile - * - private - * String id to store the openaire id for the entity. Is has as code 00 and will be created as - * 00|context_____::md5(originalId) private - * String originalId to store the id of the context as provided in the profile - * (i.e. mes) - * - private String name to store the name of the context (got from the label attribute in the context - * definition) - * - private String type to store the type of the context (i.e.: research initiative or research community) - * - private String description to store the description of the context as given in the profile - * -private String - * zenodo_community to store the zenodo community associated to the context (main zenodo community) - */ -public class ResearchInitiative implements Serializable { - @JsonSchema(description = "The OpenAIRE id for the community/research infrastructure") - private String id; // openaireId - - @JsonSchema(description = "The acronym of the community") - private String acronym; // context id - - @JsonSchema(description = "The long name of the community") - private String name; // context name - - @JsonSchema(description = "One of {Research Community, Research infrastructure}") - private String type; // context type: research initiative or research community - - @JsonSchema(description = "Description of the research community/research infrastructure") - private String description; - - @JsonSchema( - description = "The URL of the Zenodo community associated to the Research community/Research infrastructure") - private String zenodo_community; - - public String getZenodo_community() { - return zenodo_community; - } - - public void setZenodo_community(String zenodo_community) { - this.zenodo_community = zenodo_community; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getName() { - return name; - } - - public void setName(String label) { - this.name = label; - } - - public String getAcronym() { - return acronym; - } - - public void setAcronym(String acronym) { - this.acronym = acronym; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } -} diff --git a/dump-schema/src/test/java/GenerateJsonSchema.java b/dump-schema/src/test/java/GenerateJsonSchema.java index e15c247..ed55448 100644 --- a/dump-schema/src/test/java/GenerateJsonSchema.java +++ b/dump-schema/src/test/java/GenerateJsonSchema.java @@ -1,6 +1,5 @@ import java.io.IOException; -import eu.dnetlib.dhp.eosc.model.Relation; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.core.JsonProcessingException; @@ -10,43 +9,12 @@ import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.ExecCreateSchemas; -import eu.dnetlib.dhp.eosc.model.EoscResult; -import eu.dnetlib.dhp.oa.model.graph.GraphResult; +import eu.dnetlib.dhp.eosc.model.Relation; +import eu.dnetlib.dhp.eosc.model.Result; //@Disabled class GenerateJsonSchema { - @Test - void generateSchema() { - SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_7, - OptionPreset.PLAIN_JSON) - .with(Option.SCHEMA_VERSION_INDICATOR) - .without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS); - configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName()); - SchemaGeneratorConfig config = configBuilder.build(); - SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(GraphResult.class); - - System.out.println(jsonSchema.toString()); - } - - @Test - void generateSchema2() { - - ObjectMapper objectMapper = new ObjectMapper(); - AddonModule module = new AddonModule(); - SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(objectMapper, - SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON) - .with(module) - .with(Option.SCHEMA_VERSION_INDICATOR) - .without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS); - SchemaGeneratorConfig config = configBuilder.build(); - SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(GraphResult.class); - - System.out.println(jsonSchema.toString()); - } - @Test void generateSchema3() throws JsonProcessingException { @@ -59,7 +27,7 @@ class GenerateJsonSchema { .without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(EoscResult.class); + JsonNode jsonSchema = generator.generateSchema(Result.class); System.out.println(new ObjectMapper().writeValueAsString(jsonSchema)); } @@ -70,13 +38,13 @@ class GenerateJsonSchema { ObjectMapper objectMapper = new ObjectMapper(); AddonModule module = new AddonModule(); SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(objectMapper, - SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON) + SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON) .with(module) .with(Option.SCHEMA_VERSION_INDICATOR) .without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(Relation.class); + JsonNode jsonSchema = generator.generateSchema(Result.class); System.out.println(new ObjectMapper().writeValueAsString(jsonSchema)); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java index a8aa703..f2149bb 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java @@ -4,11 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.List; -import java.util.Objects; import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -17,11 +13,11 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.eosc.model.Result; +import eu.dnetlib.dhp.oa.graph.dump.eosc.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.eosc.Utils; import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException; import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; -import eu.dnetlib.dhp.oa.model.Result; -import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.OafEntity; @@ -32,9 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.OafEntity; public class DumpProducts implements Serializable { public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath, - Class inputClazz, - Class outputClazz, - String dumpType) { + Class inputClazz) { SparkConf conf = new SparkConf(); @@ -44,25 +38,23 @@ public class DumpProducts implements Serializable { spark -> { Utils.removeOutputDir(spark, outputPath); execDump( - spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType); + spark, inputPath, outputPath, communityMapPath, inputClazz); }); } - public static void execDump( + public static void execDump( SparkSession spark, String inputPath, String outputPath, String communityMapPath, - Class inputClazz, - Class outputClazz, - String dumpType) { + Class inputClazz) { CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); Utils .readPath(spark, inputPath, inputClazz) - .map((MapFunction) value -> execMap(value, communityMap, dumpType), Encoders.bean(outputClazz)) - .filter((FilterFunction) value -> value != null) + .map((MapFunction) value -> execMap(value, communityMap), Encoders.bean(Result.class)) + .filter((FilterFunction) value -> value != null) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") @@ -70,9 +62,8 @@ public class DumpProducts implements Serializable { } - private static O execMap(I value, - CommunityMap communityMap, - String dumpType) throws NoAvailableEntityTypeException, CardinalityTooHighException { + private static Result execMap(I value, + CommunityMap communityMap) throws NoAvailableEntityTypeException, CardinalityTooHighException { Optional odInfo = Optional.ofNullable(value.getDataInfo()); if (odInfo.isPresent()) { @@ -83,29 +74,7 @@ public class DumpProducts implements Serializable { return null; } - if (Constants.DUMPTYPE.COMMUNITY.getType().equals(dumpType)) { - Set communities = communityMap.keySet(); - - Optional> inputContext = Optional - .ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext()); - if (!inputContext.isPresent()) { - return null; - } - List toDumpFor = inputContext.get().stream().map(c -> { - if (communities.contains(c.getId())) { - return c.getId(); - } - if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) { - return c.getId().substring(0, c.getId().indexOf("::")); - } - return null; - }).filter(Objects::nonNull).collect(Collectors.toList()); - if (toDumpFor.isEmpty()) { - return null; - } - } - - return (O) ResultMapper.map(value, communityMap, dumpType); + return ResultMapper.map(value, communityMap); } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 03d3151..0178fa8 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -4,32 +4,22 @@ package eu.dnetlib.dhp.oa.graph.dump; import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; -import org.apache.spark.api.java.function.FilterFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.eosc.model.EoscInteroperabilityFramework; -import eu.dnetlib.dhp.eosc.model.EoscResult; -import eu.dnetlib.dhp.eosc.model.Indicator; -import eu.dnetlib.dhp.eosc.model.UsageCounts; +import eu.dnetlib.dhp.eosc.model.*; +import eu.dnetlib.dhp.eosc.model.AccessRight; +import eu.dnetlib.dhp.eosc.model.Author; +import eu.dnetlib.dhp.eosc.model.Context; +import eu.dnetlib.dhp.eosc.model.GeoLocation; +import eu.dnetlib.dhp.eosc.model.Measure; +import eu.dnetlib.dhp.eosc.model.OpenAccessRoute; +import eu.dnetlib.dhp.eosc.model.Provenance; +import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException; import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; -import eu.dnetlib.dhp.oa.model.*; -import eu.dnetlib.dhp.oa.model.AccessRight; -import eu.dnetlib.dhp.oa.model.Author; -import eu.dnetlib.dhp.oa.model.GeoLocation; -import eu.dnetlib.dhp.oa.model.Instance; -import eu.dnetlib.dhp.oa.model.Measure; -import eu.dnetlib.dhp.oa.model.OpenAccessRoute; -import eu.dnetlib.dhp.oa.model.Result; -import eu.dnetlib.dhp.oa.model.community.CfHbKeyValue; -import eu.dnetlib.dhp.oa.model.community.CommunityInstance; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; -import eu.dnetlib.dhp.oa.model.community.Context; -import eu.dnetlib.dhp.oa.model.graph.GraphResult; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; @@ -37,17 +27,10 @@ public class ResultMapper implements Serializable { private static final Logger log = LoggerFactory.getLogger(ResultMapper.class); public static Result map( - E in, Map communityMap, String dumpType) + E in, Map communityMap) throws NoAvailableEntityTypeException, CardinalityTooHighException { - Result out; - if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { - out = new GraphResult(); - } else if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { - out = new EoscResult(); - } else { - out = new CommunityResult(); - } + Result out = new Result(); eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in; Optional ort = Optional.ofNullable(input.getResulttype()); @@ -126,15 +109,31 @@ public class ResultMapper implements Serializable { .ofNullable(input.getDescription()) .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); out.setDescription(descriptionList); - Optional> oStr = Optional.ofNullable(input.getEmbargoenddate()); - if (oStr.isPresent()) { - out.setEmbargoenddate(oStr.get().getValue()); + + if (Optional.ofNullable(input.getEmbargoenddate()).isPresent()) { + out.setEmbargoenddate(input.getEmbargoenddate().getValue()); } + if (Optional.ofNullable(input.getMeasures()).isPresent()) { + Indicator i = new Indicator(); + UsageCounts uc = new UsageCounts(); + input.getMeasures().forEach(m -> { + if (m.getId().equals("downloads")) { + uc.setDownloads(m.getUnit().get(0).getValue()); + } + if (m.getId().equals("views")) { + uc.setViews(m.getUnit().get(0).getValue()); + } + }); + if (!uc.isEmpty()) { + i.setUsageCounts(uc); + out.setIndicator(i); + } + } final List formatList = new ArrayList<>(); Optional .ofNullable(input.getFormat()) - .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue()))); + .ifPresent(value -> value.forEach(f -> formatList.add(f.getValue()))); out.setFormat(formatList); out.setId(input.getId()); out.setOriginalId(new ArrayList<>()); @@ -149,38 +148,31 @@ public class ResultMapper implements Serializable { .filter(s -> !s.startsWith("50|")) .collect(Collectors.toList()))); - Optional> oInst = Optional - .ofNullable(input.getInstance()); - - if (oInst.isPresent()) { - if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { - ((GraphResult) out) - .setInstance( - oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); - } else { - ((CommunityResult) out) - .setInstance( - oInst - .get() - .stream() - .map(i -> getCommunityInstance(i, dumpType)) - .collect(Collectors.toList())); - } + if (Optional + .ofNullable(input.getInstance()) + .isPresent()) { + out + .setInstance( + input + .getInstance() + .stream() + .map(i -> getCommunityInstance(i)) + .collect(Collectors.toList())); } - Optional oL = Optional.ofNullable(input.getLanguage()); - if (oL.isPresent()) { - eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get(); - out.setLanguage(Language.newInstance(language.getClassid(), language.getClassname())); + if (Optional.ofNullable(input.getLanguage()).isPresent()) { + out + .setLanguage( + Language.newInstance(input.getLanguage().getClassid(), input.getLanguage().getClassname())); } - Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp()); - if (oLong.isPresent()) { - out.setLastupdatetimestamp(oLong.get()); + + if (Optional.ofNullable(input.getLastupdatetimestamp()).isPresent()) { + out.setLastupdatetimestamp(input.getLastupdatetimestamp()); } - Optional> otitle = Optional.ofNullable(input.getTitle()); - if (otitle.isPresent()) { - List iTitle = otitle - .get() + + if (Optional.ofNullable(input.getTitle()).isPresent()) { + List iTitle = input + .getTitle() .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .collect(Collectors.toList()); @@ -188,8 +180,8 @@ public class ResultMapper implements Serializable { out.setMaintitle(iTitle.get(0).getValue()); } - iTitle = otitle - .get() + iTitle = input + .getTitle() .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .collect(Collectors.toList()); @@ -211,13 +203,12 @@ public class ResultMapper implements Serializable { .newInstance(p.getQualifier().getClassid(), p.getValue())) .collect(Collectors.toList()))); - oStr = Optional.ofNullable(input.getDateofacceptance()); - if (oStr.isPresent()) { - out.setPublicationdate(oStr.get().getValue()); + if (Optional.ofNullable(input.getDateofacceptance()).isPresent()) { + out.setPublicationdate(input.getDateofacceptance().getValue()); } - oStr = Optional.ofNullable(input.getPublisher()); - if (oStr.isPresent()) { - out.setPublisher(oStr.get().getValue()); + + if (Optional.ofNullable(input.getPublisher()).isPresent()) { + out.setPublisher(input.getPublisher().getValue()); } Optional @@ -225,123 +216,102 @@ public class ResultMapper implements Serializable { .ifPresent( value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList()))); - if (!Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { - List subjectList = new ArrayList<>(); - Optional - .ofNullable(input.getSubject()) - .ifPresent( - value -> value - .forEach(s -> subjectList.add(getSubject(s)))); + if (Optional.ofNullable(input.getSubject()).isPresent()) { + out.setSubject(createSubjectMap(input)); + out + .setKeywords( + input + .getSubject() + .stream() + .filter( + s -> s.getQualifier().getClassid().equalsIgnoreCase("keyword") && + !s.getValue().equalsIgnoreCase("EOSC::RO-crate")) + .map(s -> s.getValue()) + .collect(Collectors.toList())); - out.setSubjects(subjectList); - } else { - if (Optional.ofNullable(input.getSubject()).isPresent()) { - ((EoscResult) out).setSubject(createSubjectMap(input)); - ((EoscResult) out) - .setKeywords( + if (Optional.ofNullable(input.getEoscifguidelines()).isPresent()) { + out + .setEoscIF( input - .getSubject() + .getEoscifguidelines() .stream() - .filter( - s -> s.getQualifier().getClassid().equalsIgnoreCase("keyword") && - !s.getValue().equalsIgnoreCase("EOSC::RO-crate")) - .map(s -> s.getValue()) + .map( + eig -> EoscInteroperabilityFramework + .newInstance( + eig.getCode(), eig.getLabel(), eig.getUrl(), + eig.getSemanticRelation())) .collect(Collectors.toList())); - - if (Optional.ofNullable(input.getEoscifguidelines()).isPresent()) { - ((EoscResult) out) - .setEoscIF( - input - .getEoscifguidelines() - .stream() - .map( - eig -> EoscInteroperabilityFramework - .newInstance( - eig.getCode(), eig.getLabel(), eig.getUrl(), - eig.getSemanticRelation())) - .collect(Collectors.toList())); - } - } + } out.setType(input.getResulttype().getClassid()); - if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { - if (!Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { - ((CommunityResult) out) - .setCollectedfrom( - input - .getCollectedfrom() - .stream() - .map(cf -> CfHbKeyValue.newInstance(cf.getKey(), cf.getValue())) - .collect(Collectors.toList())); - } - Set communities = communityMap.keySet(); - List contextList = Optional - .ofNullable( - input - .getContext()) - .map( - value -> value - .stream() - .map(c -> { - String communityId = c.getId(); - if (communityId.contains("::")) { - communityId = communityId.substring(0, communityId.indexOf("::")); - } - if (communities.contains(communityId)) { - Context context = new Context(); - context.setCode(communityId); - context.setLabel(communityMap.get(communityId)); - Optional> dataInfo = Optional.ofNullable(c.getDataInfo()); - if (dataInfo.isPresent()) { - List provenance = new ArrayList<>(); - provenance - .addAll( - dataInfo - .get() - .stream() - .map( - di -> Optional - .ofNullable(di.getProvenanceaction()) - .map( - provenanceaction -> Provenance - .newInstance( - provenanceaction.getClassname(), - di.getTrust())) - .orElse(null)) - .filter(Objects::nonNull) - .collect(Collectors.toSet())); + Set communities = communityMap.keySet(); + List contextList = Optional + .ofNullable( + input + .getContext()) + .map( + value -> value + .stream() + .map(c -> { + String communityId = c.getId(); + if (communityId.contains("::")) { + communityId = communityId.substring(0, communityId.indexOf("::")); + } + if (communities.contains(communityId)) { + Context context = new Context(); + context.setCode(communityId); + context.setLabel(communityMap.get(communityId)); + Optional> dataInfo = Optional.ofNullable(c.getDataInfo()); + if (dataInfo.isPresent()) { + List provenance = new ArrayList<>(); + provenance + .addAll( + dataInfo + .get() + .stream() + .map( + di -> Optional + .ofNullable(di.getProvenanceaction()) + .map( + provenanceaction -> Provenance + .newInstance( + provenanceaction.getClassname(), + di.getTrust())) + .orElse(null)) + .filter(Objects::nonNull) + .collect(Collectors.toSet())); - try { - context.setProvenance(getUniqueProvenance(provenance)); - } catch (NoAvailableEntityTypeException e) { - e.printStackTrace(); - } + try { + context.setProvenance(getUniqueProvenance(provenance)); + } catch (NoAvailableEntityTypeException e) { + e.printStackTrace(); } - return context; } - return null; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList())) - .orElse(new ArrayList<>()); + return context; + } + return null; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList())) + .orElse(new ArrayList<>()); - if (!contextList.isEmpty()) { - Set hashValue = new HashSet<>(); - List remainigContext = new ArrayList<>(); - contextList.forEach(c -> { - if (!hashValue.contains(c.hashCode())) { - remainigContext.add(c); - hashValue.add(c.hashCode()); - } - }); - ((CommunityResult) out).setContext(remainigContext); - } + if (!contextList.isEmpty()) { + Set hashValue = new HashSet<>(); + List remainigContext = new ArrayList<>(); + contextList.forEach(c -> { + if (!hashValue.contains(c.hashCode())) { + remainigContext.add(c); + hashValue.add(c.hashCode()); + } + }); + out.setContext(remainigContext); } + } catch (ClassCastException cce) { - return out; + return null; } } @@ -476,8 +446,8 @@ public class ResultMapper implements Serializable { } } - private static Instance getGraphInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { - Instance instance = new Instance(); + private static eu.dnetlib.dhp.eosc.model.Instance getGraphInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { + eu.dnetlib.dhp.eosc.model.Instance instance = new eu.dnetlib.dhp.eosc.model.Instance(); setCommonValue(i, instance); @@ -485,18 +455,11 @@ public class ResultMapper implements Serializable { } - private static CommunityInstance getCommunityInstance(eu.dnetlib.dhp.schema.oaf.Instance i, String dumpType) { - CommunityInstance instance = new CommunityInstance(); + private static eu.dnetlib.dhp.eosc.model.Instance getCommunityInstance(eu.dnetlib.dhp.schema.oaf.Instance i) { + eu.dnetlib.dhp.eosc.model.Instance instance = new eu.dnetlib.dhp.eosc.model.Instance(); setCommonValue(i, instance); - if (!Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { - instance - .setCollectedfrom( - CfHbKeyValue - .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue())); - } - instance .setHostedby( CfHbKeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue())); @@ -505,7 +468,8 @@ public class ResultMapper implements Serializable { } - private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { + private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, + eu.dnetlib.dhp.eosc.model.Instance instance) { Optional opAr = Optional.ofNullable(i.getAccessright()); if (opAr.isPresent() && Constants.ACCESS_RIGHTS_COAR_MAP.containsKey(opAr.get().getClassid())) { @@ -655,17 +619,6 @@ public class ResultMapper implements Serializable { return null; } - private static Subject getSubject(StructuredProperty s) { - Subject subject = new Subject(); - subject.setSubject(SubjectSchemeValue.newInstance(s.getQualifier().getClassid(), s.getValue())); - Provenance p = getProvenance(s); - if (p != null) { - subject.setProvenance(p); - } - - return subject; - } - private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) { Author a = new Author(); a.setFullname(oa.getFullname()); @@ -704,7 +657,8 @@ public class ResultMapper implements Serializable { AuthorPidSchemeValue .newInstance( pid.getQualifier().getClassid(), - pid.getValue()) + pid.getValue()), + null ); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java deleted file mode 100644 index 647f1fe..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ /dev/null @@ -1,81 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.community; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Optional; -import java.util.stream.Collectors; - -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; - -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; -import eu.dnetlib.dhp.oa.model.community.Context; - -/** - * This class splits the dumped results according to the research community - research initiative/infrastructure they - * are related to. The information about the community is found in the element "context.id" in the result. Since the - * context that can be found in the result can be associated not only to communities, a community Map is provided. It - * will guide the splitting process. Note: the repartition(1) just before writing the results related to a community. - * This is a choice due to uploading constraints (just one file for each community) As soon as a better solution will be - * in place remove the repartition - */ -public class CommunitySplit implements Serializable { - - public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath) { - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); - execSplit(spark, inputPath, outputPath, communityMap); - }); - } - - private static void execSplit(SparkSession spark, String inputPath, String outputPath, - CommunityMap communities) { - - Dataset result = Utils - .readPath(spark, inputPath + "/publication", CommunityResult.class) - .union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class)) - .union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class)) - .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); - - communities - .keySet() - .stream() - .forEach(c -> printResult(c, result, outputPath + "/" + communities.get(c).replace(" ", "_"))); - - } - - private static void printResult(String c, Dataset result, String outputPath) { - Dataset communityProducts = result - .filter((FilterFunction) r -> containsCommunity(r, c)); - - communityProducts - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath); - - } - - private static boolean containsCommunity(CommunityResult r, String c) { - if (Optional.ofNullable(r.getContext()).isPresent()) { - return r - .getContext() - .stream() - .map(Context::getCode) - .collect(Collectors.toList()) - .contains(c); - } - return false; - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java deleted file mode 100644 index 052df7b..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java +++ /dev/null @@ -1,67 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.community; - -import java.io.Serializable; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; -import eu.dnetlib.dhp.schema.oaf.Result; - -/** - * Spark action to trigger the dump of results associated to research community - reseach initiative/infrasctructure The - * actual dump if performed via the class DumpProducts that is used also for the entire graph dump - */ -public class SparkDumpCommunityProducts implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(SparkDumpCommunityProducts.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkDumpCommunityProducts.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - String communityMapPath = parser.get("communityMapPath"); - - final String dumpType = Optional - .ofNullable(parser.get("dumpType")) - .map(String::valueOf) - .orElse("community"); - - Class inputClazz = (Class) Class.forName(resultClassName); - - DumpProducts dump = new DumpProducts(); - - dump - .run( - isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, CommunityResult.class, - dumpType); - - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkSplitForCommunity.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkSplitForCommunity.java deleted file mode 100644 index b62bf18..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkSplitForCommunity.java +++ /dev/null @@ -1,50 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.community; - -import java.io.Serializable; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -/** - * Spark job to trigger the split of results associated to research community - reseach initiative/infrasctructure. The - * actual split is performed by the class CommunitySplit - */ -public class SparkSplitForCommunity implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(SparkSplitForCommunity.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkSplitForCommunity.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final String communityMapPath = parser.get("communityMapPath"); - - CommunitySplit split = new CommunitySplit(); - split.run(isSparkSessionManaged, inputPath, outputPath, communityMapPath); - - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/ContextInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/ContextInfo.java deleted file mode 100644 index 982a69a..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/ContextInfo.java +++ /dev/null @@ -1,84 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.Serializable; -import java.util.List; - -/** - * Deserialization of the information in the context needed to create Context Entities, and relations between context - * entities and datasources and projects - */ -public class ContextInfo implements Serializable { - private String id; - private String description; - private String type; - private String zenodocommunity; - private String name; - private List projectList; - private List datasourceList; - private List subject; - - public List getSubject() { - return subject; - } - - public void setSubject(List subject) { - this.subject = subject; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public String getZenodocommunity() { - return zenodocommunity; - } - - public void setZenodocommunity(String zenodocommunity) { - this.zenodocommunity = zenodocommunity; - } - - public List getProjectList() { - return projectList; - } - - public void setProjectList(List projectList) { - this.projectList = projectList; - } - - public List getDatasourceList() { - return datasourceList; - } - - public void setDatasourceList(List datasourceList) { - this.datasourceList = datasourceList; - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java deleted file mode 100644 index 0356bd4..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ /dev/null @@ -1,110 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Serializable; -import java.nio.charset.StandardCharsets; -import java.util.function.Consumer; -import java.util.function.Function; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.CompressionCodecFactory; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; - -/** - * Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and - * collects the general information for contexes of type community or ri. The general information is the id of the - * context, its label, the subjects associated to the context, its zenodo community, description and type. This - * information is used to create a new Context Entity - */ -public class CreateContextEntities implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final transient Configuration conf; - private final transient BufferedWriter writer; - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - CreateContextEntities.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - final String hdfsPath = parser.get("hdfsPath"); - log.info("hdfsPath: {}", hdfsPath); - - final String hdfsNameNode = parser.get("nameNode"); - log.info("nameNode: {}", hdfsNameNode); - - final String isLookUpUrl = parser.get("isLookUpUrl"); - log.info("isLookUpUrl: {}", isLookUpUrl); - - final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode); - - log.info("Processing contexts..."); - cce.execute(Process::getEntity, isLookUpUrl); - - cce.close(); - - } - - private void close() throws IOException { - writer.close(); - } - - public CreateContextEntities(String hdfsPath, String hdfsNameNode) throws IOException { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); - FileSystem fileSystem = FileSystem.get(this.conf); - Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; - if (fileSystem.exists(hdfsWritePath)) { - fsDataOutputStream = fileSystem.append(hdfsWritePath); - } else { - fsDataOutputStream = fileSystem.create(hdfsWritePath); - } - CompressionCodecFactory factory = new CompressionCodecFactory(conf); - CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec"); - - this.writer = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(fsDataOutputStream), - StandardCharsets.UTF_8)); - - } - - public void execute(final Function producer, String isLookUpUrl) - throws ISLookUpException { - - QueryInformationSystem queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); - - final Consumer consumer = ci -> writeEntity(producer.apply(ci)); - - queryInformationSystem.getContextInformation(consumer); - } - - protected void writeEntity(final R r) { - try { - writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r)); - writer.newLine(); - } catch (final IOException e) { - throw new IllegalArgumentException(e); - } - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java deleted file mode 100644 index 3706529..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ /dev/null @@ -1,128 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Serializable; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.function.Consumer; -import java.util.function.Function; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException; -import eu.dnetlib.dhp.oa.model.graph.*; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; - -/** - * Writes the set of new Relation between the context and datasources. At the moment the relation between the context - * and the project is not created because of a low coverage in the profiles of openaire ids related to projects - */ -public class CreateContextRelation implements Serializable { - private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class); - private final transient Configuration conf; - private final transient BufferedWriter writer; - private final transient QueryInformationSystem queryInformationSystem; - - private static final String CONTEX_RELATION_DATASOURCE = "contentproviders"; - private static final String CONTEX_RELATION_PROJECT = "projects"; - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - Objects - .requireNonNull( - CreateContextRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json"))); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String hdfsPath = parser.get("hdfsPath"); - log.info("hdfsPath: {}", hdfsPath); - - final String hdfsNameNode = parser.get("nameNode"); - log.info("nameNode: {}", hdfsNameNode); - - final String isLookUpUrl = parser.get("isLookUpUrl"); - log.info("isLookUpUrl: {}", isLookUpUrl); - - final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl); - - log.info("Creating relation for datasource..."); - cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class)); - - log.info("Creating relations for projects... "); - cce - .execute( - Process::getRelation, CONTEX_RELATION_PROJECT, - ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); - - cce.close(); - - } - - private void close() throws IOException { - writer.close(); - } - - public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl) - throws IOException, ISLookUpException { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); - - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); - queryInformationSystem.execContextRelationQuery(); - - FileSystem fileSystem = FileSystem.get(this.conf); - Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; - if (fileSystem.exists(hdfsWritePath)) { - fsDataOutputStream = fileSystem.append(hdfsWritePath); - } else { - fsDataOutputStream = fileSystem.create(hdfsWritePath); - } - - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - - } - - public void execute(final Function> producer, String category, String prefix) { - - final Consumer consumer = ci -> producer.apply(ci).forEach(this::writeEntity); - - queryInformationSystem.getContextRelation(consumer, category, prefix); - } - - protected void writeEntity(final Relation r) { - try { - writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r)); - writer.newLine(); - } catch (final Exception e) { - throw new MyRuntimeException(e); - } - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java deleted file mode 100644 index 4e176e7..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ /dev/null @@ -1,520 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.io.StringReader; -import java.util.*; -import java.util.stream.Collectors; - -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Node; -import org.dom4j.io.SAXReader; - -import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.*; -import eu.dnetlib.dhp.oa.model.graph.*; -import eu.dnetlib.dhp.oa.model.graph.Funder; -import eu.dnetlib.dhp.oa.model.graph.Project; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Journal; -import eu.dnetlib.dhp.schema.oaf.OafEntity; - -/** - * Dumps of entities in the model defined in eu.dnetlib.dhp.schema.dump.oaf.graph. Results are dumped using the same - * Mapper as for eu.dnetlib.dhp.schema.dump.oaf.community, while for the other entities the mapping is defined below - */ -public class DumpGraphEntities implements Serializable { - - public void run(Boolean isSparkSessionManaged, - String inputPath, - String outputPath, - Class inputClazz, - String communityMapPath) { - - SparkConf conf = new SparkConf(); - - switch (ModelSupport.idPrefixMap.get(inputClazz)) { - case "50": - DumpProducts d = new DumpProducts(); - d - .run( - isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, GraphResult.class, - eu.dnetlib.dhp.oa.graph.dump.Constants.DUMPTYPE.COMPLETE.getType()); - break; - case "40": - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - projectMap(spark, inputPath, outputPath, inputClazz); - - }); - break; - case "20": - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - organizationMap(spark, inputPath, outputPath, inputClazz); - - }); - break; - case "10": - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - datasourceMap(spark, inputPath, outputPath, inputClazz); - - }); - break; - } - - } - - private static void datasourceMap(SparkSession spark, String inputPath, String outputPath, - Class inputClazz) { - Utils - .readPath(spark, inputPath, inputClazz) - .map( - (MapFunction) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), - Encoders.bean(Datasource.class)) - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - } - - private static void projectMap(SparkSession spark, String inputPath, String outputPath, - Class inputClazz) { - Utils - .readPath(spark, inputPath, inputClazz) - .map( - (MapFunction) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p), - Encoders.bean(Project.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - } - - private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) { - Datasource datasource = new Datasource(); - - datasource.setId(d.getId()); - - Optional - .ofNullable(d.getOriginalId()) - .ifPresent( - oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList()))); - - Optional - .ofNullable(d.getPid()) - .ifPresent( - pids -> datasource - .setPid( - pids - .stream() - .map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue())) - .collect(Collectors.toList()))); - - Optional - .ofNullable(d.getDatasourcetype()) - .ifPresent( - dsType -> datasource - .setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname()))); - - Optional - .ofNullable(d.getOpenairecompatibility()) - .ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname())); - - Optional - .ofNullable(d.getOfficialname()) - .ifPresent(oname -> datasource.setOfficialname(oname.getValue())); - - Optional - .ofNullable(d.getEnglishname()) - .ifPresent(ename -> datasource.setEnglishname(ename.getValue())); - - Optional - .ofNullable(d.getWebsiteurl()) - .ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue())); - - Optional - .ofNullable(d.getLogourl()) - .ifPresent(lurl -> datasource.setLogourl(lurl.getValue())); - - Optional - .ofNullable(d.getDateofvalidation()) - .ifPresent(dval -> datasource.setDateofvalidation(dval.getValue())); - - Optional - .ofNullable(d.getDescription()) - .ifPresent(dex -> datasource.setDescription(dex.getValue())); - - Optional - .ofNullable(d.getSubjects()) - .ifPresent( - sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList()))); - - Optional - .ofNullable(d.getOdpolicies()) - .ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue()))); - - Optional - .ofNullable(d.getOdlanguages()) - .ifPresent( - langs -> datasource - .setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList()))); - - Optional - .ofNullable(d.getOdcontenttypes()) - .ifPresent( - ctypes -> datasource - .setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList()))); - - Optional - .ofNullable(d.getReleasestartdate()) - .ifPresent(rd -> datasource.setReleasestartdate(rd.getValue())); - - Optional - .ofNullable(d.getReleaseenddate()) - .ifPresent(ed -> datasource.setReleaseenddate(ed.getValue())); - - Optional - .ofNullable(d.getMissionstatementurl()) - .ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue())); - - Optional - .ofNullable(d.getDatabaseaccesstype()) - .ifPresent(ar -> datasource.setAccessrights(ar.getValue())); - - Optional - .ofNullable(d.getDatauploadtype()) - .ifPresent(dut -> datasource.setUploadrights(dut.getValue())); - - Optional - .ofNullable(d.getDatabaseaccessrestriction()) - .ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue())); - - Optional - .ofNullable(d.getDatauploadrestriction()) - .ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue())); - - Optional - .ofNullable(d.getVersioning()) - .ifPresent(v -> datasource.setVersioning(v.getValue())); - - Optional - .ofNullable(d.getCitationguidelineurl()) - .ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue())); - - Optional - .ofNullable(d.getPidsystems()) - .ifPresent(ps -> datasource.setPidsystems(ps.getValue())); - - Optional - .ofNullable(d.getCertificates()) - .ifPresent(c -> datasource.setCertificates(c.getValue())); - - Optional - .ofNullable(d.getPolicies()) - .ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList()))); - - Optional - .ofNullable(d.getJournal()) - .ifPresent(j -> datasource.setJournal(getContainer(j))); - - return datasource; - - } - - private static Container getContainer(Journal j) { - Container c = new Container(); - - Optional - .ofNullable(j.getName()) - .ifPresent(n -> c.setName(n)); - - Optional - .ofNullable(j.getIssnPrinted()) - .ifPresent(issnp -> c.setIssnPrinted(issnp)); - - Optional - .ofNullable(j.getIssnOnline()) - .ifPresent(issno -> c.setIssnOnline(issno)); - - Optional - .ofNullable(j.getIssnLinking()) - .ifPresent(isnl -> c.setIssnLinking(isnl)); - - Optional - .ofNullable(j.getEp()) - .ifPresent(ep -> c.setEp(ep)); - - Optional - .ofNullable(j.getIss()) - .ifPresent(iss -> c.setIss(iss)); - - Optional - .ofNullable(j.getSp()) - .ifPresent(sp -> c.setSp(sp)); - - Optional - .ofNullable(j.getVol()) - .ifPresent(vol -> c.setVol(vol)); - - Optional - .ofNullable(j.getEdition()) - .ifPresent(edition -> c.setEdition(edition)); - - Optional - .ofNullable(j.getConferencedate()) - .ifPresent(cdate -> c.setConferencedate(cdate)); - - Optional - .ofNullable(j.getConferenceplace()) - .ifPresent(cplace -> c.setConferenceplace(cplace)); - - return c; - } - - private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException { - Project project = new Project(); - - Optional - .ofNullable(p.getId()) - .ifPresent(id -> project.setId(id)); - - Optional - .ofNullable(p.getWebsiteurl()) - .ifPresent(w -> project.setWebsiteurl(w.getValue())); - - Optional - .ofNullable(p.getCode()) - .ifPresent(code -> project.setCode(code.getValue())); - - Optional - .ofNullable(p.getAcronym()) - .ifPresent(acronynim -> project.setAcronym(acronynim.getValue())); - - Optional - .ofNullable(p.getTitle()) - .ifPresent(title -> project.setTitle(title.getValue())); - - Optional - .ofNullable(p.getStartdate()) - .ifPresent(sdate -> project.setStartdate(sdate.getValue())); - - Optional - .ofNullable(p.getEnddate()) - .ifPresent(edate -> project.setEnddate(edate.getValue())); - - Optional - .ofNullable(p.getCallidentifier()) - .ifPresent(cide -> project.setCallidentifier(cide.getValue())); - - Optional - .ofNullable(p.getKeywords()) - .ifPresent(key -> project.setKeywords(key.getValue())); - - Optional> omandate = Optional.ofNullable(p.getOamandatepublications()); - Optional> oecsc39 = Optional.ofNullable(p.getEcsc39()); - boolean mandate = false; - if (omandate.isPresent()) { - if (omandate.get().getValue().equals("true")) { - mandate = true; - } - } - if (oecsc39.isPresent()) { - if (oecsc39.get().getValue().equals("true")) { - mandate = true; - } - } - - project.setOpenaccessmandateforpublications(mandate); - project.setOpenaccessmandatefordataset(false); - - Optional - .ofNullable(p.getEcarticle29_3()) - .ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true"))); - - project - .setSubject( - Optional - .ofNullable(p.getSubjects()) - .map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList())) - .orElse(new ArrayList<>())); - - Optional - .ofNullable(p.getSummary()) - .ifPresent(summary -> project.setSummary(summary.getValue())); - - Optional ofundedamount = Optional.ofNullable(p.getFundedamount()); - Optional> ocurrency = Optional.ofNullable(p.getCurrency()); - Optional ototalcost = Optional.ofNullable(p.getTotalcost()); - - if (ocurrency.isPresent()) { - if (ofundedamount.isPresent()) { - if (ototalcost.isPresent()) { - project - .setGranted( - Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get())); - } else { - project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get())); - } - } - } - - project - .setH2020programme( - Optional - .ofNullable(p.getH2020classification()) - .map( - classification -> classification - .stream() - .map( - c -> Programme - .newInstance( - c.getH2020Programme().getCode(), c.getH2020Programme().getDescription())) - .collect(Collectors.toList())) - .orElse(new ArrayList<>())); - - Optional>> ofundTree = Optional - .ofNullable(p.getFundingtree()); - List funList = new ArrayList<>(); - if (ofundTree.isPresent()) { - for (Field fundingtree : ofundTree.get()) { - funList.add(getFunder(fundingtree.getValue())); - } - } - project.setFunding(funList); - - return project; - } - - public static Funder getFunder(String fundingtree) throws DocumentException { - Funder f = new Funder(); - final Document doc; - - doc = new SAXReader().read(new StringReader(fundingtree)); - f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText()); - f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText()); - f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText()); - // f.setId(((org.dom4j.Node) (doc.selectNodes("//funder/id").get(0))).getText()); - - String id = ""; - String description = ""; - // List fundings = new ArrayList<>(); - int level = 0; - List nodes = doc.selectNodes("//funding_level_" + level); - while (nodes.size() > 0) { - for (org.dom4j.Node n : nodes) { - - List node = n.selectNodes("./id"); - id = ((org.dom4j.Node) node.get(0)).getText(); - id = id.substring(id.indexOf("::") + 2); - - node = n.selectNodes("./description"); - description += ((Node) node.get(0)).getText() + " - "; - - } - level += 1; - nodes = doc.selectNodes("//funding_level_" + level); - } - - if (!id.equals("")) { - Fundings fundings = new Fundings(); - fundings.setId(id); - fundings.setDescription(description.substring(0, description.length() - 3).trim()); - f.setFunding_stream(fundings); - } - - return f; - - } - - private static void organizationMap(SparkSession spark, String inputPath, String outputPath, - Class inputClazz) { - Utils - .readPath(spark, inputPath, inputClazz) - .map( - (MapFunction) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), - Encoders.bean(Organization.class)) - .filter((FilterFunction) o -> o != null) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - } - - private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization( - eu.dnetlib.dhp.schema.oaf.Organization org) { - if (org.getDataInfo().getDeletedbyinference()) - return null; - Organization organization = new Organization(); - - Optional - .ofNullable(org.getLegalshortname()) - .ifPresent(value -> organization.setLegalshortname(value.getValue())); - - Optional - .ofNullable(org.getLegalname()) - .ifPresent(value -> organization.setLegalname(value.getValue())); - - Optional - .ofNullable(org.getWebsiteurl()) - .ifPresent(value -> organization.setWebsiteurl(value.getValue())); - - Optional - .ofNullable(org.getAlternativeNames()) - .ifPresent( - value -> organization - .setAlternativenames( - value - .stream() - .map(v -> v.getValue()) - .collect(Collectors.toList()))); - - Optional - .ofNullable(org.getCountry()) - .ifPresent( - value -> { - if (!value.getClassid().equals(Constants.UNKNOWN)) { - organization.setCountry(Country.newInstance(value.getClassid(), value.getClassname())); - } - - }); - - Optional - .ofNullable(org.getId()) - .ifPresent(value -> organization.setId(value)); - - Optional - .ofNullable(org.getPid()) - .ifPresent( - value -> organization - .setPid( - value - .stream() - .map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue())) - .collect(Collectors.toList()))); - - return organization; - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java deleted file mode 100644 index fa55fd1..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java +++ /dev/null @@ -1,201 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.*; - -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; - -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.oa.model.Provenance; -import eu.dnetlib.dhp.oa.model.graph.Node; -import eu.dnetlib.dhp.oa.model.graph.RelType; -import eu.dnetlib.dhp.oa.model.graph.Relation; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.KeyValue; -import eu.dnetlib.dhp.schema.oaf.Result; - -/** - * Creates new Relations (as in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation) from the information in the Entity. The - * new Relations are created for the datasource in the collectedfrom and hostedby elements and for the context related - * to communities and research initiative/infrastructures. For collectedfrom elements it creates: datasource -> provides - * -> result and result -> isProvidedBy -> datasource For hostedby elements it creates: datasource -> hosts -> result - * and result -> isHostedBy -> datasource For context elements it creates: context <-> isRelatedTo <-> result. Note for - * context: it gets the first provenance in the dataInfo. If more than one is present the others are not dumped - */ -public class Extractor implements Serializable { - - public void run(Boolean isSparkSessionManaged, - String inputPath, - String outputPath, - Class inputClazz, - String communityMapPath) { - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - extractRelationResult( - spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath)); - }); - } - - private void extractRelationResult(SparkSession spark, - String inputPath, - String outputPath, - Class inputClazz, - CommunityMap communityMap) { - - Set hashCodes = new HashSet<>(); - - Utils - .readPath(spark, inputPath, inputClazz) - .flatMap((FlatMapFunction) value -> { - List relationList = new ArrayList<>(); - extractRelationsFromInstance(hashCodes, value, relationList); - Set communities = communityMap.keySet(); - Optional - .ofNullable(value.getContext()) - .ifPresent(contexts -> contexts.forEach(context -> { - String id = context.getId(); - if (id.contains(":")) { - id = id.substring(0, id.indexOf(":")); - } - if (communities.contains(id)) { - String contextId = Utils.getContextId(id); - Provenance provenance = Optional - .ofNullable(context.getDataInfo()) - .map( - dinfo -> Optional - .ofNullable(dinfo.get(0).getProvenanceaction()) - .map( - paction -> Provenance - .newInstance( - paction.getClassid(), - dinfo.get(0).getTrust())) - .orElse(null)) - .orElse(null); - Relation r = getRelation( - value.getId(), contextId, - Constants.RESULT_ENTITY, - Constants.CONTEXT_ENTITY, - ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP, provenance); - if (!hashCodes.contains(r.hashCode())) { - relationList - .add(r); - hashCodes.add(r.hashCode()); - } - r = getRelation( - contextId, value.getId(), - Constants.CONTEXT_ENTITY, - Constants.RESULT_ENTITY, - ModelConstants.IS_RELATED_TO, - ModelConstants.RELATIONSHIP, provenance); - if (!hashCodes.contains(r.hashCode())) { - relationList - .add( - r); - hashCodes.add(r.hashCode()); - } - - } - - })); - - return relationList.iterator(); - }, Encoders.bean(Relation.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath); - - } - - private void extractRelationsFromInstance(Set hashCodes, R value, - List relationList) { - Optional - .ofNullable(value.getInstance()) - .ifPresent(inst -> inst.forEach(instance -> { - Optional - .ofNullable(instance.getCollectedfrom()) - .ifPresent( - cf -> getRelatioPair( - value, relationList, cf, - ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes)); - Optional - .ofNullable(instance.getHostedby()) - .ifPresent( - hb -> getRelatioPair( - value, relationList, hb, - Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes)); - })); - } - - private static void getRelatioPair(R value, List relationList, KeyValue cf, - String resultDatasource, String datasourceResult, - Set hashCodes) { - Provenance provenance = Optional - .ofNullable(cf.getDataInfo()) - .map( - dinfo -> Optional - .ofNullable(dinfo.getProvenanceaction()) - .map( - paction -> Provenance - .newInstance( - paction.getClassname(), - dinfo.getTrust())) - .orElse( - Provenance - .newInstance( - eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED, - eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST))) - .orElse( - Provenance - .newInstance( - eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED, - eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)); - Relation r = getRelation( - value.getId(), - cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY, - resultDatasource, ModelConstants.PROVISION, - provenance); - if (!hashCodes.contains(r.hashCode())) { - relationList - .add(r); - hashCodes.add(r.hashCode()); - } - - r = getRelation( - cf.getKey(), value.getId(), - Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY, - datasourceResult, ModelConstants.PROVISION, - provenance); - - if (!hashCodes.contains(r.hashCode())) { - relationList - .add(r); - hashCodes.add(r.hashCode()); - } - - } - - private static Relation getRelation(String source, String target, String sourceType, String targetType, - String relName, String relType, Provenance provenance) { - Relation r = new Relation(); - r.setSource(Node.newInstance(source, sourceType)); - r.setTarget(Node.newInstance(target, targetType)); - r.setReltype(RelType.newInstance(relName, relType)); - r.setProvenance(provenance); - return r; - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/OrganizationMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/OrganizationMap.java deleted file mode 100644 index bf6cdbd..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/OrganizationMap.java +++ /dev/null @@ -1,21 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -public class OrganizationMap extends HashMap> { - - public OrganizationMap() { - super(); - } - - public List get(String key) { - - if (super.get(key) == null) { - return new ArrayList<>(); - } - return super.get(key); - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java deleted file mode 100644 index 07511a9..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java +++ /dev/null @@ -1,99 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.lang3.StringUtils; - -import eu.dnetlib.dhp.oa.graph.dump.Constants; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException; -import eu.dnetlib.dhp.oa.model.Provenance; -import eu.dnetlib.dhp.oa.model.graph.*; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; - -/** - * It process the ContextInfo information to produce a new Context Entity or a set of Relations between the generic - * context entity and datasource/projects related to the context. - */ -public class Process implements Serializable { - - @SuppressWarnings("unchecked") - public static R getEntity(ContextInfo ci) { - try { - ResearchInitiative ri; - if (ci.getType().equals("community")) { - ri = new ResearchCommunity(); - ((ResearchCommunity) ri).setSubject(ci.getSubject()); - ri.setType(Constants.RESEARCH_COMMUNITY); - } else { - ri = new ResearchInitiative(); - ri.setType(Constants.RESEARCH_INFRASTRUCTURE); - } - ri.setId(Utils.getContextId(ci.getId())); - ri.setAcronym(ci.getId()); - - ri.setDescription(ci.getDescription()); - ri.setName(ci.getName()); - if (StringUtils.isNotEmpty(ci.getZenodocommunity())) { - ri.setZenodo_community(Constants.ZENODO_COMMUNITY_PREFIX + ci.getZenodocommunity()); - } - return (R) ri; - - } catch (final Exception e) { - throw new MyRuntimeException(e); - } - } - - public static List getRelation(ContextInfo ci) { - try { - - List relationList = new ArrayList<>(); - ci - .getDatasourceList() - .forEach(ds -> { - - String nodeType = ModelSupport.idPrefixEntity.get(ds.substring(0, 2)); - - String contextId = Utils.getContextId(ci.getId()); - relationList - .add( - Relation - .newInstance( - Node - .newInstance( - contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY), - Node.newInstance(ds, nodeType), - RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), - Provenance - .newInstance( - Constants.USER_CLAIM, - Constants.DEFAULT_TRUST))); - - relationList - .add( - Relation - .newInstance( - Node.newInstance(ds, nodeType), - Node - .newInstance( - contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY), - RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), - Provenance - .newInstance( - Constants.USER_CLAIM, - Constants.DEFAULT_TRUST))); - - }); - - return relationList; - - } catch (final Exception e) { - throw new MyRuntimeException(e); - } - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java deleted file mode 100644 index 4fc08e3..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ /dev/null @@ -1,198 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.StringReader; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.function.Consumer; - -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.Node; -import org.dom4j.io.SAXReader; -import org.jetbrains.annotations.NotNull; -import org.xml.sax.SAXException; - -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -public class QueryInformationSystem { - - private ISLookUpService isLookUp; - private List contextRelationResult; - - private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and $x//context/param[./@name = 'status']/text() = 'all' " + - " return " + - "$x//context"; - - private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return " - + - "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " + - "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', " - + - "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)"; - - public void getContextInformation(final Consumer consumer) throws ISLookUpException { - - isLookUp - .quickSearchProfile(XQUERY_ENTITY) - .forEach(c -> { - ContextInfo cinfo = new ContextInfo(); - String[] cSplit = c.split("@@"); - cinfo.setId(cSplit[0]); - cinfo.setName(cSplit[1]); - cinfo.setDescription(cSplit[2]); - if (!cSplit[3].trim().equals("")) { - cinfo.setSubject(Arrays.asList(cSplit[3].split(","))); - } - cinfo.setZenodocommunity(cSplit[4]); - cinfo.setType(cSplit[5]); - consumer.accept(cinfo); - }); - - } - - public List getContextRelationResult() { - return contextRelationResult; - } - - public void setContextRelationResult(List contextRelationResult) { - this.contextRelationResult = contextRelationResult; - } - - public ISLookUpService getIsLookUp() { - return isLookUp; - } - - public void setIsLookUp(ISLookUpService isLookUpService) { - this.isLookUp = isLookUpService; - } - - public void execContextRelationQuery() throws ISLookUpException { - contextRelationResult = isLookUp.quickSearchProfile(XQUERY); - - } - - public void getContextRelation(final Consumer consumer, String category, String prefix) { - - contextRelationResult.forEach(xml -> { - ContextInfo cinfo = new ContextInfo(); - final Document doc; - - try { - final SAXReader reader = new SAXReader(); - reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - doc = reader.read(new StringReader(xml)); - Element root = doc.getRootElement(); - cinfo.setId(root.attributeValue("id")); - - Iterator it = root.elementIterator(); - while (it.hasNext()) { - Element el = it.next(); - if (el.getName().equals("category")) { - String categoryId = el.attributeValue("id"); - categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2); - if (categoryId.equals(category)) { - cinfo.setDatasourceList(getCategoryList(el, prefix)); - } - } - - } - consumer.accept(cinfo); - } catch (DocumentException | SAXException e) { - e.printStackTrace(); - } - - }); - - } - - @NotNull - private List getCategoryList(Element el, String prefix) { - List datasourceList = new ArrayList<>(); - for (Object node : el.selectNodes(".//concept")) { - String oid = getOpenaireId((Node) node, prefix); - if (oid != null) - datasourceList.add(oid); - } - - return datasourceList; - } - - private String getOpenaireId(Node el, String prefix) { - for (Object node : el.selectNodes(".//param")) { - Node n = (Node) node; - if (n.valueOf("./@name").equals("openaireId")) { - return prefix + "|" + n.getText(); - } - } - - return makeOpenaireId(el, prefix); - - } - - private String makeOpenaireId(Node el, String prefix) { - if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) { - return null; - } - String funder = ""; - String grantId = null; - String funding = null; - for (Object node : el.selectNodes(".//param")) { - Node n = (Node) node; - switch (n.valueOf("./@name")) { - case "funding": - funding = n.getText(); - break; - case "funder": - funder = n.getText(); - break; - case "CD_PROJECT_NUMBER": - grantId = n.getText(); - break; - default: - break; - } - } - String nsp = null; - - switch (funder.toLowerCase()) { - case "ec": - if (funding == null) { - return null; - } - if (funding.toLowerCase().contains("h2020")) { - nsp = "corda__h2020::"; - } else { - nsp = "corda_______::"; - } - break; - case "tubitak": - nsp = "tubitakf____::"; - break; - case "dfg": - nsp = "dfgf________::"; - break; - default: - StringBuilder bld = new StringBuilder(); - bld.append(funder.toLowerCase()); - for (int i = funder.length(); i < 12; i++) - bld.append("_"); - bld.append("::"); - nsp = bld.toString(); - } - - return prefix + "|" + nsp + DHPUtils.md5(grantId); - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java deleted file mode 100644 index c89d5ca..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java +++ /dev/null @@ -1,122 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.graph.GraphResult; -import eu.dnetlib.dhp.oa.model.graph.Relation; - -/** - * Reads all the entities of the same type (Relation / Results) and saves them in the same folder - */ -public class SparkCollectAndSave implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(SparkCollectAndSave.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkCollectAndSave.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final Boolean aggregateResult = Optional - .ofNullable(parser.get("resultAggregation")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath + "/result"); - run(spark, inputPath, outputPath, aggregateResult); - - }); - - } - - private static void run(SparkSession spark, String inputPath, String outputPath, boolean aggregate) { - if (aggregate) { - Utils - .readPath(spark, inputPath + "/result/publication", GraphResult.class) - .union(Utils.readPath(spark, inputPath + "/result/dataset", GraphResult.class)) - .union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class)) - .union(Utils.readPath(spark, inputPath + "/result/software", GraphResult.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath + "/result"); - } else { - write( - Utils - .readPath(spark, inputPath + "/result/publication", GraphResult.class), - outputPath + "/publication"); - write( - Utils - .readPath(spark, inputPath + "/result/dataset", GraphResult.class), - outputPath + "/dataset"); - write( - Utils - .readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class), - outputPath + "/otheresearchproduct"); - write( - Utils - .readPath(spark, inputPath + "/result/software", GraphResult.class), - outputPath + "/software"); - - } - - Utils - .readPath(spark, inputPath + "/relation/publication", Relation.class) - .union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class)) - .union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class)) - .union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class)) - .union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class)) - .union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class)) - .union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "/relation"); - - } - - private static void write(Dataset dataSet, String outputPath) { - dataSet - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath); - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java deleted file mode 100644 index 7cc05a8..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java +++ /dev/null @@ -1,54 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.Serializable; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.OafEntity; - -/** - * Spark Job that fires the dump for the entites - */ -public class SparkDumpEntitiesJob implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkDumpEntitiesJob.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkDumpEntitiesJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - final String communityMapPath = parser.get("communityMapPath"); - - Class inputClazz = (Class) Class.forName(resultClassName); - - DumpGraphEntities dg = new DumpGraphEntities(); - dg.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath); - - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java deleted file mode 100644 index 1c99505..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java +++ /dev/null @@ -1,135 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Collections; -import java.util.HashSet; -import java.util.Optional; -import java.util.Set; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.Provenance; -import eu.dnetlib.dhp.oa.model.graph.Node; -import eu.dnetlib.dhp.oa.model.graph.RelType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.Relation; - -/** - * Dumps eu.dnetlib.dhp.schema.oaf.Relation in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation - */ -public class SparkDumpRelationJob implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(SparkDumpRelationJob.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkDumpRelationJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - Optional rs = Optional.ofNullable(parser.get("removeSet")); - final Set removeSet = new HashSet<>(); - if (rs.isPresent()) { - Collections.addAll(removeSet, rs.get().split(";")); - } - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - dumpRelation(spark, inputPath, outputPath, removeSet); - - }); - - } - - private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, Set removeSet) { - Dataset relations = Utils.readPath(spark, inputPath, Relation.class); - relations - .filter((FilterFunction) r -> !removeSet.contains(r.getRelClass())) - .map((MapFunction) relation -> { - eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation(); - relNew - .setSource( - Node - .newInstance( - relation.getSource(), - ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2)))); - - relNew - .setTarget( - Node - .newInstance( - relation.getTarget(), - ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2)))); - - relNew - .setReltype( - RelType - .newInstance( - relation.getRelClass(), - relation.getSubRelType())); - - Optional odInfo = Optional.ofNullable(relation.getDataInfo()); - if (odInfo.isPresent()) { - DataInfo dInfo = odInfo.get(); - if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() && - Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) { - relNew - .setProvenance( - Provenance - .newInstance( - dInfo.getProvenanceaction().getClassname(), - dInfo.getTrust())); - } - } - if (Boolean.TRUE.equals(relation.getValidated())) { - relNew.setValidated(relation.getValidated()); - relNew.setValidationDate(relation.getValidationDate()); - } - - return relNew; - - }, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath); - - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkExtractRelationFromEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkExtractRelationFromEntities.java deleted file mode 100644 index 7f42b4a..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkExtractRelationFromEntities.java +++ /dev/null @@ -1,54 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.Serializable; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.Result; - -/** - * Spark job that fires the extraction of relations from entities - */ -public class SparkExtractRelationFromEntities implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkExtractRelationFromEntities.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkExtractRelationFromEntities.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - final String communityMapPath = parser.get("communityMapPath"); - - Class inputClazz = (Class) Class.forName(resultClassName); - - Extractor extractor = new Extractor(); - extractor.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath); - - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java deleted file mode 100644 index d0ae79d..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ /dev/null @@ -1,179 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.function.Consumer; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; -import org.jetbrains.annotations.NotNull; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gson.Gson; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.oa.model.Provenance; -import eu.dnetlib.dhp.oa.model.graph.Node; -import eu.dnetlib.dhp.oa.model.graph.RelType; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Relation; - -/** - * Create new Relations between Context Entities and Organizations whose products are associated to the context. It - * produces relation such as: organization <-> isRelatedTo <-> context - */ -public class SparkOrganizationRelation implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkOrganizationRelation.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkOrganizationRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final OrganizationMap organizationMap = new Gson() - .fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class); - final String serializedOrganizationMap = new Gson().toJson(organizationMap); - log.info("organization map : {}", serializedOrganizationMap); - - final String communityMapPath = parser.get("communityMapPath"); - log.info("communityMapPath: {}", communityMapPath); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - extractRelation(spark, inputPath, organizationMap, outputPath, communityMapPath); - - }); - - } - - private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap, - String outputPath, String communityMapPath) { - - CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); - - Dataset relationDataset = Utils.readPath(spark, inputPath, Relation.class); - - relationDataset.createOrReplaceTempView("relation"); - - List relList = new ArrayList<>(); - - Dataset mergedRelsDataset = spark - .sql( - "SELECT target organizationId, source representativeId " + - "FROM relation " + - "WHERE datainfo.deletedbyinference = false " + - "AND relclass = 'merges' " + - "AND substr(source, 1, 2) = '20'") - .as(Encoders.bean(MergedRels.class)); - - mergedRelsDataset.map((MapFunction) mergedRels -> { - if (organizationMap.containsKey(mergedRels.getOrganizationId())) { - return mergedRels; - } - return null; - }, Encoders.bean(MergedRels.class)) - .filter(Objects::nonNull) - .collectAsList() - .forEach(getMergedRelsConsumer(organizationMap, relList, communityMap)); - - organizationMap - .keySet() - .forEach( - oId -> organizationMap - .get(oId) - .forEach(community -> { - if (communityMap.containsKey(community)) { - addRelations(relList, community, oId); - } - })); - - spark - .createDataset(relList, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - } - - @NotNull - private static Consumer getMergedRelsConsumer(OrganizationMap organizationMap, - List relList, CommunityMap communityMap) { - return mergedRels -> { - String oId = mergedRels.getOrganizationId(); - organizationMap - .get(oId) - .forEach(community -> { - if (communityMap.containsKey(community)) { - addRelations(relList, community, mergedRels.getRepresentativeId()); - } - - }); - organizationMap.remove(oId); - }; - } - - private static void addRelations(List relList, String community, - String organization) { - - String id = Utils.getContextId(community); - log.info("create relation for organization: {}", organization); - relList - .add( - eu.dnetlib.dhp.oa.model.graph.Relation - .newInstance( - Node.newInstance(id, Constants.CONTEXT_ENTITY), - Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))), - RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), - Provenance - .newInstance( - eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM, - eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST))); - - relList - .add( - eu.dnetlib.dhp.oa.model.graph.Relation - .newInstance( - Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))), - Node.newInstance(id, Constants.CONTEXT_ENTITY), - RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), - Provenance - .newInstance( - eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM, - eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST))); - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java deleted file mode 100644 index 1e5675e..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java +++ /dev/null @@ -1,136 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.schema.oaf.*; - -/** - * It selects the valid relations among those present in the graph. One relation is valid if it is not deletedbyinference - * and if both the source and the target node are present in the graph and are not deleted by inference nor invisible. - * To check this I made a view of the ids of all the entities in the graph, and select the relations for which a join exists - * with this view for both the source and the target - */ - -public class SparkSelectValidRelationsJob implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationsJob.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkSelectValidRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - selectValidRelation(spark, inputPath, outputPath); - - }); - - } - - private static void selectValidRelation(SparkSession spark, String inputPath, String outputPath) { - Dataset relation = Utils.readPath(spark, inputPath + "/relation", Relation.class); - Dataset publication = Utils.readPath(spark, inputPath + "/publication", Publication.class); - Dataset dataset = Utils - .readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class); - Dataset software = Utils.readPath(spark, inputPath + "/software", Software.class); - Dataset other = Utils - .readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class); - Dataset organization = Utils.readPath(spark, inputPath + "/organization", Organization.class); - Dataset project = Utils.readPath(spark, inputPath + "/project", Project.class); - Dataset datasource = Utils.readPath(spark, inputPath + "/datasource", Datasource.class); - - relation.createOrReplaceTempView("relation"); - publication.createOrReplaceTempView("publication"); - dataset.createOrReplaceTempView("dataset"); - other.createOrReplaceTempView("other"); - software.createOrReplaceTempView("software"); - organization.createOrReplaceTempView("organization"); - project.createOrReplaceTempView("project"); - datasource.createOrReplaceTempView("datasource"); - - spark - .sql( - "SELECT id " + - "FROM publication " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM dataset " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM other " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM software " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM organization " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM project " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " + - "UNION ALL " + - "SELECT id " + - "FROM datasource " + - "WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false ") - .createOrReplaceTempView("identifiers"); - - spark - .sql( - "SELECT relation.* " + - "FROM relation " + - "JOIN identifiers i1 " + - "ON source = i1.id " + - "JOIN identifiers i2 " + - "ON target = i2.id " + - "WHERE datainfo.deletedbyinference = false") - .as(Encoders.bean(Relation.class)) - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath); - - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/CommunityMap.java similarity index 75% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunityMap.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/CommunityMap.java index d459063..5db2943 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunityMap.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/CommunityMap.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.dump.community; +package eu.dnetlib.dhp.oa.graph.dump.eosc; import java.io.Serializable; import java.util.HashMap; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/Constants.java similarity index 94% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/Constants.java index 57708a7..f389ad5 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/Constants.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.dump.complete; +package eu.dnetlib.dhp.oa.graph.dump.eosc; import java.io.Serializable; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java index 6f785d2..55bed6d 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendEoscResultWithOrganizationStep2.java @@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.*; -import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -20,13 +19,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.eosc.model.EoscResult; import eu.dnetlib.dhp.eosc.model.OrganizationPid; -import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; /** @@ -74,8 +71,8 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { private static void addOrganizations(SparkSession spark, String inputPath, String outputPath, String resultPath) { - Dataset results = Utils - .readPath(spark, resultPath, EoscResult.class); + Dataset results = Utils + .readPath(spark, resultPath, Result.class); Dataset relations = Utils .readPath(spark, inputPath + "/relation", Relation.class) @@ -126,14 +123,14 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { results .joinWith(resultOrganization, results.col("id").equalTo(resultOrganization.col("resultId")), "left") .groupByKey( - (MapFunction, String>) t2 -> t2._1().getId(), Encoders.STRING()) + (MapFunction, String>) t2 -> t2._1().getId(), Encoders.STRING()) .mapGroups( - (MapGroupsFunction, EoscResult>) (s, it) -> { - Tuple2 first = it.next(); + (MapGroupsFunction, Result>) (s, it) -> { + Tuple2 first = it.next(); if (first._2() == null) { return first._1(); } - EoscResult ret = first._1(); + Result ret = first._1(); List affiliation = new ArrayList<>(); Set alreadyInsertedAffiliations = new HashSet<>(); affiliation.add(first._2().getAffiliation()); @@ -147,7 +144,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable { }); ret.setAffiliation(affiliation); return ret; - }, Encoders.bean(EoscResult.class)) + }, Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendWithUsageCounts.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendWithUsageCounts.java index 5d0cd72..2d7e0b1 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendWithUsageCounts.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ExtendWithUsageCounts.java @@ -22,12 +22,10 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.eosc.model.EoscResult; import eu.dnetlib.dhp.eosc.model.Indicator; +import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.eosc.model.UsageCounts; -import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.action.AtomicAction; -import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; /** @@ -76,20 +74,20 @@ public class ExtendWithUsageCounts implements Serializable { private static void addIndicators(SparkSession spark, String actionSetPath, String outputPath, String resultPath) { final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - JavaRDD resultsWithIndicatorsRdd = sc + JavaRDD resultsWithIndicatorsRdd = sc .sequenceFile(actionSetPath, Text.class, Text.class) .map(value -> new ObjectMapper().readValue(value._2().toString(), AtomicAction.class)) - .map(aa -> ((Result) aa.getPayload())); + .map(aa -> ((eu.dnetlib.dhp.schema.oaf.Result) aa.getPayload())); - Dataset resultWithIndicators = spark - .createDataset(resultsWithIndicatorsRdd.rdd(), Encoders.bean(Result.class)); + Dataset resultWithIndicators = spark + .createDataset(resultsWithIndicatorsRdd.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.oaf.Result.class)); - Dataset result = Utils - .readPath(spark, resultPath, EoscResult.class); + Dataset result = Utils + .readPath(spark, resultPath, Result.class); result .joinWith(resultWithIndicators, result.col("id").equalTo(resultWithIndicators.col("id")), "left") - .map((MapFunction, EoscResult>) t2 -> { + .map((MapFunction, Result>) t2 -> { if (Optional.ofNullable(t2._2()).isPresent()) { Indicator indicator = new Indicator(); UsageCounts uc = new UsageCounts(); @@ -105,7 +103,7 @@ public class ExtendWithUsageCounts implements Serializable { } return t2._1(); - }, Encoders.bean(EoscResult.class)) + }, Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/QueryInformationSystem.java similarity index 95% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/QueryInformationSystem.java index b972de6..156e1f8 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/QueryInformationSystem.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.dump; +package eu.dnetlib.dhp.oa.graph.dump.eosc; import java.io.StringReader; import java.util.List; @@ -10,7 +10,6 @@ import org.dom4j.Element; import org.dom4j.io.SAXReader; import org.xml.sax.SAXException; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/ResultProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultProject.java similarity index 82% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/ResultProject.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultProject.java index 599393a..4035d17 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/ResultProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/ResultProject.java @@ -1,10 +1,10 @@ -package eu.dnetlib.dhp.oa.graph.dump.community; +package eu.dnetlib.dhp.oa.graph.dump.eosc; import java.io.Serializable; import java.util.List; -import eu.dnetlib.dhp.oa.model.community.Project; +import eu.dnetlib.dhp.eosc.model.Project; public class ResultProject implements Serializable { private String resultId; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java similarity index 98% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java index 51f1852..88be031 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.dump; +package eu.dnetlib.dhp.oa.graph.dump.eosc; import java.io.BufferedWriter; import java.io.IOException; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java index ee9e1e5..a6eab77 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java @@ -17,12 +17,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.eosc.model.EoscResult; -import eu.dnetlib.dhp.oa.graph.dump.Constants; +import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.schema.oaf.Result; /** * @author miriam.baglioni @@ -59,7 +55,8 @@ public class SelectEoscResultsJobStep1 implements Serializable { final String resultClassName = parser.get("resultTableName"); log.info("resultTableName: {}", resultClassName); - Class inputClazz = (Class) Class.forName(resultClassName); + Class inputClazz = (Class) Class + .forName(resultClassName); SparkConf conf = new SparkConf(); @@ -72,7 +69,8 @@ public class SelectEoscResultsJobStep1 implements Serializable { }); } - private static void selectEoscResults(SparkSession spark, String inputPath, String outputPath, + private static void selectEoscResults(SparkSession spark, + String inputPath, String outputPath, Class inputClazz, String communityMapPath) { CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); Utils @@ -80,18 +78,11 @@ public class SelectEoscResultsJobStep1 implements Serializable { .filter( (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible() && r.getContext().stream().anyMatch(c -> c.getId().equals("eosc"))) -// || -// Optional -// .ofNullable(r.getSubject()) -// .map( -// s -> s -// .stream() -// .anyMatch(sbj -> sbj.getValue().equalsIgnoreCase("EOSC::RO-crate"))) -// .orElse(false))) + .map( - (MapFunction) r -> (EoscResult) ResultMapper - .map(r, communityMap, Constants.DUMPTYPE.EOSC.getType()), - Encoders.bean(EoscResult.class)) + (MapFunction) r -> (Result) ResultMapper + .map(r, communityMap), + Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpRelation.java index fd1a5fc..607b014 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkDumpRelation.java @@ -4,14 +4,10 @@ package eu.dnetlib.dhp.oa.graph.dump.eosc; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.Collections; -import java.util.HashSet; import java.util.Optional; -import java.util.Set; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -21,11 +17,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.Provenance; -import eu.dnetlib.dhp.oa.model.graph.Node; -import eu.dnetlib.dhp.oa.model.graph.RelType; -import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.eosc.model.Provenance; +import eu.dnetlib.dhp.eosc.model.RelType; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Relation; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkExtendResultWithRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkExtendResultWithRelation.java new file mode 100644 index 0000000..088dbed --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkExtendResultWithRelation.java @@ -0,0 +1,96 @@ + +package eu.dnetlib.dhp.oa.graph.dump.eosc; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Optional; + +import javax.rmi.CORBA.Util; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.eosc.model.Relation; +import eu.dnetlib.dhp.eosc.model.Result; +import scala.Tuple2; + +/** + * @author miriam.baglioni + * @Date 02/02/23 + */ +public class SparkExtendResultWithRelation implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(SparkExtendResultWithRelation.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + SparkExtendResultWithRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_extendwithrelation_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String resultPath = parser.get("resultPath"); + log.info("resultPath: {}", resultPath); + + final String relationPath = parser.get("relationPath"); + log.info("relationPath: {}", relationPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + extendResultWithRelation(spark, resultPath, relationPath, outputPath); + + }); + + } + + private static void extendResultWithRelation(SparkSession spark, String resultPath, String relationPath, + String outputPath) { + Dataset relationDataset = Utils.readPath(spark, relationPath, Relation.class); + Dataset resultDataset = Utils.readPath(spark, resultPath, Result.class); + + resultDataset + .joinWith(relationDataset, resultDataset.col("id").equalTo(relationDataset.col("source")), "left") + .groupByKey((MapFunction, String>) t2 -> t2._1().getId(), Encoders.STRING()) + .mapGroups((MapGroupsFunction, Result>) (k, it) -> { + Tuple2 first = it.next(); + Result r = first._1(); + if (Optional.ofNullable(first._2()).isPresent()) { + if (r.getRelations() == null) { + r.setRelations(new ArrayList<>()); + } + r.getRelations().add(first._2()); + it.forEachRemaining(t2 -> r.getRelations().add(t2._2())); + } + return r; + }, Encoders.bean(Result.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath); + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java similarity index 95% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java index 079e708..3e75f31 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkPrepareResultProject.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.dump.community; +package eu.dnetlib.dhp.oa.graph.dump.eosc; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; @@ -26,11 +26,10 @@ import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.Provenance; -import eu.dnetlib.dhp.oa.model.community.Funder; -import eu.dnetlib.dhp.oa.model.community.Project; -import eu.dnetlib.dhp.oa.model.community.Validated; +import eu.dnetlib.dhp.eosc.model.Funder; +import eu.dnetlib.dhp.eosc.model.Project; +import eu.dnetlib.dhp.eosc.model.Provenance; +import eu.dnetlib.dhp.eosc.model.Validated; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Field; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java index b4edf47..e820393 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkSelectRelation.java @@ -9,9 +9,7 @@ import java.util.*; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -20,9 +18,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.eosc.model.EoscResult; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.eosc.model.Provenance; +import eu.dnetlib.dhp.eosc.model.RelType; +import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.schema.oaf.*; import scala.Tuple2; @@ -81,24 +79,24 @@ public class SparkSelectRelation implements Serializable { && !removeSet.contains(r.getRelClass())); Dataset resultIds = Utils - .readPath(spark, outputPath + "/publication", EoscResult.class) + .readPath(spark, outputPath + "/publication", Result.class) - .map((MapFunction) p -> p.getId(), Encoders.STRING()) + .map((MapFunction) p -> p.getId(), Encoders.STRING()) .union( Utils - .readPath(spark, outputPath + "/dataset", EoscResult.class) + .readPath(spark, outputPath + "/dataset", Result.class) - .map((MapFunction) d -> d.getId(), Encoders.STRING())) + .map((MapFunction) d -> d.getId(), Encoders.STRING())) .union( Utils - .readPath(spark, outputPath + "/software", EoscResult.class) + .readPath(spark, outputPath + "/software", Result.class) - .map((MapFunction) s -> s.getId(), Encoders.STRING())) + .map((MapFunction) s -> s.getId(), Encoders.STRING())) .union( Utils - .readPath(spark, outputPath + "/otherresearchproduct", EoscResult.class) + .readPath(spark, outputPath + "/otherresearchproduct", Result.class) - .map((MapFunction) o -> o.getId(), Encoders.STRING())); + .map((MapFunction) o -> o.getId(), Encoders.STRING())); // select result -> result relations Dataset relResultResult = relation @@ -108,6 +106,42 @@ public class SparkSelectRelation implements Serializable { relResultResult .joinWith(resultIds, relResultResult.col("target").equalTo(resultIds.col("value"))) .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)) + .map((MapFunction) rel -> { + eu.dnetlib.dhp.eosc.model.Relation relNew = new eu.dnetlib.dhp.eosc.model.Relation(); + relNew + .setSource( + + rel.getSource()); + + relNew + .setTarget( + + rel.getTarget()); + + relNew + .setReltype( + RelType + .newInstance( + rel.getRelClass(), + rel.getSubRelType())); + + Optional odInfo = Optional.ofNullable(rel.getDataInfo()); + if (odInfo.isPresent()) { + DataInfo dInfo = odInfo.get(); + if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() && + Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) { + relNew + .setProvenance( + Provenance + .newInstance( + dInfo.getProvenanceaction().getClassname(), + dInfo.getTrust())); + } + } + + return relNew; + + }, Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class)) .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java similarity index 72% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java index e50d011..3caa06b 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SparkUpdateProjectInfo.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.dump.community; +package eu.dnetlib.dhp.oa.graph.dump.eosc; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; @@ -19,12 +19,8 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.eosc.model.EoscResult; +import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.oa.graph.dump.Constants; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.Result; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import scala.Tuple2; public class SparkUpdateProjectInfo implements Serializable { @@ -64,43 +60,33 @@ public class SparkUpdateProjectInfo implements Serializable { SparkConf conf = new SparkConf(); - Class clazz; - - if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { - clazz = (Class) Class.forName("eu.dnetlib.dhp.eosc.model.EoscResult"); - } else { - clazz = (Class) Class - .forName("eu.dnetlib.dhp.oa.model.community.CommunityResult"); - } - runWithSparkSession( conf, isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - extend(spark, inputPath, outputPath, preparedInfoPath, clazz); + extend(spark, inputPath, outputPath, preparedInfoPath); }); } - private static void extend( + private static void extend( SparkSession spark, String inputPath, String outputPath, - String preparedInfoPath, - Class clazz) { + String preparedInfoPath) { - Dataset result = Utils.readPath(spark, inputPath, clazz); + Dataset result = Utils.readPath(spark, inputPath, Result.class); Dataset resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class); result .joinWith( resultProject, result.col("id").equalTo(resultProject.col("resultId")), "left") - .map((MapFunction, E>) value -> { - E r = value._1(); + .map((MapFunction, Result>) value -> { + Result r = value._1(); Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList())); return r; - }, Encoders.bean(clazz)) + }, Encoders.bean(Result.class)) .write() .option("compression", "gzip") .mode(SaveMode.Append) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/Utils.java similarity index 93% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/Utils.java index 8e75e9d..df9fa6a 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/Utils.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.dump; +package eu.dnetlib.dhp.oa.graph.dump.eosc; import java.io.BufferedReader; import java.io.IOException; @@ -16,8 +16,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.oa.graph.dump.complete.Constants; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java deleted file mode 100644 index a5b9575..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ /dev/null @@ -1,128 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.funderresults; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; -import org.jetbrains.annotations.NotNull; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; -import eu.dnetlib.dhp.oa.model.community.Funder; -import eu.dnetlib.dhp.oa.model.community.Project; - -/** - * Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC - * for the EC it specifies also the fundingStream (FP7 or H2020) - */ -public class SparkDumpFunderResults implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkDumpFunderResults.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkDumpFunderResults.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json")); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - writeResultProjectList(spark, inputPath, outputPath); - }); - } - - private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath) { - Dataset result = Utils - .readPath(spark, inputPath + "/publication", CommunityResult.class) - .union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class)) - .union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class)) - .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); - log.info("Number of result {}", result.count()); - Dataset tmp = result - .flatMap((FlatMapFunction) cr -> cr.getProjects().stream().map(p -> { - return getFunderName(p); - }).collect(Collectors.toList()).iterator(), Encoders.STRING()) - .distinct(); - List funderList = tmp.collectAsList(); - funderList.forEach(funder -> { - dumpResults(funder, result, outputPath); - }); - } - - @NotNull - private static String getFunderName(Project p) { - Optional ofunder = Optional.ofNullable(p.getFunder()); - if (ofunder.isPresent()) { - String fName = ofunder.get().getShortName(); - if (fName.equalsIgnoreCase("ec")) { - fName += "_" + ofunder.get().getFundingStream(); - } - return fName; - } else { - String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase(); - if (fName.equalsIgnoreCase("ec")) { - if (p.getId().contains("h2020")) { - fName += "_H2020"; - } else { - fName += "_FP7"; - } - } else if (fName.equalsIgnoreCase("conicytf")) { - fName = "CONICYT"; - } else if (fName.equalsIgnoreCase("dfgf")) { - fName = "DFG"; - } else if (fName.equalsIgnoreCase("tubitakf")) { - fName = "TUBITAK"; - } else if (fName.equalsIgnoreCase("euenvagency")) { - fName = "EEA"; - } - return fName; - } - } - - private static void dumpResults(String funder, Dataset results, String outputPath) { - results.map((MapFunction) r -> { - if (!Optional.ofNullable(r.getProjects()).isPresent()) { - return null; - } - for (Project p : r.getProjects()) { - String fName = getFunderName(p); - if (fName.equalsIgnoreCase(funder)) { - return r; - } - } - return null; - }, Encoders.bean(CommunityResult.class)) - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "/" + funder); - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java deleted file mode 100644 index 476ca09..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ /dev/null @@ -1,111 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.funderresults; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Constants; -import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.oa.graph.dump.community.ResultProject; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; -import eu.dnetlib.dhp.schema.oaf.Result; -import scala.Tuple2; - -/** - * Selects the results linked to projects. Only for these results the dump will be performed. - * The code to perform the dump and to expend the dumped results with the information related to projects - * is the one used for the dump of the community products - */ -public class SparkResultLinkedToProject implements Serializable { - - private static final Logger log = LoggerFactory.getLogger(SparkResultLinkedToProject.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SparkResultLinkedToProject.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - final String resultProjectsPath = parser.get("graphPath"); - log.info("graphPath: {}", resultProjectsPath); - - String communityMapPath = parser.get("communityMapPath"); - - @SuppressWarnings("unchecked") - Class inputClazz = (Class) Class.forName(resultClassName); - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - writeResultsLinkedToProjects( - communityMapPath, spark, inputClazz, inputPath, outputPath, resultProjectsPath); - }); - } - - private static void writeResultsLinkedToProjects(String communityMapPath, SparkSession spark, - Class inputClazz, - String inputPath, String outputPath, String resultProjectsPath) { - - Dataset results = Utils - .readPath(spark, inputPath, inputClazz) - .filter( - (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && - !r.getDataInfo().getInvisible()); - Dataset resultProjectDataset = Utils - .readPath(spark, resultProjectsPath, ResultProject.class); - CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); - results - .joinWith(resultProjectDataset, results.col("id").equalTo(resultProjectDataset.col("resultId"))) - .map((MapFunction, CommunityResult>) t2 -> { - CommunityResult cr = (CommunityResult) ResultMapper - .map( - t2._1(), - communityMap, Constants.DUMPTYPE.FUNDER.getType()); - cr.setProjects(t2._2().getProjectsList()); - return cr; - }, Encoders.bean(CommunityResult.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java deleted file mode 100644 index fb94eba..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectsSubsetSparkJob.java +++ /dev/null @@ -1,82 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.projectssubset; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Objects; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.graph.Project; -import scala.Tuple2; - -public class ProjectsSubsetSparkJob implements Serializable { - private static final Logger log = LoggerFactory.getLogger(ProjectsSubsetSparkJob.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - ProjectsSubsetSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json")); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - final String projectListPath = parser.get("projectListPath"); - log.info("projectListPath: {}", projectListPath); - SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - getNewProjectList(spark, inputPath, outputPath, projectListPath); - }); - } - - private static void getNewProjectList(SparkSession spark, String inputPath, String outputPath, - String projectListPath) { - Dataset projectList = spark.read().textFile(projectListPath); - Dataset projects; - projects = Utils.readPath(spark, inputPath, Project.class); - projects - .joinWith(projectList, projects.col("id").equalTo(projectList.col("value")), "left") - .map((MapFunction, Project>) t2 -> { - if (Optional.ofNullable(t2._2()).isPresent()) - return null; - return t2._1(); - }, Encoders.bean(Project.class)) - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - Utils - .readPath(spark, outputPath, Project.class) - .map((MapFunction) p -> p.getId(), Encoders.STRING()) - .write() - .mode(SaveMode.Append) - .option("compression", "gzip") - .text(projectListPath); - } -} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml index 452c3c0..20affcb 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sourcePath @@ -92,7 +92,7 @@ - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap + eu.dnetlib.dhp.oa.graph.dump.eosc.SaveCommunityMap --outputPath${workingDir}/communityMap --nameNode${nameNode} --isLookUpUrl${isLookUpUrl} @@ -156,7 +156,7 @@ --resultPath${workingDir}/dump/publication --outputPath${workingDir}/dump/publicationextendedaffiliation - + @@ -231,7 +231,7 @@ --resultPath${workingDir}/dump/dataset --outputPath${workingDir}/dump/datasetextendedaffiliation - + @@ -306,7 +306,7 @@ --resultPath${workingDir}/dump/otherresearchproduct --outputPath${workingDir}/dump/otherresearchproductextendedaffiliation - + @@ -381,7 +381,7 @@ --resultPath${workingDir}/dump/software --outputPath${workingDir}/dump/softwareextendedaffiliation - + @@ -409,7 +409,6 @@ - @@ -417,7 +416,7 @@ yarn cluster Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject + eu.dnetlib.dhp.oa.graph.dump.eosc.SparkPrepareResultProject dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -448,7 +447,7 @@ yarn cluster Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -460,8 +459,8 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingDir}/dump/publicationextended - --outputPath${workingDir}/tar/publication + --sourcePath${workingDir}/dump/publicationextendedaffiliation + --outputPath${workingDir}/dump/publicationextendedproject --preparedInfoPath${workingDir}/preparedInfo --dumpTypeeosc @@ -474,7 +473,7 @@ yarn cluster Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -486,8 +485,8 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingDir}/dump/datasetextended - --outputPath${workingDir}/tar/dataset + --sourcePath${workingDir}/dump/datasetextendedaffiliation + --outputPath${workingDir}/dump/datasetextendedproject --preparedInfoPath${workingDir}/preparedInfo --dumpTypeeosc @@ -500,7 +499,7 @@ yarn cluster Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -512,8 +511,8 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingDir}/dump/otherresearchproductextended - --outputPath${workingDir}/tar/otherresearchproduct + --sourcePath${workingDir}/dump/otherresearchproductextendedaffiliation + --outputPath${workingDir}/dump/otherresearchproductextendedproject --preparedInfoPath${workingDir}/preparedInfo --dumpTypeeosc @@ -526,7 +525,7 @@ yarn cluster Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo + eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -538,8 +537,9 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingDir}/dump/softwareextended - --outputPath${workingDir}/tar/software + --sourcePath${workingDir}/dump/softwareextendedaffiliation + + --outputPath${workingDir}/dump/softwareextendedproject --preparedInfoPath${workingDir}/preparedInfo --dumpTypeeosc @@ -570,15 +570,23 @@ --outputPath${workingDir}/dump --removeSet${removeSet} - + - + + + + + + + + + yarn cluster - Select the set of relations between the results in the selected set - eu.dnetlib.dhp.oa.graph.dump.eosc.SparkDumpRelation + Extends the publication adding the relations of which the publication is the source node + eu.dnetlib.dhp.oa.graph.dump.eosc.SparkExtendResultWithRelation dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -590,12 +598,91 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --sourcePath${workingDir}/dump/relation - --outputPath${workingDir}/tar/relation + --relationPath${workingDir}/dump/relation + --resultPath${workingDir}/dump/publicationextendedproject + --outputPath${workingDir}/tar/publication - + + + + + yarn + cluster + Extends the dataset adding the relations of which the dataset is the source node + eu.dnetlib.dhp.oa.graph.dump.eosc.SparkExtendResultWithRelation + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --relationPath${workingDir}/dump/relation + --resultPath${workingDir}/dump/datasetextendedproject + --outputPath${workingDir}/tar/dataset + + + + + + + + yarn + cluster + Extends the software adding the relations of which the software is the source node + eu.dnetlib.dhp.oa.graph.dump.eosc.SparkExtendResultWithRelation + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --relationPath${workingDir}/dump/relation + --resultPath${workingDir}/dump/softwareextendedproject + --outputPath${workingDir}/tar/software + + + + + + + + yarn + cluster + Extends the orp adding the relations of which the orp is the source node + eu.dnetlib.dhp.oa.graph.dump.eosc.SparkExtendResultWithRelation + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --relationPath${workingDir}/dump/relation + --resultPath${workingDir}/dump/otherresearchproductextendedproject + --outputPath${workingDir}/tar/otherresearchproduct + + + + + + + eu.dnetlib.dhp.oa.graph.dump.MakeTar diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_extendwithrelation_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_extendwithrelation_parameters.json new file mode 100644 index 0000000..e72b629 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_extendwithrelation_parameters.json @@ -0,0 +1,24 @@ +[ + + { + "paramName":"rp", + "paramLongName":"resultPath", + "paramDescription": "the path where to find the result", + "paramRequired": true + }, + { + "paramName": "relP", + "paramLongName": "relationPath", + "paramDescription": "the path where the relations are stored", + "paramRequired": true + }, + { + "paramName": "op", + "paramLongName": "outputPath", + "paramDescription": "the path where to store the results", + "paramRequired": true + } +] + + + diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 98c6a52..cef9eca 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -6,13 +6,11 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.*; -import org.apache.commons.collections.map.HashedMap; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; @@ -25,16 +23,10 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; -import eu.dnetlib.dhp.eosc.model.EoscResult; +import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.eosc.model.Subject; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.eosc.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1; -import eu.dnetlib.dhp.oa.model.Instance; -import eu.dnetlib.dhp.oa.model.OpenAccessRoute; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; -import eu.dnetlib.dhp.oa.model.graph.GraphResult; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Software; @@ -147,744 +139,6 @@ public class DumpJobTest { System.out.println(new Gson().toJson(map)); } - @Test - public void testPublicationDump() { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); - - Assertions.assertEquals(1, verificationDataset.count()); - - GraphResult gr = verificationDataset.first(); - - Assertions.assertEquals(6, gr.getAuthor().size()); - Assertions - .assertTrue( - gr - .getAuthor() - .stream() - .anyMatch( - a -> a.getFullname().equals("Nikolaidou,Charitini") && - a.getName().equals("Charitini") && a.getSurname().equals("Nikolaidou") - && a.getRank() == 1 && a.getPid() == null)); - - Assertions - .assertTrue( - gr - .getAuthor() - .stream() - .anyMatch( - a -> a.getFullname().equals("Votsi,Nefta") && - a.getName().equals("Nefta") && a.getSurname().equals("Votsi") - && a.getRank() == 2 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID) - && a.getPid().getId().getValue().equals("0000-0001-6651-1178") - && a.getPid().getProvenance() != null)); - - Assertions - .assertTrue( - gr - .getAuthor() - .stream() - .anyMatch( - a -> a.getFullname().equals("Sgardelis,Steanos") && - a.getName().equals("Steanos") && a.getSurname().equals("Sgardelis") - && a.getRank() == 3 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING) - && a.getPid().getId().getValue().equals("0000-0001-6651-1178") - && a.getPid().getProvenance() != null)); - - Assertions - .assertTrue( - gr - .getAuthor() - .stream() - .anyMatch( - a -> a.getFullname().equals("Halley,John") && - a.getName().equals("John") && a.getSurname().equals("Halley") - && a.getRank() == 4 && a.getPid() == null)); - - Assertions - .assertTrue( - gr - .getAuthor() - .stream() - .anyMatch( - a -> a.getFullname().equals("Pantis,John") && - a.getName().equals("John") && a.getSurname().equals("Pantis") - && a.getRank() == 5 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID) - && a.getPid().getId().getValue().equals("0000-0001-6651-1178") - && a.getPid().getProvenance() != null)); - - Assertions - .assertTrue( - gr - .getAuthor() - .stream() - .anyMatch( - a -> a.getFullname().equals("Tsiafouli,Maria") && - a.getName().equals("Maria") && a.getSurname().equals("Tsiafouli") - && a.getRank() == 6 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING) - && a.getPid().getId().getValue().equals("0000-0001-6651-1178") - && a.getPid().getProvenance() != null)); - - Assertions.assertEquals("publication", gr.getType()); - - Assertions.assertEquals("eng", gr.getLanguage().getCode()); - Assertions.assertEquals("English", gr.getLanguage().getLabel()); - - Assertions.assertEquals(1, gr.getCountry().size()); - Assertions.assertEquals("IT", gr.getCountry().get(0).getCode()); - Assertions.assertEquals("Italy", gr.getCountry().get(0).getLabel()); - Assertions.assertTrue(gr.getCountry().get(0).getProvenance() == null); - - Assertions.assertEquals(12, gr.getSubjects().size()); - Assertions - .assertTrue( - gr - .getSubjects() - .stream() - .anyMatch( - s -> s.getSubject().getValue().equals("Ecosystem Services hotspots") - && s.getSubject().getScheme().equals("ACM") && s.getProvenance() != null && - s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository"))); - Assertions - .assertTrue( - gr - .getSubjects() - .stream() - .anyMatch( - s -> s.getSubject().getValue().equals("Natura 2000") - && s.getSubject().getScheme().equals("") && s.getProvenance() != null && - s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository"))); - - Assertions - .assertEquals( - "Ecosystem Service capacity is higher in areas of multiple designation types", - gr.getMaintitle()); - - Assertions.assertEquals(null, gr.getSubtitle()); - - Assertions.assertEquals(1, gr.getDescription().size()); - - Assertions - .assertTrue( - gr - .getDescription() - .get(0) - .startsWith("The implementation of the Ecosystem Service (ES) concept into practice")); - Assertions - .assertTrue( - gr - .getDescription() - .get(0) - .endsWith( - "start complying with new standards and demands for nature conservation and environmental management.")); - - Assertions.assertEquals("2017-01-01", gr.getPublicationdate()); - - Assertions.assertEquals("Pensoft Publishers", gr.getPublisher()); - - Assertions.assertEquals(null, gr.getEmbargoenddate()); - - Assertions.assertEquals(1, gr.getSource().size()); - Assertions.assertEquals("One Ecosystem 2: e13718", gr.getSource().get(0)); - - Assertions.assertEquals(1, gr.getFormat().size()); - Assertions.assertEquals("text/html", gr.getFormat().get(0)); - - Assertions.assertEquals(0, gr.getContributor().size()); - - Assertions.assertEquals(0, gr.getCoverage().size()); - - Assertions.assertEquals(ModelConstants.ACCESS_RIGHT_OPEN, gr.getBestaccessright().getLabel()); - Assertions - .assertEquals( - Constants.ACCESS_RIGHTS_COAR_MAP.get(ModelConstants.ACCESS_RIGHT_OPEN), - gr.getBestaccessright().getCode()); - - Assertions.assertEquals("One Ecosystem", gr.getContainer().getName()); - Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline()); - Assertions.assertEquals("", gr.getContainer().getIssnPrinted()); - Assertions.assertEquals("", gr.getContainer().getIssnLinking()); - - Assertions.assertTrue(null == gr.getDocumentationUrl() || gr.getDocumentationUrl().size() == 0); - - Assertions.assertTrue(null == gr.getCodeRepositoryUrl()); - - Assertions.assertEquals(null, gr.getProgrammingLanguage()); - - Assertions.assertTrue(null == gr.getContactperson() || gr.getContactperson().size() == 0); - - Assertions.assertTrue(null == gr.getContactgroup() || gr.getContactgroup().size() == 0); - - Assertions.assertTrue(null == gr.getTool() || gr.getTool().size() == 0); - - Assertions.assertEquals(null, gr.getSize()); - - Assertions.assertEquals(null, gr.getVersion()); - - Assertions.assertTrue(null == gr.getGeolocation() || gr.getGeolocation().size() == 0); - - Assertions.assertEquals("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2", gr.getId()); - - Assertions.assertEquals(1, gr.getOriginalId().size()); - Assertions - .assertTrue( - gr.getOriginalId().contains("10.3897/oneeco.2.e13718")); - - Assertions.assertEquals(1, gr.getPid().size()); - Assertions - .assertTrue( - gr.getPid().get(0).getScheme().equals("doi") - && gr.getPid().get(0).getValue().equals("10.1016/j.triboint.2014.05.004")); - - Assertions.assertEquals("2020-03-23T00:20:51.392Z", gr.getDateofcollection()); - - Assertions.assertEquals(1, gr.getInstance().size()); - - Instance instance = gr.getInstance().get(0); - Assertions.assertEquals(0, instance.getPid().size()); - Assertions.assertEquals(1, instance.getAlternateIdentifier().size()); - Assertions - .assertTrue( - instance.getAlternateIdentifier().get(0).getScheme().equals("doi") - && instance.getAlternateIdentifier().get(0).getValue().equals("10.3897/oneeco.2.e13718")); - Assertions.assertEquals(null, instance.getLicense()); - Assertions - .assertTrue( - instance - .getAccessright() - .getCode() - .equals( - Constants.ACCESS_RIGHTS_COAR_MAP - .get(ModelConstants.ACCESS_RIGHT_OPEN))); - Assertions.assertTrue(instance.getAccessright().getLabel().equals(ModelConstants.ACCESS_RIGHT_OPEN)); - Assertions.assertTrue(instance.getAccessright().getOpenAccessRoute().equals(OpenAccessRoute.green)); - Assertions.assertTrue(instance.getType().equals("Article")); - Assertions.assertEquals(2, instance.getUrl().size()); - Assertions - .assertTrue( - instance.getUrl().contains("https://doi.org/10.3897/oneeco.2.e13718") - && instance.getUrl().contains("https://oneecosystem.pensoft.net/article/13718/")); - Assertions.assertEquals("2017-01-01", instance.getPublicationdate()); - Assertions.assertEquals(null, instance.getArticleprocessingcharge()); - Assertions.assertEquals("peerReviewed", instance.getRefereed()); - } - - @Test - public void testDatasetDump() { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - false, sourcePath, workingDir.toString() + "/result", - communityMapPath, Dataset.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); - - Assertions.assertEquals(1, verificationDataset.count()); - - Assertions.assertEquals(1, verificationDataset.filter("type = 'dataset'").count()); - - // the common fields in the result have been already checked. Now checking only - // community specific fields - - GraphResult gr = verificationDataset.first(); - - Assertions.assertEquals(2, gr.getGeolocation().size()); - Assertions.assertEquals(2, gr.getGeolocation().stream().filter(gl -> gl.getBox().equals("")).count()); - Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPlace().equals("")).count()); - Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("")).count()); - Assertions - .assertEquals( - 1, - gr - .getGeolocation() - .stream() - .filter(gl -> gl.getPlace().equals("18 York St, Ottawa, ON K1N 5S6; Ottawa; Ontario; Canada")) - .count()); - Assertions - .assertEquals( - 1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("45.427242 -75.693904")).count()); - Assertions - .assertEquals( - 1, - gr - .getGeolocation() - .stream() - .filter(gl -> gl.getPoint().equals("") && !gl.getPlace().equals("")) - .count()); - Assertions - .assertEquals( - 1, - gr - .getGeolocation() - .stream() - .filter(gl -> !gl.getPoint().equals("") && gl.getPlace().equals("")) - .count()); - - Assertions.assertEquals("1024Gb", gr.getSize()); - - Assertions.assertEquals("1.01", gr.getVersion()); - - Assertions.assertEquals(null, gr.getContainer()); - Assertions.assertEquals(null, gr.getCodeRepositoryUrl()); - Assertions.assertEquals(null, gr.getProgrammingLanguage()); - Assertions.assertEquals(null, gr.getDocumentationUrl()); - Assertions.assertEquals(null, gr.getContactperson()); - Assertions.assertEquals(null, gr.getContactgroup()); - Assertions.assertEquals(null, gr.getTool()); - - } - - @Test - public void testSoftwareDump() { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - false, sourcePath, workingDir.toString() + "/result", - communityMapPath, Software.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); - - Assertions.assertEquals(1, verificationDataset.count()); - - Assertions.assertEquals(1, verificationDataset.filter("type = 'software'").count()); - - GraphResult gr = verificationDataset.first(); - - Assertions.assertEquals(2, gr.getDocumentationUrl().size()); - Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_1")); - Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_2")); - - Assertions.assertEquals("code_repo", gr.getCodeRepositoryUrl()); - - Assertions.assertEquals("perl", gr.getProgrammingLanguage()); - - Assertions.assertEquals(null, gr.getContainer()); - Assertions.assertEquals(null, gr.getContactperson()); - Assertions.assertEquals(null, gr.getContactgroup()); - Assertions.assertEquals(null, gr.getTool()); - Assertions.assertEquals(null, gr.getGeolocation()); - Assertions.assertEquals(null, gr.getSize()); - Assertions.assertEquals(null, gr.getVersion()); - - } - - @Test - public void testOrpDump() { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - false, sourcePath, workingDir.toString() + "/result", - communityMapPath, OtherResearchProduct.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); - - Assertions.assertEquals(1, verificationDataset.count()); - - Assertions.assertEquals(1, verificationDataset.filter("type = 'other'").count()); - - GraphResult gr = verificationDataset.first(); - - Assertions.assertEquals(2, gr.getContactperson().size()); - Assertions.assertTrue(gr.getContactperson().contains(("contact_person1"))); - Assertions.assertTrue(gr.getContactperson().contains(("contact_person2"))); - - Assertions.assertEquals(1, gr.getContactgroup().size()); - Assertions.assertTrue(gr.getContactgroup().contains(("contact_group"))); - - Assertions.assertEquals(2, gr.getTool().size()); - Assertions.assertTrue(gr.getTool().contains("tool1")); - Assertions.assertTrue(gr.getTool().contains("tool2")); - - Assertions.assertEquals(null, gr.getContainer()); - Assertions.assertEquals(null, gr.getDocumentationUrl()); - Assertions.assertEquals(null, gr.getCodeRepositoryUrl()); - Assertions.assertEquals(null, gr.getProgrammingLanguage()); - Assertions.assertEquals(null, gr.getGeolocation()); - Assertions.assertEquals(null, gr.getSize()); - Assertions.assertEquals(null, gr.getVersion()); - - } - - @Test - public void testPublicationDumpCommunity() throws JsonProcessingException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(1, verificationDataset.count()); - - Assertions.assertEquals(1, verificationDataset.filter("type = 'publication'").count()); - - // the common fields in the result have been already checked. Now checking only - // community specific fields - - CommunityResult cr = verificationDataset.first(); - - Assertions.assertEquals(1, cr.getContext().size()); - Assertions.assertEquals("dh-ch", cr.getContext().get(0).getCode()); - Assertions.assertEquals("Digital Humanities and Cultural Heritage", cr.getContext().get(0).getLabel()); - Assertions.assertEquals(1, cr.getContext().get(0).getProvenance().size()); - Assertions.assertEquals("Inferred by OpenAIRE", cr.getContext().get(0).getProvenance().get(0).getProvenance()); - Assertions.assertEquals("0.9", cr.getContext().get(0).getProvenance().get(0).getTrust()); - - Assertions.assertEquals(1, cr.getCollectedfrom().size()); - Assertions - .assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getCollectedfrom().get(0).getKey()); - Assertions.assertEquals("Pensoft", cr.getCollectedfrom().get(0).getValue()); - - Assertions.assertEquals(1, cr.getInstance().size()); - Assertions - .assertEquals( - "10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", - cr.getInstance().get(0).getCollectedfrom().getKey()); - Assertions.assertEquals("Pensoft", cr.getInstance().get(0).getCollectedfrom().getValue()); - Assertions - .assertEquals( - "10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd", cr.getInstance().get(0).getHostedby().getKey()); - Assertions.assertEquals("One Ecosystem", cr.getInstance().get(0).getHostedby().getValue()); - - } - - @Test - public void testDataset() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(90, verificationDataset.count()); - - Assertions - .assertTrue( - verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset - .filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'") - .count()); - - Assertions - .assertTrue( - verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset - .filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'") - .count()); - - Assertions - .assertTrue( - verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset - .filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'") - .count()); - - Assertions - .assertTrue( - verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset - .filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'") - .count()); - - Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90); - - Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90); - - } - - @Test - public void testDataset2All() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); - - Assertions.assertEquals(5, verificationDataset.count()); - - } - - @Test - public void testDataset2Communities() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(0, verificationDataset.count()); - - } - - @Test - public void testPublication() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(74, verificationDataset.count()); - verificationDataset.show(false); - - Assertions.assertEquals(74, verificationDataset.filter("type = 'publication'").count()); - - } - - @Test - public void testSoftware() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(6, verificationDataset.count()); - - Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count()); - - } - - @Test - public void testORP() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - // false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class, - false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(3, verificationDataset.count()); - - Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count()); - - } - - @Test - public void testRecord() { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - DumpProducts dump = new DumpProducts(); - dump - .run( - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType()); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(2, verificationDataset.count()); - verificationDataset.show(false); - - Assertions.assertEquals(2, verificationDataset.filter("type = 'publication'").count()); - - } - @Test public void testEOSCDump() throws Exception { final String sourcePath = getClass() @@ -910,12 +164,12 @@ public class DumpJobTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD tmp = sc .textFile(workingDir.toString() + "/working") - .map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(EoscResult.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Result.class)); Assertions.assertEquals(1, verificationDataset.count()); @@ -960,12 +214,11 @@ public class DumpJobTest { workingDir.toString() + "/orp", getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getPath(), - OtherResearchProduct.class, - EoscResult.class, Constants.DUMPTYPE.EOSC.getType()); + OtherResearchProduct.class); - JavaRDD tmp = sc + JavaRDD tmp = sc .textFile(workingDir.toString() + "/orp") - .map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); System.out.println(OBJECT_MAPPER.writeValueAsString(tmp.first())); @@ -977,12 +230,11 @@ public class DumpJobTest { workingDir.toString() + "/soft", getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getPath(), - Software.class, - EoscResult.class, Constants.DUMPTYPE.EOSC.getType()); + Software.class); - JavaRDD tmp2 = sc + JavaRDD tmp2 = sc .textFile(workingDir.toString() + "/soft") - .map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); System.out.println(OBJECT_MAPPER.writeValueAsString(tmp2.first())); @@ -994,12 +246,11 @@ public class DumpJobTest { workingDir.toString() + "/soft2", getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getPath(), - Software.class, - EoscResult.class, Constants.DUMPTYPE.EOSC.getType()); + Software.class); - JavaRDD tmp3 = sc + JavaRDD tmp3 = sc .textFile(workingDir.toString() + "/soft2") - .map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); System.out.println(OBJECT_MAPPER.writeValueAsString(tmp3.first())); @@ -1011,101 +262,38 @@ public class DumpJobTest { workingDir.toString() + "/orp2", getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") .getPath(), - OtherResearchProduct.class, - EoscResult.class, Constants.DUMPTYPE.EOSC.getType()); + OtherResearchProduct.class); - JavaRDD tmp4 = sc + JavaRDD tmp4 = sc .textFile(workingDir.toString() + "/orp2") - .map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); System.out.println(OBJECT_MAPPER.writeValueAsString(tmp4.first())); } @Test - public void testArticlePCA() { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca") - .getPath(); + public void testEOSCDumpIndicators() throws Exception { - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); DumpProducts dump = new DumpProducts(); dump .run( - false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class, - GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType()); + false, getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input/indicators/publication.json") + .getPath(), + workingDir.toString() + "/publication", getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(), + Publication.class); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class)); + System.out.println(OBJECT_MAPPER.writeValueAsString(tmp.first())); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(GraphResult.class)); - - Assertions.assertEquals(23, verificationDataset.count()); - - Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count()); - - verificationDataset.createOrReplaceTempView("check"); - - org.apache.spark.sql.Dataset temp = spark - .sql( - "select id " + - "from check " + - "lateral view explode (instance) i as inst " + - "where inst.articleprocessingcharge is not null"); - - Assertions.assertTrue(temp.count() == 2); - - Assertions.assertTrue(temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").count() == 1); - - Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1); - - temp = spark - .sql( - "select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " + - "from check " + - "lateral view explode (instance) i as inst " + - "where inst.articleprocessingcharge is not null"); - - Assertions - .assertEquals( - "3131.64", - temp - .filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'") - .collectAsList() - .get(0) - .getString(1)); - Assertions - .assertEquals( - "EUR", - temp - .filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'") - .collectAsList() - .get(0) - .getString(2)); - - Assertions - .assertEquals( - "2578.35", - temp - .filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'") - .collectAsList() - .get(0) - .getString(1)); - Assertions - .assertEquals( - "EUR", - temp - .filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'") - .collectAsList() - .get(0) - .getString(2)); } @Test diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java index a23f184..872d59b 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -6,7 +6,7 @@ import org.junit.jupiter.api.Test; import com.fasterxml.jackson.databind.JsonNode; import com.github.victools.jsonschema.generator.*; -import eu.dnetlib.dhp.oa.model.graph.GraphResult; +import eu.dnetlib.dhp.eosc.model.Result; //@Disabled class GenerateJsonSchema { @@ -20,7 +20,7 @@ class GenerateJsonSchema { configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName()); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(GraphResult.class); + JsonNode jsonSchema = generator.generateSchema(Result.class); System.out.println(jsonSchema.toString()); } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java index 9522a97..736c2ac 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java @@ -23,8 +23,8 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.oa.graph.dump.community.ResultProject; -import eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject; +import eu.dnetlib.dhp.oa.graph.dump.eosc.ResultProject; +import eu.dnetlib.dhp.oa.graph.dump.eosc.SparkPrepareResultProject; public class PrepareResultProjectJobTest { diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java index 8391c89..fed1203 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java @@ -16,6 +16,7 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.xml.sax.SAXException; +import eu.dnetlib.dhp.oa.graph.dump.eosc.QueryInformationSystem; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java deleted file mode 100644 index a3506b5..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ /dev/null @@ -1,143 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.oa.graph.dump.community.CommunitySplit; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; - -public class SplitForCommunityTest { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static SparkSession spark; - - private static Path workingDir; - - private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class); - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(SplitForCommunityTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(SplitForCommunityTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(SplitForCommunityTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - void testCommunitySplit() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - CommunitySplit split = new CommunitySplit(); - - split.run(false, sourcePath, workingDir.toString() + "/split", communityMapPath); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/split/Digital_Humanities_and_Cultural_Heritage") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(19, verificationDataset.count()); - - Assertions - .assertEquals( - 1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); - - tmp = sc - .textFile(workingDir.toString() + "/split/EGI_Federation") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(1, verificationDataset.count()); - - Assertions - .assertEquals( - 1, verificationDataset.filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); - - tmp = sc - .textFile(workingDir.toString() + "/split/Neuroinformatics") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(5, verificationDataset.count()); - - Assertions - .assertEquals( - 1, verificationDataset.filter("id = '50|datacite____::6b1e3a2fa60ed8c27317a66d6357f795'").count()); - - tmp = sc - .textFile(workingDir.toString() + "/split/Science_and_Innovation_Policy_Studies") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(4, verificationDataset.count()); - - Assertions - .assertEquals( - 1, verificationDataset.filter("id = '50|dedup_wf_001::0347b1cd516fc59e41ba92e0d74e4e9f'").count()); - Assertions - .assertEquals( - 1, verificationDataset.filter("id = '50|dedup_wf_001::1432beb6171baa5da8a85a7f99545d69'").count()); - Assertions - .assertEquals( - 1, verificationDataset.filter("id = '50|dedup_wf_001::1c8bd19e633976e314b88ce5c3f92d69'").count()); - Assertions - .assertEquals( - 1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); - - } -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index b352d5f..33c1963 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -24,9 +24,9 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; -import eu.dnetlib.dhp.oa.model.community.Project; +import eu.dnetlib.dhp.eosc.model.Project; +import eu.dnetlib.dhp.eosc.model.Result; +import eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo; public class UpdateProjectInfoTest { @@ -85,12 +85,12 @@ public class UpdateProjectInfoTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD tmp = sc .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Result.class)); verificationDataset.show(false); @@ -152,12 +152,12 @@ public class UpdateProjectInfoTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD tmp = sc .textFile(workingDir.toString() + "/result") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Result.class)); verificationDataset.show(false); @@ -196,7 +196,7 @@ public class UpdateProjectInfoTest { Project project = verificationDataset .map( - (MapFunction) cr -> cr + (MapFunction) cr -> cr .getProjects() .stream() .filter(p -> p.getValidated() != null) @@ -213,7 +213,7 @@ public class UpdateProjectInfoTest { project = verificationDataset .map( - (MapFunction) cr -> cr + (MapFunction) cr -> cr .getProjects() .stream() .filter(p -> p.getValidated() == null) diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 99f6219..d705946 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -14,7 +14,8 @@ import com.google.gson.Gson; import eu.dnetlib.dhp.common.api.MissingConceptDoiException; import eu.dnetlib.dhp.common.api.ZenodoAPIClient; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.eosc.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.eosc.Utils; @Disabled public class ZenodoUploadTest { diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java deleted file mode 100644 index 37ba576..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ /dev/null @@ -1,173 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import static org.mockito.Mockito.lenient; - -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.function.Consumer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.CompressionCodecFactory; -import org.junit.jupiter.api.*; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; - -import com.google.gson.Gson; - -import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity; -import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative; -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -@ExtendWith(MockitoExtension.class) -public class CreateEntityTest { - - private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return " - + - "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " + - "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', " - + - "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)"; - - List communityMap = Arrays - .asList( - "clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri", - "ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. " - + - "SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - " - + - "Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community", - "dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community", - "fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community", - "ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community", - "mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community", - "instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community", - "elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri", - "aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community", - "dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri", - "epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri", - "covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community"); - - @Mock - private ISLookUpService isLookUpService; - - private QueryInformationSystem queryInformationSystem; - - private static String workingDir; - - @BeforeEach - public void setUp() throws ISLookUpException { - lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap); - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(isLookUpService); - } - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(CreateEntityTest.class.getSimpleName()) - .toString(); - } - - @Test - void test1() throws ISLookUpException, IOException { - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.getContextInformation(consumer); - - List riList = new ArrayList<>(); - cInfoList.forEach(cInfo -> riList.add(Process.getEntity(cInfo))); - - Assertions.assertEquals(12, riList.size()); - - riList.stream().forEach(c -> { - switch (c.getAcronym()) { - case "mes": - Assertions - .assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY)); - Assertions.assertTrue(((ResearchCommunity) c).getSubject().size() == 5); - Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("marine")); - Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("ocean")); - Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("fish")); - Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("aqua")); - Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("sea")); - Assertions - .assertTrue( - c - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5(c.getAcronym())))); - Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes")); - Assertions.assertTrue("mes".equals(c.getAcronym())); - break; - case "clarin": - Assertions - .assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_INFRASTRUCTURE)); - Assertions - .assertTrue( - c - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5(c.getAcronym())))); - Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin")); - Assertions.assertTrue("clarin".equals(c.getAcronym())); - break; - } - // TODO add check for all the others Entities - - }); - - riList.forEach(c -> System.out.println(new Gson().toJson(c))); - } - - @Test - @Disabled - void test2() throws IOException, ISLookUpException { - LocalFileSystem fs = FileSystem.getLocal(new Configuration()); - - Path hdfsWritePath = new Path(workingDir + "/prova"); - FSDataOutputStream fsDataOutputStream = null; - if (fs.exists(hdfsWritePath)) { - fsDataOutputStream = fs.append(hdfsWritePath); - } else { - fsDataOutputStream = fs.create(hdfsWritePath); - } - CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf()); - CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec"); - - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(fsDataOutputStream), - StandardCharsets.UTF_8)); - - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.getContextInformation(consumer); - - for (ContextInfo cInfo : cInfoList) { - writer.write(new Gson().toJson(Process.getEntity(cInfo))); - } - writer.close(); - - } -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java deleted file mode 100644 index 487e820..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java +++ /dev/null @@ -1,723 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.util.*; -import java.util.function.Consumer; -import java.util.stream.Collectors; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import com.google.gson.Gson; - -import eu.dnetlib.dhp.oa.model.graph.Relation; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.Project; -import eu.dnetlib.dhp.utils.DHPUtils; - -class CreateRelationTest { - - List communityContext = Arrays - .asList( - "\n" + - " all\n" + - " CLARIN\n" + - " https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg\n" - + - " Common Language Resources and Technology Infrastructure\n" + - " maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " (Part of) the work reported here was made possible by using the CLARIN infrastructure.\n" - + - " The work reported here has received funding through <CLARIN national consortium member, e.g. CLARIN.SI>, <XYZ> project, grant no. <XYZ>.\n" - + - " The work reported here has received funding (through CLARIN ERIC) from the European Union’s Horizon 2020 research and innovation programme under grant agreement No <0-9> for project <XYZ>.\n" - + - " (E.g. No 676529 for project CLARIN-PLUS.)\n" + - " oac_clarin\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + - " CLARIN-PLUS\n" + - " \n" + - " \n" + - " 676529\n" + - " http://www.clarin.eu\n" + - " EC\n" + - " H2020-INFRADEV-1-2015-1\n" + - " CLARIN+\n" + - " \n" + - " \n" + - " Common Language Resources and Technology Infrastructure\n" - + - " CLARIN\n" + - " 212230\n" + - " EC\n" + - " corda_______::ef782b2d85676aa3e5a907427feb18c4\n" + - " \n" + - " \n" + - " " + - "\n" + - " \n" + - " LINDAT/CLARIN repository\n" + - " LINDAT/CLARIN repository\n" + - " true\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " CLARIN-D\n" + - " https://www.clarin-d.de/en/\n" + - " \n" + - " http://www.lrec-conf.org/proceedings/lrec2018/pdf/504.pdf\n" - + - " Germany\n" + - " \n" + - " \n" + - " \n" - + - " \n" + - "", - "\n" + - " all\n" + - " This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.\n" - + - " http://sanmamante.org/DH_CH_logo.png\n" + - " Digital Humanities and Cultural Heritage\n" + - " ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk\n" - + - " modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels\n" - + - " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n" - + - " oac_dh-ch\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" - + - " Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\n" - + - " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n" - + - " \n" + - " 654119\n" + - " http://www.parthenos-project.eu\n" + - " EC\n" + - " PARTHENOS\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " re3data_____::9ebe127e5f3a0bf401875690f3bb6b81\n" + - " The UK's largest collection of digital research data in the social sciences and humanities\n" - + - " UK Data Archive\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::c6cd4b532e12868c1d760a8d7cda6815\n" + - " Journal of Data Mining and Digital Humanities\n" + - " Journal of Data Mining and Digital Humanities\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b\n" + - " Frontiers in Digital Humanities\n" + - " Frontiers in Digital Humanities\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::6eb31d13b12bc06bbac06aef63cf33c9\n" + - " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n" - + - " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n" - + - " true\n" + - " \n" + - " \n" - + - " doajarticles::0da84e9dfdc8419576169e027baa8028\n" + - " Conservation Science in Cultural Heritage\n" + - " Conservation Science in Cultural Heritage\n" + - " true\n" + - " \n" + - " \n" - + - " re3data_____::84e123776089ce3c7a33db98d9cd15a8\n" + - " Electronic Archiving System\n" + - " EASY\n" + - " true\n" + - " \n" + - " \n" + - " openaire____::c5502a43e76feab55dd00cf50f519125\n" + - " DANS-KB Harvester\n" + - " Gemeenschappelijke Harvester DANS-KB\n" + - " true\n" + - " \n" + - " \n" + - " re3data_____::a48f09c562b247a9919acfe195549b47\n" + - " ads\n" + - " Archaeology Data Service\n" + - " true\n" + - " \n" + - " \n" + - " opendoar____::97275a23ca44226c9964043c8462be96\n" + - " KNAW Repository\n" + - " KNAW Repository\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::2899208a99aa7d142646e0a80bfeef05\n" + - " Internet Archaeology\n" + - " Internet Archaeology\n" + - " true\n" + - " \n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.\n" - + - " https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png\n" - + - " Neuroinformatics\n" + - " sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it\n" - + - " brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities\n" - + - " \n" + - " oac_ni\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + - " \n" + - " re3data_____::5b9bf9171d92df854cf3c520692e9122\n" + - " Formerly:OpenFMRI\n" + - " OpenNeuro\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::c7d3de67dc77af72f6747157441252ec\n" + - " Research Ideas and Outcomes\n" + - " Research Ideas and Outcomes\n" + - " true\n" + - " \n" + - " \n" + - " re3data_____::8515794670370f49c1d176c399c714f5\n" + - " Neuroimaging Informatics Tools and Resources Clearinghouse\n" - + - " NITRC\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::d640648c84b10d425f96f11c3de468f3\n" + - " Frontiers in Neuroinformatics\n" + - " Frontiers in Neuroinformatics\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a\n" + - " NeuroImage: Clinical\n" + - " NeuroImage: Clinical\n" + - " true\n" + - " \n" + - " \n" + - " rest________::fb1a3d4523c95e63496e3bc7ba36244b\n" + - " NeuroVault\n" + - " NeuroVault\n" + - " true\n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " Instruct-ERIC is the European Research Infrastructure for Structural Biology\n" - + - " https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png\n" - + - " Instruct-ERIC\n" + - " claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " The authors acknowledge the support and the use of resources of Instruct-ERIC.\n" - + - " The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project\n" - + - " oac_instruct\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" - + - " Authentication and Authorisation For Research and Collaboration\n" - + - " \n" + - " 730941\n" + - " \n" + - " H2020-EINFRA-2016-1\n" + - " AARC2\n" + - " EC\n" + - " \n" + - " \n" - + - " Building data bridges between biological and medical infrastructures in Europe\n" - + - " \n" + - " 284209\n" + - " \n" + - " FP7-INFRASTRUCTURES-2011-1\n" + - " EC\n" + - " BioMedBridges\n" + - " \n" + - " \n" - + - " Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\n" - + - " \n" + - " 283570\n" + - " \n" + - " FP7-INFRASTRUCTURES-2011-1\n" + - " EC\n" + - " BioStruct-X\n" + - " \n" + - " \n" - + - " Coordinated Research Infrastructures Building Enduring Life-science services\n" - + - " \n" + - " 654248\n" + - " \n" + - " H2020-INFRADEV-1-2014-1\n" + - " EC\n" + - " CORBEL\n" + - " \n" + - " \n" - + - " Infrastructure for NMR, EM and X-rays for translational research\n" - + - " \n" + - " 653706\n" + - " \n" + - " H2020-INFRAIA-2014-2015\n" + - " EC\n" + - " iNEXT\n" + - " \n" + - " \n" - + - " Integrated Structural Biology Infrastructure\n" + - " \n" + - " 211252\n" + - " \n" + - " FP7-INFRASTRUCTURES-2007-1\n" + - " EC\n" + - " INSTRUCT\n" + - " \n" + - " \n" - + - " Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\n" - + - " \n" + - " 731005\n" + - " \n" + - " H2020-INFRADEV-2016-1\n" + - " EC\n" + - " INSTRUCT-ULTRA\n" + - " \n" + - " \n" - + - " Opening Synchrotron Light for Experimental Science and Applications in the Middle East\n" - + - " \n" + - " 730943\n" + - " \n" + - " H2020-INFRASUPP-2016-1\n" + - " EC\n" + - " OPEN SESAME\n" + - " \n" + - " \n" - + - " Infrastructure for Protein Production Platforms\n" - + - " \n" + - " 227764\n" + - " \n" + - " FP7-INFRASTRUCTURES-2008-1\n" + - " EC\n" + - " PCUBE\n" + - " \n" + - " \n" - + - " European Vaccine Research and Development Infrastructure\n" - + - " \n" + - " 730964\n" + - " \n" + - " H2020-INFRAIA-2016-1\n" + - " EC\n" + - " TRAMSVAC2\n" + - " \n" + - " \n" - + - " World-wide E-infrastructure for structural biology\n" - + - " \n" + - " 675858\n" + - " \n" + - " EC | H2020 | RIA\n" + - " EC\n" + - " West-Life\n" + - " \n" + - " \n" + - " Expanding research infrastructure visibility to strengthen strategic partnerships\n" - + - " RI-VIS\n" + - " 824063\n" + - " EC\n" + - " corda__h2020::af93b591b76991d8437993a8f6fc6538\n" + - " \n" + - " \n" + - " \n" - + - " \n" - + - " \n" + - " instruct\n" + - " \n" + - " \n" + - " \n" - + - " west-life\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " \n" + - " FRISBI\n" + - " aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==\n" - + - " aHR0cDovL2ZyaXNiaS5ldS8=\n" + - " \n" + - " \n" + - " RI-VIS\n" + - " aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=\n" - + - " aHR0cHM6Ly9yaS12aXMuZXU=\n" + - " \n" + - " \n" + - " CIISB\n" + - " aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=\n" + - " aHR0cHM6Ly93d3cuY2lpc2Iub3Jn\n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.\n" - + - " https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png\n" - + - " The Greek National Node of the ESFRI European RI ELIXIR\n" + - " vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " \n" + - " oaa_elixir-gr\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + - " BIO-INFORMATICS RESEARCH NETWORK COORDINATING CENTER (BIRN-CC)\n" - + - " \n" + - " 1U24RR025736-01\n" + - " NIH\n" + - " \n" + - " \n" + - " COLLABORATIVE RESEARCH: The Cognitive Neuroscience of Category Learning\n" - + - " \n" + - " 0223843\n" + - " NSF\n" + - " \n" + - " \n" + - " The Cognitive Atlas: Developing an Interdisciplinary Knowledge Base Through Socia\n" - + - " \n" + - " 5R01MH082795-05\n" + - " NIH\n" + - " \n" + - " \n" + - " Fragmented early life environmental and emotional / cognitive vulnerabilities\n" - + - " \n" + - " 1P50MH096889-01A1\n" + - " NIH\n" + - " \n" + - " \n" + - " Enhancement of the 1000 Functional Connectome Project\n" - + - " \n" + - " 1R03MH096321-01A1\n" + - " TUBITAK\n" + - " \n" + - " \n" + - " CRCNS Data Sharing: An open data repository for cognitive neuroscience: The OpenfMRI Project\n" - + - " \n" + - " 1131441\n" + - " NSF\n" + - " \n" + - " \n" + - " Enhancing Human Cortical Plasticity: Visual Psychophysics and fMRI\n" - + - " \n" + - " 0121950\n" + - " NSF\n" + - " \n" + - " \n" + - " Transforming statistical methodology for neuroimaging meta-analysis.\n" - + - " \n" + - " 100309\n" + - " WT\n" + - " \n" + - " " + - - " \n" - + - " \n" + - " rest________::b8e502674c3c3499d5374e9b2ea6d8d5\n" + - " bio.tools\n" + - " bio.tools\n" + - " false\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " \n" + - " \n" + - " ATHENA RC\n" + - " aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=\n" - + - " aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=\n" + - " \n" + - " \n" - + - ""); - - private QueryInformationSystem queryInformationSystem; - - private Map map; - - @BeforeEach - public void setUp() { - - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setContextRelationResult(communityContext); - } - - @Test - void test1() { - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - - queryInformationSystem - .getContextRelation(consumer, "contentproviders", ModelSupport.getIdPrefix(Datasource.class)); - - cInfoList.forEach(c -> System.out.println(new Gson().toJson(c))); - - List rList = new ArrayList<>(); - - cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add)); - - Assertions.assertEquals(34, rList.size()); - - Assertions - .assertTrue( - rList - .stream() - .map(r -> r.getSource().getId()) - .collect(Collectors.toSet()) - .contains( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))); - - Assertions - .assertEquals( - 10, - rList - .stream() - .filter( - r -> r - .getSource() - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))) - .collect(Collectors.toList()) - .size()); - - Assertions - .assertEquals( - 10, - rList - .stream() - .filter( - r -> r - .getTarget() - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))) - .collect(Collectors.toList()) - .size()); - - Set tmp = rList - .stream() - .filter( - r -> r - .getSource() - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))) - .map(r -> r.getTarget().getId()) - .collect(Collectors.toSet()); - - Assertions - .assertTrue( - tmp.contains("10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81") && - tmp.contains("10|doajarticles::c6cd4b532e12868c1d760a8d7cda6815") && - tmp.contains("10|doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b") && - tmp.contains("10|doajarticles::6eb31d13b12bc06bbac06aef63cf33c9") && - tmp.contains("10|doajarticles::0da84e9dfdc8419576169e027baa8028") && - tmp.contains("10|re3data_____::84e123776089ce3c7a33db98d9cd15a8") && - tmp.contains("10|openaire____::c5502a43e76feab55dd00cf50f519125") && - tmp.contains("10|re3data_____::a48f09c562b247a9919acfe195549b47") && - tmp.contains("10|opendoar____::97275a23ca44226c9964043c8462be96") && - tmp.contains("10|doajarticles::2899208a99aa7d142646e0a80bfeef05")); - - } - - @Test - public void test2() { - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - - queryInformationSystem - .getContextRelation(consumer, "projects", ModelSupport.getIdPrefix(Project.class)); - - cInfoList.forEach(c -> System.out.println(new Gson().toJson(c))); - - List rList = new ArrayList<>(); - - cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add)); - - Assertions.assertEquals(44, rList.size()); - - Assertions - .assertFalse( - rList - .stream() - .map(r -> r.getSource().getId()) - .collect(Collectors.toSet()) - .contains( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))); - - Assertions - .assertEquals( - 2, - rList - .stream() - .filter( - r -> r - .getSource() - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("clarin")))) - .collect(Collectors.toList()) - .size()); - - Assertions - .assertEquals( - 2, - rList - .stream() - .filter( - r -> r - .getTarget() - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("clarin")))) - .collect(Collectors.toList()) - .size()); - - Set tmp = rList - .stream() - .filter( - r -> r - .getSource() - .getId() - .equals( - String - .format( - "%s|%s::%s", Constants.CONTEXT_ID, - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("clarin")))) - .map(r -> r.getTarget().getId()) - .collect(Collectors.toSet()); - - Assertions - .assertTrue( - tmp.contains("40|corda__h2020::b5a4eb56bf84bef2ebc193306b4d423f") && - tmp.contains("40|corda_______::ef782b2d85676aa3e5a907427feb18c4")); - - rList.forEach(rel -> { - if (rel.getSource().getId().startsWith("40|")) { - String proj = rel.getSource().getId().substring(3); - Assertions.assertTrue(proj.substring(0, proj.indexOf("::")).length() == 12); - } - }); - - } -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java deleted file mode 100644 index 4ff5382..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java +++ /dev/null @@ -1,157 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; - -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.ForeachFunction; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.Organization; -import eu.dnetlib.dhp.schema.oaf.Project; - -public class DumpOrganizationProjectDatasourceTest { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static SparkSession spark; - - private static Path workingDir; - - private static final Logger log = LoggerFactory - .getLogger(DumpOrganizationProjectDatasourceTest.class); - - private static final HashMap map = new HashMap<>(); - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(DumpOrganizationProjectDatasourceTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(DumpOrganizationProjectDatasourceTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(DumpOrganizationProjectDatasourceTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - public void dumpOrganizationTest() throws Exception { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/organization") - .getPath(); - - DumpGraphEntities dg = new DumpGraphEntities(); - - dg.run(false, sourcePath, workingDir.toString() + "/dump", Organization.class, null); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/dump") - .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Organization.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Organization.class)); - - Assertions.assertEquals(15, verificationDataset.count()); - - verificationDataset - .foreach( - (ForeachFunction) o -> System.out - .println(OBJECT_MAPPER.writeValueAsString(o))); - - } - - @Test - public void dumpProjectTest() throws NoAvailableEntityTypeException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project") - .getPath(); - - DumpGraphEntities dg = new DumpGraphEntities(); - - dg.run(false, sourcePath, workingDir.toString() + "/dump", Project.class, null); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/dump") - .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Project.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Project.class)); - - Assertions.assertEquals(12, verificationDataset.count()); - - verificationDataset - .foreach( - (ForeachFunction) o -> System.out - .println(OBJECT_MAPPER.writeValueAsString(o))); - - } - - @Test - public void dumpDatasourceTest() throws NoAvailableEntityTypeException { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource") - .getPath(); - - DumpGraphEntities dg = new DumpGraphEntities(); - - dg.run(false, sourcePath, workingDir.toString() + "/dump", Datasource.class, null); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/dump") - .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Datasource.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Datasource.class)); - - Assertions.assertEquals(5, verificationDataset.count()); - - verificationDataset - .foreach( - (ForeachFunction) o -> System.out - .println(OBJECT_MAPPER.writeValueAsString(o))); - } - -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java deleted file mode 100644 index a768ab1..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpRelationTest.java +++ /dev/null @@ -1,305 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; - -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.ForeachFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.oa.model.graph.Relation; - -public class DumpRelationTest { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static SparkSession spark; - - private static Path workingDir; - - private static final Logger log = LoggerFactory - .getLogger(DumpRelationTest.class); - - private static final HashMap map = new HashMap<>(); - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(DumpRelationTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(DumpRelationTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(DumpRelationTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - public void test1() throws Exception { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") - .getPath(); - - SparkDumpRelationJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); - - verificationDataset.createOrReplaceTempView("table"); - - verificationDataset - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); - - Dataset check = spark - .sql( - "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " - + - "from table "); - - Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); - Assertions - .assertEquals( - 22, check - .filter( - "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + - "provenance = 'Harvested'") - .count()); - - Assertions.assertEquals(7, check.filter("name = 'isParticipant'").count()); - Assertions - .assertEquals( - 7, check - .filter( - "name = 'isParticipant' and stype = 'organization' and ttype = 'project' " + - "and provenance = 'Harvested'") - .count()); - - Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count()); - Assertions - .assertEquals( - 1, check - .filter( - "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + - "and provenance = 'Inferred by OpenAIRE'") - .count()); - } - - @Test - public void test2() throws Exception { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation_validated") - .getPath(); - - SparkDumpRelationJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); - - verificationDataset.createOrReplaceTempView("table"); - - verificationDataset - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); - - Dataset check = spark - .sql( - "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " - + - "from table "); - - Assertions.assertEquals(20, check.filter("name = 'isProvidedBy'").count()); - Assertions - .assertEquals( - 20, check - .filter( - "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + - "provenance = 'Harvested'") - .count()); - - Assertions.assertEquals(7, check.filter("name = 'isParticipant'").count()); - Assertions - .assertEquals( - 7, check - .filter( - "name = 'isParticipant' and stype = 'organization' and ttype = 'project' " + - "and provenance = 'Harvested'") - .count()); - - Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count()); - Assertions - .assertEquals( - 1, check - .filter( - "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + - "and provenance = 'Inferred by OpenAIRE'") - .count()); - - Assertions.assertEquals(2, check.filter("name = 'isProducedBy'").count()); - Assertions - .assertEquals( - 2, check - .filter( - "name = 'isProducedBy' and stype = 'project' and ttype = 'result' " + - "and provenance = 'Harvested' and validated = true " + - "and validationDate = '2021-08-06'") - .count()); - } - - @Test - public void test3() throws Exception {// - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") - .getPath(); - - SparkDumpRelationJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath, - "-removeSet", "isParticipant" - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); - - verificationDataset.createOrReplaceTempView("table"); - - verificationDataset - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); - - Dataset check = spark - .sql( - "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " - + - "from table "); - - Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); - Assertions - .assertEquals( - 22, check - .filter( - "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + - "provenance = 'Harvested'") - .count()); - - Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); - - Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count()); - Assertions - .assertEquals( - 1, check - .filter( - "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " + - "and provenance = 'Inferred by OpenAIRE'") - .count()); - } - - @Test - public void test4() throws Exception {// - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation") - .getPath(); - - SparkDumpRelationJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath, - "-removeSet", "isParticipant;isAuthorInstitutionOf" - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); - - verificationDataset.createOrReplaceTempView("table"); - - verificationDataset - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); - - Dataset check = spark - .sql( - "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance " - + - "from table "); - - Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count()); - Assertions - .assertEquals( - 22, check - .filter( - "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " + - "provenance = 'Harvested'") - .count()); - - Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count()); - - Assertions.assertEquals(0, check.filter("name = 'isAuthorInstitutionOf'").count()); - - } - -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java deleted file mode 100644 index 2e670eb..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java +++ /dev/null @@ -1,175 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.oa.model.graph.Relation; - -public class ExtractRelationFromEntityTest { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static SparkSession spark; - - private static Path workingDir; - - private static final Logger log = LoggerFactory - .getLogger(ExtractRelationFromEntityTest.class); - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(ExtractRelationFromEntityTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(ExtractRelationFromEntityTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(ExtractRelationFromEntityTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - void test1() { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - Extractor ex = new Extractor(); - ex - .run( - false, sourcePath, workingDir.toString() + "/relation", - // eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath); - eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); - - Assertions - .assertEquals( - 9, - verificationDataset.filter("source.id = '50|dedup_wf_001::15270b996fa8fd2fb5723daeab3685c3'").count()); - - Assertions - .assertEquals( - 9, - verificationDataset.filter("source.id = '50|dedup_wf_001::15270b996fa8fd2fb5723daxab3685c3'").count()); - - Assertions - .assertEquals( - "IsRelatedTo", verificationDataset - .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) - .collectAsList() - .get(0) - .getReltype() - .getName()); - - Assertions - .assertEquals( - "relationship", verificationDataset - .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) - .collectAsList() - .get(0) - .getReltype() - .getType()); - - Assertions - .assertEquals( - "context", verificationDataset - .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) - .collectAsList() - .get(0) - .getSource() - .getType()); - - Assertions - .assertEquals( - "result", verificationDataset - .filter((FilterFunction) row -> row.getSource().getId().startsWith("00")) - .collectAsList() - .get(0) - .getTarget() - .getType()); - Assertions - .assertEquals( - "IsRelatedTo", verificationDataset - .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) - .collectAsList() - .get(0) - .getReltype() - .getName()); - - Assertions - .assertEquals( - "relationship", verificationDataset - .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) - .collectAsList() - .get(0) - .getReltype() - .getType()); - - Assertions - .assertEquals( - "context", verificationDataset - .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) - .collectAsList() - .get(0) - .getTarget() - .getType()); - - Assertions - .assertEquals( - "result", verificationDataset - .filter((FilterFunction) row -> row.getTarget().getId().startsWith("00")) - .collectAsList() - .get(0) - .getSource() - .getType()); - } - -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java deleted file mode 100644 index 05e822d..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java +++ /dev/null @@ -1,70 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import org.dom4j.DocumentException; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; -import org.xml.sax.SAXException; - -import eu.dnetlib.dhp.oa.model.graph.Funder; - -class FunderParsingTest { - - @Test - void testFunderTwoLevels() throws DocumentException { - - String funding_Stream = "nsf_________::NSFNSFNational Science " - + - "FoundationUSnsf_________::NSF::CISE/OAD::CISE/CCFDivision " - + - "of Computing and Communication FoundationsDivision of Computing and Communication " + - "Foundationsnsf_________::NSF::CISE/OADDirectorate for " - + - "Computer & Information Science & EngineeringDirectorate for Computer & " + - "Information Science & Engineeringnsf:fundingStream"; - - Funder f = DumpGraphEntities.getFunder(funding_Stream); - - Assertions.assertEquals("NSF", f.getShortName()); - Assertions.assertEquals("National Science Foundation", f.getName()); - Assertions.assertEquals("US", f.getJurisdiction()); - - Assertions.assertEquals("NSF::CISE/OAD::CISE/CCF", f.getFunding_stream().getId()); - Assertions - .assertEquals( - "Directorate for Computer & Information Science & Engineering - Division of Computing and Communication Foundations", - f.getFunding_stream().getDescription()); - - } - - @Test - void testFunderThreeeLevels() throws DocumentException, SAXException { - String funding_stream = "ec__________::EC" + - "EC" + - "European Commission" + - "EU" + - "" + - "ec__________::EC::H2020::ERC::ERC-COG" + - "Consolidator Grant" + - "ERC-COGec:h2020toas" + - "ec__________::EC::H2020::ERC" + - "European Research Council" + - "ERCec:h2020fundings" + - "ec__________::EC::H2020H2020" + - "Horizon 2020 Framework Programme" + - "ec:h2020fundings"; - - Funder f = DumpGraphEntities.getFunder(funding_stream); - - Assertions.assertEquals("EC", f.getShortName()); - Assertions.assertEquals("European Commission", f.getName()); - Assertions.assertEquals("EU", f.getJurisdiction()); - - Assertions.assertEquals("EC::H2020::ERC::ERC-COG", f.getFunding_stream().getId()); - Assertions - .assertEquals( - "Horizon 2020 Framework Programme - European Research Council - Consolidator Grant", - f.getFunding_stream().getDescription()); - - } -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java deleted file mode 100644 index 08fcd49..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java +++ /dev/null @@ -1,810 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import static org.mockito.Mockito.lenient; - -import java.util.*; -import java.util.function.Consumer; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; - -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -@ExtendWith(MockitoExtension.class) -class QueryInformationSystemTest { - - private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and $x//context/param[./@name = 'status']/text() = 'all' " + - " return " + - "$x//context"; - - private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return " - + - "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " + - "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', " - + - "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)"; - - List communityMap = Arrays - .asList( - "clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri", - "ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. " - + - "SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - " - + - "Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community", - "dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community", - "fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community", - "ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community", - "mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community", - "instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community", - "elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri", - "aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community", - "dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri", - "epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri", - "covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community"); - - List communityContext = Arrays - .asList( - "\n" + - " all\n" + - " CLARIN\n" + - " https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg\n" - + - " Common Language Resources and Technology Infrastructure\n" + - " maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " (Part of) the work reported here was made possible by using the CLARIN infrastructure.\n" - + - " The work reported here has received funding through <CLARIN national consortium member, e.g. CLARIN.SI>, <XYZ> project, grant no. <XYZ>.\n" - + - " The work reported here has received funding (through CLARIN ERIC) from the European Union’s Horizon 2020 research and innovation programme under grant agreement No <0-9> for project <XYZ>.\n" - + - " (E.g. No 676529 for project CLARIN-PLUS.)\n" + - " oac_clarin\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + - " CLARIN-PLUS\n" + - " \n" + - " \n" + - " 676529\n" + - " http://www.clarin.eu\n" + - " EC\n" + - " H2020-INFRADEV-1-2015-1\n" + - " CLARIN+\n" + - " \n" + - " \n" + - " Common Language Resources and Technology Infrastructure\n" - + - " CLARIN\n" + - " 212230\n" + - " EC\n" + - " corda_______::ef782b2d85676aa3e5a907427feb18c4\n" + - " \n" + - " \n" + - " " + - "\n" + - " \n" + - " LINDAT/CLARIN repository\n" + - " LINDAT/CLARIN repository\n" + - " true\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " CLARIN-D\n" + - " https://www.clarin-d.de/en/\n" + - " \n" + - " http://www.lrec-conf.org/proceedings/lrec2018/pdf/504.pdf\n" - + - " Germany\n" + - " \n" + - " \n" + - " \n" - + - " \n" + - "", - "\n" + - " all\n" + - " This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.\n" - + - " http://sanmamante.org/DH_CH_logo.png\n" + - " Digital Humanities and Cultural Heritage\n" + - " ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk\n" - + - " modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels\n" - + - " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n" - + - " oac_dh-ch\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" - + - " Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\n" - + - " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n" - + - " \n" + - " 654119\n" + - " http://www.parthenos-project.eu\n" + - " EC\n" + - " PARTHENOS\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " re3data_____::9ebe127e5f3a0bf401875690f3bb6b81\n" + - " The UK's largest collection of digital research data in the social sciences and humanities\n" - + - " UK Data Archive\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::c6cd4b532e12868c1d760a8d7cda6815\n" + - " Journal of Data Mining and Digital Humanities\n" + - " Journal of Data Mining and Digital Humanities\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b\n" + - " Frontiers in Digital Humanities\n" + - " Frontiers in Digital Humanities\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::6eb31d13b12bc06bbac06aef63cf33c9\n" + - " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n" - + - " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n" - + - " true\n" + - " \n" + - " \n" - + - " doajarticles::0da84e9dfdc8419576169e027baa8028\n" + - " Conservation Science in Cultural Heritage\n" + - " Conservation Science in Cultural Heritage\n" + - " true\n" + - " \n" + - " \n" - + - " re3data_____::84e123776089ce3c7a33db98d9cd15a8\n" + - " Electronic Archiving System\n" + - " EASY\n" + - " true\n" + - " \n" + - " \n" + - " openaire____::c5502a43e76feab55dd00cf50f519125\n" + - " DANS-KB Harvester\n" + - " Gemeenschappelijke Harvester DANS-KB\n" + - " true\n" + - " \n" + - " \n" + - " re3data_____::a48f09c562b247a9919acfe195549b47\n" + - " ads\n" + - " Archaeology Data Service\n" + - " true\n" + - " \n" + - " \n" + - " opendoar____::97275a23ca44226c9964043c8462be96\n" + - " KNAW Repository\n" + - " KNAW Repository\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::2899208a99aa7d142646e0a80bfeef05\n" + - " Internet Archaeology\n" + - " Internet Archaeology\n" + - " true\n" + - " \n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.\n" - + - " https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png\n" - + - " Neuroinformatics\n" + - " sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it\n" - + - " brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities\n" - + - " \n" + - " oac_ni\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + - " re3data_____::5b9bf9171d92df854cf3c520692e9122\n" + - " Formerly:OpenFMRI\n" + - " OpenNeuro\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::c7d3de67dc77af72f6747157441252ec\n" + - " Research Ideas and Outcomes\n" + - " Research Ideas and Outcomes\n" + - " true\n" + - " \n" + - " \n" + - " re3data_____::8515794670370f49c1d176c399c714f5\n" + - " Neuroimaging Informatics Tools and Resources Clearinghouse\n" - + - " NITRC\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::d640648c84b10d425f96f11c3de468f3\n" + - " Frontiers in Neuroinformatics\n" + - " Frontiers in Neuroinformatics\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a\n" + - " NeuroImage: Clinical\n" + - " NeuroImage: Clinical\n" + - " true\n" + - " \n" + - " \n" + - " rest________::fb1a3d4523c95e63496e3bc7ba36244b\n" + - " NeuroVault\n" + - " NeuroVault\n" + - " true\n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " Instruct-ERIC is the European Research Infrastructure for Structural Biology\n" - + - " https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png\n" - + - " Instruct-ERIC\n" + - " claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " The authors acknowledge the support and the use of resources of Instruct-ERIC.\n" - + - " The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project\n" - + - " oac_instruct\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" - + - " Authentication and Authorisation For Research and Collaboration\n" - + - " \n" + - " 730941\n" + - " \n" + - " H2020-EINFRA-2016-1\n" + - " AARC2\n" + - " EC\n" + - " \n" + - " \n" - + - " Building data bridges between biological and medical infrastructures in Europe\n" - + - " \n" + - " 284209\n" + - " \n" + - " FP7-INFRASTRUCTURES-2011-1\n" + - " EC\n" + - " BioMedBridges\n" + - " \n" + - " \n" - + - " Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\n" - + - " \n" + - " 283570\n" + - " \n" + - " FP7-INFRASTRUCTURES-2011-1\n" + - " EC\n" + - " BioStruct-X\n" + - " \n" + - " \n" - + - " Coordinated Research Infrastructures Building Enduring Life-science services\n" - + - " \n" + - " 654248\n" + - " \n" + - " H2020-INFRADEV-1-2014-1\n" + - " EC\n" + - " CORBEL\n" + - " \n" + - " \n" - + - " Infrastructure for NMR, EM and X-rays for translational research\n" - + - " \n" + - " 653706\n" + - " \n" + - " H2020-INFRAIA-2014-2015\n" + - " EC\n" + - " iNEXT\n" + - " \n" + - " \n" - + - " Integrated Structural Biology Infrastructure\n" + - " \n" + - " 211252\n" + - " \n" + - " FP7-INFRASTRUCTURES-2007-1\n" + - " EC\n" + - " INSTRUCT\n" + - " \n" + - " \n" - + - " Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\n" - + - " \n" + - " 731005\n" + - " \n" + - " H2020-INFRADEV-2016-1\n" + - " EC\n" + - " INSTRUCT-ULTRA\n" + - " \n" + - " \n" - + - " Opening Synchrotron Light for Experimental Science and Applications in the Middle East\n" - + - " \n" + - " 730943\n" + - " \n" + - " H2020-INFRASUPP-2016-1\n" + - " EC\n" + - " OPEN SESAME\n" + - " \n" + - " \n" - + - " Infrastructure for Protein Production Platforms\n" - + - " \n" + - " 227764\n" + - " \n" + - " FP7-INFRASTRUCTURES-2008-1\n" + - " EC\n" + - " PCUBE\n" + - " \n" + - " \n" - + - " European Vaccine Research and Development Infrastructure\n" - + - " \n" + - " 730964\n" + - " \n" + - " H2020-INFRAIA-2016-1\n" + - " EC\n" + - " TRAMSVAC2\n" + - " \n" + - " \n" - + - " World-wide E-infrastructure for structural biology\n" - + - " \n" + - " 675858\n" + - " \n" + - " H2020-EINFRA-2015-1\n" + - " EC\n" + - " West-Life\n" + - " \n" + - " \n" + - " Expanding research infrastructure visibility to strengthen strategic partnerships\n" - + - " RI-VIS\n" + - " 824063\n" + - " EC\n" + - " corda__h2020::af93b591b76991d8437993a8f6fc6538\n" + - " \n" + - " \n" + - " \n" - + - " \n" - + - " \n" + - " instruct\n" + - " \n" + - " \n" + - " \n" - + - " west-life\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " \n" + - " FRISBI\n" + - " aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==\n" - + - " aHR0cDovL2ZyaXNiaS5ldS8=\n" + - " \n" + - " \n" + - " RI-VIS\n" + - " aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=\n" - + - " aHR0cHM6Ly9yaS12aXMuZXU=\n" + - " \n" + - " \n" + - " CIISB\n" + - " aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=\n" + - " aHR0cHM6Ly93d3cuY2lpc2Iub3Jn\n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.\n" - + - " https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png\n" - + - " The Greek National Node of the ESFRI European RI ELIXIR\n" + - " vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " \n" + - " oaa_elixir-gr\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" - + - " \n" + - " rest________::b8e502674c3c3499d5374e9b2ea6d8d5\n" + - " bio.tools\n" + - " bio.tools\n" + - " false\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " \n" + - " \n" + - " ATHENA RC\n" + - " aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=\n" - + - " aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=\n" + - " \n" + - " \n" - + - ""); - - @Mock - private ISLookUpService isLookUpService; - - private QueryInformationSystem queryInformationSystem; - - private Map map; - - @BeforeEach - public void setUp() throws ISLookUpException { - lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap); - lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityContext); - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(isLookUpService); - } - - @Test - void testSizeEntity() throws ISLookUpException { - - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.getContextInformation(consumer); - - Assertions.assertEquals(12, cInfoList.size()); - } - - @Test - void testSizeRelation() throws ISLookUpException { - - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.execContextRelationQuery(); - queryInformationSystem - .getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource")); - - Assertions.assertEquals(5, cInfoList.size()); - } - - @Test - void testContentRelation() throws ISLookUpException { - - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.execContextRelationQuery(); - queryInformationSystem - .getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource")); - - cInfoList.forEach(contextInfo -> { - switch (contextInfo.getId()) { - case "elixir-gr": - Assertions.assertEquals(1, contextInfo.getDatasourceList().size()); - Assertions - .assertEquals( - "10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5", - contextInfo.getDatasourceList().get(0)); - break; - case "instruct": - Assertions.assertEquals(0, contextInfo.getDatasourceList().size()); - break; - case "ni": - Assertions.assertEquals(6, contextInfo.getDatasourceList().size()); - Assertions - .assertTrue( - contextInfo - .getDatasourceList() - .contains("10|rest________::fb1a3d4523c95e63496e3bc7ba36244b")); - break; - case "dh-ch": - Assertions.assertEquals(10, contextInfo.getDatasourceList().size()); - break; - case "clarin": - Assertions.assertEquals(0, contextInfo.getDatasourceList().size()); - break; - } - }); - } - - @Test - void testContentEntity() throws ISLookUpException { - - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.getContextInformation(consumer); - - cInfoList.forEach(context -> { - switch (context.getId()) { - case "clarin":// clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin", - Assertions - .assertEquals("Common Language Resources and Technology Infrastructure", context.getName()); - Assertions.assertEquals("CLARIN", context.getDescription()); - Assertions - .assertTrue( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals("oac_clarin", context.getZenodocommunity()); - Assertions.assertEquals("ri", context.getType()); - break; - case "ee": - Assertions.assertEquals("Sustainable Development Solutions Network - Greece", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(17, context.getSubject().size()); - Assertions.assertEquals("oac_sdsn-greece", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - break; - case "dh-ch": - Assertions.assertEquals("Digital Humanities and Cultural Heritage", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(67, context.getSubject().size()); - Assertions.assertEquals("oac_dh-ch", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - break; - case "fam": - Assertions.assertEquals("Fisheries and Aquaculture Management", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith("Conservation of marine resources for sustainable development")); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(19, context.getSubject().size()); - Assertions.assertEquals("fisheries", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - break; - case "ni": - Assertions.assertEquals("Neuroinformatics", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith("The neuroinformatics dashboard gathers research outputs from the")); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(18, context.getSubject().size()); - Assertions.assertEquals("oac_ni", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - Assertions.assertTrue(context.getSubject().contains("brain")); - break; - case "mes": - Assertions.assertEquals("European Marine Science", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "This community was initially defined to include a very broad range of topics")); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(5, context.getSubject().size()); - Assertions.assertEquals("oac_mes", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - Assertions.assertTrue(context.getSubject().contains("sea")); - Assertions.assertTrue(context.getSubject().contains("fish")); - Assertions.assertTrue(context.getSubject().contains("ocean")); - Assertions.assertTrue(context.getSubject().contains("aqua")); - Assertions.assertTrue(context.getSubject().contains("marine")); - break; - case "instruct": - Assertions.assertEquals("Instruct-ERIC", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .equals( - "Instruct-ERIC is the European Research Infrastructure for Structural Biology")); - Assertions - .assertTrue( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals("oac_instruct", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - - break; - case "elixir-gr": - Assertions - .assertEquals("The Greek National Node of the ESFRI European RI ELIXIR", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open")); - Assertions - .assertTrue( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals("oaa_elixir-gr", context.getZenodocommunity()); - Assertions.assertEquals("ri", context.getType()); - - break; - case "aginfra": - Assertions.assertEquals("Agricultural and Food Sciences", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "The scope of this community is to provide access to publications, research data, projects and software")); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(18, context.getSubject().size()); - Assertions.assertEquals("oac_aginfra", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - Assertions.assertTrue(context.getSubject().contains("food distribution")); - break; - case "dariah": - Assertions.assertEquals("DARIAH EU", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support ")); - Assertions - .assertTrue( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - - Assertions.assertEquals("dariah", context.getZenodocommunity()); - Assertions.assertEquals("ri", context.getType()); - - break; - case "epos": - Assertions.assertEquals("European Plate Observing System", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of ")); - Assertions - .assertTrue( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - - Assertions.assertEquals("", context.getZenodocommunity()); - Assertions.assertEquals("ri", context.getType()); - - break; - case "covid-19": - Assertions.assertEquals("Corona Virus Disease", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "This portal provides access to publications, research data, projects and ")); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(25, context.getSubject().size()); - Assertions.assertEquals("covid-19", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - Assertions.assertTrue(context.getSubject().contains("coronavirus disease 2019")); - break; - - } - }); - - } -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java deleted file mode 100644 index b9a0814..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java +++ /dev/null @@ -1,121 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; - -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.oa.model.graph.Relation; - -public class RelationFromOrganizationTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static SparkSession spark; - - private static Path workingDir; - - private static final Logger log = LoggerFactory - .getLogger(RelationFromOrganizationTest.class); - - private static final HashMap map = new HashMap<>(); - - String organizationCommunityMap = "{\"20|grid________::afaa39865943381c51f76c08725ffa75\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8\":[\"mes\",\"euromarine\"], \"20|snsf________::9b253f265e3bef5cae6d881fdf61aceb\":[\"mes\",\"euromarine\"],\"20|rcuk________::e054eea0a47665af8c3656b5785ccf76\":[\"mes\",\"euromarine\"],\"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151\":[\"mes\",\"euromarine\"],\"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27\":[\"mes\",\"euromarine\"],\"20|snsf________::8fa091f8f25a846779acb4ea97b50aef\":[\"mes\",\"euromarine\"],\"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|corda_______::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78\":[\"mes\",\"euromarine\"],\"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70\":[\"mes\",\"euromarine\"],\"20|rcuk________::e16010089551a1a9182a94604fc0ea59\":[\"mes\",\"euromarine\"],\"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7\":[\"mes\",\"euromarine\"],\"20|snsf________::74730ef1439d7f7636a8be58a6b471b8\":[\"mes\",\"euromarine\"],\"20|nsf_________::ad72e19043a5a467e35f9b444d11563e\":[\"mes\",\"euromarine\"],\"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3\":[\"mes\",\"euromarine\"],\"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea\":[\"mes\",\"euromarine\"],\"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317\":[\"mes\",\"euromarine\"], \"20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f\":[\"mes\",\"euromarine\"], \"20|corda__h2020::65531bd11be9935948c7f2f4db1c1832\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946\":[\"mes\",\"euromarine\"], \"20|snsf________::3eb43582ac27601459a8d8b3e195724b\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6\":[\"mes\",\"euromarine\"], \"20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0\":[\"mes\",\"euromarine\"], \"20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0\":[\"beopen\"], " - + - "\"20|grid________::a867f78acdc5041b34acfe4f9a349157\":[\"beopen\"], \"20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff\":[\"beopen\"], \"20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad\":[\"beopen\"], \"20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602\":[\"beopen\"], \"20|corda_______::8ba50792bc5f4d51d79fca47d860c602\":[\"beopen\"], \"20|corda__h2020::e70e9114979e963eef24666657b807c3\":[\"beopen\"], \"20|corda_______::e70e9114979e963eef24666657b807c3\":[\"beopen\"], \"20|corda_______::15911e01e9744d57205825d77c218737\":[\"beopen\"], \"20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab\":[\"beopen\"], \"20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3\":[\"beopen\"], \"20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3\":[\"beopen\"], \"20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9\":[\"beopen\"], \"20|corda_______::3ff558e30c2e434d688539548300b050\":[\"beopen\"], \"20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39\":[\"beopen\"], \"20|corda__h2020::5187217e2e806a6df3579c46f82401bc\":[\"beopen\"], \"20|grid________::5fa7e2709bcd945e26bfa18689adeec1\":[\"beopen\"], \"20|corda_______::d8696683c53027438031a96ad27c3c07\":[\"beopen\"], \"20|corda__h2020::d8696683c53027438031a96ad27c3c07\":[\"beopen\"], \"20|rcuk________::23a79ebdfa59790864e4a485881568c1\":[\"beopen\"], \"20|corda__h2020::b76cf8fe49590a966953c37e18608af9\":[\"beopen\"], \"20|grid________::d2f0204126ee709244a488a4cd3b91c2\":[\"beopen\"], \"20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6\":[\"beopen\"], \"20|grid________::802401579481dc32062bdee69f5e6a34\":[\"beopen\"], \"20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d\":[\"beopen\"]}"; - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(RelationFromOrganizationTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(RelationFromOrganizationTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(RelationFromOrganizationTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - void test1() throws Exception { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymapservices.json") - .getPath(); - - SparkOrganizationRelation.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath, - "-organizationCommunityMap", organizationCommunityMap, - "-communityMapPath", communityMapPath - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); - - verificationDataset.createOrReplaceTempView("table"); - - // Assertions.assertEquals(170, verificationDataset.count()); - Assertions.assertEquals(0, verificationDataset.count()); - -// Dataset checkDs = spark -// .sql( -// "Select source.id, source.type " + -// "from table "); -// -// Assertions.assertEquals(2, checkDs.filter("substr(id, 4, 5) = 'dedup' ").count()); -// -// Assertions.assertEquals(0, checkDs.filter("id = '20|grid________::afaa39865943381c51f76c08725ffa75'").count()); -// -// Assertions.assertEquals(25, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("beopen") + "'").count()); -// -// Assertions -// .assertEquals(30, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("euromarine") + "'").count()); -// -// Assertions.assertEquals(30, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("mes") + "'").count()); - } - -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java deleted file mode 100644 index d430376..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java +++ /dev/null @@ -1,95 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; - -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.schema.oaf.Relation; - -public class SelectRelationTest { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static SparkSession spark; - - private static Path workingDir; - - private static final Logger log = LoggerFactory - .getLogger(SelectRelationTest.class); - - private static HashMap map = new HashMap<>(); - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(SelectRelationTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(SelectRelationTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(SelectRelationTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - public void test1() throws Exception { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/selectrelations") - .getPath(); - - SparkSelectValidRelationsJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/relation", - "-sourcePath", sourcePath - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/relation") - .map(item -> OBJECT_MAPPER.readValue(item, Relation.class)); - - Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Relation.class)); - - Assertions.assertTrue(verificationDataset.count() == 7); - - } - -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java index 48f0b31..d3f7c07 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultTest.java @@ -6,7 +6,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Optional; import org.apache.commons.io.FileUtils; @@ -15,8 +14,6 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; @@ -27,15 +24,10 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.eosc.model.EoscResult; import eu.dnetlib.dhp.eosc.model.Indicator; import eu.dnetlib.dhp.eosc.model.Organization; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.complete.SelectRelationTest; -import eu.dnetlib.dhp.oa.graph.dump.complete.SparkSelectValidRelationsJob; +import eu.dnetlib.dhp.eosc.model.Result; import eu.dnetlib.dhp.schema.action.AtomicAction; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; /** @@ -61,7 +53,7 @@ public class SelectEoscResultTest { log.info("using work dir {}", workingDir); SparkConf conf = new SparkConf(); - conf.setAppName(SelectRelationTest.class.getSimpleName()); + conf.setAppName(SelectEoscResultTest.class.getSimpleName()); conf.setMaster("local[*]"); conf.set("spark.driver.host", "localhost"); @@ -72,7 +64,7 @@ public class SelectEoscResultTest { spark = SparkSession .builder() - .appName(SelectRelationTest.class.getSimpleName()) + .appName(SelectEoscResultTest.class.getSimpleName()) .config(conf) .getOrCreate(); } @@ -104,9 +96,9 @@ public class SelectEoscResultTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD tmp = sc .textFile(workingDir.toString() + "/publication") - .map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); Assertions.assertEquals(3, tmp.count()); @@ -155,9 +147,9 @@ public class SelectEoscResultTest { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD tmp = sc .textFile(workingDir.toString() + "/publication") - .map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); Assertions.assertEquals(3, tmp.count()); @@ -276,7 +268,7 @@ public class SelectEoscResultTest { .getPath(); Utils - .readPath(spark, actionSetPath, Result.class) + .readPath(spark, actionSetPath, eu.dnetlib.dhp.schema.oaf.Result.class) .toJavaRDD() .map(p -> new AtomicAction(p.getClass(), p)) .mapToPair( @@ -294,9 +286,9 @@ public class SelectEoscResultTest { }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc + JavaRDD tmp = sc .textFile(workingDir.toString() + "/publication") - .map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class)); + .map(item -> OBJECT_MAPPER.readValue(item, Result.class)); Assertions .assertEquals( diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java deleted file mode 100644 index a26772f..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java +++ /dev/null @@ -1,139 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.funderresult; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; - -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; - -public class ResultLinkedToProjectTest { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static SparkSession spark; - - private static Path workingDir; - - private static final Logger log = LoggerFactory - .getLogger(ResultLinkedToProjectTest.class); - - private static final HashMap map = new HashMap<>(); - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory( - ResultLinkedToProjectTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(ResultLinkedToProjectTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(ResultLinkedToProjectTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - void testNoMatch() throws Exception { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json") - .getPath(); - - final String graphPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath") - .getPath(); - - SparkResultLinkedToProject.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/preparedInfo", - "-sourcePath", sourcePath, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-graphPath", graphPath, - "-communityMapPath", communityMapPath - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/preparedInfo") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - Assertions.assertEquals(0, tmp.count()); - - } - - @Test - void testMatchOne() throws Exception { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json") - .getPath(); - - final String graphPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo") - .getPath(); - - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath") - .getPath(); - - SparkResultLinkedToProject.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/preparedInfo", - "-sourcePath", sourcePath, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-graphPath", graphPath, - "-communityMapPath", communityMapPath - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/preparedInfo") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - Assertions.assertEquals(1, tmp.count()); - - } - -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java deleted file mode 100644 index eed07ae..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ /dev/null @@ -1,145 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.funderresult; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; - -public class SplitPerFunderTest { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static SparkSession spark; - - private static Path workingDir; - - private static final Logger log = LoggerFactory.getLogger(SplitPerFunderTest.class); - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(SplitPerFunderTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - - SparkConf conf = new SparkConf(); - conf.setAppName(SplitPerFunderTest.class.getSimpleName()); - - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - - spark = SparkSession - .builder() - .appName(SplitPerFunderTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - void test1() throws Exception { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext") - .getPath(); - - SparkDumpFunderResults.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/split", - "-sourcePath", sourcePath - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - // FP7 3 and H2020 3 - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/split/EC_FP7") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - - Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class)); - - Assertions.assertEquals(3, verificationDataset.count()); - - Assertions - .assertEquals( - 1, verificationDataset.filter("id = '50|dedup_wf_001::0d16b1714ab3077df73893a8ea57d776'").count()); - - // CIHR 2 - tmp = sc - .textFile(workingDir.toString() + "/split/CIHR") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(2, tmp.count()); - - // NWO 1 - tmp = sc - .textFile(workingDir.toString() + "/split/NWO") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(1, tmp.count()); - - // NIH 3 - tmp = sc - .textFile(workingDir.toString() + "/split/NIH") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(2, tmp.count()); - - // NSF 1 - tmp = sc - .textFile(workingDir.toString() + "/split/NSF") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(1, tmp.count()); - - // SNSF 1 - tmp = sc - .textFile(workingDir.toString() + "/split/SNSF") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(1, tmp.count()); - - // NHMRC 1 - tmp = sc - .textFile(workingDir.toString() + "/split/NHMRC") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(1, tmp.count()); - - // H2020 3 - tmp = sc - .textFile(workingDir.toString() + "/split/EC_H2020") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(3, tmp.count()); - - // MZOS 1 - tmp = sc - .textFile(workingDir.toString() + "/split/MZOS") - .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); - Assertions.assertEquals(1, tmp.count()); - - } - -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectSubsetTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectSubsetTest.java deleted file mode 100644 index ffbe8ce..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/projectssubset/ProjectSubsetTest.java +++ /dev/null @@ -1,124 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.projectssubset; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.oa.model.graph.Project; - -public class ProjectSubsetTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static SparkSession spark; - private static Path workingDir; - private static final Logger log = LoggerFactory - .getLogger(ProjectSubsetTest.class); - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory( - ProjectSubsetTest.class.getSimpleName()); - log.info("using work dir {}", workingDir); - SparkConf conf = new SparkConf(); - conf.setAppName(ProjectSubsetTest.class.getSimpleName()); - conf.setMaster("local[*]"); - conf.set("spark.driver.host", "localhost"); - conf.set("hive.metastore.local", "true"); - conf.set("spark.ui.enabled", "false"); - conf.set("spark.sql.warehouse.dir", workingDir.toString()); - conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); - spark = SparkSession - .builder() - .appName(ProjectSubsetTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); - } - - @AfterAll - public static void afterAll() throws IOException { - FileUtils.deleteDirectory(workingDir.toFile()); - spark.stop(); - } - - @Test - void testAllNew() throws Exception { - final String projectListPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId") - .getPath(); - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/allnew/projects") - .getPath(); - spark - .read() - .textFile(projectListPath) - .write() - .mode(SaveMode.Overwrite) - .text(workingDir.toString() + "/projectIds"); - ProjectsSubsetSparkJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/projects", - "-sourcePath", sourcePath, - "-projectListPath", workingDir.toString() + "/projectIds" - }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/projects") - .map(item -> OBJECT_MAPPER.readValue(item, Project.class)); - Assertions.assertEquals(12, tmp.count()); - Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count()); - Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count()); - Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count()); - Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count()); - Assertions.assertEquals(1, tmp.filter(p -> p.getId().substring(3, 15).equals("corda_______")).count()); - Assertions.assertEquals(40, sc.textFile(workingDir.toString() + "/projectIds").count()); - } - - @Test - void testMatchOne() throws Exception { - final String projectListPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId") - .getPath(); - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/matchOne/projects") - .getPath(); - spark - .read() - .textFile(projectListPath) - .write() - .mode(SaveMode.Overwrite) - .text(workingDir.toString() + "/projectIds"); - ProjectsSubsetSparkJob.main(new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-outputPath", workingDir.toString() + "/projects", - "-sourcePath", sourcePath, - "-projectListPath", workingDir.toString() + "/projectIds" - }); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/projects") - .map(item -> OBJECT_MAPPER.readValue(item, Project.class)); - Assertions.assertEquals(11, tmp.count()); - Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count()); - Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count()); - Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count()); - Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count()); - Assertions.assertEquals(0, tmp.filter(p -> p.getId().substring(3, 15).equals("corda__h2020")).count()); - Assertions.assertEquals(39, sc.textFile(workingDir.toString() + "/projectIds").count()); - } -} diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel index 979cbf1..0c41057 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel @@ -1,2 +1,2 @@ -{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} -{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subject":null,"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subject":null,"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"measures":[{"key":"influence","value":"1.62759106106e-08"},{"key":"popularity","value":"0.22519296"}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/software.json b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/software.json index 1e5ed66..b5c885a 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/software.json +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/software.json @@ -1,6 +1,6 @@ -{"author":[],"collectedfrom":[{"key":"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5","value":"bio.tools"}],"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Bulktagging for Community - Subject"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"","description":["Chilibot searches the PubMed literature database based on specific relationships between proteins, genes, or keywords. The results are returned as a graph."],"documentationUrl":["http://www.chilibot.net"],"format":[],"id":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80","instance":[{"collectedfrom":{"key":"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5","value":"bio.tools"},"hostedby":{"key":"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5","value":"bio.tools"},"publicationdate":"2017-05-01","type":"Software","url":["https://bio.tools/chilibot"]}],"language":{"code":"UNKNOWN","label":"UNKNOWN"},"lastupdatetimestamp":1591282924188,"maintitle":"Chilibot","originalId":["__bioTools__::c17ebaca97f287b181090c9b4cba766e"],"pid":[],"programmingLanguage":"UNKNOWN","publicationdate":"2017-05-01","publisher":"bio.tools","subjects":[{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Natural language processing"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Molecular interactions, pathways and networks"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Proteins"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Literature and language"}},{"provenance":{"provenance":"iis","trust":"0.8739"},"subject":{"scheme":"mesheuropmc","value":"human activities"}}],"type":"software"} -{"author":[{"fullname":"Agustoni, Marco","name":"Marco","rank":1,"surname":"Agustoni"},{"fullname":"Marti, Lukas","name":"Lukas","rank":2,"surname":"Marti"},{"fullname":"ATLAS, Collaboration","name":"Collaboration","rank":3,"surname":"Atlas"},{"fullname":"Schneider, Basil","name":"Basil","rank":4,"surname":"Schneider"},{"fullname":"Gallo, Valentina","name":"Valentina","rank":5,"surname":"Gallo"},{"fullname":"Ereditato, Antonio","name":"Antonio","rank":6,"surname":"Ereditato"},{"fullname":"Sciacca, Gianfranco","name":"Gianfranco","rank":7,"surname":"Sciacca"},{"fullname":"Haug, Sigve","name":"Sigve","rank":8,"surname":"Haug"},{"fullname":"Kabana, Sonja","name":"Sonja","rank":9,"surname":"Kabana"},{"fullname":"Kruker, Tobias","name":"Tobias","rank":10,"surname":"Kruker"},{"fullname":"Ancu, Lucian","name":"Lucian","rank":11,"surname":"Ancu"},{"fullname":"Battaglia, Andreas","name":"Andreas","rank":12,"surname":"Battaglia"},{"fullname":"Beck, Hans Peter","name":"Hans Peter","rank":13,"surname":"Beck"},{"fullname":"Pretzl, Klaus-Peter","name":"Klaus-Peter","rank":14,"surname":"Pretzl"},{"fullname":"Borer, Claudia","name":"Claudia","rank":15,"surname":"Borer"},{"fullname":"Weber, Michael","name":"Michael","rank":16,"surname":"Weber"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"code":"egi","label":"EGI Federation","provenance":[{"provenance":"iis","trust":"0.9"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"","description":[],"documentationUrl":[],"embargoenddate":"2013-01-01","format":["application/pdf"],"id":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2013-01-01","type":"Software","url":["http://dx.doi.org/10.7892/boris.58468"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1591283098072,"maintitle":"Search for pair-produced massive coloured scalars in four-jet final states with the ATLAS detector in proton-proton collisions at sqrts=7 TeV","originalId":["datacite____::04dd1f84f3a429ecf1e838afcd94cb3f"],"pid":[{"scheme":"doi","value":"10.7892/boris.58468"}],"programmingLanguage":"application/pdf","publicationdate":"2013-01-01","publisher":"EDP Sciences","subjects":[{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"530 Physics"}}],"type":"software"} -{"author":[{"fullname":"Regev, Mor","name":"Mor","rank":1,"surname":"Regev"},{"fullname":"Simony, Erez","name":"Erez","rank":2,"surname":"Simony"},{"fullname":"Lee, Katherine","name":"Katherine","rank":3,"surname":"Lee"},{"fullname":"Tan, Kean Ming","name":"Kean Ming","rank":4,"surname":"Tan"},{"fullname":"Chen, Janice","name":"Janice","rank":5,"surname":"Chen"},{"fullname":"Hasson, Uri","name":"Uri","rank":6,"surname":"Hasson"}],"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"code":"ni","label":"Neuroinformatics","provenance":[{"provenance":"Bulktagging for Community - Subject"}]}],"contributor":["Regev, Mor"],"country":[],"coverage":[],"dateofcollection":"","description":["This capsule demonstrates the inter-subject functional correlation (ISFC) analysis described in \"Propagation of information along the cortical hierarchy as a function of attention while reading and listening to stories \" by Regev, Simony, Lee, Tan, Chen and Hasson."],"documentationUrl":[],"format":[],"id":"50|datacite____::6b1e3a2fa60ed8c27317a66d6357f795","instance":[{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::c52707e6a6b63c5aeac022e62cc8cee7","value":"Code Ocean"},"license":"https://opensource.org/licenses/MIT","publicationdate":"2018-01-01","type":"Software","url":["http://dx.doi.org/10.24433/co.12957bc5-fa2b-488f-ae72-52e3fe362b5c","https://codeocean.com/2018/10/30/intersubject-functional-correlation-lpar-isfc-rpar-as-a-function-of-attention"]}],"language":{"code":"en-us","label":"en-us"},"lastupdatetimestamp":1591282729250,"maintitle":"Intersubject functional correlation (ISFC) as a function of attention","originalId":["datacite____::6b1e3a2fa60ed8c27317a66d6357f795"],"pid":[{"scheme":"doi","value":"10.24433/co.12957bc5-fa2b-488f-ae72-52e3fe362b5c"}],"programmingLanguage":"UNKNOWN","publicationdate":"2018-01-01","publisher":"Code Ocean","subjects":[{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Capsule"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Biology"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"fmri"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"neuroscience"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"language"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"attention"}}],"type":"software"} -{"author":[{"fullname":"Winebrenner, Dale","name":"Dale","pid":{"id":{"scheme":"ORCID","value":"0000-0003-2341-1083"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},"rank":1,"surname":"Winebrenner"},{"fullname":"MacGregor, Joseph","name":"Joseph","rank":2,"surname":"Macgregor"},{"fullname":"Kintner, Paul","name":"Paul","rank":3,"surname":"Kintner"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"context":[{"code":"science-innovation-policy","label":"Science and Innovation Policy Studies","provenance":[{"provenance":"Bulktagging for Community - Subject"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"","description":["

Collection of Matlab scripts and data files to implement and exercise data analysis and modeling in support of the paper "New Estimates of Ice and Oxygen Fluxes Across the Entire Lid of Lake Vostok from Observations of Englacial Radiowave Attenuation", in review at the Journal of Geophysical Research - Earth Surface, October 2018. 

"],"documentationUrl":[],"format":[],"id":"50|dedup_wf_001::0347b1cd516fc59e41ba92e0d74e4e9f","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"https://creativecommons.org/licenses/by/4.0","publicationdate":"2018-01-01","type":"Software","url":["https://zenodo.org/record/1467433","http://dx.doi.org/10.5281/zenodo.1467432"]},{"accessright":{"code":"c_14cb","label":"CLOSED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"hostedby":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"publicationdate":"2018-01-01","type":"Software","url":["https://figshare.com/articles/Lake_Vostok_radar_attenuation_and_basal_accretion_data_models_and_computations/7246043","http://dx.doi.org/10.5281/zenodo.1467433"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"https://creativecommons.org/licenses/by/4.0","publicationdate":"2018-01-01","type":"Software","url":["https://zenodo.org/record/1467433","http://dx.doi.org/10.5281/zenodo.1467433"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://creativecommons.org/licenses/by/4.0/legalcode","publicationdate":"2018-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.1467433"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"publicationdate":"2018-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.1467433"]}],"language":{"code":"UNKNOWN","label":"UNKNOWN"},"lastupdatetimestamp":1591291157172,"maintitle":"Lake Vostok Radar Attenuation And Basal Accretion Data, Models And Computations","originalId":["datacite____::6ad543d8217b00d3c6e96f0b0a310d9f","r37980778c78::1a67aade293d5f0c985073a3470fdd9c","datacite____::51f29f85c008323ab696ef1c51ab242c","r37b0ad08687::213cee42e7f14a78806470a35a09f87a","od______2659::213cee42e7f14a78806470a35a09f87a"],"pid":[{"scheme":"doi","value":"10.5281/zenodo.1467432"},{"scheme":"doi","value":"10.5281/zenodo.1467433"}],"programmingLanguage":"UNKNOWN","publicationdate":"2018-01-01","publisher":"Zenodo","subjects":[{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Lake Vostok, radioglaciology"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Cancer"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Inorganic Chemistry"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Science Policy"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"59999 Environmental Sciences not elsewhere classified"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"69999 Biological Sciences not elsewhere classified"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"80699 Information Systems not elsewhere classified"}}],"type":"software"} -{"author":[{"fullname":"Laboissière, Rafael","name":"Rafael","rank":1,"surname":"Laboissière"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"}],"context":[{"code":"science-innovation-policy","label":"Science and Innovation Policy Studies","provenance":[{"provenance":"Bulktagging for Community - Subject"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"","description":["

This release contains the raw data, as well as the pre-processing and statistical analysis scripts (in R) for the experiments of perception of object stability under vection (illusion of self movement).

"],"documentationUrl":[],"format":[],"id":"50|dedup_wf_001::1432beb6171baa5da8a85a7f99545d69","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://www.opensource.org/licenses/GPL-3.0","publicationdate":"2017-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.801400"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://www.opensource.org/licenses/GPL-3.0","publicationdate":"2017-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.801400","https://zenodo.org/record/801400"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"publicationdate":"2017-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.801400"]},{"accessright":{"code":"c_14cb","label":"CLOSED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"hostedby":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"publicationdate":"2017-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.801400","https://figshare.com/articles/rlaboiss_vextab-data_Raw_data_and_statistical_analysis_code_for_the_vextab_study/11470608"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://www.opensource.org/licenses/GPL-3.0","publicationdate":"2017-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.801399","https://zenodo.org/record/801400"]}],"language":{"code":"UNKNOWN","label":"UNKNOWN"},"lastupdatetimestamp":1591291157172,"maintitle":"rlaboiss/vextab-data: Raw data and statistical analysis code for the vextab study","originalId":["r37b0ad08687::25acd09277dae71c80810366a599421c","datacite____::b353952b541b80dfed37f4e48b86e4c2","od______2659::25acd09277dae71c80810366a599421c","r37980778c78::a02038e6624d01df9cf22cb709d7fe92","datacite____::c2c7b341f563b61294ec4c7396a3f984"],"pid":[{"scheme":"doi","value":"10.5281/zenodo.801400"},{"scheme":"doi","value":"10.5281/zenodo.801399"}],"programmingLanguage":"UNKNOWN","publicationdate":"2017-01-01","publisher":"Zenodo","subjects":[{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"perceived object stability, vection, gravity perception, illusory body tilt, frames of reference"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Sociology"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Immunology"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Inorganic Chemistry"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Science Policy"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"69999 Biological Sciences not elsewhere classified"}}],"type":"software"} -{"author":[{"fullname":"Gaberial Campese","name":"","rank":1,"surname":""}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"}],"context":[{"code":"science-innovation-policy","label":"Science and Innovation Policy Studies","provenance":[{"provenance":"Bulktagging for Community - Subject"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"","description":["

A visualization project that researches trends in military, healthcare, and education expenditures by government. This project uses the Google Charts API.

"],"documentationUrl":[],"format":[],"id":"50|dedup_wf_001::1c8bd19e633976e314b88ce5c3f92d69","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://creativecommons.org/licenses/by/4.0/legalcode","publicationdate":"2019-01-01","type":"Software","url":["https://zenodo.org/record/3490231","http://dx.doi.org/10.5281/zenodo.3490231"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"publicationdate":"2019-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.3490231"]},{"accessright":{"code":"c_14cb","label":"CLOSED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"hostedby":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"publicationdate":"2019-01-01","type":"Software","url":["https://figshare.com/articles/Military_Healthcare_and_Education_Visualization_Project/11451567","http://dx.doi.org/10.5281/zenodo.3490231"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://creativecommons.org/licenses/by/4.0/legalcode","publicationdate":"2019-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.3490167","https://zenodo.org/record/3490167"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://creativecommons.org/licenses/by/4.0/legalcode","publicationdate":"2019-01-01","type":"Software","url":["https://zenodo.org/record/3490231","http://dx.doi.org/10.5281/zenodo.3490166"]},{"accessright":{"code":"c_14cb","label":"CLOSED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"hostedby":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"publicationdate":"2019-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.3490167","https://figshare.com/articles/Military_Healthcare_and_Education_Visualization_Project/11536080"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"publicationdate":"2019-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.3490167"]}],"language":{"code":"UNKNOWN","label":"UNKNOWN"},"lastupdatetimestamp":1591291157172,"maintitle":"Military, Healthcare, and Education Visualization Project","originalId":["datacite____::fc293ea8b1058dc6adaa87ad19a01123","od______2659::ea8c041f18c6201e4f7cd2764b408d20","r37980778c78::a1412e0bb17acd68cd6604c22386a988","datacite____::d8408df60f8d9b703e500ca210de53cb","datacite____::42607e67f94a1d82aebb02eeb7a69d7d","r37980778c78::2dd6d1ca777a3ca6e5f253362dcf1968","od______2659::c27f64126331e6a4a0a82a17aef4b48e"],"pid":[{"scheme":"doi","value":"10.5281/zenodo.3490231"},{"scheme":"doi","value":"10.5281/zenodo.3490167"},{"scheme":"doi","value":"10.5281/zenodo.3490166"}],"programmingLanguage":"UNKNOWN","publicationdate":"2019-01-01","publisher":"Zenodo","subjects":[{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Medicine"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Neuroscience"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Cancer"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Science Policy"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"111714 Mental Health"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"19999 Mathematical Sciences not elsewhere classified"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"80699 Information Systems not elsewhere classified"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"trend"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"healthcare"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Education Visualization Project"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Google Charts API"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"education expenditures"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"Military"}},{"provenance":{"provenance":"sysimport:crosswalk:datasetarchive","trust":"0.9"},"subject":{"scheme":"","value":"visualization project"}},{"provenance":{"provenance":"iis","trust":"0.7542"},"subject":{"scheme":"acm","value":"ComputingMilieux_LEGALASPECTSOFCOMPUTING"}}],"type":"software"} \ No newline at end of file +{"author":[],"collectedfrom":[{"key":"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5","value":"bio.tools"}],"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Bulktagging for Community - Subject"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"","description":["Chilibot searches the PubMed literature database based on specific relationships between proteins, genes, or keywords. The results are returned as a graph."],"documentationUrl":["http://www.chilibot.net"],"format":[],"id":"50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80","instance":[{"collectedfrom":{"key":"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5","value":"bio.tools"},"hostedby":{"key":"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5","value":"bio.tools"},"publicationdate":"2017-05-01","type":"Software","url":["https://bio.tools/chilibot"]}],"language":{"code":"UNKNOWN","label":"UNKNOWN"},"lastupdatetimestamp":1591282924188,"maintitle":"Chilibot","originalId":["__bioTools__::c17ebaca97f287b181090c9b4cba766e"],"pid":[],"programmingLanguage":"UNKNOWN","publicationdate":"2017-05-01","publisher":"bio.tools","subject":null,"type":"software"} +{"author":[{"fullname":"Agustoni, Marco","name":"Marco","rank":1,"surname":"Agustoni"},{"fullname":"Marti, Lukas","name":"Lukas","rank":2,"surname":"Marti"},{"fullname":"ATLAS, Collaboration","name":"Collaboration","rank":3,"surname":"Atlas"},{"fullname":"Schneider, Basil","name":"Basil","rank":4,"surname":"Schneider"},{"fullname":"Gallo, Valentina","name":"Valentina","rank":5,"surname":"Gallo"},{"fullname":"Ereditato, Antonio","name":"Antonio","rank":6,"surname":"Ereditato"},{"fullname":"Sciacca, Gianfranco","name":"Gianfranco","rank":7,"surname":"Sciacca"},{"fullname":"Haug, Sigve","name":"Sigve","rank":8,"surname":"Haug"},{"fullname":"Kabana, Sonja","name":"Sonja","rank":9,"surname":"Kabana"},{"fullname":"Kruker, Tobias","name":"Tobias","rank":10,"surname":"Kruker"},{"fullname":"Ancu, Lucian","name":"Lucian","rank":11,"surname":"Ancu"},{"fullname":"Battaglia, Andreas","name":"Andreas","rank":12,"surname":"Battaglia"},{"fullname":"Beck, Hans Peter","name":"Hans Peter","rank":13,"surname":"Beck"},{"fullname":"Pretzl, Klaus-Peter","name":"Klaus-Peter","rank":14,"surname":"Pretzl"},{"fullname":"Borer, Claudia","name":"Claudia","rank":15,"surname":"Borer"},{"fullname":"Weber, Michael","name":"Michael","rank":16,"surname":"Weber"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"code":"egi","label":"EGI Federation","provenance":[{"provenance":"iis","trust":"0.9"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"","description":[],"documentationUrl":[],"embargoenddate":"2013-01-01","format":["application/pdf"],"id":"50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"publicationdate":"2013-01-01","type":"Software","url":["http://dx.doi.org/10.7892/boris.58468"]}],"language":{"code":"eng","label":"English"},"lastupdatetimestamp":1591283098072,"maintitle":"Search for pair-produced massive coloured scalars in four-jet final states with the ATLAS detector in proton-proton collisions at sqrts=7 TeV","originalId":["datacite____::04dd1f84f3a429ecf1e838afcd94cb3f"],"pid":[{"scheme":"doi","value":"10.7892/boris.58468"}],"programmingLanguage":"application/pdf","publicationdate":"2013-01-01","publisher":"EDP Sciences","subject":null,"type":"software"} +{"author":[{"fullname":"Regev, Mor","name":"Mor","rank":1,"surname":"Regev"},{"fullname":"Simony, Erez","name":"Erez","rank":2,"surname":"Simony"},{"fullname":"Lee, Katherine","name":"Katherine","rank":3,"surname":"Lee"},{"fullname":"Tan, Kean Ming","name":"Kean Ming","rank":4,"surname":"Tan"},{"fullname":"Chen, Janice","name":"Janice","rank":5,"surname":"Chen"},{"fullname":"Hasson, Uri","name":"Uri","rank":6,"surname":"Hasson"}],"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"code":"ni","label":"Neuroinformatics","provenance":[{"provenance":"Bulktagging for Community - Subject"}]}],"contributor":["Regev, Mor"],"country":[],"coverage":[],"dateofcollection":"","description":["This capsule demonstrates the inter-subject functional correlation (ISFC) analysis described in \"Propagation of information along the cortical hierarchy as a function of attention while reading and listening to stories \" by Regev, Simony, Lee, Tan, Chen and Hasson."],"documentationUrl":[],"format":[],"id":"50|datacite____::6b1e3a2fa60ed8c27317a66d6357f795","instance":[{"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::c52707e6a6b63c5aeac022e62cc8cee7","value":"Code Ocean"},"license":"https://opensource.org/licenses/MIT","publicationdate":"2018-01-01","type":"Software","url":["http://dx.doi.org/10.24433/co.12957bc5-fa2b-488f-ae72-52e3fe362b5c","https://codeocean.com/2018/10/30/intersubject-functional-correlation-lpar-isfc-rpar-as-a-function-of-attention"]}],"language":{"code":"en-us","label":"en-us"},"lastupdatetimestamp":1591282729250,"maintitle":"Intersubject functional correlation (ISFC) as a function of attention","originalId":["datacite____::6b1e3a2fa60ed8c27317a66d6357f795"],"pid":[{"scheme":"doi","value":"10.24433/co.12957bc5-fa2b-488f-ae72-52e3fe362b5c"}],"programmingLanguage":"UNKNOWN","publicationdate":"2018-01-01","publisher":"Code Ocean","subject":null,"type":"software"} +{"author":[{"fullname":"Winebrenner, Dale","name":"Dale","pid":{"id":{"scheme":"ORCID","value":"0000-0003-2341-1083"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},"rank":1,"surname":"Winebrenner"},{"fullname":"MacGregor, Joseph","name":"Joseph","rank":2,"surname":"Macgregor"},{"fullname":"Kintner, Paul","name":"Paul","rank":3,"surname":"Kintner"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"}],"context":[{"code":"science-innovation-policy","label":"Science and Innovation Policy Studies","provenance":[{"provenance":"Bulktagging for Community - Subject"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"","description":["

Collection of Matlab scripts and data files to implement and exercise data analysis and modeling in support of the paper "New Estimates of Ice and Oxygen Fluxes Across the Entire Lid of Lake Vostok from Observations of Englacial Radiowave Attenuation", in review at the Journal of Geophysical Research - Earth Surface, October 2018. 

"],"documentationUrl":[],"format":[],"id":"50|dedup_wf_001::0347b1cd516fc59e41ba92e0d74e4e9f","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"https://creativecommons.org/licenses/by/4.0","publicationdate":"2018-01-01","type":"Software","url":["https://zenodo.org/record/1467433","http://dx.doi.org/10.5281/zenodo.1467432"]},{"accessright":{"code":"c_14cb","label":"CLOSED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"hostedby":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"publicationdate":"2018-01-01","type":"Software","url":["https://figshare.com/articles/Lake_Vostok_radar_attenuation_and_basal_accretion_data_models_and_computations/7246043","http://dx.doi.org/10.5281/zenodo.1467433"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"https://creativecommons.org/licenses/by/4.0","publicationdate":"2018-01-01","type":"Software","url":["https://zenodo.org/record/1467433","http://dx.doi.org/10.5281/zenodo.1467433"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://creativecommons.org/licenses/by/4.0/legalcode","publicationdate":"2018-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.1467433"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"publicationdate":"2018-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.1467433"]}],"language":{"code":"UNKNOWN","label":"UNKNOWN"},"lastupdatetimestamp":1591291157172,"maintitle":"Lake Vostok Radar Attenuation And Basal Accretion Data, Models And Computations","originalId":["datacite____::6ad543d8217b00d3c6e96f0b0a310d9f","r37980778c78::1a67aade293d5f0c985073a3470fdd9c","datacite____::51f29f85c008323ab696ef1c51ab242c","r37b0ad08687::213cee42e7f14a78806470a35a09f87a","od______2659::213cee42e7f14a78806470a35a09f87a"],"pid":[{"scheme":"doi","value":"10.5281/zenodo.1467432"},{"scheme":"doi","value":"10.5281/zenodo.1467433"}],"programmingLanguage":"UNKNOWN","publicationdate":"2018-01-01","publisher":"Zenodo","subject":null,"type":"software"} +{"author":[{"fullname":"Laboissière, Rafael","name":"Rafael","rank":1,"surname":"Laboissière"}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"}],"context":[{"code":"science-innovation-policy","label":"Science and Innovation Policy Studies","provenance":[{"provenance":"Bulktagging for Community - Subject"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"","description":["

This release contains the raw data, as well as the pre-processing and statistical analysis scripts (in R) for the experiments of perception of object stability under vection (illusion of self movement).

"],"documentationUrl":[],"format":[],"id":"50|dedup_wf_001::1432beb6171baa5da8a85a7f99545d69","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://www.opensource.org/licenses/GPL-3.0","publicationdate":"2017-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.801400"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://www.opensource.org/licenses/GPL-3.0","publicationdate":"2017-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.801400","https://zenodo.org/record/801400"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"publicationdate":"2017-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.801400"]},{"accessright":{"code":"c_14cb","label":"CLOSED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"hostedby":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"publicationdate":"2017-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.801400","https://figshare.com/articles/rlaboiss_vextab-data_Raw_data_and_statistical_analysis_code_for_the_vextab_study/11470608"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://www.opensource.org/licenses/GPL-3.0","publicationdate":"2017-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.801399","https://zenodo.org/record/801400"]}],"language":{"code":"UNKNOWN","label":"UNKNOWN"},"lastupdatetimestamp":1591291157172,"maintitle":"rlaboiss/vextab-data: Raw data and statistical analysis code for the vextab study","originalId":["r37b0ad08687::25acd09277dae71c80810366a599421c","datacite____::b353952b541b80dfed37f4e48b86e4c2","od______2659::25acd09277dae71c80810366a599421c","r37980778c78::a02038e6624d01df9cf22cb709d7fe92","datacite____::c2c7b341f563b61294ec4c7396a3f984"],"pid":[{"scheme":"doi","value":"10.5281/zenodo.801400"},{"scheme":"doi","value":"10.5281/zenodo.801399"}],"programmingLanguage":"UNKNOWN","publicationdate":"2017-01-01","publisher":"Zenodo","subject":null,"type":"software"} +{"author":[{"fullname":"Gaberial Campese","name":"","rank":1,"surname":""}],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"}],"context":[{"code":"science-innovation-policy","label":"Science and Innovation Policy Studies","provenance":[{"provenance":"Bulktagging for Community - Subject"}]}],"contributor":[],"country":[],"coverage":[],"dateofcollection":"","description":["

A visualization project that researches trends in military, healthcare, and education expenditures by government. This project uses the Google Charts API.

"],"documentationUrl":[],"format":[],"id":"50|dedup_wf_001::1c8bd19e633976e314b88ce5c3f92d69","instance":[{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://creativecommons.org/licenses/by/4.0/legalcode","publicationdate":"2019-01-01","type":"Software","url":["https://zenodo.org/record/3490231","http://dx.doi.org/10.5281/zenodo.3490231"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"publicationdate":"2019-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.3490231"]},{"accessright":{"code":"c_14cb","label":"CLOSED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"hostedby":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"publicationdate":"2019-01-01","type":"Software","url":["https://figshare.com/articles/Military_Healthcare_and_Education_Visualization_Project/11451567","http://dx.doi.org/10.5281/zenodo.3490231"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://creativecommons.org/licenses/by/4.0/legalcode","publicationdate":"2019-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.3490167","https://zenodo.org/record/3490167"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"hostedby":{"key":"10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6","value":"Zenodo"},"license":"http://creativecommons.org/licenses/by/4.0/legalcode","publicationdate":"2019-01-01","type":"Software","url":["https://zenodo.org/record/3490231","http://dx.doi.org/10.5281/zenodo.3490166"]},{"accessright":{"code":"c_14cb","label":"CLOSED","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"hostedby":{"key":"10|re3data_____::7980778c78fb4cf0fab13ce2159030dc","value":"figshare"},"publicationdate":"2019-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.3490167","https://figshare.com/articles/Military_Healthcare_and_Education_Visualization_Project/11536080"]},{"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"collectedfrom":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"hostedby":{"key":"10|opendoar____::358aee4cc897452c00244351e4d91f69","value":"ZENODO"},"publicationdate":"2019-01-01","type":"Software","url":["http://dx.doi.org/10.5281/zenodo.3490167"]}],"language":{"code":"UNKNOWN","label":"UNKNOWN"},"lastupdatetimestamp":1591291157172,"maintitle":"Military, Healthcare, and Education Visualization Project","originalId":["datacite____::fc293ea8b1058dc6adaa87ad19a01123","od______2659::ea8c041f18c6201e4f7cd2764b408d20","r37980778c78::a1412e0bb17acd68cd6604c22386a988","datacite____::d8408df60f8d9b703e500ca210de53cb","datacite____::42607e67f94a1d82aebb02eeb7a69d7d","r37980778c78::2dd6d1ca777a3ca6e5f253362dcf1968","od______2659::c27f64126331e6a4a0a82a17aef4b48e"],"pid":[{"scheme":"doi","value":"10.5281/zenodo.3490231"},{"scheme":"doi","value":"10.5281/zenodo.3490167"},{"scheme":"doi","value":"10.5281/zenodo.3490166"}],"programmingLanguage":"UNKNOWN","publicationdate":"2019-01-01","publisher":"Zenodo","subject":null,"type":"software"} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/indicators/publication.json b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/indicators/publication.json new file mode 100644 index 0000000..aad4d09 --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/input/indicators/publication.json @@ -0,0 +1 @@ +{"context": [], "dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "conference paper", "classname": "conference paper", "schemeid": "dnet:dataCite_resource", "schemename": "dnet:dataCite_resource"}, "pid": [], "contributor": [], "bestaccessright": {"classid": "CLOSED", "classname": "Closed Access", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "issued", "classname": "issued", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2002-01-01"}], "collectedfrom": [{"key": "10|openaire____::345c9d171ef3c5d706d08041d506428c", "value": "Croatian Scientific Bibliography - CROSBI"}], "id": "50|57a035e5b1ae::bf0d0fcf6b76658c1a1c283d957bf5e9", "subject": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Genetic Taxonom; IS Modelling Methods"}], "lastupdatetimestamp": 1673908725321, "author": [{"surname": "Brumec", "name": "J.", "pid": [], "rank": 1, "affiliation": [], "fullname": "Brumec, J."}, {"surname": "Vrc\\u030cek", "name": "N.", "pid": [], "rank": 2, "affiliation": [], "fullname": "Vr\\u010dek, N."}], "instance": [{"refereed": {"classid": "0000", "classname": "UNKNOWN", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|openaire____::345c9d171ef3c5d706d08041d506428c", "value": "Croatian Scientific Bibliography - CROSBI"}, "url": ["https://www.bib.irb.hr/185431"], "pid": [], "alternateIdentifier": [], "dateofacceptance": {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "2002-01-01"}, "collectedfrom": {"key": "10|openaire____::345c9d171ef3c5d706d08041d506428c", "value": "Croatian Scientific Bibliography - CROSBI"}, "accessright": {"classid": "CLOSED", "classname": "Closed Access", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0038", "classname": "Other literature type", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2023-01-08T01:03:51+0000", "fulltext": [], "dateoftransformation": "2023-01-08T02:34:01.814Z", "description": [], "format": [], "measures": [{"id": "downloads", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:usage_counts", "classname": "measure:usage_counts", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "count", "value": "0"}]}, {"id": "views", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:usage_counts", "classname": "measure:usage_counts", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "count", "value": "1"}]}], "coverage": [], "externalReference": [], "eoscifguidelines": [], "language": {"classid": "eng", "classname": "English", "schemeid": "dnet:languages", "schemename": "dnet:languages"}, "resulttype": {"classid": "publication", "classname": "publication", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [{"classid": "HR", "classname": "Croatia", "dataInfo": {"provenanceaction": {"classid": "country:instrepos", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "propagation", "invisible": false, "trust": "0.85"}, "schemeid": "dnet:countries", "schemename": "dnet:countries"}], "extraInfo": [], "originalId": ["50|57a035e5b1ae::bf0d0fcf6b76658c1a1c283d957bf5e9", "185431"], "source": [], "dateofacceptance": {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "2002-01-01"}, "title": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Genetic Taxonomy: the Theoretical Source for IS Modelling Methods"}]} \ No newline at end of file