From f85db930d948de2da8a92e92e3c62ede07cdf905 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 5 Apr 2024 12:59:41 +0200 Subject: [PATCH] [SKG-IF-EOSC] added new indicators field in the product plus the scheme to the denormalized topic in the result. Added provenance classname to the provenance field in the topic in the result --- .../eu/dnetlib/dhp/ExecCreateSchemas.java | 18 +- .../java/eu/dnetlib/dhp/oa/model/APC.java | 29 - .../eu/dnetlib/dhp/oa/model/AccessRight.java | 27 - .../dhp/oa/model/AlternateIdentifier.java | 44 - .../java/eu/dnetlib/dhp/oa/model/Author.java | 75 -- .../eu/dnetlib/dhp/oa/model/AuthorPid.java | 52 -- .../dhp/oa/model/AuthorPidSchemeValue.java | 40 - .../dnetlib/dhp/oa/model/BestAccessRight.java | 60 -- .../eu/dnetlib/dhp/oa/model/Container.java | 143 --- .../java/eu/dnetlib/dhp/oa/model/Country.java | 47 - .../java/eu/dnetlib/dhp/oa/model/Funder.java | 43 - .../eu/dnetlib/dhp/oa/model/GeoLocation.java | 53 -- .../dhp/oa/model/ImpactIndicators.java | 56 -- .../eu/dnetlib/dhp/oa/model/Indicator.java | 34 - .../eu/dnetlib/dhp/oa/model/Instance.java | 152 ---- .../eu/dnetlib/dhp/oa/model/Language.java | 38 - .../java/eu/dnetlib/dhp/oa/model/Measure.java | 49 - .../dnetlib/dhp/oa/model/OpenAccessColor.java | 15 - .../dnetlib/dhp/oa/model/OpenAccessRoute.java | 13 - .../java/eu/dnetlib/dhp/oa/model/Project.java | 57 -- .../eu/dnetlib/dhp/oa/model/Provenance.java | 41 - .../java/eu/dnetlib/dhp/oa/model/Result.java | 511 ----------- .../dnetlib/dhp/oa/model/ResultCountry.java | 41 - .../eu/dnetlib/dhp/oa/model/ResultPid.java | 44 - .../java/eu/dnetlib/dhp/oa/model/Score.java | 46 - .../java/eu/dnetlib/dhp/oa/model/Subject.java | 40 - .../dhp/oa/model/SubjectSchemeValue.java | 42 - .../eu/dnetlib/dhp/oa/model/UsageCounts.java | 29 - .../dhp/oa/model/community/CfHbKeyValue.java | 47 - .../oa/model/community/CommunityInstance.java | 43 - .../oa/model/community/CommunityResult.java | 75 -- .../dhp/oa/model/community/Context.java | 73 -- .../dhp/oa/model/community/Funder.java | 24 - .../dhp/oa/model/community/Project.java | 58 -- .../dhp/oa/model/community/Validated.java | 39 - .../dnetlib/dhp/oa/model/graph/Constants.java | 21 - .../dhp/oa/model/graph/Datasource.java | 358 -------- .../dhp/oa/model/graph/DatasourcePid.java | 41 - .../oa/model/graph/DatasourceSchemeValue.java | 41 - .../eu/dnetlib/dhp/oa/model/graph/Funder.java | 23 - .../dnetlib/dhp/oa/model/graph/Fundings.java | 38 - .../dnetlib/dhp/oa/model/graph/Granted.java | 61 -- .../dhp/oa/model/graph/GraphResult.java | 28 - .../oa/model/graph/H2020Classification.java | 82 -- .../dhp/oa/model/graph/Organization.java | 94 -- .../dhp/oa/model/graph/OrganizationPid.java | 42 - .../dnetlib/dhp/oa/model/graph/Programme.java | 41 - .../dnetlib/dhp/oa/model/graph/Project.java | 211 ----- .../dnetlib/dhp/oa/model/graph/RelType.java | 39 - .../dnetlib/dhp/oa/model/graph/Relation.java | 124 --- .../dhp/oa/model/graph/ResearchCommunity.java | 27 - .../oa/model/graph/ResearchInitiative.java | 89 -- .../dhp/skgif/model/DownloadsAndViews.java | 47 + .../eu/dnetlib/dhp/skgif/model/Indicator.java | 33 + .../eu/dnetlib/dhp/skgif/model/MinTopic.java | 9 + .../dhp/skgif/model/ResearchProduct.java | 10 + .../src/test/java/GenerateJsonSchema.java | 8 +- .../dump/filterentities/FilterEntities.java | 2 +- .../SelectConnectedEntities.java | 2 +- .../graph/dump/skgif/DumpResearchProduct.java | 19 +- .../oa/graph/dump/skgif/EmitFromEntities.java | 6 +- .../dhp/oa/graph/dump/skgif/ResultMapper.java | 31 +- .../copygraph/oozie_app/config-default.xml | 30 - .../dump/copygraph/oozie_app/workflow.xml | 110 --- .../oozie_app/config-default.xml | 30 - .../countryresults/oozie_app/workflow.xml | 511 ----------- .../dump/csv/oozie_app/config-default.xml | 30 - .../oa/graph/dump/csv/oozie_app/workflow.xml | 282 ------ .../oa/graph/dump/input_cm_parameters.json | 31 - .../oa/graph/dump/input_collect_and_save.json | 30 - .../graph/dump/input_complete_parameters.json | 30 - .../oa/graph/dump/input_dump_csv_ste1.json | 23 - .../oa/graph/dump/input_dump_csv_ste2.json | 38 - .../oa/graph/dump/input_dump_csv_ste3.json | 36 - .../oa/graph/dump/input_dump_csv_ste4.json | 25 - .../oa/graph/dump/input_entity_parameter.json | 20 - .../dump/input_organization_parameters.json | 31 - .../dump/input_parameter_select_relation.json | 20 - .../dhp/oa/graph/dump/input_parameters.json | 75 -- .../graph/dump/input_parameters_link_prj.json | 41 - .../dump/input_relationdump_parameters.json | 44 - .../oa/graph/dump/input_select_context.json | 37 - ...ect_valid_relation_context_parameters.json | 27 - ...nput_select_valid_relation_parameters.json | 27 - .../oozie_app/config-default.xml | 30 - .../organizationonly/oozie_app/workflow.xml | 88 -- .../graph/dump/project_input_parameters.json | 29 - .../graph/dump/project_prep_parameters.json | 26 - .../graph/dump/project_subset_parameters.json | 27 - .../oozie_app/config-default.xml | 30 - .../dump/projectsubset/oozie_app/workflow.xml | 171 ---- .../graph/dump/result_country_parameters.json | 42 - .../result_related_country_parameters.json | 29 - .../serafeim/oozie_app/config-default.xml | 30 - .../dump/serafeim/oozie_app/workflow.xml | 102 --- .../filter_entities_parameters.json | 0 .../graph/dump/skgif/oozie_app/workflow.xml | 2 + .../select_connected_entities_parameters.json | 0 .../dhp/oa/graph/dump/split_parameters.json | 37 - .../dump/wf/main/oozie_app/config-default.xml | 30 - .../graph/dump/wf/main/oozie_app/import.txt | 5 - .../graph/dump/wf/main/oozie_app/workflow.xml | 337 ------- .../community/oozie_app/config-default.xml | 30 - .../community/oozie_app/workflow.xml | 362 -------- .../complete/oozie_app/config-default.xml | 30 - .../complete/oozie_app/workflow.xml | 525 ----------- .../funder/oozie_app/config-default.xml | 30 - .../funder/oozie_app/workflow.xml | 257 ------ .../subset/oozie_app/config-default.xml | 30 - .../subset/oozie_app/workflow.xml | 858 ------------------ .../dump/oozie_app/config-default.xml | 30 - .../scholexplorer/dump/oozie_app/workflow.xml | 132 --- .../dhp/oa/graph/dump/GenerateJsonSchema.java | 4 +- .../oa/graph/dump/skgif/DumpResultTest.java | 1 - 114 files changed, 154 insertions(+), 8403 deletions(-) delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/APC.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AccessRight.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AlternateIdentifier.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Author.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPid.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPidSchemeValue.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/BestAccessRight.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Container.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Country.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Funder.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/GeoLocation.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ImpactIndicators.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Indicator.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Language.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Measure.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessColor.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessRoute.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Project.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Provenance.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultCountry.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultPid.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Score.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Subject.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/SubjectSchemeValue.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/UsageCounts.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CfHbKeyValue.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Context.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Funder.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Project.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Validated.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Constants.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Datasource.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/DatasourcePid.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/DatasourceSchemeValue.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Funder.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Fundings.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Granted.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/GraphResult.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/H2020Classification.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Organization.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/OrganizationPid.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Programme.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Project.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/RelType.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Relation.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchCommunity.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchInitiative.java create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/DownloadsAndViews.java create mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/Indicator.java delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste3.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste4.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_context.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_context_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_country_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_related_country_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{ => skgif}/filter_entities_parameters.json (100%) rename dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/{ => skgif}/select_connected_entities_parameters.json (100%) delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java b/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java index b669da2..6b94228 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java @@ -12,8 +12,7 @@ import com.fasterxml.jackson.databind.SerializationFeature; import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.victools.jsonschema.generator.*; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; -import eu.dnetlib.dhp.oa.model.graph.*; +import eu.dnetlib.dhp.skgif.model.*; public class ExecCreateSchemas { final static String DIRECTORY = "/eu/dnetlib/dhp/schema/dump/jsonschemas/"; @@ -61,14 +60,13 @@ public class ExecCreateSchemas { ExecCreateSchemas ecs = new ExecCreateSchemas(); ecs.init(); - ecs.generate(GraphResult.class, DIRECTORY, "result_schema.json"); - ecs.generate(ResearchCommunity.class, DIRECTORY, "community_infrastructure_schema.json"); - ecs.generate(Datasource.class, DIRECTORY, "datasource_schema.json"); - ecs.generate(Project.class, DIRECTORY, "project_schema.json"); - ecs.generate(Relation.class, DIRECTORY, "relation_schema.json"); - ecs.generate(Organization.class, DIRECTORY, "organization_schema.json"); - - ecs.generate(CommunityResult.class, DIRECTORY, "community_result_schema.json"); + ecs.generate(Venue.class, DIRECTORY, "venue.json"); + ecs.generate(Grant.class, DIRECTORY, "grant.json"); + ecs.generate(ResearchProduct.class, DIRECTORY, "product.json"); + ecs.generate(Persons.class, DIRECTORY, "person.json"); + ecs.generate(Topic.class, DIRECTORY, "topic.json"); + ecs.generate(Organization.class, DIRECTORY, "organization.json"); + ecs.generate(Datasource.class, DIRECTORY, "datasource.json"); } } diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/APC.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/APC.java deleted file mode 100644 index 96c7b62..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/APC.java +++ /dev/null @@ -1,29 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -/** - * Used to refer to the Article Processing Charge information. It contains two parameters: - - * currency of type String to store the currency of the APC - amount of type String to stores the charged amount - */ -public class APC implements Serializable { - private String currency; - private String amount; - - public String getCurrency() { - return currency; - } - - public void setCurrency(String currency) { - this.currency = currency; - } - - public String getAmount() { - return amount; - } - - public void setAmount(String amount) { - this.amount = amount; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AccessRight.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AccessRight.java deleted file mode 100644 index 16d0088..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AccessRight.java +++ /dev/null @@ -1,27 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -/** - * AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.BestAccessRight - * element with value for the openaccess route - */ -public class AccessRight extends BestAccessRight { - - private OpenAccessRoute openAccessRoute; - - public static AccessRight newInstance(String code, String label, String scheme) { - AccessRight ar = new AccessRight(); - ar.setCode(code); - ar.setLabel(label); - ar.setScheme(scheme); - return ar; - } - - public OpenAccessRoute getOpenAccessRoute() { - return openAccessRoute; - } - - public void setOpenAccessRoute(OpenAccessRoute openAccessRoute) { - this.openAccessRoute = openAccessRoute; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AlternateIdentifier.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AlternateIdentifier.java deleted file mode 100644 index db7313d..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AlternateIdentifier.java +++ /dev/null @@ -1,44 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public class AlternateIdentifier implements Serializable { - @JsonSchema( - description = "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers " - + - "it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides " - + - "as identifier for the result also the doi") - private String scheme; - - @JsonSchema(description = "The value expressed in the scheme") - private String value; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static AlternateIdentifier newInstance(String scheme, String value) { - AlternateIdentifier cf = new AlternateIdentifier(); - - cf.setScheme(scheme); - cf.setValue(value); - - return cf; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Author.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Author.java deleted file mode 100644 index ed2babb..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Author.java +++ /dev/null @@ -1,75 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * Used to represent the generic author of the result. It has six parameters: - name of type String to store the given - * name of the author. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author name - surname of - * type String to store the family name of the author. The value for this parameter corresponds to - * eu.dnetlib.dhp.schema.oaf.Author surname - fullname of type String to store the fullname of the author. The value for - * this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author fullname - rank of type Integer to store the rank on - * the author in the result's authors list. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author - * rank - pid of type eu.dnetlib.dhp.schema.dump.oaf.Pid to store the persistent identifier for the author. For the - * moment only ORCID identifiers will be dumped. - The id element is instantiated by using the following values in the - * eu.dnetlib.dhp.schema.oaf.Result pid: * Qualifier.classid for scheme * value for value - The provenance element is - * instantiated only if the dataInfo is set for the pid in the result to be dumped. The provenance element is - * instantiated by using the following values in the eu.dnetlib.dhp.schema.oaf.Result pid: * - * dataInfo.provenanceaction.classname for provenance * dataInfo.trust for trust - */ -public class Author implements Serializable { - - private String fullname; - - private String name; - - private String surname; - - private Integer rank; - - @JsonSchema(description = "The author's persistent identifiers") - private AuthorPid pid; - - public String getFullname() { - return fullname; - } - - public void setFullname(String fullname) { - this.fullname = fullname; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getSurname() { - return surname; - } - - public void setSurname(String surname) { - this.surname = surname; - } - - public Integer getRank() { - return rank; - } - - public void setRank(Integer rank) { - this.rank = rank; - } - - public AuthorPid getPid() { - return pid; - } - - public void setPid(AuthorPid pid) { - this.pid = pid; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPid.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPid.java deleted file mode 100644 index f66e586..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPid.java +++ /dev/null @@ -1,52 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To represent the generic persistent identifier. It has two parameters: - * - id of type - * eu.dnetlib.dhp.schema.dump.oaf.AuthorPidSchemeValue to store the scheme and value of the Persistent Identifier. - * - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store the provenance and trust of the information - */ -public class AuthorPid implements Serializable { - - private AuthorPidSchemeValue id; - - @JsonSchema(description = "The reason why the pid was associated to the author") - private Provenance provenance; - - public AuthorPidSchemeValue getId() { - return id; - } - - public void setId(AuthorPidSchemeValue pid) { - this.id = pid; - } - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - - public static AuthorPid newInstance(AuthorPidSchemeValue pid, Provenance provenance) { - AuthorPid p = new AuthorPid(); - p.id = pid; - p.provenance = provenance; - - return p; - } - - public static AuthorPid newInstance(AuthorPidSchemeValue pid) { - AuthorPid p = new AuthorPid(); - p.id = pid; - - return p; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPidSchemeValue.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPidSchemeValue.java deleted file mode 100644 index 09eaba5..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/AuthorPidSchemeValue.java +++ /dev/null @@ -1,40 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public class AuthorPidSchemeValue implements Serializable { - - @JsonSchema(description = "The author's pid scheme. OpenAIRE currently supports 'ORCID'") - private String scheme; - - @JsonSchema(description = "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)") - private String value; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static AuthorPidSchemeValue newInstance(String scheme, String value) { - AuthorPidSchemeValue cf = new AuthorPidSchemeValue(); - - cf.setScheme(scheme); - cf.setValue(value); - - return cf; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/BestAccessRight.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/BestAccessRight.java deleted file mode 100644 index abde526..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/BestAccessRight.java +++ /dev/null @@ -1,60 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * BestAccessRight. Used to represent the result best access rights. Values for this element are found against the - * COAR access right scheme. The classid of the element accessright in eu.dnetlib.dhp.schema.oaf.Result is used to get - * the COAR corresponding code whose value will be used to set the code parameter. The COAR label corresponding to the - * COAR code will be used to set the label parameter. The scheme value will always be the one referring to the COAR - * access right scheme - */ - -public class BestAccessRight implements Serializable { - - @JsonSchema( - description = "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/") - private String code; // the classid in the Qualifier - - @JsonSchema(description = "Label for the access mode") - private String label; // the classname in the Qualifier - - @JsonSchema( - description = "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/") - private String scheme; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getLabel() { - return label; - } - - public void setLabel(String label) { - this.label = label; - } - - public static BestAccessRight newInstance(String code, String label, String scheme) { - BestAccessRight ar = new BestAccessRight(); - ar.code = code; - ar.label = label; - ar.scheme = scheme; - return ar; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Container.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Container.java deleted file mode 100644 index 4fdeff4..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Container.java +++ /dev/null @@ -1,143 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To store information about the conference or journal where the result has been presented or published. It contains - * eleven parameters: - name of type String to store the name of the journal or conference. It corresponds to the - * parameter name of eu.dnetlib.dhp.schema.oaf.Journal - issnPrinted ot type String to store the journal printed issn. - * It corresponds to the parameter issnPrinted of eu.dnetlib.dhp.schema.oaf.Journal - issnOnline of type String to store - * the journal online issn. It corresponds to the parameter issnOnline of eu.dnetlib.dhp.schema.oaf.Journal - - * issnLinking of type String to store the journal linking issn. It corresponds to the parameter issnLinking of - * eu.dnetlib.dhp.schema.oaf.Journal - ep of type String to store the end page. It corresponds to the parameter ep of - * eu.dnetlib.dhp.schema.oaf.Journal - iss of type String to store the journal issue. It corresponds to the parameter - * iss of eu.dnetlib.dhp.schema.oaf.Journal - sp of type String to store the start page. It corresponds to the parameter - * sp of eu.dnetlib.dhp.schema.oaf.Journal - vol of type String to store the Volume. It corresponds to the parameter vol - * of eu.dnetlib.dhp.schema.oaf.Journal - edition of type String to store the edition of the journal or conference - * proceeding. It corresponds to the parameter edition of eu.dnetlib.dhp.schema.oaf.Journal - conferenceplace of type - * String to store the place of the conference. It corresponds to the parameter conferenceplace of - * eu.dnetlib.dhp.schema.oaf.Journal - conferencedate of type String to store the date of the conference. It corresponds - * to the parameter conferencedate of eu.dnetlib.dhp.schema.oaf.Journal - */ -public class Container implements Serializable { - - @JsonSchema(description = "Name of the journal or conference") - private String name; - - private String issnPrinted; - - private String issnOnline; - - private String issnLinking; - - @JsonSchema(description = "End page") - private String ep; - - @JsonSchema(description = "Journal issue number") - private String iss; - - @JsonSchema(description = "Start page") - private String sp; - - @JsonSchema(description = "Volume") - private String vol; - - @JsonSchema(description = "Edition of the journal or conference proceeding") - private String edition; - - private String conferenceplace; - - private String conferencedate; - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getIssnPrinted() { - return issnPrinted; - } - - public void setIssnPrinted(String issnPrinted) { - this.issnPrinted = issnPrinted; - } - - public String getIssnOnline() { - return issnOnline; - } - - public void setIssnOnline(String issnOnline) { - this.issnOnline = issnOnline; - } - - public String getIssnLinking() { - return issnLinking; - } - - public void setIssnLinking(String issnLinking) { - this.issnLinking = issnLinking; - } - - public String getEp() { - return ep; - } - - public void setEp(String ep) { - this.ep = ep; - } - - public String getIss() { - return iss; - } - - public void setIss(String iss) { - this.iss = iss; - } - - public String getSp() { - return sp; - } - - public void setSp(String sp) { - this.sp = sp; - } - - public String getVol() { - return vol; - } - - public void setVol(String vol) { - this.vol = vol; - } - - public String getEdition() { - return edition; - } - - public void setEdition(String edition) { - this.edition = edition; - } - - public String getConferenceplace() { - return conferenceplace; - } - - public void setConferenceplace(String conferenceplace) { - this.conferenceplace = conferenceplace; - } - - public String getConferencedate() { - return conferencedate; - } - - public void setConferencedate(String conferencedate) { - this.conferencedate = conferencedate; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Country.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Country.java deleted file mode 100644 index 7a69de0..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Country.java +++ /dev/null @@ -1,47 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * Represents the country associated to the generic entity. It extends eu.dnetlib.dhp.schema.dump.oaf.Qualifier with a - * provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country in not mapped if its value in the - * result reprensented in the internal format is Unknown. The value for this element correspond to: - code corresponds - * to the classid of eu.dnetlib.dhp.schema.oaf.Country - label corresponds to the classname of - * eu.dnetlib.dhp.schema.oaf.Country - provenance set only if the dataInfo associated to the Country of the result to be - * dumped is not null. In this case : - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with - * datainfo.provenanceaction.classname) - trust corresponds to dataInfo.trust - */ -public class Country implements Serializable { - @JsonSchema(description = "ISO 3166-1 alpha-2 country code (i.e. IT)") - private String code; // the classid in the Qualifier - - @JsonSchema(description = "The label for that code (i.e. Italy)") - private String label; // the classname in the Qualifier - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getLabel() { - return label; - } - - public void setLabel(String label) { - this.label = label; - } - - public static Country newInstance(String code, String label) { - Country c = new Country(); - c.setCode(code); - c.setLabel(label); - return c; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Funder.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Funder.java deleted file mode 100644 index 49376fc..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Funder.java +++ /dev/null @@ -1,43 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public class Funder implements Serializable { - - @JsonSchema(description = "The short name of the funder (EC)") - private String shortName; - - @JsonSchema(description = "The name of the funder (European Commission)") - private String name; - - @JsonSchema( - description = "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)") - private String jurisdiction; - - public String getJurisdiction() { - return jurisdiction; - } - - public void setJurisdiction(String jurisdiction) { - this.jurisdiction = jurisdiction; - } - - public String getShortName() { - return shortName; - } - - public void setShortName(String shortName) { - this.shortName = shortName; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/GeoLocation.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/GeoLocation.java deleted file mode 100644 index d8cbb39..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/GeoLocation.java +++ /dev/null @@ -1,53 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import org.apache.commons.lang3.StringUtils; - -import com.fasterxml.jackson.annotation.JsonIgnore; - -/** - * Represents the geolocation information. It has three parameters: - point of type String to store the point - * information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation point - box ot type String to store the box - * information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation box - place of type String to store the place - * information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation place - */ -public class GeoLocation implements Serializable { - - private String point; - - private String box; - - private String place; - - public String getPoint() { - return point; - } - - public void setPoint(String point) { - this.point = point; - } - - public String getBox() { - return box; - } - - public void setBox(String box) { - this.box = box; - } - - public String getPlace() { - return place; - } - - public void setPlace(String place) { - this.place = place; - } - - @JsonIgnore - public boolean isBlank() { - return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place); - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ImpactIndicators.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ImpactIndicators.java deleted file mode 100644 index b98d89e..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ImpactIndicators.java +++ /dev/null @@ -1,56 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -/** - * @author miriam.baglioni - * @Date 07/11/22 - */ -public class ImpactIndicators implements Serializable { - Score influence; - Score influence_alt; - Score popularity; - Score popularity_alt; - Score impulse; - - public Score getInfluence() { - return influence; - } - - public void setInfluence(Score influence) { - this.influence = influence; - } - - public Score getInfluence_alt() { - return influence_alt; - } - - public void setInfluence_alt(Score influence_alt) { - this.influence_alt = influence_alt; - } - - public Score getPopularity() { - return popularity; - } - - public void setPopularity(Score popularity) { - this.popularity = popularity; - } - - public Score getPopularity_alt() { - return popularity_alt; - } - - public void setPopularity_alt(Score popularity_alt) { - this.popularity_alt = popularity_alt; - } - - public Score getImpulse() { - return impulse; - } - - public void setImpulse(Score impulse) { - this.impulse = impulse; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Indicator.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Indicator.java deleted file mode 100644 index 65883f8..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Indicator.java +++ /dev/null @@ -1,34 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public class Indicator implements Serializable { - @JsonSchema(description = "The impact measures (i.e. popularity)") - List bipIndicators; - - @JsonSchema(description = "The usage counts (i.e. downloads)") - UsageCounts usageCounts; - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getBipIndicators() { - return bipIndicators; - } - - public void setBipIndicators(List bipIndicators) { - this.bipIndicators = bipIndicators; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public UsageCounts getUsageCounts() { - return usageCounts; - } - - public void setUsageCounts(UsageCounts usageCounts) { - this.usageCounts = usageCounts; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java deleted file mode 100644 index c35f93c..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java +++ /dev/null @@ -1,152 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published - * versions are two manifestations of the same research result. It has the following parameters: - license of type - * String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be - * dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. - - * type of type String to store the type of the instance as defined in the corresponding dnet vocabulary - * (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - url of type - * List list of locations where the instance is accessible. It corresponds to url of the instance to be dumped - - * publicationdate of type String to store the publication date of the instance ;// dateofacceptance; - refereed of type - * String to store information abour the review status of the instance. Possible values are 'Unknown', - * 'nonPeerReviewed', 'peerReviewed'. It corresponds to refereed.classname of the instance to be dumped - * - articleprocessingcharge of type APC to store the article processing charges possibly associated to the instance - * -pid of type List that is the list of pids associated to the result coming from authoritative sources for that pid - * -alternateIdentifier of type List that is the list of pids associated to the result coming from NON authoritative - * sources for that pid - * -measure list to represent the measure computed for this instance (for example the Bip!Finder ones). It corresponds to measures in the model - */ -public class Instance implements Serializable { - -// @JsonSchema(description = "Indicators computed for this instance, for example Bip!Finder ones") -// private Indicator indicators; - - private List pid; - - @JsonSchema( - description = "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs") - private List alternateIdentifier; - - private String license; - - @JsonSchema(description = "The accessRights for this materialization of the result") - private AccessRight accessright; - - @JsonSchema( - description = "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)") - private String type; - - @JsonSchema( - description = "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ") - private List url; - - @JsonSchema( - description = "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.") - private APC articleprocessingcharge; - - @JsonSchema(description = "Date of the research product") - private String publicationdate;// dateofacceptance; - - @JsonSchema( - description = "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, " + - "nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)") - private String refereed; // peer-review status - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getLicense() { - return license; - } - - public void setLicense(String license) { - this.license = license; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public AccessRight getAccessright() { - return accessright; - } - - public void setAccessright(AccessRight accessright) { - this.accessright = accessright; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getUrl() { - return url; - } - - public void setUrl(List url) { - this.url = url; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getPublicationdate() { - return publicationdate; - } - - public void setPublicationdate(String publicationdate) { - this.publicationdate = publicationdate; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getRefereed() { - return refereed; - } - - public void setRefereed(String refereed) { - this.refereed = refereed; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public APC getArticleprocessingcharge() { - return articleprocessingcharge; - } - - public void setArticleprocessingcharge(APC articleprocessingcharge) { - this.articleprocessingcharge = articleprocessingcharge; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getPid() { - return pid; - } - - public void setPid(List pid) { - this.pid = pid; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getAlternateIdentifier() { - return alternateIdentifier; - } - - public void setAlternateIdentifier(List alternateIdentifier) { - this.alternateIdentifier = alternateIdentifier; - } - -// @JsonInclude(JsonInclude.Include.NON_NULL) -// public Indicator getIndicators() { -// return indicators; -// } -// -// public void setIndicators(Indicator indicators) { -// this.indicators = indicators; -// } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Language.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Language.java deleted file mode 100644 index 82a8794..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Language.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public class Language implements Serializable { - - @JsonSchema(description = "alpha-3/ISO 639-2 code of the language") - private String code; // the classid in the Qualifier - - @JsonSchema(description = "Language label in English") - private String label; // the classname in the Qualifier - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getLabel() { - return label; - } - - public void setLabel(String label) { - this.label = label; - } - - public static Language newInstance(String code, String value) { - Language qualifier = new Language(); - qualifier.setCode(code); - qualifier.setLabel(value); - return qualifier; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Measure.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Measure.java deleted file mode 100644 index 3a210cc..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Measure.java +++ /dev/null @@ -1,49 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import org.apache.commons.lang3.StringUtils; - -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * @author miriam.baglioni - * @Date 03/08/22 - */ -public class Measure implements Serializable { - @JsonSchema(description = "The measure (i.e. class)") - private String key; - - @JsonSchema(description = "The value for that measure") - private String value; - - public String getKey() { - return key; - } - - public void setKey(String key) { - this.key = key; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static Measure newInstance(String key, String value) { - Measure mes = new Measure(); - mes.key = key; - mes.value = value; - return mes; - } - - @JsonIgnore - public boolean isBlank() { - return StringUtils.isBlank(key) && StringUtils.isBlank(value); - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessColor.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessColor.java deleted file mode 100644 index 5baeb33..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessColor.java +++ /dev/null @@ -1,15 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -/** - * @author miriam.baglioni - * @Date 19/12/23 - */ -/** - * The OpenAccess color meant to be used on the result level - */ -public enum OpenAccessColor { - - gold, hybrid, bronze - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessRoute.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessRoute.java deleted file mode 100644 index 09a0236..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessRoute.java +++ /dev/null @@ -1,13 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -/** - * This Enum models the OpenAccess status, currently including only the values from Unpaywall - * - * https://support.unpaywall.org/support/solutions/articles/44001777288-what-do-the-types-of-oa-status-green-gold-hybrid-and-bronze-mean- - */ -public enum OpenAccessRoute { - - gold, green, hybrid, bronze - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Project.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Project.java deleted file mode 100644 index 9b08427..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Project.java +++ /dev/null @@ -1,57 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * This class to store the common information about the project that will be dumped for community and for the whole - * graph - private String id to store the id of the project (OpenAIRE id) - private String code to store the grant - * agreement of the project - private String acronym to store the acronym of the project - private String title to store - * the tile of the project - */ -public class Project implements Serializable { - @JsonSchema(description = "The OpenAIRE id for the project") - protected String id;// OpenAIRE id - - @JsonSchema(description = "The grant agreement number") - protected String code; - - @JsonSchema(description = "The acronym of the project") - protected String acronym; - - protected String title; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getAcronym() { - return acronym; - } - - public void setAcronym(String acronym) { - this.acronym = acronym; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Provenance.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Provenance.java deleted file mode 100644 index 1be96dc..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Provenance.java +++ /dev/null @@ -1,41 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -/** - * Indicates the process that produced (or provided) the information, and the trust associated to the information. It - * has two parameters: - provenance of type String to store the provenance of the information, - trust of type String to - * store the trust associated to the information - */ -public class Provenance implements Serializable { - private String provenance; - private String trust; - - public String getProvenance() { - return provenance; - } - - public void setProvenance(String provenance) { - this.provenance = provenance; - } - - public String getTrust() { - return trust; - } - - public void setTrust(String trust) { - this.trust = trust; - } - - public static Provenance newInstance(String provenance, String trust) { - Provenance p = new Provenance(); - p.provenance = provenance; - p.trust = trust; - return p; - } - - public String toString() { - return provenance + trust; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java deleted file mode 100644 index bc455dc..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java +++ /dev/null @@ -1,511 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To represent the dumped result. It will be extended in the dump for Research Communities - Research - * Initiative/Infrastructures. It has the following parameters: - * - author of type - * List to describe the authors of a result. For each author in the result - * represented in the internal model one author in the esternal model is produced. - * - type of type String to represent - * the category of the result. Possible values are publication, dataset, software, other. It corresponds to - * resulttype.classname of the dumped result - * - language of type eu.dnetlib.dhp.schema.dump.oaf.Language to store - * information about the language of the result. It is dumped as - code corresponds to language.classid - value - * corresponds to language.classname - * - country of type List to store the country - * list to which the result is associated. For each country in the result respresented in the internal model one country - * in the external model is produces - subjects of type List to store the subjects for - * the result. For each subject in the result represented in the internal model one subject in the external model is - * produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first - * title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle - * of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to - * "subtitle" - description of type List to store the description of the result. It corresponds to the list of - * description.value in the result represented in the internal model - publicationdate of type String to store the - * pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model - - * publisher of type String to store information about the publisher. It corresponds to publisher.value of the result - * represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to - * embargoenddate.value of the result represented in the internal model - source of type List See definition of - * Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal - * model - format of type List It corresponds to the list of format.value in the result represented in the - * internal model - contributor of type List to represent contributors for this result. It corresponds to the - * list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds - * to the list of coverage.value in the result represented in the internal model - bestaccessright of type - * eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the - * manifestations of this research results. It corresponds to the same parameter in the result represented in the - * internal model - container of type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It - * corresponds to the parameter journal of the result represented in the internal model - documentationUrl of type - * List (only for results of type software) to store the URLs to the software documentation. It corresponds to - * the list of documentationUrl.value of the result represented in the internal model - codeRepositoryUrl of type String - * (only for results of type software) to store the URL to the repository with the source code. It corresponds to - * codeRepositoryUrl.value of the result represented in the internal model - programmingLanguage of type String (only - * for results of type software) to store the programming language. It corresponds to programmingLanguaga.classid of the - * result represented in the internal model - contactperson of type List (only for results of type other) to - * store the contact person for this result. It corresponds to the list of contactperson.value of the result represented - * in the internal model - contactgroup of type List (only for results of type other) to store the information - * for the contact group. It corresponds to the list of contactgroup.value of the result represented in the internal - * model - tool of type List (only fro results of type other) to store information about tool useful for the - * interpretation and/or re-used of the research product. It corresponds to the list of tool.value in the result - * represented in the internal modelt - size of type String (only for results of type dataset) to store the size of the - * dataset. It corresponds to size.value in the result represented in the internal model - version of type String (only - * for results of type dataset) to store the version. It corresponds to version.value of the result represented in the - * internal model - geolocation fo type List (only for results of type - * dataset) to store geolocation information. For each geolocation element in the result represented in the internal - * model a GeoLocation in the external model il produced - id of type String to store the OpenAIRE id of the result. It - * corresponds to the id of the result represented in the internal model - originalId of type List to store the - * original ids of the result. It corresponds to the originalId of the result represented in the internal model - pid of - * type List to store the persistent identifiers for the result. For - * each pid in the results represented in the internal model one pid in the external model is produced. The value - * correspondence is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model - - * value corresponds to the pid.value of the result represented in the internal model - dateofcollection of type String - * to store information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result - * represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of - * the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model - * - */ -public class Result implements Serializable { - - private List author; - - // resulttype allows subclassing results into publications | datasets | software - - @JsonProperty("isGreen") - @JsonSchema(description = "True if the result is green Open Access") - private Boolean isGreen; - - @JsonSchema(description = "The Open Access Color of the publication") - private OpenAccessColor openAccessColor; - - @JsonProperty("isInDiamondJournal") - @JsonSchema(description = "True if the result is published in a Diamond Journal") - private Boolean isInDiamondJournal; - - @JsonSchema(description = "True if the result is outcome of a project") - private Boolean publiclyFunded; - - public Boolean getGreen() { - return isGreen; - } - - public void setGreen(Boolean green) { - isGreen = green; - } - - public OpenAccessColor getOpenAccessColor() { - return openAccessColor; - } - - public void setOpenAccessColor(OpenAccessColor openAccessColor) { - this.openAccessColor = openAccessColor; - } - - public Boolean getInDiamondJournal() { - return isInDiamondJournal; - } - - public void setInDiamondJournal(Boolean inDiamondJournal) { - isInDiamondJournal = inDiamondJournal; - } - - public Boolean getPubliclyFunded() { - return publiclyFunded; - } - - public void setPubliclyFunded(Boolean publiclyFunded) { - this.publiclyFunded = publiclyFunded; - } - - @JsonSchema( - description = "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)") - private String type; // resulttype - - // common fields - private Language language; - - @JsonSchema(description = "The list of countries associated to this result") - private List country; - - @JsonSchema(description = "Keywords associated to the result") - private List subjects; - - @JsonSchema( - description = "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software.") - private String maintitle; - - @JsonSchema(description = "Explanatory or alternative name by which a scientific result is known.") - private String subtitle; - - private List description; - - @JsonSchema( - description = "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date.") - private String publicationdate; // dateofacceptance; - - @JsonSchema( - description = "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource.") - private String publisher; - - @JsonSchema(description = "Date when the embargo ends and this result turns Open Access") - private String embargoenddate; - - @JsonSchema(description = "See definition of Dublin Core field dc:source") - private List source; - - private List format; - - @JsonSchema(description = "Contributors for the result") - private List contributor; - - private List coverage; - - @JsonSchema(description = "The openest of the access rights of this result.") - private BestAccessRight bestaccessright; - - @JsonSchema( - description = "Container has information about the conference or journal where the result has been presented or published") - private Container container;// Journal - - @JsonSchema(description = "Only for results with type 'software': URL to the software documentation") - private List documentationUrl; // software - - @JsonSchema(description = "Only for results with type 'software': the URL to the repository with the source code") - private String codeRepositoryUrl; // software - - @JsonSchema(description = "Only for results with type 'software': the programming language") - private String programmingLanguage; // software - - @JsonSchema( - description = "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource") - private List contactperson; // orp - - @JsonSchema( - description = "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource") - private List contactgroup; // orp - - @JsonSchema( - description = "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product") - private List tool; // orp - - @JsonSchema(description = "Only for results with type 'dataset': the declared size of the dataset") - private String size; // dataset - - @JsonSchema(description = "Version of the result") - private String version; // dataset - - @JsonSchema(description = "Geolocation information") - private List geolocation; // dataset - - @JsonSchema(description = "The OpenAIRE identifiers for this result") - private String id; - - @JsonSchema(description = "Identifiers of the record at the original sources") - private List originalId; - - @JsonSchema(description = "Persistent identifiers of the result") - private List pid; - - @JsonSchema(description = "When OpenAIRE collected the record the last time") - private String dateofcollection; - - @JsonSchema(description = "Timestamp of last update of the record in OpenAIRE") - private Long lastupdatetimestamp; - - @JsonSchema(description = "Indicators computed for this result, for example UsageCount ones") - private Indicator indicators; - - @JsonInclude(JsonInclude.Include.NON_NULL) - public Indicator getIndicators() { - return indicators; - } - - public void setIndicators(Indicator indicators) { - this.indicators = indicators; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public Long getLastupdatetimestamp() { - return lastupdatetimestamp; - } - - public void setLastupdatetimestamp(Long lastupdatetimestamp) { - this.lastupdatetimestamp = lastupdatetimestamp; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getOriginalId() { - return originalId; - } - - public void setOriginalId(List originalId) { - this.originalId = originalId; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getPid() { - return pid; - } - - public void setPid(List pid) { - this.pid = pid; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getDateofcollection() { - return dateofcollection; - } - - public void setDateofcollection(String dateofcollection) { - this.dateofcollection = dateofcollection; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getAuthor() { - return author; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public Container getContainer() { - return container; - } - - public void setContainer(Container container) { - this.container = container; - } - - public void setAuthor(List author) { - this.author = author; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public Language getLanguage() { - return language; - } - - public void setLanguage(Language language) { - this.language = language; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getCountry() { - return country; - } - - public void setCountry(List country) { - this.country = country; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getSubjects() { - return subjects; - } - - public void setSubjects(List subjects) { - this.subjects = subjects; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getMaintitle() { - return maintitle; - } - - public void setMaintitle(String maintitle) { - this.maintitle = maintitle; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getSubtitle() { - return subtitle; - } - - public void setSubtitle(String subtitle) { - this.subtitle = subtitle; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getDescription() { - return description; - } - - public void setDescription(List description) { - this.description = description; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getPublicationdate() { - return publicationdate; - } - - public void setPublicationdate(String publicationdate) { - this.publicationdate = publicationdate; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getPublisher() { - return publisher; - } - - public void setPublisher(String publisher) { - this.publisher = publisher; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getEmbargoenddate() { - return embargoenddate; - } - - public void setEmbargoenddate(String embargoenddate) { - this.embargoenddate = embargoenddate; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getSource() { - return source; - } - - public void setSource(List source) { - this.source = source; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getFormat() { - return format; - } - - public void setFormat(List format) { - this.format = format; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getContributor() { - return contributor; - } - - public void setContributor(List contributor) { - this.contributor = contributor; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getCoverage() { - return coverage; - } - - public void setCoverage(List coverage) { - this.coverage = coverage; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public BestAccessRight getBestaccessright() { - return bestaccessright; - } - - public void setBestaccessright(BestAccessRight bestaccessright) { - this.bestaccessright = bestaccessright; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getDocumentationUrl() { - return documentationUrl; - } - - public void setDocumentationUrl(List documentationUrl) { - this.documentationUrl = documentationUrl; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getCodeRepositoryUrl() { - return codeRepositoryUrl; - } - - public void setCodeRepositoryUrl(String codeRepositoryUrl) { - this.codeRepositoryUrl = codeRepositoryUrl; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getProgrammingLanguage() { - return programmingLanguage; - } - - public void setProgrammingLanguage(String programmingLanguage) { - this.programmingLanguage = programmingLanguage; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getContactperson() { - return contactperson; - } - - public void setContactperson(List contactperson) { - this.contactperson = contactperson; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getContactgroup() { - return contactgroup; - } - - public void setContactgroup(List contactgroup) { - this.contactgroup = contactgroup; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getTool() { - return tool; - } - - public void setTool(List tool) { - this.tool = tool; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getSize() { - return size; - } - - public void setSize(String size) { - this.size = size; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public String getVersion() { - return version; - } - - public void setVersion(String version) { - this.version = version; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getGeolocation() { - return geolocation; - } - - public void setGeolocation(List geolocation) { - this.geolocation = geolocation; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultCountry.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultCountry.java deleted file mode 100644 index 324d382..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultCountry.java +++ /dev/null @@ -1,41 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * Represents the country associated to the generic result. It extends eu.dnetlib.dhp.schema.dump.oaf.Country with a - * provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country is not mapped if its value in the - * result reprensented in the internal format is Unknown. The value for this element correspond to: - * - code corresponds to the classid of eu.dnetlib.dhp.schema.oaf.Country - * - label corresponds to the classname of eu.dnetlib.dhp.schema.oaf.Country - * - provenance set only if the dataInfo associated to the Country of the result to be dumped is not null. In this case: - * - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with datainfo.provenanceaction.classname) - * - trust corresponds to dataInfo.trust - */ - -public class ResultCountry extends Country { - - @JsonSchema(description = "Why this result is associated to the country.") - private Provenance provenance; - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - - public static ResultCountry newInstance(String code, String label, Provenance provenance) { - ResultCountry c = new ResultCountry(); - c.setProvenance(provenance); - c.setCode(code); - c.setLabel(label); - return c; - } - - public static ResultCountry newInstance(String code, String label, String provenance, String trust) { - return newInstance(code, label, Provenance.newInstance(provenance, trust)); - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultPid.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultPid.java deleted file mode 100644 index 991412f..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/ResultPid.java +++ /dev/null @@ -1,44 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public class ResultPid implements Serializable { - @JsonSchema( - description = "The scheme of the persistent identifier for the result (i.e. doi). " + - "If the pid is here it means the information for the pid has been collected from an authority for " + - "that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite " - + - "pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories") - private String scheme; - - @JsonSchema(description = "The value expressed in the scheme (i.e. 10.1000/182)") - private String value; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static ResultPid newInstance(String scheme, String value) { - ResultPid cf = new ResultPid(); - - cf.setScheme(scheme); - cf.setValue(value); - - return cf; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Score.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Score.java deleted file mode 100644 index 5d0e1dc..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Score.java +++ /dev/null @@ -1,46 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.fasterxml.jackson.annotation.JsonGetter; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.annotation.JsonSetter; - -/** - * @author miriam.baglioni - * @Date 07/11/22 - */ -public class Score implements Serializable { - private String indicator; - private String score; - - @JsonProperty("class") - private String clazz; - - public String getScore() { - return score; - } - - public void setScore(String score) { - this.score = score; - } - - @JsonGetter("class") - public String getClazz() { - return clazz; - } - - @JsonSetter("class") - public void setClazz(String clazz) { - this.clazz = clazz; - } - - public String getIndicator() { - return indicator; - } - - public void setIndicator(String indicator) { - this.indicator = indicator; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Subject.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Subject.java deleted file mode 100644 index b7e1872..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Subject.java +++ /dev/null @@ -1,40 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To represent keywords associated to the result. It has two parameters: - * - subject of type eu.dnetlib.dhp.schema.dump.oaf.SubjectSchemeValue to describe the subject. It mapped as: - * - schema it corresponds to qualifier.classid of the dumped subject - * - value it corresponds to the subject value - * - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo - * is not null. In this case: - * - provenance corresponds to dataInfo.provenanceaction.classname - * - trust corresponds to dataInfo.trust - */ -public class Subject implements Serializable { - private SubjectSchemeValue subject; - - @JsonSchema(description = "Why this subject is associated to the result") - private Provenance provenance; - - public SubjectSchemeValue getSubject() { - return subject; - } - - public void setSubject(SubjectSchemeValue subject) { - this.subject = subject; - } - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/SubjectSchemeValue.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/SubjectSchemeValue.java deleted file mode 100644 index a7e665d..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/SubjectSchemeValue.java +++ /dev/null @@ -1,42 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public class SubjectSchemeValue implements Serializable { - @JsonSchema( - description = "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies).") - private String scheme; - - @JsonSchema( - description = "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary).") - private String value; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static SubjectSchemeValue newInstance(String scheme, String value) { - SubjectSchemeValue cf = new SubjectSchemeValue(); - - cf.setScheme(scheme); - cf.setValue(value); - - return cf; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/UsageCounts.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/UsageCounts.java deleted file mode 100644 index 2f894e0..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/UsageCounts.java +++ /dev/null @@ -1,29 +0,0 @@ - -package eu.dnetlib.dhp.oa.model; - -import java.io.Serializable; - -/** - * @author miriam.baglioni - * @Date 07/11/22 - */ -public class UsageCounts implements Serializable { - private String downloads; - private String views; - - public String getDownloads() { - return downloads; - } - - public void setDownloads(String downloads) { - this.downloads = downloads; - } - - public String getViews() { - return views; - } - - public void setViews(String views) { - this.views = views; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CfHbKeyValue.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CfHbKeyValue.java deleted file mode 100644 index 46ce417..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CfHbKeyValue.java +++ /dev/null @@ -1,47 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.community; - -import java.io.Serializable; - -import org.apache.commons.lang3.StringUtils; - -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public class CfHbKeyValue implements Serializable { - - @JsonSchema(description = "the OpenAIRE identifier of the data source") - private String key; - - @JsonSchema(description = "the name of the data source") - private String value; - - public String getKey() { - return key; - } - - public void setKey(String key) { - this.key = key; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static CfHbKeyValue newInstance(String key, String value) { - CfHbKeyValue inst = new CfHbKeyValue(); - inst.key = key; - inst.value = value; - return inst; - } - - @JsonIgnore - public boolean isBlank() { - return StringUtils.isBlank(key) && StringUtils.isBlank(value); - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java deleted file mode 100644 index b58468b..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityInstance.java +++ /dev/null @@ -1,43 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.community; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Instance; - -/** - * It extends eu.dnetlib.dhp.dump.oaf.Instance with values related to the community dump. In the Result dump this - * information is not present because it is dumped as a set of relations between the result and the datasource. - - * hostedby of type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the - * instance can be viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and - - * key corresponds to hostedby.key - value corresponds to hostedby.value - collectedfrom of type - * eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been - * collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to - * collectedfrom.key - value corresponds to collectedfrom.value - */ -public class CommunityInstance extends Instance { - @JsonSchema(description = "Information about the source from which the instance can be viewed or downloaded.") - private CfHbKeyValue hostedby; - - @JsonSchema(description = "Information about the source from which the record has been collected") - private CfHbKeyValue collectedfrom; - - @JsonInclude(JsonInclude.Include.NON_NULL) - public CfHbKeyValue getHostedby() { - return hostedby; - } - - public void setHostedby(CfHbKeyValue hostedby) { - this.hostedby = hostedby; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public CfHbKeyValue getCollectedfrom() { - return collectedfrom; - } - - public void setCollectedfrom(CfHbKeyValue collectedfrom) { - this.collectedfrom = collectedfrom; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java deleted file mode 100644 index fc8b792..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/CommunityResult.java +++ /dev/null @@ -1,75 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.community; - -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Result; - -/** - * extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type - * List to store the list of projects related to the result. The - * information is added after the result is mapped to the external model - context of type - * List to store information about the RC RI related to the result. - * For each context in the result represented in the internal model one context in the external model is produced - - * collectedfrom of type List to store information about the sources from which - * the record has been collected. For each collectedfrom in the result represented in the internal model one - * collectedfrom in the external model is produced - instance of type - * List to store all the instances associated to the result. - * It corresponds to the same parameter in the result represented in the internal model - */ -public class CommunityResult extends Result { - - @JsonSchema(description = "List of projects (i.e. grants) that (co-)funded the production ofn the research results") - private List projects; - - @JsonSchema( - description = "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu") - private List context; - - @JsonSchema(description = "Information about the sources from which the record has been collected") - protected List collectedfrom; - - @JsonSchema( - description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version") - private List instance; - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getInstance() { - return instance; - } - - public void setInstance(List instance) { - this.instance = instance; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getCollectedfrom() { - return collectedfrom; - } - - public void setCollectedfrom(List collectedfrom) { - this.collectedfrom = collectedfrom; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getProjects() { - return projects; - } - - public void setProjects(List projects) { - this.projects = projects; - } - - @JsonInclude(JsonInclude.Include.NON_NULL) - public List getContext() { - return context; - } - - public void setContext(List context) { - this.context = context; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Context.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Context.java deleted file mode 100644 index 90dd431..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Context.java +++ /dev/null @@ -1,73 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.community; - -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Provenance; - -/** - * Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with - * OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type - * List to store the provenances of the association between the result and - * the RC/RI. The values for this element correspond to: - code: it corresponds to the id of the context in the result - * to be mapped. If the context id refers to a RC/RI and contains '::' only the part of the id before the first "::" - * will be used as value for code - label it corresponds to the label associated to the id. The information id taken - * from the profile of the RC/RI - provenance it is set only if the dataInfo associated to the contenxt element of the - * result to be dumped is not null. For each dataInfo one instance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance is - * instantiated if the element datainfo.provenanceaction is not null. In this case - provenance corresponds to - * dataInfo.provenanceaction.classname - trust corresponds to dataInfo.trust - */ -public class Context { - @JsonSchema(description = "Code identifying the RI/RC") - private String code; - - @JsonSchema(description = "Label of the RI/RC") - private String label; - - @JsonSchema(description = "Why this result is associated to the RI/RC.") - private List provenance; - - public List getProvenance() { - return provenance; - } - - public void setProvenance(List provenance) { - this.provenance = provenance; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getLabel() { - return label; - } - - public void setLabel(String label) { - this.label = label; - } - - @Override - public int hashCode() { - final String p = Optional - .ofNullable(getProvenance()) - .map( - prov -> prov - .stream() - .map(Provenance::toString) - .collect(Collectors.joining())) - .orElse(""); - - return Objects.hash(getCode(), getLabel(), p); - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Funder.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Funder.java deleted file mode 100644 index 2099429..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Funder.java +++ /dev/null @@ -1,24 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.community; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To store information about the funder funding the project related to the result. It has the following parameters: - - * shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name - * (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to - * store the jurisdiction of the funder - */ -public class Funder extends eu.dnetlib.dhp.oa.model.Funder { - - @JsonSchema(description = "Stream of funding (e.g. for European Commission can be H2020 or FP7)") - private String fundingStream; - - public String getFundingStream() { - return fundingStream; - } - - public void setFundingStream(String fundingStream) { - this.fundingStream = fundingStream; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Project.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Project.java deleted file mode 100644 index 421f772..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Project.java +++ /dev/null @@ -1,58 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.community; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Provenance; - -/** - * To store information about the project related to the result. This information is not directly mapped from the result - * represented in the internal model because it is not there. The mapped result will be enriched with project - * information derived by relation between results and projects. Project extends eu.dnetlib.dhp.schema.dump.oaf.Project - * with the following parameters: - funder of type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information - * about the funder funding the project - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store - * information about the. provenance of the association between the result and the project - */ -public class Project extends eu.dnetlib.dhp.oa.model.Project { - - @JsonSchema(description = "Information about the funder funding the project") - private Funder funder; - - private Provenance provenance; - - private Validated validated; - - public void setValidated(Validated validated) { - this.validated = validated; - } - - public Validated getValidated() { - return validated; - } - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - - public Funder getFunder() { - return funder; - } - - public void setFunder(Funder funders) { - this.funder = funders; - } - - public static Project newInstance(String id, String code, String acronym, String title, Funder funder) { - Project project = new Project(); - project.setAcronym(acronym); - project.setCode(code); - project.setFunder(funder); - project.setId(id); - project.setTitle(title); - return project; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Validated.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Validated.java deleted file mode 100644 index 015f282..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/community/Validated.java +++ /dev/null @@ -1,39 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.community; - -import java.io.Serializable; - -/** - * To store information about the funder funding the project related to the result. It has the following parameters: - - * shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name - * (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to - * store the jurisdiction of the funder - */ -public class Validated implements Serializable { - - private String validationDate; - private Boolean validatedByFunder; - - public void setValidationDate(String validationDate) { - this.validationDate = validationDate; - } - - public String getValidationDate() { - return validationDate; - } - - public void setValidatedByFunder(Boolean validated) { - this.validatedByFunder = validated; - } - - public Boolean getValidatedByFunder() { - return validatedByFunder; - } - - public static Validated newInstance(Boolean validated, String validationDate) { - Validated v = new Validated(); - v.validatedByFunder = validated; - v.validationDate = validationDate; - return v; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Constants.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Constants.java deleted file mode 100644 index 8c92aff..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Constants.java +++ /dev/null @@ -1,21 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -public class Constants implements Serializable { - // collectedFrom va con isProvidedBy -> becco da ModelSupport - - public static final String HOSTED_BY = "isHostedBy"; - public static final String HOSTS = "hosts"; - - // community result uso isrelatedto - - public static final String RESULT_ENTITY = "result"; - public static final String DATASOURCE_ENTITY = "datasource"; - public static final String CONTEXT_ENTITY = "context"; - - public static final String CONTEXT_ID = "60"; - public static final String CONTEXT_NS_PREFIX = "context____"; - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Datasource.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Datasource.java deleted file mode 100644 index 7b21379..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Datasource.java +++ /dev/null @@ -1,358 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; -import java.util.List; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Container; -import eu.dnetlib.dhp.oa.model.Indicator; - -/** - * To store information about the datasource OpenAIRE collects information from. It contains the following parameters: - - * id of type String to store the OpenAIRE id for the datasource. It corresponds to the parameter id of the datasource - * represented in the internal model - originalId of type List to store the list of original ids associated to - * the datasource. It corresponds to the parameter originalId of the datasource represented in the internal model. The - * null values are filtered out - pid of type List to store the - * persistent identifiers for the datasource. For each pid in the datasource represented in the internal model one pid - * in the external model is produced as : - schema corresponds to pid.qualifier.classid of the datasource represented in - * the internal model - value corresponds to pid.value of the datasource represented in the internal model - - * datasourceType of type eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the datasource type (e.g. - * pubsrepository::institutional, Institutional Repository) as in the dnet vocabulary dnet:datasource_typologies. It - * corresponds to datasourcetype of the datasource represented in the internal model and : - code corresponds to - * datasourcetype.classid - value corresponds to datasourcetype.classname - openairecompatibility of type String to - * store information about the OpenAIRE compatibility of the ingested results (which guidelines they are compliant to). - * It corresponds to openairecompatibility.classname of the datasource represented in the internal model - officialname - * of type Sgtring to store the official name of the datasource. It correspond to officialname.value of the datasource - * represented in the internal model - englishname of type String to store the English name of the datasource. It - * corresponds to englishname.value of the datasource represented in the internal model - websiteurl of type String to - * store the URL of the website of the datasource. It corresponds to websiteurl.value of the datasource represented in - * the internal model - logourl of type String to store the URL of the logo for the datasource. It corresponds to - * logourl.value of the datasource represented in the internal model - dateofvalidation of type String to store the data - * of validation against the guidelines for the datasource records. It corresponds to dateofvalidation.value of the - * datasource represented in the internal model - description of type String to store the description for the - * datasource. It corresponds to description.value of the datasource represented in the internal model - */ -public class Datasource implements Serializable { - @JsonSchema(description = "The OpenAIRE id of the data source") - private String id; // string - - @JsonSchema(description = "Original identifiers for the datasource") - private List originalId; // list string - - @JsonSchema(description = "Persistent identifiers of the datasource") - private List pid; // list - - @JsonSchema( - description = "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies") - private DatasourceSchemeValue datasourcetype; // value - - @JsonSchema( - description = "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu.") - private String openairecompatibility; // value - - @JsonSchema(description = "The official name of the datasource") - private String officialname; // string - - @JsonSchema(description = "The English name of the datasource") - private String englishname; // string - - private String websiteurl; // string - - private String logourl; // string - - @JsonSchema(description = "The date of last validation against the OpenAIRE guidelines for the datasource records") - private String dateofvalidation; // string - - private String description; // description - - @JsonSchema(description = "List of subjects associated to the datasource") - private List subjects; // List - - // opendoar specific fields (od*) - - @JsonSchema(description = "The languages present in the data source's content, as defined by OpenDOAR.") - private List languages; // odlanguages List - - @JsonSchema(description = "Types of content in the data source, as defined by OpenDOAR") - private List contenttypes; // odcontent types List - - // re3data fields - @JsonSchema(description = "Releasing date of the data source, as defined by re3data.org") - private String releasestartdate; // string - - @JsonSchema( - description = "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org") - private String releaseenddate; // string - - @JsonSchema( - description = "The URL of a mission statement describing the designated community of the data source. As defined by re3data.org") - private String missionstatementurl; // string - - @JsonSchema( - description = "Type of access to the data source, as defined by re3data.org. Possible values: " + - "{open, restricted, closed}") - private String accessrights; // databaseaccesstype string - - // {open, restricted or closed} - @JsonSchema(description = "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}") - private String uploadrights; // datauploadtype string - - @JsonSchema( - description = "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}") - private String databaseaccessrestriction; // string - - @JsonSchema( - description = "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}") - private String datauploadrestriction; // string - - @JsonSchema(description = "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise.") - private Boolean versioning; // boolean - - @JsonSchema( - description = "The URL of the data source providing information on how to cite its items. As defined by re3data.org.") - private String citationguidelineurl; // string - - // {yes, no, uknown} - @JsonSchema( - description = "The persistent identifier system that is used by the data source. As defined by re3data.org") - private String pidsystems; // string - - @JsonSchema( - description = "The certificate, seal or standard the data source complies with. As defined by re3data.org.") - private String certificates; // string - - @JsonSchema(description = "Policies of the data source, as defined in OpenDOAR.") - private List policies; // - - @JsonSchema(description = "Information about the journal, if this data source is of type Journal.") - private Container journal; // issn etc del Journal - -// @JsonSchema(description = "Indicators computed for this Datasource, for example UsageCount ones") -// private Indicator indicators; -// -// public Indicator getIndicators() { -// return indicators; -// } -// -// public void setIndicators(Indicator indicators) { -// this.indicators = indicators; -// } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getOriginalId() { - return originalId; - } - - public void setOriginalId(List originalId) { - this.originalId = originalId; - } - - public List getPid() { - return pid; - } - - public void setPid(List pid) { - this.pid = pid; - } - - public DatasourceSchemeValue getDatasourcetype() { - return datasourcetype; - } - - public void setDatasourcetype(DatasourceSchemeValue datasourcetype) { - this.datasourcetype = datasourcetype; - } - - public String getOpenairecompatibility() { - return openairecompatibility; - } - - public void setOpenairecompatibility(String openairecompatibility) { - this.openairecompatibility = openairecompatibility; - } - - public String getOfficialname() { - return officialname; - } - - public void setOfficialname(String officialname) { - this.officialname = officialname; - } - - public String getEnglishname() { - return englishname; - } - - public void setEnglishname(String englishname) { - this.englishname = englishname; - } - - public String getWebsiteurl() { - return websiteurl; - } - - public void setWebsiteurl(String websiteurl) { - this.websiteurl = websiteurl; - } - - public String getLogourl() { - return logourl; - } - - public void setLogourl(String logourl) { - this.logourl = logourl; - } - - public String getDateofvalidation() { - return dateofvalidation; - } - - public void setDateofvalidation(String dateofvalidation) { - this.dateofvalidation = dateofvalidation; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public List getSubjects() { - return subjects; - } - - public void setSubjects(List subjects) { - this.subjects = subjects; - } - - public List getLanguages() { - return languages; - } - - public void setLanguages(List languages) { - this.languages = languages; - } - - public List getContenttypes() { - return contenttypes; - } - - public void setContenttypes(List contenttypes) { - this.contenttypes = contenttypes; - } - - public String getReleasestartdate() { - return releasestartdate; - } - - public void setReleasestartdate(String releasestartdate) { - this.releasestartdate = releasestartdate; - } - - public String getReleaseenddate() { - return releaseenddate; - } - - public void setReleaseenddate(String releaseenddate) { - this.releaseenddate = releaseenddate; - } - - public String getMissionstatementurl() { - return missionstatementurl; - } - - public void setMissionstatementurl(String missionstatementurl) { - this.missionstatementurl = missionstatementurl; - } - - public String getAccessrights() { - return accessrights; - } - - public void setAccessrights(String accessrights) { - this.accessrights = accessrights; - } - - public String getUploadrights() { - return uploadrights; - } - - public void setUploadrights(String uploadrights) { - this.uploadrights = uploadrights; - } - - public String getDatabaseaccessrestriction() { - return databaseaccessrestriction; - } - - public void setDatabaseaccessrestriction(String databaseaccessrestriction) { - this.databaseaccessrestriction = databaseaccessrestriction; - } - - public String getDatauploadrestriction() { - return datauploadrestriction; - } - - public void setDatauploadrestriction(String datauploadrestriction) { - this.datauploadrestriction = datauploadrestriction; - } - - public Boolean getVersioning() { - return versioning; - } - - public void setVersioning(Boolean versioning) { - this.versioning = versioning; - } - - public String getCitationguidelineurl() { - return citationguidelineurl; - } - - public void setCitationguidelineurl(String citationguidelineurl) { - this.citationguidelineurl = citationguidelineurl; - } - - public String getPidsystems() { - return pidsystems; - } - - public void setPidsystems(String pidsystems) { - this.pidsystems = pidsystems; - } - - public String getCertificates() { - return certificates; - } - - public void setCertificates(String certificates) { - this.certificates = certificates; - } - - public List getPolicies() { - return policies; - } - - public void setPolicies(List policiesr3) { - this.policies = policiesr3; - } - - public Container getJournal() { - return journal; - } - - public void setJournal(Container journal) { - this.journal = journal; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/DatasourcePid.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/DatasourcePid.java deleted file mode 100644 index 50eddbc..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/DatasourcePid.java +++ /dev/null @@ -1,41 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public class DatasourcePid implements Serializable { - - @JsonSchema(description = "The scheme used to express the value ") - private String scheme; - - @JsonSchema(description = "The value expressed in the scheme ") - private String value; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static DatasourcePid newInstance(String scheme, String value) { - DatasourcePid cf = new DatasourcePid(); - - cf.setScheme(scheme); - cf.setValue(value); - - return cf; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/DatasourceSchemeValue.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/DatasourceSchemeValue.java deleted file mode 100644 index a437403..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/DatasourceSchemeValue.java +++ /dev/null @@ -1,41 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public // TODO change the DatasourceSchemaValue to DatasourceKeyValue. The scheme is always the dnet one. What we show - // here is the entry in the scheme (the key) and its understandable value -class DatasourceSchemeValue implements Serializable { - @JsonSchema(description = "The scheme used to express the value (i.e. pubsrepository::journal)") - private String scheme; - - @JsonSchema(description = "The value expressed in the scheme (Journal)") - private String value; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static DatasourceSchemeValue newInstance(String scheme, String value) { - DatasourceSchemeValue cf = new DatasourceSchemeValue(); - - cf.setScheme(scheme); - cf.setValue(value); - - return cf; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Funder.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Funder.java deleted file mode 100644 index 84c09d2..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Funder.java +++ /dev/null @@ -1,23 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To store information about the funder funding the project related to the result. It extends - * eu.dnetlib.dhp.schema.dump.oaf.Funder with the following parameter: - - private - * eu.dnetdlib.dhp.schema.dump.oaf.graph.Fundings funding_stream to store the fundingstream - */ -public class Funder extends eu.dnetlib.dhp.oa.model.Funder { - - @JsonSchema(description = "Description of the funding stream") - private Fundings funding_stream; - - public Fundings getFunding_stream() { - return funding_stream; - } - - public void setFunding_stream(Fundings funding_stream) { - this.funding_stream = funding_stream; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Fundings.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Fundings.java deleted file mode 100644 index b0330d7..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Fundings.java +++ /dev/null @@ -1,38 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To store inforamtion about the funding stream. It has two parameters: - private String id to store the id of the - * fundings stream. The id is created by appending the shortname of the funder to the name of each level in the xml - * representing the fundng stream. For example: if the funder is the European Commission, the funding level 0 name is - * FP7, the funding level 1 name is SP3 and the funding level 2 name is PEOPLE then the id will be: EC::FP7::SP3::PEOPLE - * - private String description to describe the funding stream. It is created by concatenating the description of each - * funding level so for the example above the description would be: SEVENTH FRAMEWORK PROGRAMME - SP3-People - - * Marie-Curie Actions - */ -public class Fundings implements Serializable { - - @JsonSchema(description = "Id of the funding stream") - private String id; - private String description; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Granted.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Granted.java deleted file mode 100644 index c5fb352..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Granted.java +++ /dev/null @@ -1,61 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To describe the funded amount. It has the following parameters: - private String currency to store the currency of - * the fund - private float totalcost to store the total cost of the project - private float fundedamount to store the - * funded amount by the funder - */ -public class Granted implements Serializable { - @JsonSchema(description = "The currency of the granted amount (e.g. EUR)") - private String currency; - - @JsonSchema(description = "The total cost of the project") - private float totalcost; - - @JsonSchema(description = "The funded amount") - private float fundedamount; - - public String getCurrency() { - return currency; - } - - public void setCurrency(String currency) { - this.currency = currency; - } - - public float getTotalcost() { - return totalcost; - } - - public void setTotalcost(float totalcost) { - this.totalcost = totalcost; - } - - public float getFundedamount() { - return fundedamount; - } - - public void setFundedamount(float fundedamount) { - this.fundedamount = fundedamount; - } - - public static Granted newInstance(String currency, float totalcost, float fundedamount) { - Granted granted = new Granted(); - granted.currency = currency; - granted.totalcost = totalcost; - granted.fundedamount = fundedamount; - return granted; - } - - public static Granted newInstance(String currency, float fundedamount) { - Granted granted = new Granted(); - granted.currency = currency; - granted.fundedamount = fundedamount; - return granted; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/GraphResult.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/GraphResult.java deleted file mode 100644 index 9b95881..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/GraphResult.java +++ /dev/null @@ -1,28 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.util.List; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Instance; -import eu.dnetlib.dhp.oa.model.Result; - -/** - * It extends the eu.dnetlib.dhp.schema.dump.oaf.Result with - instance of type - * List to store all the instances associated to the result. It corresponds to - * the same parameter in the result represented in the internal model - */ -public class GraphResult extends Result { - @JsonSchema( - description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version") - private List instance; - - public List getInstance() { - return instance; - } - - public void setInstance(List instance) { - this.instance = instance; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/H2020Classification.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/H2020Classification.java deleted file mode 100644 index 96d1ed8..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/H2020Classification.java +++ /dev/null @@ -1,82 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -/** - * To store information about the classification for the project. The classification depends on the programme. For example - * H2020-EU.3.4.5.3 can be classified as - * H2020-EU.3. => Societal Challenges (level1) - * H2020-EU.3.4. => Transport (level2) - * H2020-EU.3.4.5. => CLEANSKY2 (level3) - * H2020-EU.3.4.5.3. => IADP Fast Rotorcraft (level4) - * - * We decided to explicitly represent up to three levels in the classification. - * - * H2020Classification has the following parameters: - * - private Programme programme to store the information about the programme related to this classification - * - private String level1 to store the information about the level 1 of the classification (Priority or Pillar of the EC) - * - private String level2 to store the information about the level2 af the classification (Objectives (?)) - * - private String level3 to store the information about the level3 of the classification - * - private String classification to store the entire classification related to the programme - */ -public class H2020Classification implements Serializable { - private Programme programme; - - private String level1; - private String level2; - private String level3; - - private String classification; - - public Programme getProgramme() { - return programme; - } - - public void setProgramme(Programme programme) { - this.programme = programme; - } - - public String getLevel1() { - return level1; - } - - public void setLevel1(String level1) { - this.level1 = level1; - } - - public String getLevel2() { - return level2; - } - - public void setLevel2(String level2) { - this.level2 = level2; - } - - public String getLevel3() { - return level3; - } - - public void setLevel3(String level3) { - this.level3 = level3; - } - - public String getClassification() { - return classification; - } - - public void setClassification(String classification) { - this.classification = classification; - } - - public static H2020Classification newInstance(String programme_code, String programme_description, String level1, - String level2, String level3, String classification) { - H2020Classification h2020classification = new H2020Classification(); - h2020classification.programme = Programme.newInstance(programme_code, programme_description); - h2020classification.level1 = level1; - h2020classification.level2 = level2; - h2020classification.level3 = level3; - h2020classification.classification = classification; - return h2020classification; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Organization.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Organization.java deleted file mode 100644 index 65211c8..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Organization.java +++ /dev/null @@ -1,94 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; -import java.util.List; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Country; - -/** - * To represent the generic organizaiton. It has the following parameters: - * - private String legalshortname to store the legalshortname of the organizaiton - * - private String legalname to store the legal name of the organization - * - private String websiteurl to store the websiteurl of the organization - * - private List alternativenames to store the alternative names of the organization - * - private Country country to store the country of the organization - * - private String id to store the openaire id of the organization - * - private List pid to store the list of pids for the organization - */ -public class Organization implements Serializable { - private String legalshortname; - private String legalname; - private String websiteurl; - - @JsonSchema(description = "Alternative names that identify the organisation") - private List alternativenames; - - @JsonSchema(description = "The organisation country") - private Country country; - - @JsonSchema(description = "The OpenAIRE id for the organisation") - private String id; - - @JsonSchema(description = "Persistent identifiers for the organisation i.e. isni 0000000090326370") - private List pid; - - public String getLegalshortname() { - return legalshortname; - } - - public void setLegalshortname(String legalshortname) { - this.legalshortname = legalshortname; - } - - public String getLegalname() { - return legalname; - } - - public void setLegalname(String legalname) { - this.legalname = legalname; - } - - public String getWebsiteurl() { - return websiteurl; - } - - public void setWebsiteurl(String websiteurl) { - this.websiteurl = websiteurl; - } - - public List getAlternativenames() { - return alternativenames; - } - - public void setAlternativenames(List alternativenames) { - this.alternativenames = alternativenames; - } - - public Country getCountry() { - return country; - } - - public void setCountry(Country country) { - this.country = country; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public List getPid() { - return pid; - } - - public void setPid(List pid) { - this.pid = pid; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/OrganizationPid.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/OrganizationPid.java deleted file mode 100644 index 575f4c8..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/OrganizationPid.java +++ /dev/null @@ -1,42 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -public - -class OrganizationPid implements Serializable { - @JsonSchema(description = "The scheme of the identifier (i.e. isni)") - private String scheme; - - @JsonSchema(description = "The value in the schema (i.e. 0000000090326370)") - private String value; - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } - - public String getValue() { - return value; - } - - public void setValue(String value) { - this.value = value; - } - - public static OrganizationPid newInstance(String scheme, String value) { - OrganizationPid cf = new OrganizationPid(); - - cf.setScheme(scheme); - cf.setValue(value); - - return cf; - } - -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Programme.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Programme.java deleted file mode 100644 index 14c9f40..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Programme.java +++ /dev/null @@ -1,41 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To store information about the ec programme for the project. It has the following parameters: - private String code - * to store the code of the programme - private String description to store the description of the programme - */ -public class Programme implements Serializable { - @JsonSchema(description = "The code of the programme") - private String code; - - @JsonSchema(description = "The description of the programme") - private String description; - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public static Programme newInstance(String code, String description) { - Programme p = new Programme(); - p.code = code; - p.description = description; - return p; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Project.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Project.java deleted file mode 100644 index 6ef3a5c..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Project.java +++ /dev/null @@ -1,211 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; -import java.util.List; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Indicator; - -/** - * This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump - * of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and - * Projects but we put the information about the Funder within the Project representation. We also removed the - * collected from element from the Project. No relation between the Project and the Datasource entity from which it is - * collected will be created. We will never create relations between Project and Datasource. In case some relation will - * be extracted from the Project they will refer the Funder and will be of type ( organization -> funds -> project, - * project -> isFundedBy -> organization) We also removed the duration parameter because the most of times it is set to - * 0. It has the following parameters: - * - private String id to store the id of the project (OpenAIRE id) - * - private String websiteurl to store the websiteurl of the project - * - private String code to store the grant agreement of the project - * - private String acronym to store the acronym of the project - * - private String title to store the tile of the project - * - private String startdate to store the start date - * - private String enddate to store the end date - * - private String callidentifier to store the call indentifier - * - private String keywords to store the keywords - * - private boolean openaccessmandateforpublications to store if the project must accomplish to the open access mandate - * for publications. This value will be set to true if one of the field in the project represented in the internal model - * is set to true - * - private boolean openaccessmandatefordataset to store if the project must accomplish to the open access mandate for - * dataset. It is set to the value in the corresponding filed of the project represented in the internal model - * - private List subject to store the list of subjects of the project - * - private List funding to store the list of funder of the project - * - private String summary to store the summary of the project - * - private Granted granted to store the granted amount - * - private List h2020programme to store the list of programmes the project is related to - */ - -public class Project implements Serializable { - private String id; - - private String websiteurl; - private String code; - private String acronym; - private String title; - - private String startdate; - - private String enddate; - - private String callidentifier; - - private String keywords; - - private boolean openaccessmandateforpublications; - - private boolean openaccessmandatefordataset; - private List subject; - - @JsonSchema(description = "Funding information for the project") - private List funding; - - private String summary; - - @JsonSchema(description = "The money granted to the project") - private Granted granted; - - @JsonSchema(description = "The h2020 programme funding the project") - private List h2020programme; - -// @JsonSchema(description = "Indicators computed for this project, for example UsageCount ones") -// private Indicator indicators; -// -// public Indicator getIndicators() { -// return indicators; -// } -// -// public void setIndicators(Indicator indicators) { -// this.indicators = indicators; -// } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getWebsiteurl() { - return websiteurl; - } - - public void setWebsiteurl(String websiteurl) { - this.websiteurl = websiteurl; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getAcronym() { - return acronym; - } - - public void setAcronym(String acronym) { - this.acronym = acronym; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getStartdate() { - return startdate; - } - - public void setStartdate(String startdate) { - this.startdate = startdate; - } - - public String getEnddate() { - return enddate; - } - - public void setEnddate(String enddate) { - this.enddate = enddate; - } - - public String getCallidentifier() { - return callidentifier; - } - - public void setCallidentifier(String callidentifier) { - this.callidentifier = callidentifier; - } - - public String getKeywords() { - return keywords; - } - - public void setKeywords(String keywords) { - this.keywords = keywords; - } - - public boolean isOpenaccessmandateforpublications() { - return openaccessmandateforpublications; - } - - public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) { - this.openaccessmandateforpublications = openaccessmandateforpublications; - } - - public boolean isOpenaccessmandatefordataset() { - return openaccessmandatefordataset; - } - - public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) { - this.openaccessmandatefordataset = openaccessmandatefordataset; - } - - public List getSubject() { - return subject; - } - - public void setSubject(List subject) { - this.subject = subject; - } - - public List getFunding() { - return funding; - } - - public void setFunding(List funding) { - this.funding = funding; - } - - public String getSummary() { - return summary; - } - - public void setSummary(String summary) { - this.summary = summary; - } - - public Granted getGranted() { - return granted; - } - - public void setGranted(Granted granted) { - this.granted = granted; - } - - public List getH2020programme() { - return h2020programme; - } - - public void setH2020programme(List h2020programme) { - this.h2020programme = h2020programme; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/RelType.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/RelType.java deleted file mode 100644 index ea85187..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/RelType.java +++ /dev/null @@ -1,39 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -/** - * To represent the semantics of the generic relation between two entities. It has the following parameters: - private - * String name to store the semantics of the relation (i.e. isAuthorInstitutionOf). It corresponds to the relclass - * parameter in the relation represented in the internal model represented in the internal model - private String type - * to store the type of the relation (i.e. affiliation). It corresponds to the subreltype parameter of the relation - * represented in theinternal model - */ -public class RelType implements Serializable { - private String name; // relclass - private String type; // subreltype - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public static RelType newInstance(String name, String type) { - RelType rel = new RelType(); - rel.name = name; - rel.type = type; - return rel; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Relation.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Relation.java deleted file mode 100644 index 2cc4280..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/Relation.java +++ /dev/null @@ -1,124 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; -import java.util.Objects; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.Provenance; - -/** - * To represent the gereric relation between two entities. It has the following parameters: - private Node source to - * represent the entity source of the relation - private Node target to represent the entity target of the relation - - * private RelType reltype to represent the semantics of the relation - private Provenance provenance to represent the - * provenance of the relation - */ -public class Relation implements Serializable { - @JsonSchema(description = "The identifier of the source in the relation") - private String source; - - @JsonSchema(description = "The entity type of the source in the relation") - private String sourceType; - - @JsonSchema(description = "The identifier of the target in the relation") - private String target; - - @JsonSchema(description = "The entity type of the target in the relation") - private String targetType; - - @JsonSchema(description = "To represent the semantics of a relation between two entities") - private RelType reltype; - - @JsonSchema(description = "The reason why OpenAIRE holds the relation ") - private Provenance provenance; - - @JsonSchema( - description = "True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)") - private boolean validated; - - @JsonSchema(description = "The date when the relation was collected from OpenAIRE") - private String validationDate; - - public String getSource() { - return source; - } - - public void setSource(String source) { - this.source = source; - } - - public String getSourceType() { - return sourceType; - } - - public void setSourceType(String sourceType) { - this.sourceType = sourceType; - } - - public String getTarget() { - return target; - } - - public void setTarget(String target) { - this.target = target; - } - - public String getTargetType() { - return targetType; - } - - public void setTargetType(String targetType) { - this.targetType = targetType; - } - - public RelType getReltype() { - return reltype; - } - - public void setReltype(RelType reltype) { - this.reltype = reltype; - } - - public Provenance getProvenance() { - return provenance; - } - - public void setProvenance(Provenance provenance) { - this.provenance = provenance; - } - - public void setValidated(boolean validate) { - this.validated = validate; - } - - public boolean getValidated() { - return validated; - } - - public void setValidationDate(String validationDate) { - this.validationDate = validationDate; - } - - public String getValidationDate() { - return validationDate; - } - - @Override - public int hashCode() { - - return Objects.hash(source, target, reltype.getType() + ":" + reltype.getName()); - } - - public static Relation newInstance(String source, String sourceType, String target, String targetType, - RelType reltype, Provenance provenance) { - Relation relation = new Relation(); - relation.source = source; - relation.sourceType = sourceType; - relation.target = target; - relation.targetType = targetType; - relation.reltype = reltype; - relation.provenance = provenance; - return relation; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchCommunity.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchCommunity.java deleted file mode 100644 index 0baf21a..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchCommunity.java +++ /dev/null @@ -1,27 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject - * to store the list of subjects related to the community - */ -public class ResearchCommunity extends ResearchInitiative { - @JsonSchema( - description = "Only for research communities: the list of the subjects associated to the research community") - - @JsonInclude(JsonInclude.Include.NON_NULL) - private List subject; - - public List getSubject() { - return subject; - } - - public void setSubject(List subject) { - this.subject = subject; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchInitiative.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchInitiative.java deleted file mode 100644 index 7a82401..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/graph/ResearchInitiative.java +++ /dev/null @@ -1,89 +0,0 @@ - -package eu.dnetlib.dhp.oa.model.graph; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile - * - private - * String id to store the openaire id for the entity. Is has as code 00 and will be created as - * 00|context_____::md5(originalId) private - * String originalId to store the id of the context as provided in the profile - * (i.e. mes) - * - private String name to store the name of the context (got from the label attribute in the context - * definition) - * - private String type to store the type of the context (i.e.: research initiative or research community) - * - private String description to store the description of the context as given in the profile - * -private String - * zenodo_community to store the zenodo community associated to the context (main zenodo community) - */ -public class ResearchInitiative implements Serializable { - @JsonSchema(description = "The OpenAIRE id for the community/research infrastructure") - private String id; // openaireId - - @JsonSchema(description = "The acronym of the community") - private String acronym; // context id - - @JsonSchema(description = "The long name of the community") - private String name; // context name - - @JsonSchema(description = "One of {Research Community, Research infrastructure}") - private String type; // context type: research initiative or research community - - @JsonSchema(description = "Description of the research community/research infrastructure") - private String description; - - @JsonSchema( - description = "The URL of the Zenodo community associated to the Research community/Research infrastructure") - private String zenodo_community; - - public String getZenodo_community() { - return zenodo_community; - } - - public void setZenodo_community(String zenodo_community) { - this.zenodo_community = zenodo_community; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getName() { - return name; - } - - public void setName(String label) { - this.name = label; - } - - public String getAcronym() { - return acronym; - } - - public void setAcronym(String acronym) { - this.acronym = acronym; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/DownloadsAndViews.java b/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/DownloadsAndViews.java new file mode 100644 index 0000000..5b0bfb3 --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/DownloadsAndViews.java @@ -0,0 +1,47 @@ + +package eu.dnetlib.dhp.skgif.model; + +/** + * @author miriam.baglioni + * @Date 04/04/24 + */ +/** + * @author miriam.baglioni + * @Date 04/04/24 + */ +import java.io.Serializable; + +import org.apache.commons.lang3.StringUtils; + +public class DownloadsAndViews implements Serializable { + private String views; + private String downloads; + + public String getViews() { + return views; + } + + public void setViews(String views) { + this.views = views; + } + + public String getDownloads() { + return downloads; + } + + public void setDownloads(String downloads) { + this.downloads = downloads; + } + + public static DownloadsAndViews newInstance(String views, String downloads) { + DownloadsAndViews uc = new DownloadsAndViews(); + uc.setViews(views); + uc.setDownloads(downloads); + return uc; + } + + public boolean isEmpty() { + return StringUtils.isEmpty(this.downloads) || StringUtils.isEmpty(this.views); + } + +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/Indicator.java b/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/Indicator.java new file mode 100644 index 0000000..7b2018d --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/Indicator.java @@ -0,0 +1,33 @@ + +package eu.dnetlib.dhp.skgif.model; + +import java.io.Serializable; + +/** + * @author miriam.baglioni + * @Date 04/04/24 + */ +public class Indicator implements Serializable { + private DownloadsAndViews downloadsAndViews; + + public DownloadsAndViews getDownloadsAndViews() { + return downloadsAndViews; + } + + public void setDownloadsAndViews(DownloadsAndViews downloadsAndViews) { + this.downloadsAndViews = downloadsAndViews; + } + + public static Indicator newInstance(DownloadsAndViews uc) { + Indicator i = new Indicator(); + i.setDownloadsAndViews(uc); + return i; + } + + public static Indicator newInstance(String downloads, String views) { + Indicator i = new Indicator(); + i.setDownloadsAndViews(DownloadsAndViews.newInstance(views, downloads)); + return i; + } + +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/MinTopic.java b/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/MinTopic.java index a230354..adba4aa 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/MinTopic.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/MinTopic.java @@ -10,6 +10,15 @@ import java.io.Serializable; public class MinTopic implements Serializable { private String local_identifier; private String value; + private String scheme; + + public String getScheme() { + return scheme; + } + + public void setScheme(String scheme) { + this.scheme = scheme; + } public String getLocal_identifier() { return local_identifier; diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/ResearchProduct.java b/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/ResearchProduct.java index 126a3ad..24bd645 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/ResearchProduct.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/skgif/model/ResearchProduct.java @@ -28,6 +28,16 @@ public class ResearchProduct implements Serializable { @JsonProperty("related_products") private List related_products; + private Indicator indicator; + + public Indicator getIndicator() { + return indicator; + } + + public void setIndicator(Indicator indicator) { + this.indicator = indicator; + } + public String getLocal_identifier() { return local_identifier; } diff --git a/dump-schema/src/test/java/GenerateJsonSchema.java b/dump-schema/src/test/java/GenerateJsonSchema.java index c7916e2..dd9e7aa 100644 --- a/dump-schema/src/test/java/GenerateJsonSchema.java +++ b/dump-schema/src/test/java/GenerateJsonSchema.java @@ -9,9 +9,7 @@ import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.ExecCreateSchemas; -import eu.dnetlib.dhp.oa.model.Result; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; -import eu.dnetlib.dhp.oa.model.graph.*; +import eu.dnetlib.dhp.skgif.model.ResearchProduct; //@Disabled class GenerateJsonSchema { @@ -25,7 +23,7 @@ class GenerateJsonSchema { configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName()); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(CommunityResult.class); + JsonNode jsonSchema = generator.generateSchema(ResearchProduct.class); System.out.println(jsonSchema.toString()); } @@ -42,7 +40,7 @@ class GenerateJsonSchema { .without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(Result.class); + JsonNode jsonSchema = generator.generateSchema(ResearchProduct.class); System.out.println(jsonSchema.toString()); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/FilterEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/FilterEntities.java index fcc59a8..3cdb299 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/FilterEntities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/FilterEntities.java @@ -36,7 +36,7 @@ public class FilterEntities implements Serializable { .toString( FilterEntities.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/filter_entities_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/skgif/filter_entities_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectConnectedEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectConnectedEntities.java index efac959..6dd618c 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectConnectedEntities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectConnectedEntities.java @@ -46,7 +46,7 @@ public class SelectConnectedEntities implements Serializable { .toString( SelectConnectedEntities.class .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/select_connected_entities_parameters.json")); + "/eu/dnetlib/dhp/oa/graph/dump/skgif/select_connected_entities_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/DumpResearchProduct.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/DumpResearchProduct.java index b0377a5..1878a30 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/DumpResearchProduct.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/DumpResearchProduct.java @@ -87,20 +87,17 @@ public class DumpResearchProduct implements Serializable { } private static void moveDumpedProducts(SparkSession spark, String workingDir, String outputPath) { - Dataset researchProducts = spark.emptyDataset(Encoders.bean(ResearchProduct.class)); + for (EntityType e : ModelSupport.entityTypes.keySet()) { if (ModelSupport.isResult(e)) - researchProducts = researchProducts - .union( - Utils - .readPath( - spark, workingDir + "products" + e.name() + "/researchproduct", ResearchProduct.class)); + Utils + .readPath( + spark, workingDir + "products" + e.name() + "/researchproduct", ResearchProduct.class) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath + "products"); } - researchProducts - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "products"); } private static void dumpResearchProduct(SparkSession spark, String inputPath, String workingDir, diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/EmitFromEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/EmitFromEntities.java index 277063c..2566221 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/EmitFromEntities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/EmitFromEntities.java @@ -70,6 +70,7 @@ public class EmitFromEntities implements Serializable { }); } + public static void emitFromResult(SparkSession spark, String inputPath, String outputPath, String workingDir) { @@ -78,7 +79,7 @@ public class EmitFromEntities implements Serializable { emitDatasourcePublisher(spark, inputPath, workingDir); } - + private static void emitDatasourcePublisher(SparkSession spark, String inputPath, String workingDir) { Dataset journalIds = spark .read() @@ -130,7 +131,8 @@ public class EmitFromEntities implements Serializable { .stream() .filter( s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") - || s.getQualifier().getClassid().equalsIgnoreCase("sdg")) + || s.getQualifier().getClassid().equalsIgnoreCase("sdg") + || s.getQualifier().getClassid().equalsIgnoreCase("keyword")) .map(s -> { Topic t = new Topic(); t diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/ResultMapper.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/ResultMapper.java index 4b83965..937f995 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/ResultMapper.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/skgif/ResultMapper.java @@ -9,7 +9,6 @@ import eu.dnetlib.dhp.oa.graph.dump.skgif.exception.NoAllowedTypeException; import eu.dnetlib.dhp.oa.graph.dump.skgif.exception.NoTitleFoundException; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.skgif.model.*; -import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; /** @@ -35,6 +34,7 @@ public class ResultMapper implements Serializable { mapType(out, input); mapTopic(out, input); mapContribution(out, input); + mapMeasure(out, input); //The manifestation will be included extending the result as well as the relations to funder, organization and other results return out; @@ -47,6 +47,25 @@ public class ResultMapper implements Serializable { } + private static void mapMeasure(ResearchProduct out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional.ofNullable(input.getMeasures()).isPresent()) { + Indicator i = new Indicator(); + DownloadsAndViews uc = new DownloadsAndViews(); + input.getMeasures().forEach(m -> { + if (m.getId().equals("downloads")) { + uc.setDownloads(m.getUnit().get(0).getValue()); + } + if (m.getId().equals("views")) { + uc.setViews(m.getUnit().get(0).getValue()); + } + }); + if (!uc.isEmpty()) { + i.setDownloadsAndViews(uc); + out.setIndicator(i); + } + } + } + private static void mapContribution(ResearchProduct out, E input) { if (Optional.ofNullable(input.getAuthor()).isPresent()) { int count = 0; @@ -93,9 +112,10 @@ public class ResultMapper implements Serializable { .getSubject() .stream() .filter( - s -> s.getQualifier().getClassid().equalsIgnoreCase("fos")) -// || -// s.getQualifier().getClassid().equalsIgnoreCase("sdg")) + s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") + || + s.getQualifier().getClassid().equalsIgnoreCase("sdg") + || s.getQualifier().getClassid().equalsIgnoreCase("keyword")) .map(s -> { ResultTopic topic = new ResultTopic(); MinTopic minTopic = new MinTopic(); @@ -103,6 +123,7 @@ public class ResultMapper implements Serializable { .setLocal_identifier( Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue())); minTopic.setValue(s.getValue()); + minTopic.setScheme(s.getQualifier().getClassid()); topic .setTopic(minTopic); if (Optional.ofNullable(s.getDataInfo()).isPresent()) { @@ -112,7 +133,7 @@ public class ResultMapper implements Serializable { } catch (NumberFormatException nfe) { } - provenance.setType(s.getDataInfo().getInferenceprovenance()); + provenance.setType(s.getDataInfo().getProvenanceaction().getClassname()); topic.setProvenance(provenance); } diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/workflow.xml deleted file mode 100644 index 47420ed..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/workflow.xml +++ /dev/null @@ -1,110 +0,0 @@ - - - - - outputPath - the output path - - - hivePath - the country for which to produce the dump - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - yarn - cluster - Copy graph - eu.dnetlib.dhp.oa.graph.dump.SparkCopyGraph - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --hivePath${hivePath} - --outputPath${outputPath} - - - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml deleted file mode 100644 index 0abe7cb..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml +++ /dev/null @@ -1,511 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - the output path - - - country - the country for which to produce the dump - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - - - - - - - - yarn - cluster - Dump table publication - eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultsRelatedToCountry - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/resultsInCountry - --country${country} - - - - - - - - - - - - - - - - yarn - cluster - Dump table publication - eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry - dump-${projectVersion}.jar - - --executor-memory=7G - --executor-cores=2 - --driver-memory=7G - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath} - --resultWithCountry${workingDir}/resultsInCountry - --resultTypepublication - - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath} - --resultTypedataset - --resultWithCountry${workingDir}/resultsInCountry - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath} - --resultTypeotherresearchproduct - --resultWithCountry${workingDir}/resultsInCountry - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath} - --resultTypesoftware - --resultWithCountry${workingDir}/resultsInCountry - - - - - - - - - - - - - - - - - yarn - cluster - Dump table publication for community/funder related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/dump/publication - --communityMapPath${workingDir}/communityMap - --dumpTypecountry - - - - - - - - yarn - cluster - Dump table dataset for community/funder related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dump/dataset - --communityMapPath${workingDir}/communityMap - --dumpTypecountry - - - - - - - - yarn - cluster - Dump table ORP for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/dump/otherresearchproduct - --communityMapPath${workingDir}/communityMap - --dumpTypecountry - - - - - - - - yarn - cluster - Dump table software for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/dump/software - --communityMapPath${workingDir}/communityMap - --dumpTypecountry - - - - - - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - - - yarn - cluster - Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/publication - --outputPath${outputPath}/dump/publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/dataset - --outputPath${outputPath}/dump/dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/otherresearchproduct - --outputPath${outputPath}/dump/otherresearchproduct - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/software - --outputPath${outputPath}/dump/software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath}/tar - --nameNode${nameNode} - --sourcePath${outputPath}/dump - - - - - - - Sub-workflow dump complete failed with error message ${wf:errorMessage()} - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/config-default.xml deleted file mode 100644 index d262cb6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml deleted file mode 100644 index c29798d..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml +++ /dev/null @@ -1,282 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - the output path - - - communities - the communities whose products should be dumped - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities - --outputPath${outputPath}/community - --nameNode${nameNode} - --communities${communities} - - - - - - - - - yarn - cluster - select results ids connected to communities and dump relation - eu.dnetlib.dhp.oa.graph.dump.csv.SparkSelectResultsAndDumpRelations - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --workingPath${outputPath}/workingDir - --outputPath${outputPath} - --communities${communities} - - - - - - - - - - - - - - - - yarn - cluster - select results from publication - eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults - dump-${projectVersion}.jar - - --executor-memory=9G - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --workingPath${outputPath}/workingDir - --resultTypepublication - - - - - - - yarn - cluster - select results from dataset - eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --workingPath${outputPath}/workingDir - --resultTypedataset - - - - - - - yarn - cluster - select results from other - eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --workingPath${outputPath}/workingDir - --resultTypeotherresearchproduct - - - - - - - yarn - cluster - select results from software - eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --workingPath${outputPath}/workingDir - --resultTypesoftware - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - yarn - cluster - Dump single results - eu.dnetlib.dhp.oa.graph.dump.csv.SparkMoveOnSigleDir - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --workingPath${outputPath}/workingDir - --outputPath${outputPath} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/tar - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json deleted file mode 100644 index 31d8619..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json +++ /dev/null @@ -1,31 +0,0 @@ - -[ - - { - "paramName":"nn", - "paramLongName":"nameNode", - "paramDescription": "the name node", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "sd", - "paramLongName": "singleDeposition", - "paramDescription": "true if the dump should be created for a single community", - "paramRequired": false - }, - { - "paramName": "ci", - "paramLongName": "communityId", - "paramDescription": "the id of the community for which to create the dump", - "paramRequired": false - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json deleted file mode 100644 index 2b42217..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json +++ /dev/null @@ -1,30 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "ra", - "paramLongName": "resultAggregation", - "paramDescription": "true if all the result type should be saved under the generic result name. false to get a different dump for each result type", - "paramRequired": true -} -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json deleted file mode 100644 index a59a5ce..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_complete_parameters.json +++ /dev/null @@ -1,30 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json deleted file mode 100644 index 00376d9..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json +++ /dev/null @@ -1,23 +0,0 @@ -[ - - - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "nn", - "paramLongName": "nameNode", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": true - }, - { - "paramName":"c", - "paramLongName":"communities", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json deleted file mode 100644 index a78b1be..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste2.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"wp", - "paramLongName":"workingPath", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName":"c", - "paramLongName":"communities", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] - - - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste3.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste3.json deleted file mode 100644 index 1aceb18..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste3.json +++ /dev/null @@ -1,36 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"wp", - "paramLongName":"workingPath", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName":"rt", - "paramLongName":"resultType", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName":"rtn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste4.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste4.json deleted file mode 100644 index 706e7e9..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste4.json +++ /dev/null @@ -1,25 +0,0 @@ -[ - - - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"wp", - "paramLongName":"workingPath", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - - { - "paramName":"o", - "paramLongName":"outputPath", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json deleted file mode 100644 index ba359ce..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - - - { - "paramName": "hdfs", - "paramLongName": "hdfsPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "nn", - "paramLongName": "nameNode", - "paramDescription": "the name node", - "paramRequired": true - } - -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json deleted file mode 100644 index 70abce6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json +++ /dev/null @@ -1,31 +0,0 @@ -[ - - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the serialization of the community map", - "paramRequired": true - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json deleted file mode 100644 index 1a67134..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameter_select_relation.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json deleted file mode 100644 index 4962eae..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json +++ /dev/null @@ -1,75 +0,0 @@ -[ - - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the serialization of the community map", - "paramRequired": false - }, - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName":"dt", - "paramLongName":"dumpType", - "paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project", - "paramRequired": false - }, - { - "paramName":"cid", - "paramLongName":"communityId", - "paramDescription": "the id of the community to be dumped", - "paramRequired": false -}, - { - "paramName":"sc", - "paramLongName":"selectionCriteria", - "paramDescription": "the selection criteria to choose the results", - "paramRequired": false - }, - - { - "paramName":"pm", - "paramLongName":"pathMap", - "paramDescription": "the map to find fields in the json", - "paramRequired": false - }, - { - "paramName":"rt", - "paramLongName":"resultType", - "paramDescription": "the map to find fields in the json", - "paramRequired": false - }, - { - "paramName":"md", - "paramLongName":"masterDuplicatePath", - "paramDescription": "the map to find fields in the json", - "paramRequired": false - } - - -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json deleted file mode 100644 index 6c45538..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json +++ /dev/null @@ -1,41 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName":"gp", - "paramLongName":"graphPath", - "paramDescription": "the path to the relations", - "paramRequired": true - }, - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the relations", - "paramRequired": true - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json deleted file mode 100644 index 22e054c..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json +++ /dev/null @@ -1,44 +0,0 @@ - - -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "rs", - "paramLongName": "removeSet", - "paramDescription": "the list of classname relations, split by ';', not to be dumped", - "paramRequired": false - }, - { - "paramName": "wd", - "paramLongName": "workingDir", - "paramDescription": "the list of classname relations, split by ';', not to be dumped", - "paramRequired": false - }, - { - "paramName": "mdp", - "paramLongName": "masterDuplicatePath", - "paramDescription": "the list of classname relations, split by ';', not to be dumped", - "paramRequired": false - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_context.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_context.json deleted file mode 100644 index fec6997..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_context.json +++ /dev/null @@ -1,37 +0,0 @@ -[ - - - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the serialization of the community map", - "paramRequired": false - }, - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"cp", - "paramLongName":"contextPath", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_context_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_context_parameters.json deleted file mode 100644 index 4926569..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_context_parameters.json +++ /dev/null @@ -1,27 +0,0 @@ -[ - - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - - { - "paramName":"crp", - "paramLongName":"contextRelationPath", - "paramDescription": "the map to find fields in the json", - "paramRequired": false - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_parameters.json deleted file mode 100644 index 91b4d20..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_parameters.json +++ /dev/null @@ -1,27 +0,0 @@ -[ - - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - - { - "paramName":"rp", - "paramLongName":"relationPath", - "paramDescription": "the map to find fields in the json", - "paramRequired": false - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/config-default.xml deleted file mode 100644 index d262cb6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/workflow.xml deleted file mode 100644 index 41b0ebe..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/workflow.xml +++ /dev/null @@ -1,88 +0,0 @@ - - - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - yarn - cluster - Dump table organization and related relations - eu.dnetlib.dhp.oa.graph.dump.organizationonly.SparkDumpOrganizationJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/project - --resultTableNameeu.dnetlib.dhp.schema.oaf.Project - --outputPath${workingDir}/project - --communityMapPathnoneed - - - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json deleted file mode 100644 index f2dc02b..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json +++ /dev/null @@ -1,29 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "pip", - "paramLongName": "preparedInfoPath", - "paramDescription": "the path of the association result projectlist", - "paramRequired": true - } -] - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json deleted file mode 100644 index a4ebd34..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "sb", - "paramLongName": "substring", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json deleted file mode 100644 index ed23136..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json +++ /dev/null @@ -1,27 +0,0 @@ -[ - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "pl", - "paramLongName": "projectListPath", - "paramDescription": "the path of the association result projectlist", - "paramRequired": true - } -] - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/config-default.xml deleted file mode 100644 index d262cb6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml deleted file mode 100644 index 620f761..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml +++ /dev/null @@ -1,171 +0,0 @@ - - - - sourcePath - the source path - - - projectListPath - the path to the project list - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - yarn - cluster - Dump table project - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/project - --resultTableNameeu.dnetlib.dhp.schema.oaf.Project - --outputPath${workingDir}/project - --communityMapPathnoneed - - - - - - - yarn - cluster - Dump table project - eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectsSubsetSparkJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/project - --outputPath${workingDir}/tar/project - --projectListPath${projectListPath} - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/tar - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_country_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_country_parameters.json deleted file mode 100644 index 5aefc18..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_country_parameters.json +++ /dev/null @@ -1,42 +0,0 @@ - -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "c", - "paramLongName": "resultWithCountry", - "paramDescription": "the path of the id of results associated to a given country", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", -"paramDescription": "the name of the result table we are currently working on", -"paramRequired": true -}, -{ -"paramName":"rt", -"paramLongName":"resultType", -"paramDescription": "", -"paramRequired": true -} -] - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_related_country_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_related_country_parameters.json deleted file mode 100644 index 729a405..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_related_country_parameters.json +++ /dev/null @@ -1,29 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName": "c", - "paramLongName": "country", - "paramDescription": "the path of the association result projectlist", - "paramRequired": true - } -] - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml deleted file mode 100644 index d262cb6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml deleted file mode 100644 index dc9ead6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/serafeim/oozie_app/workflow.xml +++ /dev/null @@ -1,102 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - the output path - - - communities - the communities whose products should be dumped - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - yarn - cluster - select results ids connected to communities and dump relation - eu.dnetlib.dhp.oa.graph.dump.serafeim.SparkSelectResultsAndDumpRelations - dump-${projectVersion}.jar - - --executor-memory=10G - --executor-cores=3 - --driver-memory=10G - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath} - --workingPath${workingDir} - --outputPath${outputPath} - --communities${communities} - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/filter_entities_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/skgif/filter_entities_parameters.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/filter_entities_parameters.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/skgif/filter_entities_parameters.json diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/skgif/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/skgif/oozie_app/workflow.xml index 74854db..1a0f489 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/skgif/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/skgif/oozie_app/workflow.xml @@ -68,6 +68,8 @@ ${wf:conf('resumeFrom') eq "MapEoscDsIds"} + ${wf:conf('resumeFrom') eq "DumpResearchProduct"} + ${wf:conf('resumeFrom') eq "EmitFromResult"} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/select_connected_entities_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/skgif/select_connected_entities_parameters.json similarity index 100% rename from dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/select_connected_entities_parameters.json rename to dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/skgif/select_connected_entities_parameters.json diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json deleted file mode 100644 index dec82bc..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json +++ /dev/null @@ -1,37 +0,0 @@ - -[ - - { - "paramName":"cmp", - "paramLongName":"communityMapPath", - "paramDescription": "the path to the serialization of the community map", - "paramRequired": false - }, - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, { - "paramName":"cid", - "paramLongName":"communityId", - "paramDescription": "the id of the community to be dumped", - "paramRequired": false -} -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml deleted file mode 100644 index d262cb6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt deleted file mode 100644 index d43d3ee..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/import.txt +++ /dev/null @@ -1,5 +0,0 @@ -## This is a classpath-based import file (this header is required) -dump_complete classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app -dump_funder classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app -dump_community classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app -dump_subset classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml deleted file mode 100644 index d79cfd1..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml +++ /dev/null @@ -1,337 +0,0 @@ - - - - - singleDeposition - false - Indicates if it is a single community deposition - - - communityId - none - the id of the community to be dumped if a dump for a single community should be done - - - dumpType - the type of the dump one of {complete, community, funder} - - - onlyUpload - true if the dump is already done and should only be upload in zenodo - - - upload - true if the dump should be upload in zenodo - - - sourcePath - the source path - - - outputPath - the output path - - - resultAggregation - false - true if all the result type have to be dumped under result. false otherwise - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - "" - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - none - for new version, the id of the record for the old deposition - - - depositionId - none - the depositionId of a deposition open that has to be added content - - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${wf:conf('makeArchive') eq true} - ${wf:conf('onlyUpload') eq true} - - - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap - --outputPath${workingDir}/communityMap - --nameNode${nameNode} - --singleDeposition${singleDeposition} - --communityId${communityId} - - - - - - - - - - ${wf:conf('dumpType') eq "funder"} - ${wf:conf('dumpType') eq "community"} - ${wf:conf('dumpType') eq "subset"} - - - - - - - - ${wf:appPath()}/dump_subset - - - - - communityMapPath - ${workingDir}/communityMap - - - outputPath - ${outputPath} - - - sourcePath - ${sourcePath} - - - - - - - - - - - ${wf:appPath()}/dump_complete - - - - - communityMapPath - ${workingDir}/communityMap - - - outputPath - ${outputPath}/dump - - - sourcePath - ${sourcePath} - - - - resultAggregation - ${resultAggregation} - - - postgresURL - ${postgresURL} - - - postgresUser - ${postgresUser} - - - postgresPassword - ${postgresPassword} - - - - - - - - - - - ${wf:appPath()}/dump_community - - - - - sourcePath - ${sourcePath} - - - communityMapPath - ${workingDir}/communityMap - - - outputPath - ${outputPath}/dump - - - - - - - - - - ${wf:appPath()}/dump_funder - - - - - communityMapPath - ${workingDir}/communityMap - - - outputPath - ${outputPath}/dump - - - sourcePath - ${sourcePath} - - - dumpType - ${dumpType} - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath}/tar - --nameNode${nameNode} - --sourcePath${outputPath}/dump - - - - - - - - ${wf:conf('upload') eq true} - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath}/tar/ - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml deleted file mode 100644 index a39980e..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml +++ /dev/null @@ -1,362 +0,0 @@ - - - - - sourcePath - the source path - - - outputPath - the output path - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - yarn - cluster - Dump table publication for community/funder related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/dump/publication - --communityMapPath${communityMapPath} - --dumpType${dumpType} - - - - - - - - yarn - cluster - Dump table dataset for community/funder related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dump/dataset - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table ORP for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/dump/otherresearchproduct - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table software for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/dump/software - --communityMapPath${communityMapPath} - - - - - - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - yarn - cluster - Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/publication - --outputPath${workingDir}/ext/publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/dataset - --outputPath${workingDir}/ext/dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/otherresearchproduct - --outputPath${workingDir}/ext/orp - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/software - --outputPath${workingDir}/ext/software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - - yarn - cluster - Split dumped result for community - eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/ext - --outputPath${outputPath} - --communityMapPath${communityMapPath} - - - - - - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml deleted file mode 100644 index f744f6d..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml +++ /dev/null @@ -1,525 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - the output path - - - resultAggregation - true if all the result type have to be dumped under result. false otherwise - - - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - - - - - - - - - - - - - - yarn - cluster - Dump table publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/result/publication - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/result/dataset - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/result/otherresearchproduct - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/result/software - - - - - - - - yarn - cluster - Dump table organization - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/organization - --resultTableNameeu.dnetlib.dhp.schema.oaf.Organization - --outputPath${outputPath}/organization - - - - - - - - yarn - cluster - Dump table project - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/project - --resultTableNameeu.dnetlib.dhp.schema.oaf.Project - --outputPath${outputPath}/project - - - - - - - - yarn - cluster - Dump table datasource - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/datasource - --resultTableNameeu.dnetlib.dhp.schema.oaf.Datasource - --outputPath${outputPath}/datasource - - - - - - - - yarn - cluster - Select valid table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkSelectValidRelationsJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath} - --outputPath${workingDir}/validrelation - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/validrelation - --outputPath${workingDir}/relation/relation - --removeSet${removeSet} - - - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities - --hdfsPath${outputPath}/communities_infrastructures/community_infrastructure.json.gz - --nameNode${nameNode} - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation - --hdfsPath${workingDir}/relation/context - --nameNode${nameNode} - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/relation/contextOrg - --communityMapPath${communityMapPath} - - - - - - - - - - - - - - - - - yarn - cluster - Extract Relations from publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/relation/publication - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/relation/dataset - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/relation/orp - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/relation/software - --communityMapPath${communityMapPath} - - - - - - - - - - yarn - cluster - Collect Results and Relations and put them in the right path - eu.dnetlib.dhp.oa.graph.dump.complete.SparkCollectAndSave - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir} - --outputPath${outputPath} - --resultAggregation${resultAggregation} - - - - - - - - - Sub-workflow dump complete failed with error message ${wf:errorMessage()} - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml deleted file mode 100644 index e23778b..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml +++ /dev/null @@ -1,257 +0,0 @@ - - - - - sourcePath - the source path - - - outputPath - the output path - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath} - --outputPath${workingDir}/preparedInfo - --substringfalse - - - - - - - - - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dump-${projectVersion}.jar - - --executor-memory=9G - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/result/publication - --graphPath${workingDir}/preparedInfo - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/result/dataset - --graphPath${workingDir}/preparedInfo - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/result/otherresearchproduct - --graphPath${workingDir}/preparedInfo - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/result/software - --graphPath${workingDir}/preparedInfo - --communityMapPath${communityMapPath} - - - - - - - - - - yarn - cluster - Dump funder results - eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/result - --outputPath${outputPath} - - - - - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/config-default.xml deleted file mode 100644 index e5ec3d0..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml deleted file mode 100644 index 837a844..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml +++ /dev/null @@ -1,858 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - the output path - - - pathMap - the path where to find the elements involved in the constraints within the json - - - selectionCriteria - the selection criteria used to select the results - - - mapAs - It specifies the type of model for the dump (community - complete) - - - hiveDbName - the target hive database name - - - hiveJdbcUrl - hive server jdbc url - - - hiveMetastoreUris - hive server metastore URIs - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB - --hdfsPath${workingDir}/masterduplicate - --hdfsNameNode${nameNode} - --postgresUrl${postgresURL} - --postgresUser${postgresUser} - --postgresPassword${postgresPassword} - - - - - - - - - - - - - - - yarn - cluster - Dump table publication - eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath} - --communityMapPath${communityMapPath} - --pathMap${pathMap} - --selectionCriteria${selectionCriteria} - --resultTypepublication - - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath} - --pathMap${pathMap} - --selectionCriteria${selectionCriteria} - --resultTypedataset - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath} - --pathMap${pathMap} - --selectionCriteria${selectionCriteria} - --resultTypeotherresearchproduct - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath} - --pathMap${pathMap} - --selectionCriteria${selectionCriteria} - --resultTypesoftware - - - - - - - - - - yarn - cluster - Select valid table relation - eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectSubset - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - --conf spark.sql.shuffle.partitions=3840 - - --sourcePath${sourcePath} - --outputPath${outputPath} - --removeSet${removeSet} - - - - - - - - - - ${wf:conf('mapAs') eq "community"} - - - - - - - - - - - - - - yarn - cluster - Dump table publication for community/funder related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/dump/publication - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table dataset for community/funder related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/dump/dataset - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table ORP for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/dump/otherresearchproduct - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table software for community related products - eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/dump/software - --communityMapPath${communityMapPath} - - - - - - - - - - yarn - cluster - Prepare association result subset of project info - eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original - --outputPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - - - yarn - cluster - Extend dumped publications with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/publication - --outputPath${outputPath}/dump/publication - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped dataset with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/dataset - --outputPath${outputPath}/dump/dataset - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped ORP with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/otherresearchproduct - --outputPath${outputPath}/dump/otherresearchproduct - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - yarn - cluster - Extend dumped software with information about project - eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${workingDir}/dump/software - --outputPath${outputPath}/dump/software - --preparedInfoPath${workingDir}/preparedInfo - - - - - - - - - - - - - - - - - yarn - cluster - Dump table organization - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/organization - --resultTableNameeu.dnetlib.dhp.schema.oaf.Organization - --outputPath${outputPath}/dump/organization - - - - - - - - yarn - cluster - Dump table project - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/project - --resultTableNameeu.dnetlib.dhp.schema.oaf.Project - --outputPath${outputPath}/dump/project - - - - - - - - yarn - cluster - Dump table datasource - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/datasource - --resultTableNameeu.dnetlib.dhp.schema.oaf.Datasource - --outputPath${outputPath}/dump/datasource - - - - - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities - --hdfsPath${workingDir}/context/community_infrastructure.json.gz - --nameNode${nameNode} - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectValidContext - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original - --contextPath${workingDir}/context/community_infrastructure.json.gz - --communityMapPath${communityMapPath} - --outputPath${outputPath}/dump/communities_infrastructures - - - - - - - eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation - --hdfsPath${workingDir}/dump/relation/context - --nameNode${nameNode} - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/relation - --outputPath${workingDir}/dump/relation/contextOrg - --communityMapPath${communityMapPath} - - - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectValidRelationContext - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/dump - --contextRelationPath${workingDir}/dump/relation - - - - - - - - - - yarn - cluster - Dump table relation - eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/relation - --outputPath${workingDir}/relation - --removeSet${removeSet} - - - - - - - - yarn - cluster - Extract Relations from publication - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/relation - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table dataset - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/relation - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table ORP - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/relation - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Dump table software - eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/original/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/relation - --communityMapPath${communityMapPath} - - - - - - - - yarn - cluster - Select valid relations - eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectValidRelation - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${outputPath}/dump - --relationPath${workingDir}/relation - - - - - - Sub-workflow dump complete failed with error message ${wf:errorMessage()} - - - - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/config-default.xml deleted file mode 100644 index d262cb6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml deleted file mode 100644 index 6662da0..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml +++ /dev/null @@ -1,132 +0,0 @@ - - - - - sourcePath - the source path - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - "" - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - none - for new version, the id of the record for the old deposition - - - depositionId - none - the depositionId of a deposition open that has to be added content - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath}/tar - --nameNode${nameNode} - --sourcePath${sourcePath} - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath}/tar/ - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - - - diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java index a23f184..3887877 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -6,7 +6,7 @@ import org.junit.jupiter.api.Test; import com.fasterxml.jackson.databind.JsonNode; import com.github.victools.jsonschema.generator.*; -import eu.dnetlib.dhp.oa.model.graph.GraphResult; +import eu.dnetlib.dhp.skgif.model.ResearchProduct; //@Disabled class GenerateJsonSchema { @@ -20,7 +20,7 @@ class GenerateJsonSchema { configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName()); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(GraphResult.class); + JsonNode jsonSchema = generator.generateSchema(ResearchProduct.class); System.out.println(jsonSchema.toString()); } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/skgif/DumpResultTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/skgif/DumpResultTest.java index c7fc774..a6cdc48 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/skgif/DumpResultTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/skgif/DumpResultTest.java @@ -35,7 +35,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation; import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.RelationPerProduct; -import eu.dnetlib.dhp.oa.model.graph.Relation; import eu.dnetlib.dhp.skgif.model.*; import eu.dnetlib.dhp.utils.DHPUtils;