From 67ee82535a7b04f75244255a1947db744b0420a0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 4 Oct 2022 12:20:08 +0200 Subject: [PATCH 01/26] Changed the jar from dhp-graph-dump to dump --- .../dump/projectsubset/oozie_app/workflow.xml | 4 +-- .../community/oozie_app/workflow.xml | 20 ++++++------- .../complete/oozie_app/workflow.xml | 30 +++++++++---------- .../funder/oozie_app/workflow.xml | 12 ++++---- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml index 4013db9..620f761 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/projectsubset/oozie_app/workflow.xml @@ -99,7 +99,7 @@ cluster Dump table project eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -124,7 +124,7 @@ cluster Dump table project eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectsSubsetSparkJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml index f1c7a8f..a39980e 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml @@ -96,7 +96,7 @@ cluster Dump table publication for community/funder related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -123,7 +123,7 @@ cluster Dump table dataset for community/funder related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -149,7 +149,7 @@ cluster Dump table ORP for community related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -175,7 +175,7 @@ cluster Dump table software for community related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -203,7 +203,7 @@ cluster Prepare association result subset of project info eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -234,7 +234,7 @@ cluster Extend dumped publications with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -259,7 +259,7 @@ cluster Extend dumped dataset with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -284,7 +284,7 @@ cluster Extend dumped ORP with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -309,7 +309,7 @@ cluster Extend dumped software with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -336,7 +336,7 @@ cluster Split dumped result for community eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml index 306e1d3..569b143 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml @@ -106,7 +106,7 @@ cluster Dump table publication eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -132,7 +132,7 @@ cluster Dump table dataset eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -158,7 +158,7 @@ cluster Dump table ORP eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -184,7 +184,7 @@ cluster Dump table software eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -210,7 +210,7 @@ cluster Dump table organization eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -236,7 +236,7 @@ cluster Dump table project eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -262,7 +262,7 @@ cluster Dump table datasource eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -288,7 +288,7 @@ cluster Select valid table relation eu.dnetlib.dhp.oa.graph.dump.complete.SparkSelectValidRelationsJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -313,7 +313,7 @@ cluster Dump table relation eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -368,7 +368,7 @@ cluster Dump table relation eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -403,7 +403,7 @@ cluster Extract Relations from publication eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -429,7 +429,7 @@ cluster Dump table dataset eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -455,7 +455,7 @@ cluster Dump table ORP eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -481,7 +481,7 @@ cluster Dump table software eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -509,7 +509,7 @@ cluster Collect Results and Relations and put them in the right path eu.dnetlib.dhp.oa.graph.dump.complete.SparkCollectAndSave - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml index 1cdece8..75124cf 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml @@ -89,7 +89,7 @@ cluster Prepare association result subset of project info eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -121,7 +121,7 @@ cluster Dump funder results eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -148,7 +148,7 @@ cluster Dump funder results eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -175,7 +175,7 @@ cluster Dump funder results eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -202,7 +202,7 @@ cluster Dump funder results eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -231,7 +231,7 @@ cluster Dump funder results eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} From 746a06d4ea1939483f6016d14bb0411e4cc382c5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 11 Oct 2022 10:57:50 +0200 Subject: [PATCH 02/26] [Dump] removing all EOSC related addition from master --- .../model/EoscInteroperabilityFramework.java | 67 ------------------- .../eu/dnetlib/dhp/eosc/model/EoscResult.java | 23 ------- .../dnetlib/dhp/oa/graph/dump/Constants.java | 24 +++---- .../dhp/oa/graph/dump/ResultMapper.java | 47 +++---------- .../dhp/oa/graph/dump/DumpJobTest.java | 4 +- 5 files changed, 22 insertions(+), 143 deletions(-) delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscInteroperabilityFramework.java delete mode 100644 dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscInteroperabilityFramework.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscInteroperabilityFramework.java deleted file mode 100644 index 8b92bb2..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscInteroperabilityFramework.java +++ /dev/null @@ -1,67 +0,0 @@ - -package eu.dnetlib.dhp.eosc.model; - -import java.io.Serializable; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -/** - * @author miriam.baglioni - * @Date 29/07/22 - */ -public class EoscInteroperabilityFramework implements Serializable { - @JsonSchema(description = "EOSC-IF label") - private String label; - - @JsonSchema( - description = "EOSC-IF local code. Later on it could be populated with a PID (e.g. DOI), but for the time being we stick to a more loose definition.") - private String code; - - @JsonSchema(description = "EOSC-IF url to the guidelines") - private String url; - - @JsonSchema(description = "EOSC-IF semantic relation (e.g. compliesWith)") - private String semanticRelation; - - public String getLabel() { - return label; - } - - public void setLabel(String label) { - this.label = label; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getUrl() { - return url; - } - - public void setUrl(String url) { - this.url = url; - } - - public String getSemanticRelation() { - return semanticRelation; - } - - public void setSemanticRelation(String semanticRelation) { - this.semanticRelation = semanticRelation; - } - - public static EoscInteroperabilityFramework newInstance(String code, String label, String url, - String semanticRelation) { - EoscInteroperabilityFramework eif = new EoscInteroperabilityFramework(); - eif.label = label; - eif.code = code; - eif.url = url; - eif.semanticRelation = semanticRelation; - return eif; - } -} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java b/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java deleted file mode 100644 index 1bbc675..0000000 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/eosc/model/EoscResult.java +++ /dev/null @@ -1,23 +0,0 @@ - -package eu.dnetlib.dhp.eosc.model; - -import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; - -import eu.dnetlib.dhp.oa.model.graph.GraphResult; - -/** - * @author miriam.baglioni - * @Date 29/07/22 - */ -public class EoscResult extends GraphResult { - @JsonSchema(description = "Describes a reference to the EOSC Interoperability Framework (IF) Guidelines") - private EoscInteroperabilityFramework eoscIF; - - public EoscInteroperabilityFramework getEoscIF() { - return eoscIF; - } - - public void setEoscIF(EoscInteroperabilityFramework eoscIF) { - this.eoscIF = eoscIF; - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java index 06f7d79..7c74066 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java @@ -9,8 +9,8 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; public class Constants { - protected static final Map accessRightsCoarMap = Maps.newHashMap(); - protected static final Map coarCodeLabelMap = Maps.newHashMap(); + protected static final Map ACCESS_RIGHTS_COAR_MAP = Maps.newHashMap(); + protected static final Map COAR_CODE_LABEL_MAP = Maps.newHashMap(); public static final String INFERRED = "Inferred by OpenAIRE"; public static final String CABF2 = "c_abf2"; @@ -28,22 +28,22 @@ public class Constants { public static final String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative"; static { - accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2); - accessRightsCoarMap.put("RESTRICTED", "c_16ec"); - accessRightsCoarMap.put("OPEN SOURCE", CABF2); - accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_CLOSED, "c_14cb"); - accessRightsCoarMap.put(ModelConstants.ACCESS_RIGHT_EMBARGO, "c_f1cf"); + ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2); + ACCESS_RIGHTS_COAR_MAP.put("RESTRICTED", "c_16ec"); + ACCESS_RIGHTS_COAR_MAP.put("OPEN SOURCE", CABF2); + ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_CLOSED, "c_14cb"); + ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_EMBARGO, "c_f1cf"); } static { - coarCodeLabelMap.put(CABF2, ModelConstants.ACCESS_RIGHT_OPEN); - coarCodeLabelMap.put("c_16ec", "RESTRICTED"); - coarCodeLabelMap.put("c_14cb", ModelConstants.ACCESS_RIGHT_CLOSED); - coarCodeLabelMap.put("c_f1cf", "EMBARGO"); + COAR_CODE_LABEL_MAP.put(CABF2, ModelConstants.ACCESS_RIGHT_OPEN); + COAR_CODE_LABEL_MAP.put("c_16ec", "RESTRICTED"); + COAR_CODE_LABEL_MAP.put("c_14cb", ModelConstants.ACCESS_RIGHT_CLOSED); + COAR_CODE_LABEL_MAP.put("c_f1cf", "EMBARGO"); } public enum DUMPTYPE { - COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder"), EOSC("eosc"); + COMPLETE("complete"), COMMUNITY("community"), FUNDER("funder"); private final String type; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 8d4035b..6e9d20f 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -9,12 +9,6 @@ import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.eosc.model.EoscInteroperabilityFramework; -import eu.dnetlib.dhp.eosc.model.EoscResult; -import eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1; import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException; import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; import eu.dnetlib.dhp.oa.model.*; @@ -43,8 +37,6 @@ public class ResultMapper implements Serializable { Result out; if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { out = new GraphResult(); - } else if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { - out = new EoscResult(); } else { out = new CommunityResult(); } @@ -64,15 +56,15 @@ public class ResultMapper implements Serializable { // I do not map Access Right UNKNOWN or OTHER Optional oar = Optional.ofNullable(input.getBestaccessright()); - if (oar.isPresent() && Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(oar.get().getClassid()); + if (oar.isPresent() && Constants.ACCESS_RIGHTS_COAR_MAP.containsKey(oar.get().getClassid())) { + String code = Constants.ACCESS_RIGHTS_COAR_MAP.get(oar.get().getClassid()); out .setBestaccessright( BestAccessRight .newInstance( code, - Constants.coarCodeLabelMap.get(code), + Constants.COAR_CODE_LABEL_MAP.get(code), Constants.COAR_ACCESS_RIGHT_SCHEMA)); } @@ -157,11 +149,7 @@ public class ResultMapper implements Serializable { ((GraphResult) out) .setInstance( oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); - } else if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { - ((EoscResult) out) - .setInstance( - oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); - } else { + } else { ((CommunityResult) out) .setInstance( oInst @@ -240,26 +228,7 @@ public class ResultMapper implements Serializable { out.setType(input.getResulttype().getClassid()); - if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) { - if (Optional.ofNullable(input.getEoscifguidelines()).isPresent()) { - List gei = input.getEoscifguidelines(); - if (gei.size() > 1) { - throw new CardinalityTooHighException( - "EOSC IF in the result has cardinality greater than one. Change dump!"); - } - if (gei.size() == 1) { - - EoscIfGuidelines ifra = gei.get(0); - ((EoscResult) out) - .setEoscIF( - EoscInteroperabilityFramework - .newInstance( - ifra.getCode(), ifra.getLabel(), ifra.getUrl(), - ifra.getSemanticRelation())); - - } - } - } else if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { + if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { ((CommunityResult) out) .setCollectedfrom( input @@ -472,15 +441,15 @@ public class ResultMapper implements Serializable { private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) { Optional opAr = Optional.ofNullable(i.getAccessright()); - if (opAr.isPresent() && Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) { - String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid()); + if (opAr.isPresent() && Constants.ACCESS_RIGHTS_COAR_MAP.containsKey(opAr.get().getClassid())) { + String code = Constants.ACCESS_RIGHTS_COAR_MAP.get(opAr.get().getClassid()); instance .setAccessright( AccessRight .newInstance( code, - Constants.coarCodeLabelMap.get(code), + Constants.COAR_CODE_LABEL_MAP.get(code), Constants.COAR_ACCESS_RIGHT_SCHEMA)); Optional> mes = Optional.ofNullable(i.getMeasures()); diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 34da999..0cfade8 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -317,7 +317,7 @@ public class DumpJobTest { Assertions.assertEquals(ModelConstants.ACCESS_RIGHT_OPEN, gr.getBestaccessright().getLabel()); Assertions .assertEquals( - Constants.accessRightsCoarMap.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); + Constants.ACCESS_RIGHTS_COAR_MAP.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); Assertions.assertEquals("One Ecosystem", gr.getContainer().getName()); Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline()); @@ -373,7 +373,7 @@ public class DumpJobTest { .getAccessright() .getCode() .equals( - Constants.accessRightsCoarMap + Constants.ACCESS_RIGHTS_COAR_MAP .get(ModelConstants.ACCESS_RIGHT_OPEN))); Assertions.assertTrue(instance.getAccessright().getLabel().equals(ModelConstants.ACCESS_RIGHT_OPEN)); Assertions.assertTrue(instance.getAccessright().getOpenAccessRoute().equals(OpenAccessRoute.green)); From cebee39c22dfff9295f72d1e8d6cdf398c38b416 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 11 Oct 2022 10:58:44 +0200 Subject: [PATCH 03/26] [Dump] removing all EOSC related addition from master --- .../dump/eosc_select_result_parameters.json | 30 --- .../eoscdump/oozie_app/config-default.xml | 30 --- .../dump/eoscdump/oozie_app/workflow.xml | 231 ------------------ 3 files changed, 291 deletions(-) delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_select_result_parameters.json delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_select_result_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_select_result_parameters.json deleted file mode 100644 index a59a5ce..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc_select_result_parameters.json +++ /dev/null @@ -1,30 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "out", - "paramLongName": "outputPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } -] - - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/config-default.xml deleted file mode 100644 index d262cb6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml deleted file mode 100644 index de85e94..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eoscdump/oozie_app/workflow.xml +++ /dev/null @@ -1,231 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - yarn - cluster - Dump Publication For EOSC - eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1 - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/tar/publication - - - - - - - - yarn - cluster - Dump Dataset For EOSC - eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1 - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/tar/dataset - - - - - - - - yarn - cluster - Dump ORP For EOSC - eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1 - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/tar/otherresearchproduct - - - - - - - - yarn - cluster - Dump Software For EOSC - eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1 - dump-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/tar/software - - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/tar - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - \ No newline at end of file From 6ed41a927643ee7f96a7bfa98df15177ae0aa32e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 11 Oct 2022 11:55:32 +0200 Subject: [PATCH 04/26] [Dump] removing all EOSC related addition from master --- .../eu/dnetlib/dhp/ExecCreateSchemas.java | 2 - .../src/test/java/GenerateJsonSchema.java | 18 +--- .../dump/eosc/SelectEoscRelationsStep2.java | 83 ----------------- .../dump/eosc/SelectEoscResultsJobStep1.java | 89 ------------------- .../dhp/oa/graph/dump/DumpJobTest.java | 42 +-------- .../dhp/oa/graph/dump/eosc/eosctag.json | 1 - 6 files changed, 2 insertions(+), 233 deletions(-) delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscRelationsStep2.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java delete mode 100644 dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/eosctag.json diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java b/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java index 4adce65..e54f60e 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java @@ -12,7 +12,6 @@ import com.fasterxml.jackson.databind.SerializationFeature; import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.victools.jsonschema.generator.*; -import eu.dnetlib.dhp.eosc.model.EoscResult; import eu.dnetlib.dhp.oa.model.community.CommunityResult; import eu.dnetlib.dhp.oa.model.graph.*; @@ -70,7 +69,6 @@ public class ExecCreateSchemas { ecs.generate(CommunityResult.class, DIRECTORY, "community_result_schema.json"); - ecs.generate(EoscResult.class, DIRECTORY, "eosc_result_schema.json"); } } diff --git a/dump-schema/src/test/java/GenerateJsonSchema.java b/dump-schema/src/test/java/GenerateJsonSchema.java index 7fe8076..7b875ca 100644 --- a/dump-schema/src/test/java/GenerateJsonSchema.java +++ b/dump-schema/src/test/java/GenerateJsonSchema.java @@ -9,7 +9,7 @@ import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.ExecCreateSchemas; -import eu.dnetlib.dhp.eosc.model.EoscResult; + import eu.dnetlib.dhp.oa.model.graph.GraphResult; //@Disabled @@ -46,22 +46,6 @@ class GenerateJsonSchema { System.out.println(jsonSchema.toString()); } - @Test - void generateSchema3() throws JsonProcessingException { - - ObjectMapper objectMapper = new ObjectMapper(); - AddonModule module = new AddonModule(); - SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(objectMapper, - SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON) - .with(module) - .with(Option.SCHEMA_VERSION_INDICATOR) - .without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS); - SchemaGeneratorConfig config = configBuilder.build(); - SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(EoscResult.class); - - System.out.println(new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(jsonSchema)); - } @Test void generateJsonSchema3() throws IOException { diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscRelationsStep2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscRelationsStep2.java deleted file mode 100644 index 72422f4..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscRelationsStep2.java +++ /dev/null @@ -1,83 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.eosc; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.graph.GraphResult; -import eu.dnetlib.dhp.schema.oaf.Relation; - -/** - * @author miriam.baglioni - * @Date 27/07/22 - */ -public class SelectEoscRelationsStep2 implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SelectEoscRelationsStep2.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SelectEoscRelationsStep2.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/reletion_selection_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String resultPath = parser.get("resultPath"); - log.info("resultPath: {}", resultPath); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, resultPath + "/relation"); - selectRelations(spark, inputPath, resultPath + "/relation", resultPath); - }); - } - - private static void selectRelations(SparkSession spark, String inputPath, String outputPath, String resultPath) { - Dataset results = Utils - .readPath(spark, resultPath + "/publication", GraphResult.class) - .union( - Utils - .readPath(spark, resultPath + "/dataset", GraphResult.class)) - .union( - Utils - .readPath(spark, resultPath + "/software", GraphResult.class)) - .union( - Utils - .readPath(spark, resultPath + "/otherresearchproduct", GraphResult.class)); - - Dataset relations = Utils - .readPath(spark, inputPath + "/relation", Relation.class) - .filter( - (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && - !r.getDataInfo().getInvisible()); - - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java deleted file mode 100644 index 304d891..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SelectEoscResultsJobStep1.java +++ /dev/null @@ -1,89 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.eosc; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.io.Serializable; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.eosc.model.EoscResult; -import eu.dnetlib.dhp.oa.graph.dump.Constants; -import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.model.graph.GraphResult; -import eu.dnetlib.dhp.schema.oaf.Result; - -/** - * @author miriam.baglioni - * @Date 27/07/22 - */ -public class SelectEoscResultsJobStep1 implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SelectEoscResultsJobStep1.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - SelectEoscResultsJobStep1.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/eosc_select_result_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - Class inputClazz = (Class) Class.forName(resultClassName); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - selectEoscResults(spark, inputPath, outputPath, inputClazz); - }); - } - - private static void selectEoscResults(SparkSession spark, String inputPath, String outputPath, - Class inputClazz) { - Utils - .readPath(spark, inputPath, inputClazz) - .filter( - (FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible() - && r.getContext().stream().anyMatch(c -> c.getId().equals("eosc"))) - .map( - (MapFunction) r -> (EoscResult) ResultMapper - .map(r, null, Constants.DUMPTYPE.EOSC.getType()), - Encoders.bean(EoscResult.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - } - -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 0cfade8..cddc530 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -25,9 +25,8 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; -import eu.dnetlib.dhp.eosc.model.EoscResult; + import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1; import eu.dnetlib.dhp.oa.model.Instance; import eu.dnetlib.dhp.oa.model.OpenAccessRoute; import eu.dnetlib.dhp.oa.model.community.CommunityResult; @@ -883,46 +882,7 @@ public class DumpJobTest { } - @Test - public void testEOSCDump() throws Exception { - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/eosctag.json") - .getPath(); - final String communityMapPath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") - .getPath(); - - SelectEoscResultsJobStep1 - .main( - new String[] { - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-sourcePath", - sourcePath, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/working" - - }); - - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - JavaRDD tmp = sc - .textFile(workingDir.toString() + "/working") - .map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class)); - - org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(EoscResult.class)); - - Assertions.assertEquals(1, verificationDataset.count()); - - Assertions.assertEquals(1, verificationDataset.filter("type = 'dataset'").count()); - - Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getCode().equals("EOSC::Twitter Data")).count()); - Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getLabel().equals("EOSC::Twitter Data")).count()); - Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getUrl().equals("")).count()); - Assertions.assertEquals(1, tmp.filter(d -> d.getEoscIF().getSemanticRelation().equals("compliesWith")).count()); - - } @Test public void testArticlePCA() { diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/eosctag.json b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/eosctag.json deleted file mode 100644 index cd17fa7..0000000 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/eosctag.json +++ /dev/null @@ -1 +0,0 @@ -{"geolocation": [], "dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "resourcetype": {"classid": "UNKNOWN", "classname": "Unknown", "schemeid": "dnet:dataCite_resource", "schemename": "dnet:dataCite_resource"}, "pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.21227/mqmt-yq28"}], "contributor": [], "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "issued", "classname": "issued", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2020-11-21"}], "collectedfrom": [{"key": "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "value": "Datacite"}], "id": "50|doi_________::bbf3a8925017a575215fc7be77cab114", "subject": [{"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "COVID-19"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Machine Learning"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Corona Tweets Dataset"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "COVID-19 Tweets Dataset"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Corona Tweets"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "COVID-19 Tweets"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Corona Twitter Sentiment"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "COVID-19 Twitter Sentiment"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "SARS-CoV-2 Tweets Dataset"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "SARS-CoV-2 Twitter Sentiment"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Coronavirus English Tweets Dataset"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "COVID-19 English Tweets Dataset"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Coronavirus Geotagged Tweets"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "COVID-19 Geotagged Tweets"}, {"dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_classes", "invisible": false, "trust": "0.891"}, "qualifier": {"classid": "ACM", "classname": "ACM Computing Classification System", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "InformationSystems_MISCELLANEOUS"}, {"dataInfo": {"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_classes", "invisible": false, "trust": "0.8226"}, "qualifier": {"classid": "ACM", "classname": "ACM Computing Classification System", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "InformationSystems_INFORMATIONSTORAGEANDRETRIEVAL"}], "lastupdatetimestamp": 1657046634922, "author": [{"surname": "Lamsal", "name": "Rabindra", "pid": [], "rank": 1, "affiliation": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "School of Computer and Systems Sciences, JN"}], "fullname": "Lamsal, Rabindra"}], "instance": [{"refereed": {"classid": "0000", "classname": "UNKNOWN", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|re3data_____::3bc31eb6c47d0134a1ac576dc028c3b9", "value": "IEEE DataPort"}, "license": {"value": "https://creativecommons.org/licenses/by/4.0/legalcode"}, "url": ["https://dx.doi.org/10.21227/mqmt-yq28"], "pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.21227/mqmt-yq28"}], "dateofacceptance": {"value": "2020-11-21"}, "collectedfrom": {"key": "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "value": "Datacite"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2020-11-21T04:46:12+0000", "fulltext": [], "dateoftransformation": "2020-11-21T04:46:12+0000", "description": [{"value": "This dataset contains IDs and sentiment scores of the geo-tagged tweets related to the COVID-19 pandemic. The tweets are captured by an on-going project deployed at https://live.rlamsal.com.np. The model monitors the real-time Twitter feed for coronavirus-related tweets using 90+ different keywords and hashtags that are commonly used while referencing the pandemic. Complying with Twitter's content redistribution policy, only the tweet IDs are shared. You can re-construct the dataset by hydrating these IDs. The tweet IDs in this dataset belong to the tweets tweeted providing an exact location.The paper associated with this dataset is available here: Design and analysis of a large-scale COVID-19 tweets dataset"}], "format": [], "measures": [], "coverage": [], "externalReference": [], "publisher": {"value": "IEEE DataPort"}, "context": [{"dataInfo": [{"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_covid19", "invisible": false, "trust": "0.9"}, {"provenanceaction": {"classid": "community:subject", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "bulktagging", "invisible": false, "trust": "0.8"}, {"provenanceaction": {"classid": "community:datasource", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "bulktagging", "invisible": false, "trust": "0.8"}], "id": "covid-19"}, {"dataInfo": [{"provenanceaction": {"classid": "community:datasource", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "bulktagging", "invisible": false, "trust": "0.8"}], "id": "eosc"}], "eoscifguidelines": [{"semanticRelation": "compliesWith", "url": "", "code": "EOSC::Twitter Data", "label": "EOSC::Twitter Data"}], "language": {"classid": "und", "classname": "Undetermined", "schemeid": "dnet:languages", "schemename": "dnet:languages"}, "resulttype": {"classid": "dataset", "classname": "dataset", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["10.21227/mqmt-yq28"], "source": [], "dateofacceptance": {"value": "2020-11-21"}, "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Coronavirus (COVID-19) Geo-tagged Tweets Dataset"}]} \ No newline at end of file From db657d79a62dce28623560a77855ab36429412e6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 13 Oct 2022 11:50:48 +0200 Subject: [PATCH 05/26] [Dump] fixed issue on dump of datasource pids --- .../dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index 43a569a..78e4eb8 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -126,10 +126,10 @@ public class DumpGraphEntities implements Serializable { Optional .ofNullable(d.getPid()) .ifPresent( - pids -> pids + pids -> datasource.setPid(pids .stream() .map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue())) - .collect(Collectors.toList())); + .collect(Collectors.toList()))); Optional .ofNullable(d.getDatasourcetype()) From 0a0e2cfc9cf79954e4dd994450a5b8428901e3c2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 29 Nov 2022 16:09:10 +0100 Subject: [PATCH 06/26] refactoring --- .../main/java/eu/dnetlib/dhp/ExecCreateSchemas.java | 1 - dump-schema/src/test/java/GenerateJsonSchema.java | 2 -- .../eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java | 2 +- .../dhp/oa/graph/dump/complete/DumpGraphEntities.java | 10 ++++++---- .../java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java | 6 ++---- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java b/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java index e54f60e..a9a0c49 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/ExecCreateSchemas.java @@ -69,6 +69,5 @@ public class ExecCreateSchemas { ecs.generate(CommunityResult.class, DIRECTORY, "community_result_schema.json"); - } } diff --git a/dump-schema/src/test/java/GenerateJsonSchema.java b/dump-schema/src/test/java/GenerateJsonSchema.java index 7b875ca..381ebb5 100644 --- a/dump-schema/src/test/java/GenerateJsonSchema.java +++ b/dump-schema/src/test/java/GenerateJsonSchema.java @@ -9,7 +9,6 @@ import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.ExecCreateSchemas; - import eu.dnetlib.dhp.oa.model.graph.GraphResult; //@Disabled @@ -46,7 +45,6 @@ class GenerateJsonSchema { System.out.println(jsonSchema.toString()); } - @Test void generateJsonSchema3() throws IOException { diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 6e9d20f..b9cd515 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -149,7 +149,7 @@ public class ResultMapper implements Serializable { ((GraphResult) out) .setInstance( oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); - } else { + } else { ((CommunityResult) out) .setInstance( oInst diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java index 78e4eb8..4e176e7 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java @@ -126,10 +126,12 @@ public class DumpGraphEntities implements Serializable { Optional .ofNullable(d.getPid()) .ifPresent( - pids -> datasource.setPid(pids - .stream() - .map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue())) - .collect(Collectors.toList()))); + pids -> datasource + .setPid( + pids + .stream() + .map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue())) + .collect(Collectors.toList()))); Optional .ofNullable(d.getDatasourcetype()) diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index cddc530..9d27cab 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -25,7 +25,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; - import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.model.Instance; import eu.dnetlib.dhp.oa.model.OpenAccessRoute; @@ -316,7 +315,8 @@ public class DumpJobTest { Assertions.assertEquals(ModelConstants.ACCESS_RIGHT_OPEN, gr.getBestaccessright().getLabel()); Assertions .assertEquals( - Constants.ACCESS_RIGHTS_COAR_MAP.get(ModelConstants.ACCESS_RIGHT_OPEN), gr.getBestaccessright().getCode()); + Constants.ACCESS_RIGHTS_COAR_MAP.get(ModelConstants.ACCESS_RIGHT_OPEN), + gr.getBestaccessright().getCode()); Assertions.assertEquals("One Ecosystem", gr.getContainer().getName()); Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline()); @@ -882,8 +882,6 @@ public class DumpJobTest { } - - @Test public void testArticlePCA() { final String sourcePath = getClass() From 62d818089140ec1889fea111f66fda4db614218b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 22 Dec 2022 09:54:21 +0100 Subject: [PATCH 07/26] [ChangeMeasure] semplified workflow --- .../graph/dump/complete/SparkCollectAndSave.java | 3 ++- .../oa/graph/dump/wf/main/oozie_app/workflow.xml | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java index adb7394..0839133 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java @@ -78,7 +78,8 @@ public class SparkCollectAndSave implements Serializable { .union(Utils.readPath(spark, inputPath + "/result/dataset", GraphResult.class)) .union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class)) .union(Utils.readPath(spark, inputPath + "/result/software", GraphResult.class)) - .map((MapFunction) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING() ) + .map( + (MapFunction) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING()) .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml index 2a612de..229b57c 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml @@ -53,14 +53,14 @@ depositionType the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - conceptRecordId - for new version, the id of the record for the old deposition - - - depositionId - the depositionId of a deposition open that has to be added content - + + + + + + + + organizationCommunityMap the organization community map From 71862838b02ace8a8fe6fb9a2258367fb1d9f34f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 27 Dec 2022 10:00:47 +0100 Subject: [PATCH 08/26] [dump] removed relations extracted from products where the datasource was not in the graph --- .../eu/dnetlib/dhp/oa/graph/dump/Utils.java | 66 ++++++++++++++++ .../dump/complete/SparkCollectAndSave.java | 20 ++++- .../dump/subset/SparkSelectValidContext.java | 19 +++-- .../dump/subset/SparkSelectValidRelation.java | 79 ++++++------------- .../complete/oozie_app/workflow.xml | 2 +- .../subset/oozie_app/workflow.xml | 3 +- .../oa/graph/dump/subset/DumpSubsetTest.java | 3 +- .../dump/subset/dump/community_infrastructure | 3 +- .../oa/graph/dump/subset/original/publication | 3 +- 9 files changed, 122 insertions(+), 76 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java index 8e75e9d..7328ce8 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java @@ -10,6 +10,7 @@ import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import com.fasterxml.jackson.databind.ObjectMapper; @@ -18,9 +19,13 @@ import com.google.gson.Gson; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.complete.Constants; +import eu.dnetlib.dhp.oa.model.graph.GraphResult; +import eu.dnetlib.dhp.oa.model.graph.Relation; +import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import scala.Tuple2; public class Utils { public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @@ -74,4 +79,65 @@ public class Utils { return new Gson().fromJson(sb.toString(), CommunityMap.class); } + public static Dataset getEntitiesId(SparkSession spark, String inputPath) { + Dataset dumpedIds = Utils + .readPath(spark, inputPath + "/publication", GraphResult.class) + .map((MapFunction) r -> r.getId(), Encoders.STRING()) + .union( + Utils + .readPath(spark, inputPath + "/dataset", GraphResult.class) + .map((MapFunction) r -> r.getId(), Encoders.STRING())) + .union( + Utils + .readPath(spark, inputPath + "/software", GraphResult.class) + .map((MapFunction) r -> r.getId(), Encoders.STRING())) + .union( + Utils + .readPath(spark, inputPath + "/otherresearchproduct", GraphResult.class) + .map((MapFunction) r -> r.getId(), Encoders.STRING())) + .union( + Utils + .readPath(spark, inputPath + "/organization", eu.dnetlib.dhp.oa.model.graph.Organization.class) + .map( + (MapFunction) o -> o.getId(), + Encoders.STRING())) + .union( + Utils + .readPath(spark, inputPath + "/project", eu.dnetlib.dhp.oa.model.graph.Project.class) + .map( + (MapFunction) o -> o.getId(), Encoders.STRING())) + .union( + Utils + .readPath(spark, inputPath + "/datasource", eu.dnetlib.dhp.oa.model.graph.Datasource.class) + .map( + (MapFunction) o -> o.getId(), + Encoders.STRING())) + .union( + Utils + .readPath(spark, inputPath + "/communities_infrastructures", ResearchCommunity.class) + .map((MapFunction) c -> c.getId(), Encoders.STRING())); + return dumpedIds; + } + + public static Dataset getValidRelations(SparkSession spark, Dataset relations, + Dataset entitiesIds) { + Dataset> relationSource = relations + .map( + (MapFunction>) r -> new Tuple2<>(r.getSource().getId(), r), + Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class))); + + Dataset> relJoinSource = relationSource + .joinWith(entitiesIds, relationSource.col("_1").equalTo(entitiesIds.col("value"))) + .map( + (MapFunction, String>, Tuple2>) t2 -> new Tuple2<>( + t2._1()._2().getTarget().getId(), t2._1()._2()), + Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class))); + + return relJoinSource + .joinWith(entitiesIds, relJoinSource.col("_1").equalTo(entitiesIds.col("value"))) + .map( + (MapFunction, String>, Relation>) t2 -> t2._1()._2(), + Encoders.bean(Relation.class)); + } + } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java index c6d7616..ade2fb9 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkCollectAndSave.java @@ -23,6 +23,7 @@ import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.model.graph.GraphResult; import eu.dnetlib.dhp.oa.model.graph.Relation; import it.unimi.dsi.fastutil.objects.Object2BooleanMap; +import scala.Tuple2; /** * Reads all the entities of the same type (Relation / Results) and saves them in the same folder @@ -96,7 +97,7 @@ public class SparkCollectAndSave implements Serializable { write( Utils .readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class), - outputPath + "/otheresearchproduct"); + outputPath + "/otherresearchproduct"); write( Utils .readPath(spark, inputPath + "/result/software", GraphResult.class), @@ -104,14 +105,27 @@ public class SparkCollectAndSave implements Serializable { } - Utils +// Dataset dumpedIds = Utils.getEntitiesId(spark, outputPath); + + Dataset relations = Utils .readPath(spark, inputPath + "/relation/publication", Relation.class) .union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class)) .union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class)) .union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class)) .union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class)) .union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class)) - .union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class)) + .union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class)); + + Utils.getValidRelations(spark, relations, Utils.getEntitiesId(spark, outputPath)) +// Dataset relJoinSource = relations +// .joinWith(dumpedIds, relations.col("source.id").equalTo(dumpedIds.col("value"))) +// .map((MapFunction, Relation>) t2 -> t2._1(), +// Encoders.bean(Relation.class)); +// +// relJoinSource +// .joinWith(dumpedIds, relJoinSource.col("target.id").equalTo(dumpedIds.col("value"))) +// .map((MapFunction, Relation>) t2 -> t2._1(), +// Encoders.bean(Relation.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidContext.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidContext.java index 0b90c70..0dcdbcd 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidContext.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidContext.java @@ -4,10 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.subset; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.Arrays; -import java.util.List; -import java.util.Optional; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -88,6 +85,8 @@ public class SparkSelectValidContext implements Serializable { .union(getFilter(spark, inputPath + "/otherresearchproduct", keys, OtherResearchProduct.class)) .distinct(); + context.foreach((ForeachFunction) c -> System.out.println(c)); + Dataset researchCommunity = Utils.readPath(spark, contextPath, ResearchCommunity.class); researchCommunity @@ -112,11 +111,11 @@ public class SparkSelectValidContext implements Serializable { (FlatMapFunction) r -> r .getContext() .stream() - .map(c -> c.getId()) + .map(c -> extract(c.getId(), keys)) .collect(Collectors.toList()) .iterator(), Encoders.STRING()) - .filter((FilterFunction) c -> extracted(c, keys)); + .filter(Objects::nonNull); } @@ -124,11 +123,11 @@ public class SparkSelectValidContext implements Serializable { return Optional.ofNullable(r.getContext()).isPresent(); } - private static boolean extracted(String c, List keySet) { + private static String extract(String c, List keySet) { if (keySet.contains(c)) - return true; + return c; if (c.contains(":") && keySet.contains(c.substring(0, c.indexOf(":")))) - return true; - return false; + return c.substring(0, c.indexOf(":")); + return null; } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidRelation.java index 84ca4f6..8e1db7b 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidRelation.java @@ -2,6 +2,8 @@ package eu.dnetlib.dhp.oa.graph.dump.subset; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntitiesId; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.getValidRelations; import java.io.Serializable; import java.util.Optional; @@ -69,65 +71,28 @@ public class SparkSelectValidRelation implements Serializable { private static void selectValidRelation(SparkSession spark, String inputPath, String relationPath) { // read the results - Dataset dumpedIds = Utils - .readPath(spark, inputPath + "/publication", GraphResult.class) - .map((MapFunction) r -> r.getId(), Encoders.STRING()) - .union( - Utils - .readPath(spark, inputPath + "/dataset", GraphResult.class) - .map((MapFunction) r -> r.getId(), Encoders.STRING())) - .union( - Utils - .readPath(spark, inputPath + "/software", GraphResult.class) - .map((MapFunction) r -> r.getId(), Encoders.STRING())) - .union( - Utils - .readPath(spark, inputPath + "/otherresearchproduct", GraphResult.class) - .map((MapFunction) r -> r.getId(), Encoders.STRING())) - .union( - Utils - .readPath(spark, inputPath + "/organization", eu.dnetlib.dhp.oa.model.graph.Organization.class) - .map( - (MapFunction) o -> o.getId(), - Encoders.STRING())) - .union( - Utils - .readPath(spark, inputPath + "/project", eu.dnetlib.dhp.oa.model.graph.Project.class) - .map( - (MapFunction) o -> o.getId(), Encoders.STRING())) - .union( - Utils - .readPath(spark, inputPath + "/datasource", eu.dnetlib.dhp.oa.model.graph.Datasource.class) - .map( - (MapFunction) o -> o.getId(), - Encoders.STRING())) - .union( - Utils - .readPath(spark, inputPath + "/community_infrastructure", ResearchCommunity.class) - .map((MapFunction) c -> c.getId(), Encoders.STRING())); - Dataset> relationSource = Utils - .readPath(spark, relationPath, Relation.class) - .map( - (MapFunction>) r -> new Tuple2<>(r.getSource().getId(), r), - Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class))); + getValidRelations( + spark, Utils + .readPath(spark, relationPath, Relation.class), + getEntitiesId(spark, inputPath)) - Dataset> relJoinSource = relationSource - .joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value"))) - .map( - (MapFunction, String>, Tuple2>) t2 -> new Tuple2<>( - t2._1()._2().getTarget().getId(), t2._1()._2()), - Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class))); - - relJoinSource - .joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value"))) - .map( - (MapFunction, String>, Relation>) t2 -> t2._1()._2(), - Encoders.bean(Relation.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath + "/relation"); +// Dataset> relJoinSource = relationSource +// .joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value"))) +// .map( +// (MapFunction, String>, Tuple2>) t2 -> new Tuple2<>( +// t2._1()._2().getTarget().getId(), t2._1()._2()), +// Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class))); +// +// relJoinSource +// .joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value"))) +// .map( +// (MapFunction, String>, Relation>) t2 -> t2._1()._2(), +// Encoders.bean(Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + "/relation"); // relJoinSource = relationSource // .joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value"))) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml index df36e30..ab598f4 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml @@ -336,7 +336,7 @@ eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities - --hdfsPath${outputPath}/communities_infrastructures/communities_infrastructure.json.gz + --hdfsPath${outputPath}/communities_infrastructures/community_infrastructure.json.gz --nameNode${nameNode} --isLookUpUrl${isLookUpUrl} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml index a301b10..3660a12 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml @@ -380,7 +380,7 @@ --sourcePath${outputPath}/original --contextPath${workingDir}/context/community_infrastructure.json.gz --communityMapPath${communityMapPath} - --outputPath${outputPath}/dump/community_infrastructure + --outputPath${outputPath}/dump/communities_infrastructures @@ -598,7 +598,6 @@ --sourcePath${outputPath}/dump --relationPath${workingDir}/relation - diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java index 2035af1..5fa9286 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java @@ -388,13 +388,14 @@ public class DumpSubsetTest { .textFile(workingDir.toString() + "/dump/community_infrastructure") .map(item -> OBJECT_MAPPER.readValue(item, ResearchCommunity.class)); - Assertions.assertEquals(5, tmp.count()); + Assertions.assertEquals(6, tmp.count()); Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("enermaps")).count()); Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("eutopia")).count()); Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("dh-ch")).count()); Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("beopen")).count()); Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("neanias-underwater")).count()); + Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("sdsn-gr")).count()); } diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure index 6470db8..d72fa66 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure @@ -2,4 +2,5 @@ {"id":"00|context_____::aa0e56dd2e9d2a0be749f5debdd2b3d8","acronym":"enermaps","name":"Welcome to EnerMaps Gateway! Find the latest scientific data.","type":"Research Community","description":"","zenodo_community":null,"subject":[]} {"id":"00|context_____::6f567d9abd1c6603b0c0205a832bc757","acronym":"neanias-underwater","name":"NEANIAS Underwater Research Community","type":"Research Community","description":"","zenodo_community":null,"subject":["Ocean mapping","Multibeam Backscatter","Bathymetry","Seabed classification","Submarine Geomorphology","Underwater Photogrammetry"]} {"id":"00|context_____::04a00617ca659adc944977ac700ea14b","acronym":"dh-ch","name":"Digital Humanities and Cultural Heritage","type":"Research Community","description":"This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.","zenodo_community":"https://zenodo.org/communities/oac_dh-ch","subject":["modern art","monuments","europeana data model","field walking","frescoes","LIDO metadata schema","art history","excavation","Arts and Humanities General","coins","temples","numismatics","lithics","environmental archaeology","digital cultural heritage","archaeological reports","history","CRMba","churches","cultural heritage","archaeological stratigraphy","religious art","digital humanities","archaeological sites","linguistic studies","bioarchaeology","architectural orders","palaeoanthropology","fine arts","europeana","CIDOC CRM","decorations","classic art","stratigraphy","digital archaeology","intangible cultural heritage","walls","chapels","CRMtex","Language and Literature","paintings","archaeology","mosaics","burials","medieval art","castles","CARARE metadata schema","statues","natural language processing","inscriptions","CRMsci","vaults","contemporary art","Arts and Humanities","CRMarchaeo","pottery"]} -{"id":"00|context_____::5fde864866ea5ded4cc873b3170b63c3","acronym":"beopen","name":"Transport Research","type":"Research Community","description":"Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research.\nThe TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323)","zenodo_community":"https://zenodo.org/communities/be-open-transport","subject":["Green Transport","City mobility systems","Vulnerable road users","Traffic engineering","Transport electrification","Intermodal freight transport","Clean vehicle fleets","Intelligent mobility","Inflight refueling","District mobility systems","Navigation and control systems for optimised planning and routing","European Space Technology Platform","European Transport networks","Green cars","Inter-modality infrastructures","Advanced Take Off and Landing Ideas","Sustainable urban systems","port-area railway networks","Innovative forms of urban transport","Alliance for Logistics Innovation through Collaboration in Europe","Advisory Council for Aeronautics Research in Europe","Mobility services for people and goods","Guidance and traffic management","Passenger mobility","Smart mobility and services","transport innovation","high-speed railway","Vehicle design","Inland shipping","public transportation","aviation’s climate impact","Road transport","On-demand public transport","Personal Air Transport","Pipeline transport","European Association of Aviation Training and Education Organisations","Defrosting of railway infrastructure","Inclusive and affordable transport","River Information Services","jel:L92","Increased use of public transport","Seamless mobility","STRIA","trolleybus transport","Intelligent Transport System","Low-emission alternative energy for transport","Shared mobility for people and goods","Business model for urban mobility","Interoperability of transport systems","Cross-border train slot booking","Air transport","Transport pricing","Sustainable transport","European Rail Transport Research Advisory Council","Alternative aircraft configurations","Railways applications","urban transport","Environmental impact of transport","urban freight delivery systems","Automated Road Transport","Alternative fuels in public transport","Active LIDAR-sensor for GHG-measurements","Autonomous logistics operations","Rational use of motorised transport","Network and traffic management systems","electrification of railway wagons","Single European Sky","Electrified road systems","Railway dynamics","Motorway of the Sea","smart railway communications","Maritime transport","Environmental- friendly transport","Combined transport","Connected automated driving technology","Innovative freight logistics services","automated and shared vehicles","Alternative Aircraft Systems","Land-use and transport interaction","Public transport system","Business plan for shared mobility","Shared mobility","Growing of mobility demand","European Road Transport Research Advisory Council","WATERBORNE ETP","Effective transport management system","Short Sea Shipping","air traffic management","Sea hubs and the motorways of the sea","Urban mobility solutions","Smart city planning","Maritime spatial planning","EUropean rail Research Network of Excellence","ENERGY CONSUMPTION BY THE TRANSPORT SECTOR","Integrated urban plan","inland waterway services","European Conference of Transport Research Institutes","air vehicles","E-freight","Automated Driving","Automated ships","pricing for cross-border passenger transport","Vehicle efficiency","Railway transport","Electric vehicles","Road traffic monitoring","Deep sea shipping","Circular economy in transport","Traffic congestion","air transport system","Urban logistics","Rail transport","OpenStreetMap","high speed rail","Transportation engineering","Intermodal travel information","Flight Data Recorders","Advanced driver assistance systems","long distance freight transport","Inland waterway transport","Smart mobility","Mobility integration","Personal Rapid Transit system","Safety measures & requirements for roads","Green rail transport","Vehicle manufacturing","Future Airport Layout","Rail technologies","European Intermodal Research Advisory Council","inland navigation","Automated urban vehicles","ECSS-standards","Traveller services","Polluting transport","Air Traffic Control","Cooperative and connected and automated transport","Innovative powertrains","Quality of transport system and services","door-to- door logistics chain","Inter-modal aspects of urban mobility","Innovative freight delivery systems","urban freight delivery infrastructures"]} \ No newline at end of file +{"id":"00|context_____::5fde864866ea5ded4cc873b3170b63c3","acronym":"beopen","name":"Transport Research","type":"Research Community","description":"Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research.\nThe TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323)","zenodo_community":"https://zenodo.org/communities/be-open-transport","subject":["Green Transport","City mobility systems","Vulnerable road users","Traffic engineering","Transport electrification","Intermodal freight transport","Clean vehicle fleets","Intelligent mobility","Inflight refueling","District mobility systems","Navigation and control systems for optimised planning and routing","European Space Technology Platform","European Transport networks","Green cars","Inter-modality infrastructures","Advanced Take Off and Landing Ideas","Sustainable urban systems","port-area railway networks","Innovative forms of urban transport","Alliance for Logistics Innovation through Collaboration in Europe","Advisory Council for Aeronautics Research in Europe","Mobility services for people and goods","Guidance and traffic management","Passenger mobility","Smart mobility and services","transport innovation","high-speed railway","Vehicle design","Inland shipping","public transportation","aviation’s climate impact","Road transport","On-demand public transport","Personal Air Transport","Pipeline transport","European Association of Aviation Training and Education Organisations","Defrosting of railway infrastructure","Inclusive and affordable transport","River Information Services","jel:L92","Increased use of public transport","Seamless mobility","STRIA","trolleybus transport","Intelligent Transport System","Low-emission alternative energy for transport","Shared mobility for people and goods","Business model for urban mobility","Interoperability of transport systems","Cross-border train slot booking","Air transport","Transport pricing","Sustainable transport","European Rail Transport Research Advisory Council","Alternative aircraft configurations","Railways applications","urban transport","Environmental impact of transport","urban freight delivery systems","Automated Road Transport","Alternative fuels in public transport","Active LIDAR-sensor for GHG-measurements","Autonomous logistics operations","Rational use of motorised transport","Network and traffic management systems","electrification of railway wagons","Single European Sky","Electrified road systems","Railway dynamics","Motorway of the Sea","smart railway communications","Maritime transport","Environmental- friendly transport","Combined transport","Connected automated driving technology","Innovative freight logistics services","automated and shared vehicles","Alternative Aircraft Systems","Land-use and transport interaction","Public transport system","Business plan for shared mobility","Shared mobility","Growing of mobility demand","European Road Transport Research Advisory Council","WATERBORNE ETP","Effective transport management system","Short Sea Shipping","air traffic management","Sea hubs and the motorways of the sea","Urban mobility solutions","Smart city planning","Maritime spatial planning","EUropean rail Research Network of Excellence","ENERGY CONSUMPTION BY THE TRANSPORT SECTOR","Integrated urban plan","inland waterway services","European Conference of Transport Research Institutes","air vehicles","E-freight","Automated Driving","Automated ships","pricing for cross-border passenger transport","Vehicle efficiency","Railway transport","Electric vehicles","Road traffic monitoring","Deep sea shipping","Circular economy in transport","Traffic congestion","air transport system","Urban logistics","Rail transport","OpenStreetMap","high speed rail","Transportation engineering","Intermodal travel information","Flight Data Recorders","Advanced driver assistance systems","long distance freight transport","Inland waterway transport","Smart mobility","Mobility integration","Personal Rapid Transit system","Safety measures & requirements for roads","Green rail transport","Vehicle manufacturing","Future Airport Layout","Rail technologies","European Intermodal Research Advisory Council","inland navigation","Automated urban vehicles","ECSS-standards","Traveller services","Polluting transport","Air Traffic Control","Cooperative and connected and automated transport","Innovative powertrains","Quality of transport system and services","door-to- door logistics chain","Inter-modal aspects of urban mobility","Innovative freight delivery systems","urban freight delivery infrastructures"]} +{"id":"00|context_____::a38bf77184799906a6ce86b9eb761c80","acronym":"sdsn-gr","name":"Sustainable Development Solutions Network - Greece","type":"Research Community","description":"The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.","zenodo_community":"https://zenodo.org/communities/oac_sdsn-greece","subject":["SDG13 - Climate action","SDG8 - Decent work and economic\n\t\t\t\t\tgrowth","SDG15 - Life on land","SDG2 - Zero hunger","SDG17 - Partnerships for the\n\t\t\t\t\tgoals","SDG10 - Reduced inequalities","SDG5 - Gender equality","SDG12 - Responsible\n\t\t\t\t\tconsumption and production","SDG14 - Life below water","SDG6 - Clean water and\n\t\t\t\t\tsanitation","SDG11 - Sustainable cities and communities","SDG1 - No poverty","SDG3 -\n\t\t\t\t\tGood health and well being","SDG7 - Affordable and clean energy","SDG4 - Quality\n\t\t\t\t\teducation","SDG9 - Industry innovation and infrastructure","SDG16 - Peace justice\n\t\t\t\t\tand strong institutions"]} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/subset/original/publication b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/subset/original/publication index 0fc3ab1..03d874c 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/subset/original/publication +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/subset/original/publication @@ -13,4 +13,5 @@ {"author":[{"affiliation":[],"fullname":"Embree, Jennifer","name":"Jennifer","pid":[],"rank":1,"surname":"Embree"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}],"id":"dh-ch"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Over the last century, conflicts across the world have resulted in an unprecedented number of cultural heritage sites being purposefully targeted for destruction. While there have been several historical attempts to combat this destruction, the emerging field of digital humanities is now using new digital technologies to also document and preserve cultural heritage demolishment. This article conducts case studies of two such projects: Project Syria, a virtual reality experience documenting the Syrian Civil War, and Manar al-Athar, a digital photo archive that collects pictures of cultural heritage sites in the Middle East. This exploratory study seeks to compare past methods of preservation and documentation of cultural heritage during times of conflict to current methods of preservation and documentation through digital humanities projects, and to determine what digital humanities projects can accomplish that more traditional methods of preservation cannot."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::26243564100cd29b39382d2321372a95","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2018-01-01"},"distributionlocation":"","hostedby":{"key":"10|re3data_____::0394b97eb11f19785cbca1ec830429da","value":"UNC Dataverse"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"url":["http://dx.doi.org/10.17615/xh7w-qv18"]}],"language":{"classid":"English","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282702184,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T05:09:49.694Z","identifier":"10.17615/xh7w-qv18","metadataNamespace":""}},"originalId":["datacite____::26243564100cd29b39382d2321372a95"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.17615/xh7w-qv18"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The University of North Carolina at Chapel Hill University Libraries"},"relevantdate":[],"resourcetype":{"classid":"Masters Paper","classname":"Masters Paper","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Destruction of cultural property"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Digital humanities"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Syria--History--Syrian Civil War, 2011-"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Archives by Any Other name: Archiving Memory During Times of Conflict through Non-Traditional Methods--A Case Study on Digital Humanities Projects Manar al-Athar and Project Syria"}]} {"author":[{"affiliation":[],"fullname":"Huber, Brigitte","name":"Brigitte","pid":[],"rank":1,"surname":"Huber"},{"affiliation":[],"fullname":"Barnidge, Matthew","name":"Matthew","pid":[],"rank":2,"surname":"Barnidge"},{"affiliation":[],"fullname":"Zúñiga, Homero Gil De","name":"Homero Gil","pid":[],"rank":3,"surname":"Zúñiga"},{"affiliation":[],"fullname":"Liu, James","name":"James","pid":[],"rank":4,"surname":"Liu"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}],"id":"science-innovation-policy"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Supplemental material, Supplemental_Material for Fostering public trust in science: The role of social media by Brigitte Huber, Matthew Barnidge, Homero Gil de Zúñiga and James Liu in Public Understanding of Science"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::2d1773354e6c79eee7001407cd8da2f0","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://creativecommons.org/licenses/by/4.0"},"url":["http://dx.doi.org/10.25384/sage.9869183.v1"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591282547342,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T04:44:26.733Z","identifier":"10.25384/sage.9869183.v1","metadataNamespace":""}},"originalId":["datacite____::2d1773354e6c79eee7001407cd8da2f0"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.25384/sage.9869183.v1"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"SAGE Journals"},"relevantdate":[],"resourcetype":{"classid":"Journal contribution","classname":"Journal contribution","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"200199 Communication and Media Studies not elsewhere classified"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"Science Policy"}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Supplemental_Material – Supplemental material for Fostering public trust in science: The role of social media"}]} {"author":[{"affiliation":[],"fullname":"Шогенцукова Залина Хасановна","name":"","pid":[],"rank":1,"surname":""},{"affiliation":[],"fullname":"Гедгафова Ирина Юрьевна","name":"","pid":[],"rank":2,"surname":""},{"affiliation":[],"fullname":"Мирзоева Жанна Мухарбиевна","name":"","pid":[],"rank":3,"surname":""},{"affiliation":[],"fullname":"Шогенцуков Али Хасанович","name":"","pid":[],"rank":4,"surname":""}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a","value":"Datacite"}],"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}],"id":"aginfra"}],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"dateofcollection":"","dateoftransformation":"","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Статья посвящена исследованию понятия кластера и его использования как инструмента управления повышения эффективности деятельности агропромышленным комплексом Кабардино-Балкарской Республики. Рассматриваются предпосылки и особенности кластеризации АПК как в отдельном регионе, так и в России в целом. Реализация кластерной политики в области сельского хозяйства России является инновационным подходом развития отрасли и повышения конкурентоспособности производимой продукции на рынке, повышения эффективности производственного процесса и т.д. В статье исследована модель «тройной спирали», используемой при создании и функционировании кластеров в сфере АПК. Исследование кластеров, как инструмент управления АПК отдельного региона, в частности Кабардино-Балкарской Республики, позволяет выявить факторы, обуславливающие необходимость данного процесса с одной стороны, а также выявлять резервы и иные возможности для общего развития эффективности АПК России и активации внедрения инновационных механизмов в сельское хозяйство."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Article is devoted to the study of the concept of cluster and their use as a management tool to improve the efficiency of the agro-industrial complex of the KabardinoBalkaria Republic. The prerequisites and features of agribusiness clustering both in a separate region and in Russia as a whole are considered. The implementations of the cluster policy in the field of agriculture in Russia are an innovative approach to the development of the industry and improve the competitiveness of products in the market, improve the efficiency of the production process, etc. The article investigates the model of “triple helix” used in the creation and functioning of clusters in the field of agriculture. The study of clusters as an instrument of agribusiness management in a particular region, in particular the Kabardino-Balkaria Republic, allows to identify the factors causing the need for this process on the one hand, as well as to identify reserves and other opportunities for the overall development of the efficiency of the Russian agribusiness and the activation of the introduction of innovative mechanisms in agriculture."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|datacite____::2fa3de5d0846180a43214310234e5526","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2019-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository"},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"url":["http://dx.doi.org/10.24411/2413-046x-2019-16022"]}],"language":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1591283178985,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fdatacite-api.dnet.d4science.org","datestamp":"","harvestDate":"2020-04-11T02:21:45.926Z","identifier":"10.24411/2413-046x-2019-16022","metadataNamespace":""}},"originalId":["datacite____::2fa3de5d0846180a43214310234e5526"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.24411/2413-046x-2019-16022"}],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Московский экономический журнал"},"relevantdate":[],"resourcetype":{"classid":"Paper","classname":"Paper","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"кластеры"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"урожайность в овощеводстве"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"селекция"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"современные технологии"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"«продовольственная безопасность»"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"АПК"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"растениеводство"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"животноводство"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"модель «тройной спирали»"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"модернизация"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"селекция."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"clusters"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"productivity in vegetable growing"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"selection"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"modern technologies"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"“food security”"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"agriculture"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"crop production"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"animal husbandry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"“triple helix” model"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"modernization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"selection."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"clusters"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"productivity in vegetable growing"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"selection"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"modern technologies"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"“food security”"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"agriculture"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"crop production"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"animal husbandry"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"“triple helix” model"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"modernization"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"value":"selection."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Кластеры как инструмент управления агробизнесом Кабардино-Балкарской Республики"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Clusters as instrument of management of agrobusiness of Kabardino-Balkar Republic"}]} -{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall","dataInfo":null},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null},{"key":"10|fairsharing_::cd0f74b5955dc87fd0605745c4b49ee8","value":"ZENODO","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.8","inferenceprovenance":"decisiontree-dedup-test","provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1591290751234,"id":"50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8","originalId":["10.1524/phil.1866.24.14.561","2601803436","od______2659::f9ba286bcf8429160b58072028052325"],"pid":[{"value":"10.1524/phil.1866.24.14.561","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2019-02-17T01:33:44Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"processingchargeamount":null,"processingchargecurrency":null,"measures":null,"author":[{"fullname":"Schubart, A.","name":"A.","surname":"Schubart","rank":1,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Theology","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"History","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Art history","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"XIV. Die wörter άγαλμα, είχών, ξόανον, ανδρuις und verwandte, in ihren verschiedenen beziehungen. Nach Pausanias","qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null},{"value":"xiv die worter άγαλμα eίχών ξόανον ανδρuις und verwandte in ihren verschiedenen beziehungen nach pausanias","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2017-03-26T10:09:50Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"1866-01-01","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"n/a","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2022-11-01","dataInfo":null},"publisher":{"value":"Walter de Gruyter GmbH","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null},{"value":null,"dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"Other","classname":"Other","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"id":"dh-ch","dataInfo":[{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":null,"inferenceprovenance":"bulktagging","provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}]}],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus","dataInfo":null},"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.pdf","http://dx.doi.org/10.1524/phil.1866.24.14.561"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":null,"alternateIdentifier":null,"dateofacceptance":{"value":"2022-11-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"measures":null},{"license":{"value":"cc0","dataInfo":null},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus","dataInfo":null},"url":["https://zenodo.org/record/1634826/files/article.pdf"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall","dataInfo":null},"pid":null,"alternateIdentifier":null,"dateofacceptance":{"value":"2006-11-12","dataInfo":null},"processingchargeamount":{"value":"2578.35","dataInfo":{"invisible":true,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargecurrency":{"value":"EUR","dataInfo":{"invisible":true,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"refereed":null,"measures":null},{"license":null,"accessright":null,"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus","dataInfo":null},"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","https://academic.microsoft.com/#/detail/2601803436"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null},"pid":null,"alternateIdentifier":null,"dateofacceptance":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"measures":null},{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|fairsharing_::cd0f74b5955dc87fd0605745c4b49ee8","value":"ZENODO","dataInfo":null},"url":["https://zenodo.org/record/1634826"],"distributionlocation":"","collectedfrom":{"key":"10|fairsharing_::cd0f74b5955dc87fd0605745c4b49ee8","value":"ZENODO","dataInfo":null},"pid":null,"alternateIdentifier":null,"dateofacceptance":{"value":"1866-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"measures":null}],"eoscifguidelines":null,"journal":{"name":"Philologus","issnPrinted":"0031-7985","issnOnline":"2196-7008","issnLinking":null,"ep":null,"iss":null,"sp":null,"vol":"24","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} \ No newline at end of file +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall","dataInfo":null},{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null},{"key":"10|fairsharing_::cd0f74b5955dc87fd0605745c4b49ee8","value":"ZENODO","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.8","inferenceprovenance":"decisiontree-dedup-test","provenanceaction":{"classid":"sysimport:dedup","classname":"sysimport:dedup","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1591290751234,"id":"50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8","originalId":["10.1524/phil.1866.24.14.561","2601803436","od______2659::f9ba286bcf8429160b58072028052325"],"pid":[{"value":"10.1524/phil.1866.24.14.561","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2019-02-17T01:33:44Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"processingchargeamount":null,"processingchargecurrency":null,"measures":null,"author":[{"fullname":"Schubart, A.","name":"A.","surname":"Schubart","rank":1,"pid":[],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Theology","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"History","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Art history","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"XIV. Die wörter άγαλμα, είχών, ξόανον, ανδρuις und verwandte, in ihren verschiedenen beziehungen. Nach Pausanias","qualifier":{"classid":"alternative title","classname":"alternative title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null},{"value":"xiv die worter άγαλμα eίχών ξόανον ανδρuις und verwandte in ihren verschiedenen beziehungen nach pausanias","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2017-03-26T10:09:50Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"1866-01-01","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"n/a","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2022-11-01","dataInfo":null},"publisher":{"value":"Walter de Gruyter GmbH","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null},{"value":null,"dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"Other","classname":"Other","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"id":"dh-ch","dataInfo":[{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":null,"inferenceprovenance":"bulktagging","provenanceaction":{"classid":"community:subject","classname":"Bulktagging for Community - Subject","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}]}],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus","dataInfo":null},"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.pdf","http://dx.doi.org/10.1524/phil.1866.24.14.561"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":null,"alternateIdentifier":null,"dateofacceptance":{"value":"2022-11-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"measures":null},{"license":{"value":"cc0","dataInfo":null},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus","dataInfo":null},"url":["https://zenodo.org/record/1634826/files/article.pdf"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::8ac8380272269217cb09a928c8caa993","value":"UnpayWall","dataInfo":null},"pid":null,"alternateIdentifier":null,"dateofacceptance":{"value":"2006-11-12","dataInfo":null},"processingchargeamount":{"value":"2578.35","dataInfo":{"invisible":true,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargecurrency":{"value":"EUR","dataInfo":{"invisible":true,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"refereed":null,"measures":null},{"license":null,"accessright":null,"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|issn___print::4a110be99bfeda7a5e85b5e4c7e95d87","value":"Philologus","dataInfo":null},"url":["http://www.degruyter.com/view/j/phil.1866.24.issue-1-4/phil.1866.24.14.561/phil.1866.24.14.561.xml","https://academic.microsoft.com/#/detail/2601803436"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a","value":"Microsoft Academic Graph","dataInfo":null},"pid":null,"alternateIdentifier":null,"dateofacceptance":null,"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"measures":null},{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0038","classname":"Other literature type","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|fairsharing_::cd0f74b5955dc87fd0605745c4b49ee8","value":"ZENODO","dataInfo":null},"url":["https://zenodo.org/record/1634826"],"distributionlocation":"","collectedfrom":{"key":"10|fairsharing_::cd0f74b5955dc87fd0605745c4b49ee8","value":"ZENODO","dataInfo":null},"pid":null,"alternateIdentifier":null,"dateofacceptance":{"value":"1866-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null,"measures":null}],"eoscifguidelines":null,"journal":{"name":"Philologus","issnPrinted":"0031-7985","issnOnline":"2196-7008","issnLinking":null,"ep":null,"iss":null,"sp":null,"vol":"24","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"context": [{"dataInfo": [{"provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "iis::document_referencedProjects", "invisible": false, "trust": "0.9"}], "id": "sdsn-gr::projects::1063"}], "dataInfo": {"provenanceaction": {"classid": "sysimport:dedup", "classname": "Inferred by OpenAIRE", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "dedup-result-decisiontree-v3", "invisible": false, "trust": "0.8"}, "resourcetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}, "pid": [{"qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.3390/su14148496"}], "contributor": [], "oaiprovenance": {"originDescription": {"metadataNamespace": "http://www.openarchives.org/OAI/2.0/oai_dc/", "harvestDate": "2022-07-23T08:22:08.955Z", "baseURL": "http%3A%2F%2Foai.mdpi.com%2Foai%2Foai2.php", "datestamp": "2022-07-12", "altered": true, "identifier": "oai:mdpi.com:/2071-1050/14/14/8496/"}}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [{"qualifier": {"classid": "created", "classname": "created", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2022-07-12"}, {"qualifier": {"classid": "published-online", "classname": "published-online", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2022-07-11"}], "collectedfrom": [{"key": "10|openaire____::c2cdfa5866e03cdd07d313cbc8fb8311", "value": "Multidisciplinary Digital Publishing Institute"}, {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, {"key": "10|fairsharing_::cd0f74b5955dc87fd0605745c4b49ee8", "value": "ORCID"}], "id": "50|doi_dedup___::e5b323368a52ae63a3878685496d987c", "subject": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "resilience; wave climate; tourism management; surfing; climatology; decision making; climate service; sustainability; adaptation"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Management, Monitoring, Policy and Law"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Renewable Energy, Sustainability and the Environment"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Geography, Planning and Development"}, {"qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Building and Construction"}], "lastupdatetimestamp": 1671511486850, "author": [{"pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "orcid_pending", "classname": "Open Researcher and Contributor ID", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "0000-0002-9816-5641"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.91", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "orcid", "classname": "Open Researcher and Contributor ID", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "0000-0002-9816-5641"}], "fullname": "Anna Boqu\\u00e9 Ciurana", "surname": "Boqu\\u00e9 Ciurana", "name": "Anna", "rank": 1}, {"pid": [], "fullname": "Melisa M\\u00e9nendez", "surname": "M\\u00e9nendez", "name": "Melisa", "rank": 2}, {"pid": [], "fullname": "Mar\\u00eda Su\\u00e1rez Bilbao", "surname": "Su\\u00e1rez Bilbao", "name": "Mar\\u00eda", "rank": 3}, {"pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "orcid_pending", "classname": "Open Researcher and Contributor ID", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "0000-0002-8384-377x"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:entityregistry", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.91", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "orcid", "classname": "Open Researcher and Contributor ID", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "0000-0002-8384-377x"}], "fullname": "Enric Aguilar", "surname": "Aguilar", "name": "Enric", "rank": 4}], "instance": [{"refereed": {"classid": "0000", "classname": "UNKNOWN", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|doajarticles::d5c4679e64974dc24c29d1a19841ba88", "value": "Sustainability"}, "license": {"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "https://creativecommons.org/licenses/by/4.0/"}, "url": ["https://dx.doi.org/10.3390/su14148496"], "pid": [], "distributionlocation": "", "alternateIdentifier": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.3390/su14148496"}], "dateofacceptance": {"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "2022-07-11"}, "collectedfrom": {"key": "10|openaire____::c2cdfa5866e03cdd07d313cbc8fb8311", "value": "Multidisciplinary Digital Publishing Institute"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "openAccessRoute": "gold", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0038", "classname": "Other literature type", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}, {"refereed": {"classid": "0000", "classname": "UNKNOWN", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|doajarticles::d5c4679e64974dc24c29d1a19841ba88", "value": "Sustainability"}, "license": {"value": "https://creativecommons.org/licenses/by/4.0/"}, "url": ["https://doi.org/10.3390/su14148496"], "measures": [{"id": "influence", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "5.2774247E-9"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "popularity", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "8.507125E-9"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "influence_alt", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "0"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "popularity_alt", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "0.0"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "impulse", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "0"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}], "pid": [{"qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.3390/su14148496"}], "dateofacceptance": {"value": "2022-07-11"}, "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "openAccessRoute": "gold", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2022-07-23T08:22:08.955Z", "fulltext": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "http://www.mdpi.com/2071-1050/14/14/8496/pdf"}], "dateoftransformation": "2022-09-30T01:32:34.169Z", "description": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "Surfing is one of the most popular activities in coastal tourism resorts. However, the sport depends strongly on the met-ocean weather conditions, particularly on the surface wind-generated waves that reach the coast. This study provides examples of how users’ needs and user perspectives are considered by climate data specialists to develop needed, highly useful information addressing human and social needs. In this vein, the climate analysis of such data can provide input on the expected length of a surfing season, according to the surfer’s level of expertise. In addition, other water sports, such as SUP Wave and windsurfing, among others, might be indicated when surfing conditions are not optimal. Finally, the safety of surfers and other tourists who venture into the sea is also dependent on those conditions. We collaborated with the surfing community to define a series of indices for quantifying surfing days (SD), surfing days stratified by surfers’ skills (SDS), alternate offers (AOs), and surfers’ and swimmers’ safety (SuS and SwS). These are of general applications but require wind and wave data at a very fine scale as the input. To illustrate the potential of our indices, we applied them to the Somo beach (Cantabria, Spain). We downscaled a global wave hindcast dataset covering a 30-year period to a spatial resolution of 100 m to obtain wave-surfing information at Somo’s surf spot. The results confirmed Somo’s status as a year-round surf spot, with SD values of 229.5 days/year and monthly values between 22 days/month and 16 days/month. SDS showed different seasonal peaks according to the surfers’ skills. Beginners’ conditions occurred more often in the summer (18.1 days/month in July), intermediate surfers’ conditions appeared in the transitional seasons (14.1 days/month in April), and advanced and big-wave riders in the winter (15.1 days/month in January and 0.7 days/month, respectively). The AO index identified the SUP wave values of 216 days/year. Wind water sports presented values of 141.6 days/year; conversely, SUP sports were possible on only 7.4 days/year. SuS and SwS identified different seasonal hazard values, decreasing from the winter, autumn, and spring to minimum values in the summer."}], "format": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "application/pdf"}], "journal": {"issnPrinted": "2071-1050", "dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "name": "Sustainability; Volume 14; Issue 14; Pages: 8496", "edition": "", "vol": "", "sp": "", "iss": "", "issnOnline": "", "ep": "", "issnLinking": ""}, "measures": [{"id": "influence", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "4.842839E-9"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "popularity", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "6.6174897E-9"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "influence_alt", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "0"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "popularity_alt", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "0.0"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}, {"id": "impulse", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "score", "value": "0"}, {"dataInfo": {"provenanceaction": {"classid": "measure:bip", "classname": "measure:bip", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "class", "value": "C"}]}], "coverage": [], "externalReference": [], "publisher": {"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "Multidisciplinary Digital Publishing Institute"}, "eoscifguidelines": [], "language": {"classid": "eng", "classname": "English", "schemeid": "dnet:languages", "schemename": "dnet:languages"}, "resulttype": {"classid": "publication", "classname": "publication", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["50|multidiscipl::7a0f78728f1dbdebe536842e145bca44", "oai:mdpi.com:/2071-1050/14/14/8496/", "su14148496", "10.3390/su14148496", "50|doiboost____::e5b323368a52ae63a3878685496d987c"], "source": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "Sustainability; Volume 14; Issue 14; Pages: 8496"}, {"value": "Crossref"}], "dateofacceptance": {"value": "2022-07-11"}, "title": [{"dataInfo": {"provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Exploring the Climatic Potential of Somo’s Surf Spot for Tourist Destination Management"}]} \ No newline at end of file From 2d2b62386f410b5f625970ba9157db83354c48b1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Dec 2022 21:50:48 +0100 Subject: [PATCH 09/26] removed indicators from Instance --- .../eu/dnetlib/dhp/oa/model/Instance.java | 20 +++++++-------- .../dhp/oa/graph/dump/ResultMapper.java | 6 ++--- .../SparkSelectValidRelationContext.java | 2 +- .../graph/dump/wf/main/oozie_app/workflow.xml | 25 +++++++++++++------ .../dhp/oa/graph/dump/DumpJobTest.java | 8 +++--- 5 files changed, 36 insertions(+), 25 deletions(-) diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java index bff44ad..c35f93c 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Instance.java @@ -26,8 +26,8 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; */ public class Instance implements Serializable { - @JsonSchema(description = "Indicators computed for this instance, for example Bip!Finder ones") - private Indicator indicators; +// @JsonSchema(description = "Indicators computed for this instance, for example Bip!Finder ones") +// private Indicator indicators; private List pid; @@ -141,12 +141,12 @@ public class Instance implements Serializable { this.alternateIdentifier = alternateIdentifier; } - @JsonInclude(JsonInclude.Include.NON_NULL) - public Indicator getIndicators() { - return indicators; - } - - public void setIndicators(Indicator indicators) { - this.indicators = indicators; - } +// @JsonInclude(JsonInclude.Include.NON_NULL) +// public Indicator getIndicators() { +// return indicators; +// } +// +// public void setIndicators(Indicator indicators) { +// this.indicators = indicators; +// } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index f26a4b4..0035c1c 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -548,7 +548,7 @@ public class ResultMapper implements Serializable { Constants.COAR_ACCESS_RIGHT_SCHEMA)); Optional> mes = Optional.ofNullable(i.getMeasures()); - if (mes.isPresent()) { +// if (mes.isPresent()) { // List indicators = new ArrayList<>(); // mes // .get() @@ -563,8 +563,8 @@ public class ResultMapper implements Serializable { // .stream() // .map(u -> Measure.newInstance(u.getKey(), u.getValue())) // .collect(Collectors.toList())))); - instance.setIndicators(getIndicator(mes.get())); - } +// instance.setIndicators(getIndicator(mes.get())); +// } if (opAr.get().getOpenAccessRoute() != null) { switch (opAr.get().getOpenAccessRoute()) { diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidRelationContext.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidRelationContext.java index 093fe32..073782d 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidRelationContext.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkSelectValidRelationContext.java @@ -112,7 +112,7 @@ public class SparkSelectValidRelationContext implements Serializable { Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class))); Dataset allowedContext = Utils - .readPath(spark, inputPath + "/community_infrastructure", ResearchCommunity.class); + .readPath(spark, inputPath + "/communities_infrastructures", ResearchCommunity.class); Dataset> relJoinSource = relationSource .joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value"))) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml index b39c07c..ff4d0fe 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml @@ -65,7 +65,6 @@ organizationCommunityMap the organization community map - hiveDbName the target hive database name @@ -134,8 +133,8 @@ - - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -215,7 +214,7 @@ - + @@ -309,12 +308,23 @@ + + + + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} + --hdfsPath${outputPath}/tar --nameNode${nameNode} - --sourcePath${workingDir}/tar + --sourcePath${outputPath}/dump @@ -330,7 +340,8 @@ eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} + + --hdfsPath${outputPath}/tar --nameNode${nameNode} --accessToken${accessToken} --connectionUrl${connectionUrl} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java index 9e2b837..e1d96e6 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java @@ -438,10 +438,10 @@ public class DumpJobTest { Assertions.assertEquals("2017-01-01", instance.getPublicationdate()); Assertions.assertEquals(null, instance.getArticleprocessingcharge()); Assertions.assertEquals("peerReviewed", instance.getRefereed()); - Indicator indicator = instance.getIndicators(); - - Assertions.assertFalse(Optional.ofNullable(indicator.getUsageCounts()).isPresent()); - Assertions.assertTrue(Optional.ofNullable(indicator.getImpactMeasures()).isPresent()); +// Indicator indicator = instance.getIndicators(); +// +// Assertions.assertFalse(Optional.ofNullable(indicator.getUsageCounts()).isPresent()); +// Assertions.assertTrue(Optional.ofNullable(indicator.getImpactMeasures()).isPresent()); } @Test From 8ec02787f2fdf01032cecd1dcb52e69935e08cc0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Dec 2022 23:00:37 +0100 Subject: [PATCH 10/26] minor changes --- .../graph/dump/wf/main/oozie_app/workflow.xml | 58 ++++++++++--------- .../community/oozie_app/workflow.xml | 20 +++---- .../funder/oozie_app/workflow.xml | 12 ++-- 3 files changed, 48 insertions(+), 42 deletions(-) diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml index ff4d0fe..46a4d32 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml @@ -3,10 +3,12 @@ singleDeposition + false Indicates if it is a single community deposition communityId + none the id of the community to be dumped if a dump for a single community should be done @@ -35,6 +37,7 @@ resultAggregation + false true if all the result type have to be dumped under result. false otherwise @@ -47,22 +50,26 @@ metadata + "" the metadata associated to the deposition depositionType the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - - - - - - + + conceptRecordId + none + for new version, the id of the record for the old deposition + + + depositionId + none + the depositionId of a deposition open that has to be added content + organizationCommunityMap + none the organization community map @@ -133,8 +140,8 @@ - - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -255,7 +262,7 @@ - + ${wf:appPath()}/dump_community @@ -308,28 +315,28 @@ - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath}/tar + --hdfsPath${outputPath} --nameNode${nameNode} - --sourcePath${outputPath}/dump + --sourcePath${workingDir}/tar + + + + + + + + + + + ${wf:conf('upload') eq true} @@ -340,8 +347,7 @@ eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - - --hdfsPath${outputPath}/tar + --hdfsPath${outputPath} --nameNode${nameNode} --accessToken${accessToken} --connectionUrl${connectionUrl} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml index f1c7a8f..a39980e 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app/workflow.xml @@ -96,7 +96,7 @@ cluster Dump table publication for community/funder related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -123,7 +123,7 @@ cluster Dump table dataset for community/funder related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -149,7 +149,7 @@ cluster Dump table ORP for community related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -175,7 +175,7 @@ cluster Dump table software for community related products eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -203,7 +203,7 @@ cluster Prepare association result subset of project info eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -234,7 +234,7 @@ cluster Extend dumped publications with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -259,7 +259,7 @@ cluster Extend dumped dataset with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -284,7 +284,7 @@ cluster Extend dumped ORP with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -309,7 +309,7 @@ cluster Extend dumped software with information about project eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -336,7 +336,7 @@ cluster Split dumped result for community eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml index 1cdece8..75124cf 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app/workflow.xml @@ -89,7 +89,7 @@ cluster Prepare association result subset of project info eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -121,7 +121,7 @@ cluster Dump funder results eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -148,7 +148,7 @@ cluster Dump funder results eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -175,7 +175,7 @@ cluster Dump funder results eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -202,7 +202,7 @@ cluster Dump funder results eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -231,7 +231,7 @@ cluster Dump funder results eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults - dhp-graph-dump-${projectVersion}.jar + dump-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} From ad1ba563cdc09257e0b1ff9c4a422decca5f2082 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 29 Dec 2022 14:46:51 +0100 Subject: [PATCH 11/26] update of the schema of the dump --- .../java/eu/dnetlib/dhp/oa/model/Score.java | 7 +- .../jsonschemas/datasource_schema.json | 110 ++-- .../openaire-community-dump-schema.json | 563 ++++++++++++++++++ .../jsonschemas/organization_schema.json | 26 +- .../resources/jsonschemas/result_schema.json | 389 ++++++------ .../src/test/java/GenerateJsonSchema.java | 6 +- .../graph/dump/community/CommunitySplit.java | 9 +- .../community/SparkUpdateProjectInfo.java | 5 +- .../oa/graph/dump/SplitForCommunityTest.java | 23 + .../dhp/oa/graph/dump/communityResult/dataset | 0 .../dhp/oa/graph/dump/communityResult/orp | 0 .../oa/graph/dump/communityResult/publication | 1 + .../oa/graph/dump/communityResult/software | 0 13 files changed, 863 insertions(+), 276 deletions(-) create mode 100644 dump-schema/src/main/resources/jsonschemas/openaire-community-dump-schema.json create mode 100644 dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/dataset create mode 100644 dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/orp create mode 100644 dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/publication create mode 100644 dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/software diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Score.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Score.java index df68a4d..4b48346 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Score.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Score.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.model; import java.io.Serializable; import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonSetter; /** @@ -12,6 +13,8 @@ import com.fasterxml.jackson.annotation.JsonSetter; */ public class Score implements Serializable { private String score; + + @JsonProperty("class") private String clazz; public String getScore() { @@ -28,7 +31,7 @@ public class Score implements Serializable { } @JsonSetter("class") - public void setClazz(String classe) { - this.clazz = classe; + public void setClazz(String clazz) { + this.clazz = clazz; } } diff --git a/dump-schema/src/main/resources/jsonschemas/datasource_schema.json b/dump-schema/src/main/resources/jsonschemas/datasource_schema.json index c416f23..9ef7d38 100644 --- a/dump-schema/src/main/resources/jsonschemas/datasource_schema.json +++ b/dump-schema/src/main/resources/jsonschemas/datasource_schema.json @@ -1,20 +1,6 @@ { - "$schema":"http://json-schema.org/draft-07/schema#", - "definitions": { - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - } - }, - "type":"object", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", "properties": { "accessrights": { "type": "string", @@ -26,13 +12,14 @@ }, "citationguidelineurl": { "type": "string", - "description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org." + "description": "The URL of the data source providing information on how to cite its items. As defined by re3data.org." }, "contenttypes": { "description": "Types of content in the data source, as defined by OpenDOAR", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Types of content in the data source, as defined by OpenDOAR" } }, "databaseaccessrestriction": { @@ -40,14 +27,18 @@ "description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}" }, "datasourcetype": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "The scheme used to express the value (i.e. pubsrepository::journal)" }, - { - "description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies" + "value": { + "type": "string", + "description": "The value expressed in the scheme (Journal)" } - ] + }, + "description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies" }, "datauploadrestriction": { "type": "string", @@ -57,9 +48,7 @@ "type": "string", "description": "The date of last validation against the OpenAIRE guidelines for the datasource records" }, - "description": { - "type": "string" - }, + "description": {"type": "string"}, "englishname": { "type": "string", "description": "The English name of the datasource" @@ -71,14 +60,11 @@ "journal": { "type": "object", "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, + "conferencedate": {"type": "string"}, + "conferenceplace": {"type": "string"}, "edition": { - "type": "string" + "type": "string", + "description": "Edition of the journal or conference proceeding" }, "ep": { "type": "string", @@ -86,19 +72,14 @@ }, "iss": { "type": "string", - "description": "Issue number" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" + "description": "Journal issue number" }, + "issnLinking": {"type": "string"}, + "issnOnline": {"type": "string"}, + "issnPrinted": {"type": "string"}, "name": { - "type": "string" + "type": "string", + "description": "Name of the journal or conference" }, "sp": { "type": "string", @@ -115,15 +96,14 @@ "description": "The languages present in the data source's content, as defined by OpenDOAR.", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "The languages present in the data source's content, as defined by OpenDOAR." } }, - "logourl": { - "type": "string" - }, + "logourl": {"type": "string"}, "missionstatementurl": { "type": "string", - "description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org" + "description": "The URL of a mission statement describing the designated community of the data source. As defined by re3data.org" }, "officialname": { "type": "string", @@ -134,21 +114,29 @@ "description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu." }, "originalId": { - "description": "Original identifiers for the datasource" + "description": "Original identifiers for the datasource", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Original identifiers for the datasource" } }, "pid": { "description": "Persistent identifiers of the datasource", "type": "array", "items": { - "allOf": [ - { - "$ref": "#/definitions/ControlledField" + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "The scheme used to express the value " + }, + "value": { + "type": "string", + "description": "The value expressed in the scheme " } - ] + }, + "description": "Persistent identifiers of the datasource" } }, "pidsystems": { @@ -159,7 +147,8 @@ "description": "Policies of the data source, as defined in OpenDOAR.", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Policies of the data source, as defined in OpenDOAR." } }, "releaseenddate": { @@ -174,7 +163,8 @@ "description": "List of subjects associated to the datasource", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "List of subjects associated to the datasource" } }, "uploadrights": { @@ -185,8 +175,6 @@ "type": "boolean", "description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise." }, - "websiteurl": { - "type": "string" - } + "websiteurl": {"type": "string"} } } \ No newline at end of file diff --git a/dump-schema/src/main/resources/jsonschemas/openaire-community-dump-schema.json b/dump-schema/src/main/resources/jsonschemas/openaire-community-dump-schema.json new file mode 100644 index 0000000..2ca49f8 --- /dev/null +++ b/dump-schema/src/main/resources/jsonschemas/openaire-community-dump-schema.json @@ -0,0 +1,563 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "CfHbKeyValue": { + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "the OpenAIRE identifier of the data source" + }, + "value": { + "type": "string", + "description": "the name of the data source" + } + } + }, + "Provenance": { + "type": "object", + "properties": { + "provenance": {"type": "string"}, + "trust": {"type": "string"} + } + }, + "ResultPid": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories" + }, + "value": { + "type": "string", + "description": "The value expressed in the scheme (i.e. 10.1000/182)" + } + } + }, + "Score": { + "type": "object", + "properties": { + "clazz": {"type": "string"}, + "score": {"type": "string"} + } + } + }, + "type": "object", + "properties": { + "author": { + "type": "array", + "items": { + "type": "object", + "properties": { + "fullname": {"type": "string"}, + "name": {"type": "string"}, + "pid": { + "type": "object", + "properties": { + "id": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "The author's pid scheme. OpenAIRE currently supports 'ORCID'" + }, + "value": { + "type": "string", + "description": "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)" + } + } + }, + "provenance": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "The reason why the pid was associated to the author"} + ] + } + }, + "description": "The author's persistent identifiers" + }, + "rank": {"type": "integer"}, + "surname": {"type": "string"} + } + } + }, + "bestaccessright": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "label": { + "type": "string", + "description": "Label for the access mode" + }, + "scheme": { + "type": "string", + "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" + } + }, + "description": "The openest of the access rights of this result." + }, + "codeRepositoryUrl": { + "type": "string", + "description": "Only for results with type 'software': the URL to the repository with the source code" + }, + "collectedfrom": { + "description": "Information about the sources from which the record has been collected", + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/definitions/CfHbKeyValue"}, + {"description": "Information about the sources from which the record has been collected"} + ] + } + }, + "contactgroup": { + "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", + "type": "array", + "items": { + "type": "string", + "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource" + } + }, + "contactperson": { + "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", + "type": "array", + "items": { + "type": "string", + "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource" + } + }, + "container": { + "type": "object", + "properties": { + "conferencedate": {"type": "string"}, + "conferenceplace": {"type": "string"}, + "edition": { + "type": "string", + "description": "Edition of the journal or conference proceeding" + }, + "ep": { + "type": "string", + "description": "End page" + }, + "iss": { + "type": "string", + "description": "Journal issue number" + }, + "issnLinking": {"type": "string"}, + "issnOnline": {"type": "string"}, + "issnPrinted": {"type": "string"}, + "name": { + "type": "string", + "description": "Name of the journal or conference" + }, + "sp": { + "type": "string", + "description": "Start page" + }, + "vol": { + "type": "string", + "description": "Volume" + } + }, + "description": "Container has information about the conference or journal where the result has been presented or published" + }, + "context": { + "description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu", + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Code identifying the RI/RC" + }, + "label": { + "type": "string", + "description": "Label of the RI/RC" + }, + "provenance": { + "description": "Why this result is associated to the RI/RC.", + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Why this result is associated to the RI/RC."} + ] + } + } + }, + "description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu" + } + }, + "contributor": { + "description": "Contributors for the result", + "type": "array", + "items": { + "type": "string", + "description": "Contributors for the result" + } + }, + "country": { + "description": "The list of countries associated to this result", + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "ISO 3166-1 alpha-2 country code (i.e. IT)" + }, + "label": { + "type": "string", + "description": "The label for that code (i.e. Italy)" + }, + "provenance": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Why this result is associated to the country."} + ] + } + }, + "description": "The list of countries associated to this result" + } + }, + "coverage": { + "type": "array", + "items": {"type": "string"} + }, + "dateofcollection": { + "type": "string", + "description": "When OpenAIRE collected the record the last time" + }, + "description": { + "type": "array", + "items": {"type": "string"} + }, + "documentationUrl": { + "description": "Only for results with type 'software': URL to the software documentation", + "type": "array", + "items": { + "type": "string", + "description": "Only for results with type 'software': URL to the software documentation" + } + }, + "embargoenddate": { + "type": "string", + "description": "Date when the embargo ends and this result turns Open Access" + }, + "format": { + "type": "array", + "items": {"type": "string"} + }, + "geolocation": { + "description": "Geolocation information", + "type": "array", + "items": { + "type": "object", + "properties": { + "box": {"type": "string"}, + "place": {"type": "string"}, + "point": {"type": "string"} + }, + "description": "Geolocation information" + } + }, + "id": { + "type": "string", + "description": "The OpenAIRE identifiers for this result" + }, + "indicators": { + "type": "object", + "properties": { + "impactMeasures": { + "type": "object", + "properties": { + "impulse": {"$ref": "#/definitions/Score"}, + "influence": {"$ref": "#/definitions/Score"}, + "influence_alt": {"$ref": "#/definitions/Score"}, + "popularity": {"$ref": "#/definitions/Score"}, + "popularity_alt": {"$ref": "#/definitions/Score"} + }, + "description": "The impact measures (i.e. popularity)" + }, + "usageCounts": { + "type": "object", + "properties": { + "downloads": {"type": "string"}, + "views": {"type": "string"} + }, + "description": "The usage counts (i.e. downloads)" + } + }, + "description": "Indicators computed for this result, for example UsageCount ones" + }, + "instance": { + "description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", + "type": "array", + "items": { + "type": "object", + "properties": { + "accessright": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "label": { + "type": "string", + "description": "Label for the access mode" + }, + "openAccessRoute": { + "type": "string", + "enum": [ + "gold", + "green", + "hybrid", + "bronze" + ] + }, + "scheme": { + "type": "string", + "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" + } + }, + "description": "The accessRights for this materialization of the result" + }, + "alternateIdentifier": { + "description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs", + "type": "array", + "items": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi" + }, + "value": { + "type": "string", + "description": "The value expressed in the scheme" + } + }, + "description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs" + } + }, + "articleprocessingcharge": { + "type": "object", + "properties": { + "amount": {"type": "string"}, + "currency": {"type": "string"} + }, + "description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative." + }, + "collectedfrom": { + "allOf": [ + {"$ref": "#/definitions/CfHbKeyValue"}, + {"description": "Information about the source from which the record has been collected"} + ] + }, + "hostedby": { + "allOf": [ + {"$ref": "#/definitions/CfHbKeyValue"}, + {"description": "Information about the source from which the instance can be viewed or downloaded."} + ] + }, + "license": {"type": "string"}, + "pid": { + "type": "array", + "items": {"$ref": "#/definitions/ResultPid"} + }, + "publicationdate": { + "type": "string", + "description": "Date of the research product" + }, + "refereed": { + "type": "string", + "description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)" + }, + "type": { + "type": "string", + "description": "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" + }, + "url": { + "description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", + "type": "array", + "items": { + "type": "string", + "description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. " + } + } + }, + "description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version" + } + }, + "language": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "alpha-3/ISO 639-2 code of the language" + }, + "label": { + "type": "string", + "description": "Language label in English" + } + } + }, + "lastupdatetimestamp": { + "type": "integer", + "description": "Timestamp of last update of the record in OpenAIRE" + }, + "maintitle": { + "type": "string", + "description": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software." + }, + "originalId": { + "description": "Identifiers of the record at the original sources", + "type": "array", + "items": { + "type": "string", + "description": "Identifiers of the record at the original sources" + } + }, + "pid": { + "description": "Persistent identifiers of the result", + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/definitions/ResultPid"}, + {"description": "Persistent identifiers of the result"} + ] + } + }, + "programmingLanguage": { + "type": "string", + "description": "Only for results with type 'software': the programming language" + }, + "projects": { + "description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results", + "type": "array", + "items": { + "type": "object", + "properties": { + "acronym": { + "type": "string", + "description": "The acronym of the project" + }, + "code": { + "type": "string", + "description": "The grant agreement number" + }, + "funder": { + "type": "object", + "properties": { + "fundingStream": { + "type": "string", + "description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)" + }, + "jurisdiction": { + "type": "string", + "description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)" + }, + "name": { + "type": "string", + "description": "The name of the funder (European Commission)" + }, + "shortName": { + "type": "string", + "description": "The short name of the funder (EC)" + } + }, + "description": "Information about the funder funding the project" + }, + "id": { + "type": "string", + "description": "The OpenAIRE id for the project" + }, + "provenance": {"$ref": "#/definitions/Provenance"}, + "title": {"type": "string"}, + "validated": { + "type": "object", + "properties": { + "validatedByFunder": {"type": "boolean"}, + "validationDate": {"type": "string"} + } + } + }, + "description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results" + } + }, + "publicationdate": { + "type": "string", + "description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it\u2019s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the \u201cwinning date\u201d because it becomes the most frequent well-formatted date." + }, + "publisher": { + "type": "string", + "description": "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource." + }, + "size": { + "type": "string", + "description": "Only for results with type 'dataset': the declared size of the dataset" + }, + "source": { + "description": "See definition of Dublin Core field dc:source", + "type": "array", + "items": { + "type": "string", + "description": "See definition of Dublin Core field dc:source" + } + }, + "subjects": { + "description": "Keywords associated to the result", + "type": "array", + "items": { + "type": "object", + "properties": { + "provenance": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Why this subject is associated to the result"} + ] + }, + "subject": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)." + }, + "value": { + "type": "string", + "description": "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)." + } + } + } + }, + "description": "Keywords associated to the result" + } + }, + "subtitle": { + "type": "string", + "description": "Explanatory or alternative name by which a scientific result is known." + }, + "tool": { + "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", + "type": "array", + "items": { + "type": "string", + "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product" + } + }, + "type": { + "type": "string", + "description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" + }, + "version": { + "type": "string", + "description": "Version of the result" + } + } +} \ No newline at end of file diff --git a/dump-schema/src/main/resources/jsonschemas/organization_schema.json b/dump-schema/src/main/resources/jsonschemas/organization_schema.json index 16afa38..6b2562b 100644 --- a/dump-schema/src/main/resources/jsonschemas/organization_schema.json +++ b/dump-schema/src/main/resources/jsonschemas/organization_schema.json @@ -6,7 +6,8 @@ "description": "Alternative names that identify the organisation", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Alternative names that identify the organisation" } }, "country": { @@ -14,25 +15,21 @@ "properties": { "code": { "type": "string", - "description": "The organisation country code" + "description": "ISO 3166-1 alpha-2 country code (i.e. IT)" }, "label": { "type": "string", - "description": "The organisation country label" + "description": "The label for that code (i.e. Italy)" } }, - "description": "The country of the organisation" + "description": "The organisation country" }, "id": { "type": "string", "description": "The OpenAIRE id for the organisation" }, - "legalname": { - "type": "string" - }, - "legalshortname": { - "type": "string" - }, + "legalname": {"type": "string"}, + "legalshortname": {"type": "string"}, "pid": { "description": "Persistent identifiers for the organisation i.e. isni 0000000090326370", "type": "array", @@ -45,13 +42,12 @@ }, "value": { "type": "string", - "description": "the value in the schema (i.e. 0000000090326370)" + "description": "The value in the schema (i.e. 0000000090326370)" } - } + }, + "description": "Persistent identifiers for the organisation i.e. isni 0000000090326370" } }, - "websiteurl": { - "type": "string" - } + "websiteurl": {"type": "string"} } } \ No newline at end of file diff --git a/dump-schema/src/main/resources/jsonschemas/result_schema.json b/dump-schema/src/main/resources/jsonschemas/result_schema.json index 1a2e065..d9cc432 100644 --- a/dump-schema/src/main/resources/jsonschemas/result_schema.json +++ b/dump-schema/src/main/resources/jsonschemas/result_schema.json @@ -1,30 +1,32 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { - "ControlledField": { - "type": "object", - "properties": { - "scheme": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)" - }, "Provenance": { "type": "object", "properties": { - "provenance": { + "provenance": {"type": "string"}, + "trust": {"type": "string"} + } + }, + "ResultPid": { + "type": "object", + "properties": { + "scheme": { "type": "string", - "description": "The process that produced/provided the information" + "description": "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories" }, - "trust": { - "type": "string" + "value": { + "type": "string", + "description": "The value expressed in the scheme (i.e. 10.1000/182)" } - }, - "description": "Indicates the process that produced (or provided) the information, and the trust associated to the information" + } + }, + "Score": { + "type": "object", + "properties": { + "class": {"type": "string"}, + "score": {"type": "string"} + } } }, "type": "object", @@ -34,55 +36,56 @@ "items": { "type": "object", "properties": { - "fullname": { - "type": "string" - }, - "name": { - "type": "string" - }, + "fullname": {"type": "string"}, + "name": {"type": "string"}, "pid": { "type": "object", "properties": { "id": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"} - ] + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "The author's pid scheme. OpenAIRE currently supports 'ORCID'" + }, + "value": { + "type": "string", + "description": "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)" + } + } }, "provenance": { "allOf": [ {"$ref": "#/definitions/Provenance"}, - {"description": "Provenance of author's pid"} + {"description": "The reason why the pid was associated to the author"} ] } - } + }, + "description": "The author's persistent identifiers" }, - "rank": { - "type": "integer" - }, - "surname": { - "type": "string" - } + "rank": {"type": "integer"}, + "surname": {"type": "string"} } } }, - "bestaccessright":{ - "type":"object", - "properties":{ - "code": { - "type": "string", - "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label": { - "type": "string", - "description": "Label for the access mode" - }, + "bestaccessright": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" + }, + "label": { + "type": "string", + "description": "Label for the access mode" + }, "scheme": { - "type": "string", - "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - } + "type": "string", + "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" + } }, + "description": "The openest of the access rights of this result." + }, "codeRepositoryUrl": { "type": "string", "description": "Only for results with type 'software': the URL to the repository with the source code" @@ -91,25 +94,23 @@ "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource" } }, "contactperson": { "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource" } }, "container": { "type": "object", "properties": { - "conferencedate": { - "type": "string" - }, - "conferenceplace": { - "type": "string" - }, + "conferencedate": {"type": "string"}, + "conferenceplace": {"type": "string"}, "edition": { "type": "string", "description": "Edition of the journal or conference proceeding" @@ -120,32 +121,28 @@ }, "iss": { "type": "string", - "description": "Journal issue" - }, - "issnLinking": { - "type": "string" - }, - "issnOnline": { - "type": "string" - }, - "issnPrinted": { - "type": "string" + "description": "Journal issue number" }, + "issnLinking": {"type": "string"}, + "issnOnline": {"type": "string"}, + "issnPrinted": {"type": "string"}, "name": { "type": "string", "description": "Name of the journal or conference" }, "sp": { "type": "string", - "description": "start page" + "description": "Start page" }, "vol": { - "type": "string" + "type": "string", + "description": "Volume" } }, "description": "Container has information about the conference or journal where the result has been presented or published" }, "contributor": { + "description": "Contributors for the result", "type": "array", "items": { "type": "string", @@ -153,16 +150,18 @@ } }, "country": { + "description": "The list of countries associated to this result", "type": "array", "items": { "type": "object", "properties": { "code": { "type": "string", - "description": "ISO 3166-1 alpha-2 country code" + "description": "ISO 3166-1 alpha-2 country code (i.e. IT)" }, "label": { - "type": "string" + "type": "string", + "description": "The label for that code (i.e. Italy)" }, "provenance": { "allOf": [ @@ -170,14 +169,13 @@ {"description": "Why this result is associated to the country."} ] } - } + }, + "description": "The list of countries associated to this result" } }, "coverage": { "type": "array", - "items": { - "type": "string" - } + "items": {"type": "string"} }, "dateofcollection": { "type": "string", @@ -185,15 +183,14 @@ }, "description": { "type": "array", - "items": { - "type": "string" - } + "items": {"type": "string"} }, "documentationUrl": { "description": "Only for results with type 'software': URL to the software documentation", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Only for results with type 'software': URL to the software documentation" } }, "embargoenddate": { @@ -202,9 +199,7 @@ }, "format": { "type": "array", - "items": { - "type": "string" - } + "items": {"type": "string"} }, "geolocation": { "description": "Geolocation information", @@ -212,31 +207,51 @@ "items": { "type": "object", "properties": { - "box": { - "type": "string" - }, - "place": { - "type": "string" - }, - "point": { - "type": "string" - } - } + "box": {"type": "string"}, + "place": {"type": "string"}, + "point": {"type": "string"} + }, + "description": "Geolocation information" } }, "id": { "type": "string", - "description": "OpenAIRE Identifier" + "description": "The OpenAIRE identifiers for this result" }, - "instance":{ - "description":"Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", - "type":"array", - "items":{ - "type":"object", - "properties":{ - "accessright":{ - "type":"object", - "properties":{ + "indicators": { + "type": "object", + "properties": { + "impactMeasures": { + "type": "object", + "properties": { + "impulse": {"$ref": "#/definitions/Score"}, + "influence": {"$ref": "#/definitions/Score"}, + "influence_alt": {"$ref": "#/definitions/Score"}, + "popularity": {"$ref": "#/definitions/Score"}, + "popularity_alt": {"$ref": "#/definitions/Score"} + }, + "description": "The impact measures (i.e. popularity)" + }, + "usageCounts": { + "type": "object", + "properties": { + "downloads": {"type": "string"}, + "views": {"type": "string"} + }, + "description": "The usage counts (i.e. downloads)" + } + }, + "description": "Indicators computed for this result, for example UsageCount ones" + }, + "instance": { + "description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", + "type": "array", + "items": { + "type": "object", + "properties": { + "accessright": { + "type": "object", + "properties": { "code": { "type": "string", "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" @@ -245,102 +260,75 @@ "type": "string", "description": "Label for the access mode" }, - "openAccessRoute":{ - "type":"string", - "enum":[ + "openAccessRoute": { + "type": "string", + "enum": [ "gold", "green", "hybrid", "bronze" - ], - "description":"The type of OpenAccess applied to the result" + ] }, "scheme": { "type": "string", "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" } - } + }, + "description": "The accessRights for this materialization of the result" }, - "alternateIdentifier":{ - "type":"array", - "items":{ - "allOf":[ - { - "$ref":"#/definitions/ControlledField" + "alternateIdentifier": { + "description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs", + "type": "array", + "items": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi" }, - { - "description":"All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs" - } - ] - } - }, - "articleprocessingcharge":{ - "description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.", - "type":"object", - "properties":{ - "amount":{ - "type":"string" - }, - "currency":{ - "type":"string" - } - } - }, - "license":{ - "type":"string" - }, - "measures":{ - - "type":"array", - "items":{ - "type":"object", - "properties":{ - "key":{ - "type":"string", - "description":"The measure" - }, - "value":{ - "type":"string", - "description":"The value for the measure" + "value": { + "type": "string", + "description": "The value expressed in the scheme" } }, - "description":"Measures computed for this instance, for example Bip!Finder ones" + "description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs" } }, - "pid":{ - "description":"The set of persistent identifiers associated to this instance that have been collected from an authority for the pid type (i.e. Crossref/Datacite for doi)", - "type":"array", - "items":{ - "allOf":[ - { - "$ref":"#/definitions/ControlledField" - }, - { - "description":"The persistent identifier associated to the result" - } - ] - } - }, - "publicationdate":{ - "type":"string", + "articleprocessingcharge": { + "type": "object", + "properties": { + "amount": {"type": "string"}, + "currency": {"type": "string"} + }, + "description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative." + }, + "license": {"type": "string"}, + "pid": { + "type": "array", + "items": {"$ref": "#/definitions/ResultPid"} + }, + "publicationdate": { + "type": "string", "description": "Date of the research product" }, - "refereed":{ - "description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)", - "type":"string" + "refereed": { + "type": "string", + "description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)" }, - "type":{ - "type":"string", - "description":"The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" + "type": { + "type": "string", + "description": "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" }, - "url":{ - "description":"URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", - "type":"array", - "items":{ - "type":"string" + "url": { + "description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", + "type": "array", + "items": { + "type": "string", + "description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. " } } - } + }, + "description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version" } }, "language": { @@ -362,17 +350,14 @@ }, "maintitle": { "type": "string", - "descriptio": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software." - }, - "subtitle": { - "type": "string", - "descriptio": "Explanatory or alternative name by which a scientific result is known." + "description": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software." }, "originalId": { "description": "Identifiers of the record at the original sources", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Identifiers of the record at the original sources" } }, "pid": { @@ -380,8 +365,8 @@ "type": "array", "items": { "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result. Note: the result will have a pid associated only if it was collected from an authority for that pid type. For example a doi will be among the pids for one result if the result metadata were collected from Crossref or Datacite. In all the other cases, the doi will be present among the alteranteIdentifiers for the result "} + {"$ref": "#/definitions/ResultPid"}, + {"description": "Persistent identifiers of the result"} ] } }, @@ -391,7 +376,7 @@ }, "publicationdate": { "type": "string", - "description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date." + "description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it\u2019s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the \u201cwinning date\u201d because it becomes the most frequent well-formatted date." }, "publisher": { "type": "string", @@ -405,7 +390,8 @@ "description": "See definition of Dublin Core field dc:source", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "See definition of Dublin Core field dc:source" } }, "subjects": { @@ -421,19 +407,32 @@ ] }, "subject": { - "allOf": [ - {"$ref": "#/definitions/ControlledField"}, - {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."} - ] + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)." + }, + "value": { + "type": "string", + "description": "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)." + } + } } - } + }, + "description": "Keywords associated to the result" } }, + "subtitle": { + "type": "string", + "description": "Explanatory or alternative name by which a scientific result is known." + }, "tool": { "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", "type": "array", "items": { - "type": "string" + "type": "string", + "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product" } }, "type": { diff --git a/dump-schema/src/test/java/GenerateJsonSchema.java b/dump-schema/src/test/java/GenerateJsonSchema.java index 381ebb5..59d5d74 100644 --- a/dump-schema/src/test/java/GenerateJsonSchema.java +++ b/dump-schema/src/test/java/GenerateJsonSchema.java @@ -1,5 +1,6 @@ import java.io.IOException; +import eu.dnetlib.dhp.oa.model.community.CommunityResult; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.core.JsonProcessingException; @@ -9,7 +10,10 @@ import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.ExecCreateSchemas; +import eu.dnetlib.dhp.oa.model.graph.Datasource; import eu.dnetlib.dhp.oa.model.graph.GraphResult; +import eu.dnetlib.dhp.oa.model.graph.Organization; +import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity; //@Disabled class GenerateJsonSchema { @@ -40,7 +44,7 @@ class GenerateJsonSchema { .without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(GraphResult.class); + JsonNode jsonSchema = generator.generateSchema(CommunityResult.class); System.out.println(jsonSchema.toString()); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 647f1fe..72af465 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -9,10 +9,14 @@ import java.util.stream.Collectors; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.model.community.CommunityResult; import eu.dnetlib.dhp.oa.model.community.Context; @@ -60,10 +64,13 @@ public class CommunitySplit implements Serializable { .filter((FilterFunction) r -> containsCommunity(r, c)); communityProducts + .map( + (MapFunction) cr -> new ObjectMapper().writeValueAsString(cr), + Encoders.STRING()) .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) - .json(outputPath); + .text(outputPath); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java index 941f681..fef3359 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java @@ -82,10 +82,13 @@ public class SparkUpdateProjectInfo implements Serializable { }, Encoders.bean(CommunityResult.class)); long count = tmp.count(); tmp + .map( + (MapFunction) cr -> new ObjectMapper().writeValueAsString(cr), + Encoders.STRING()) .write() .option("compression", "gzip") .mode(SaveMode.Append) - .json(outputPath); + .text(outputPath); } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index a3506b5..a24d9bf 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -61,6 +61,29 @@ public class SplitForCommunityTest { spark.stop(); } + @Test + void testCommunitySplit2() { + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityResult") + .getPath(); + + final String communityMapPath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json") + .getPath(); + + CommunitySplit split = new CommunitySplit(); + + split.run(false, sourcePath, workingDir.toString() + "/split", communityMapPath); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/split/Digital_Humanities_and_Cultural_Heritage") + .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class)); + + } + @Test void testCommunitySplit() { diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/dataset b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/dataset new file mode 100644 index 0000000..e69de29 diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/orp b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/orp new file mode 100644 index 0000000..e69de29 diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/publication b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/publication new file mode 100644 index 0000000..b7f9ced --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/publication @@ -0,0 +1 @@ +{"pid": [{"scheme": "doi", "value": "10.1023/a:1019971625315"}], "contributor": [], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, {"key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a", "value": "Microsoft Academic Graph"}], "id": "50|doi_________::0027accd79214af151336e8237a2b084", "container": {"issnPrinted": "1607-6729", "conferencedate": null, "vol": "385", "conferenceplace": null, "name": "Doklady Biochemistry and Biophysics", "iss": null, "sp": "228", "edition": null, "issnOnline": null, "ep": "234", "issnLinking": null}, "lastupdatetimestamp": 1649039791345, "author": [{"surname": null, "fullname": "Vladimir S. Saakov", "pid": null, "name": null, "rank": 1}], "instance": [{"refereed": "UNKNOWN", "hostedby": {"key": "10|issn___print::55156520c3996f4d887f858c089d1e5f", "value": "Doklady Biochemistry and Biophysics"}, "url": ["https://doi.org/10.1023/a:1019971625315"], "pid": [{"scheme": "doi", "value": "10.1023/a:1019971625315"}], "publicationdate": "2002-01-01", "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "type": "Article"}], "subjects": [{"provenance": null, "subject": {"scheme": "keyword", "value": "General Chemistry"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "Biochemistry"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "General Medicine"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "Biophysics"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Photosystem II"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Ion"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Chemistry"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Soil salinity"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Analytical chemistry"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Function (biology)"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Pulse (signal processing)"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Fluorescence"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Phototroph"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Kinetic energy"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Photochemistry"}}], "publicationdate": "2002-01-01", "indicators": {"impactMeasures": {"influence": {"score": "4.901964E-9", "class": "C"}, "popularity": {"score": "6.185583E-10", "class": "C"}, "influence_alt": {"score": "3", "class": "C"}, "impulse": {"score": "0", "class": "C"}, "popularity_alt": {"score": "0.03722029", "class": "C"}}}, "dateofcollection": "2022-04-04T02:36:31Z", "type": "publication", "description": [], "format": [], "coverage": [], "publisher": "Springer Science and Business Media LLC", "language": {"code": "und", "label": "Undetermined"}, "country": [], "originalId": ["453197", "10.1023/a:1019971625315", "314096869"], "source": ["Crossref", null], "context": [{"code": "enermaps", "provenance": [{"provenance": "Inferred by OpenAIRE", "trust": "0.8"}], "label": "Energy Research"}]} \ No newline at end of file diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/software b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/communityResult/software new file mode 100644 index 0000000..e69de29 From b743dc29600d5bb6dd6cff04f99522168ffa9eaf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 29 Dec 2022 15:19:36 +0100 Subject: [PATCH 12/26] removed class --- .../eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java deleted file mode 100644 index e69de29..0000000 From 4dcd03b78ee57c2769e7de0ccb3d63ba042dd8c4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sat, 31 Dec 2022 13:00:00 +0100 Subject: [PATCH 13/26] minor and fixed wronf number is test because of change in the input resource --- .../src/test/java/GenerateJsonSchema.java | 2 +- .../community/SparkUpdateProjectInfo.java | 8 ++--- .../funderresults/SparkDumpFunderResults.java | 5 ++- .../SparkResultLinkedToProject.java | 13 +++++-- .../graph/dump/wf/main/oozie_app/workflow.xml | 34 +++++++++---------- .../oa/graph/dump/subset/DumpSubsetTest.java | 12 ++++--- .../addProjectInfo/publication_extendedmodel | 8 ++--- 7 files changed, 46 insertions(+), 36 deletions(-) diff --git a/dump-schema/src/test/java/GenerateJsonSchema.java b/dump-schema/src/test/java/GenerateJsonSchema.java index d059db7..1dd03f8 100644 --- a/dump-schema/src/test/java/GenerateJsonSchema.java +++ b/dump-schema/src/test/java/GenerateJsonSchema.java @@ -1,6 +1,5 @@ import java.io.IOException; -import eu.dnetlib.dhp.oa.model.community.CommunityResult; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.core.JsonProcessingException; @@ -10,6 +9,7 @@ import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.ExecCreateSchemas; +import eu.dnetlib.dhp.oa.model.community.CommunityResult; import eu.dnetlib.dhp.oa.model.graph.Datasource; import eu.dnetlib.dhp.oa.model.graph.GraphResult; import eu.dnetlib.dhp.oa.model.graph.Organization; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java index fef3359..996b4e2 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java @@ -8,6 +8,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -71,7 +72,8 @@ public class SparkUpdateProjectInfo implements Serializable { String preparedInfoPath) { Dataset result = Utils.readPath(spark, inputPath, CommunityResult.class); Dataset resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class); - Dataset tmp = result + + result .joinWith( resultProject, result.col("id").equalTo(resultProject.col("resultId")), "left") @@ -79,9 +81,7 @@ public class SparkUpdateProjectInfo implements Serializable { CommunityResult r = value._1(); Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList())); return r; - }, Encoders.bean(CommunityResult.class)); - long count = tmp.count(); - tmp + }, Encoders.bean(CommunityResult.class)) .map( (MapFunction) cr -> new ObjectMapper().writeValueAsString(cr), Encoders.STRING()) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index a5b9575..41e3f15 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -88,7 +88,10 @@ public class SparkDumpFunderResults implements Serializable { } else { String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase(); if (fName.equalsIgnoreCase("ec")) { - if (p.getId().contains("h2020")) { + if(p.getId().contains("he")){ + fName += "_HE"; + } + else if (p.getId().contains("h2020")) { fName += "_H2020"; } else { fName += "_FP7"; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 476ca09..57e7f9f 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -17,6 +18,8 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; @@ -99,13 +102,19 @@ public class SparkResultLinkedToProject implements Serializable { .map( t2._1(), communityMap, Constants.DUMPTYPE.FUNDER.getType()); - cr.setProjects(t2._2().getProjectsList()); + if (cr != null) { + cr.setProjects(t2._2().getProjectsList()); + } return cr; }, Encoders.bean(CommunityResult.class)) + .filter(Objects::nonNull) + .map( + (MapFunction) cr -> new ObjectMapper().writeValueAsString(cr), + Encoders.STRING()) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath); + .text(outputPath); } } diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml index 46a4d32..ffa8bcd 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml @@ -238,7 +238,7 @@ outputPath - ${workingDir}/tar + ${outputPath}/dump sourcePath @@ -279,7 +279,7 @@ outputPath - ${workingDir}/tar + ${outputPath}/dump @@ -299,7 +299,7 @@ outputPath - ${workingDir}/tar + ${outputPath}/dump sourcePath @@ -315,28 +315,28 @@ - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath} - --nameNode${nameNode} - --sourcePath${workingDir}/tar - - - - - - + - + + + + eu.dnetlib.dhp.oa.graph.dump.MakeTar + --hdfsPath${outputPath}/tar + --nameNode${nameNode} + --sourcePath${outputPath}/dump + + + + + ${wf:conf('upload') eq true} @@ -347,7 +347,7 @@ eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath} + --hdfsPath${outputPath}/tar/ --nameNode${nameNode} --accessToken${accessToken} --connectionUrl${connectionUrl} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java index 5fa9286..ce73ac6 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java @@ -456,7 +456,7 @@ public class DumpSubsetTest { getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure") .getPath()) - .saveAsTextFile(workingDir.toString() + "/dump/community_infrastructure"); + .saveAsTextFile(workingDir.toString() + "/dump/communities_infrastructures"); SparkSelectValidRelationContext .main( @@ -512,10 +512,12 @@ public class DumpSubsetTest { .textFile(workingDir.toString() + "/relation") .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Relation.class)); - Assertions.assertEquals(94, tmp.count()); - Assertions.assertEquals(47, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count()); - Assertions.assertEquals(36, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count()); - Assertions.assertEquals(11, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count()); + Assertions.assertEquals(102, tmp.count()); + + + Assertions.assertEquals(51, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count()); + Assertions.assertEquals(39, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count()); + Assertions.assertEquals(12, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count()); } } diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel index f198781..b56b30f 100644 --- a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo/publication_extendedmodel @@ -1,6 +1,2 @@ -{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"indicators":[{"id":"influence", "measure":[{"key":"score","value":"1.62759106106e-08"}]},{"id":"popularity", "measure":[{"key":"score","value":"0.22519296"}]}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} -{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"indicators":[{"id":"influence", "measure":[{"key":"score","value":"1.62759106106e-08"}]},{"id":"popularity", "measure":[{"key":"score","value":"0.22519296"}]}],"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} - - - - +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} +{"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":{"id":{"scheme":"orcid","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":{"id":{"scheme":"orcid_pending","value":"0000-0001-6651-1178"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}}],"type":"publication","language":{"code":"eng","label":"English"},"country":[{"code":"IT","label":"Italy","provenance":null}],"subjects":[{"subject":{"scheme":"ACM","value":"Ecosystem Services hotspots"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Natura 2000"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Quiet Protected Areas"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Biodiversity"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Agriculture"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Elevation"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Slope"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"Ecosystem Service trade-offs and synergies"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":" cultural services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"provisioning services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"regulating services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}},{"subject":{"scheme":"","value":"supporting services"},"provenance":{"provenance":"sysimport:crosswalk:repository","trust":"0.9"}}],"maintitle":"Ecosystem Service capacity is higher in areas of multiple designation types","subtitle":null,"description":["The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management."],"publicationdate":"2017-01-01","publisher":"Pensoft Publishers","embargoenddate":null,"source":["One Ecosystem 2: e13718"],"format":["text/html"],"contributor":[],"coverage":[],"bestaccessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/"},"container":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null},"documentationUrl":null,"codeRepositoryUrl":null,"programmingLanguage":null,"contactperson":null,"contactgroup":null,"tool":null,"size":null,"version":null,"geolocation":null,"id":"50|fakeoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"scheme":"doi","value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","lastupdatetimestamp":1628257970612,"projects":null,"context":[{"code":"dh-ch","label":"Digital Humanities and Cultural Heritage","provenance":[{"provenance":"Inferred by OpenAIRE","trust":"0.9"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}],"instance":[{"pid":[],"alternateIdentifier":[{"scheme":"doi","value":"10.3897/oneeco.2.e13718"}],"license":null,"accessright":{"code":"c_abf2","label":"OPEN","scheme":"http://vocabularies.coar-repositories.org/documentation/access_rights/","openAccessRoute":"green"},"type":"Article","url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"articleprocessingcharge":null,"publicationdate":"2017-01-01","refereed":"peerReviewed","hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem"},"collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft"}}]} \ No newline at end of file From f738db860aafefc7e6d9a48bf53fe297d4bc58c2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 25 Jan 2023 11:52:51 +0100 Subject: [PATCH 14/26] refactoring --- .../oa/graph/dump/funderresults/SparkDumpFunderResults.java | 5 ++--- .../eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 41e3f15..60b8af1 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -88,10 +88,9 @@ public class SparkDumpFunderResults implements Serializable { } else { String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase(); if (fName.equalsIgnoreCase("ec")) { - if(p.getId().contains("he")){ + if (p.getId().contains("he")) { fName += "_HE"; - } - else if (p.getId().contains("h2020")) { + } else if (p.getId().contains("h2020")) { fName += "_H2020"; } else { fName += "_FP7"; diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java index ce73ac6..0b3c479 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/subset/DumpSubsetTest.java @@ -514,7 +514,6 @@ public class DumpSubsetTest { Assertions.assertEquals(102, tmp.count()); - Assertions.assertEquals(51, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count()); Assertions.assertEquals(39, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count()); Assertions.assertEquals(12, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count()); From b6e0c7d660e8195808c04aaa8f5db65028e70ec4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 19 Apr 2023 09:40:45 +0200 Subject: [PATCH 15/26] changed the interaction with Zenodo since the API chenaged --- dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java | 2 +- .../java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java | 2 +- .../java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java index b56849a..cb2e29b 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java @@ -66,7 +66,7 @@ public class MakeTar implements Serializable { String pathString = p.toString(); String entity = pathString.substring(pathString.lastIndexOf("/") + 1); - MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit); + MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit, true); } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index a19a3a5..685af91 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -83,7 +83,7 @@ public class SendToZenodoHDFS implements Serializable { String name = pString.substring(pString.lastIndexOf("/") + 1); FSDataInputStream inputStream = fileSystem.open(p); - zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen()); + zenodoApiClient.uploadIS(inputStream, name); } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 99f6219..ada6c83 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -73,7 +73,7 @@ public class ZenodoUploadTest { // File f = new File("/tmp/" + community_name); FSDataInputStream inputStream = fs.open(fileStatus.getPath()); - System.out.println(client.uploadIS(inputStream, community_name, fileStatus.getLen())); + System.out.println(client.uploadIS(inputStream, community_name)); } @@ -128,7 +128,7 @@ public class ZenodoUploadTest { // File f = new File("/tmp/" + community_name); FSDataInputStream inputStream = fs.open(fileStatus.getPath()); - System.out.println(client.uploadIS(inputStream, community_name, fileStatus.getLen())); + System.out.println(client.uploadIS(inputStream, community_name)); } From 5d0d14528f0c5cf7b2559352476fbac101bf51a2 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 22 Jun 2023 16:54:17 +0200 Subject: [PATCH 16/26] Added new implementation upload huge file --- .../dump/MissingConceptDoiException.java | 8 + .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 6 +- .../dhp/oa/graph/dump/ZenodoAPIClient2.java | 425 ++++++++++++++++++ .../oozie_app/config-default.xml | 30 ++ .../sx/scholexplorer/oozie_app/workflow.xml | 132 ++++++ 5 files changed, 598 insertions(+), 3 deletions(-) create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/config-default.xml create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/workflow.xml diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java new file mode 100644 index 0000000..0160d27 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java @@ -0,0 +1,8 @@ +package eu.dnetlib.dhp.oa.graph.dump; + + +public class MissingConceptDoiException extends Throwable { + public MissingConceptDoiException(String message) { + super(message); + } +} \ No newline at end of file diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index 685af91..5b016c4 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -9,8 +9,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.api.MissingConceptDoiException; -import eu.dnetlib.dhp.common.api.ZenodoAPIClient; + + import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; public class SendToZenodoHDFS implements Serializable { @@ -53,7 +53,7 @@ public class SendToZenodoHDFS implements Serializable { RemoteIterator fileStatusListIterator = fileSystem .listFiles( new Path(hdfsPath), true); - ZenodoAPIClient zenodoApiClient = new ZenodoAPIClient(connection_url, access_token); + ZenodoAPIClient2 zenodoApiClient = new ZenodoAPIClient2(connection_url, access_token); switch (depositionType) { case NEW: zenodoApiClient.newDeposition(); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java new file mode 100644 index 0000000..4eea343 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java @@ -0,0 +1,425 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +import java.io.*; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.concurrent.TimeUnit; + + +import org.apache.http.HttpHeaders; +import org.apache.http.entity.ContentType; + +import com.google.gson.Gson; + +import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; +import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; +import okhttp3.*; +import org.jetbrains.annotations.NotNull; + +public class ZenodoAPIClient2 implements Serializable { + + String urlString; + String bucket; + + String deposition_id; + String access_token; + + public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8"); + + private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip"); + + public String getUrlString() { + return urlString; + } + + public void setUrlString(String urlString) { + this.urlString = urlString; + } + + public String getBucket() { + return bucket; + } + + public void setBucket(String bucket) { + this.bucket = bucket; + } + + public void setDeposition_id(String deposition_id) { + this.deposition_id = deposition_id; + } + + public ZenodoAPIClient2(String urlString, String access_token) { + + this.urlString = urlString; + this.access_token = access_token; + } + + /** + * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload + * + * @return response code + * @throws IOException + */ + public int newDeposition() throws IOException { + String json = "{}"; + + URL url = new URL(urlString); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setRequestMethod("POST"); + conn.setDoOutput(true); + try (OutputStream os = conn.getOutputStream()) { + byte[] input = json.getBytes("utf-8"); + os.write(input, 0, input.length); + } + + String body = getBody(conn); + + int responseCode = conn.getResponseCode(); + conn.disconnect(); + + if(!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); + + ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); + this.bucket = newSubmission.getLinks().getBucket(); + this.deposition_id = newSubmission.getId(); + + return responseCode; + } + + + + public int uploadIS2(InputStream is, String fileName) throws IOException { + final String crlf = "\r\n"; + final String twoHyphens = "--"; + final String boundary = "*****"; + final URL url = new URL(bucket + "/" + fileName); + + HttpURLConnection httpUrlConnection = null; + + httpUrlConnection = (HttpURLConnection) url.openConnection(); + httpUrlConnection.setUseCaches(false); + httpUrlConnection.setDoOutput(true); + + httpUrlConnection.setRequestMethod("PUT"); + httpUrlConnection.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip"); + httpUrlConnection.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + httpUrlConnection.setRequestProperty("Connection", "Keep-Alive"); + httpUrlConnection.setRequestProperty("Cache-Control", "no-cache"); + httpUrlConnection.setRequestProperty( + "Content-Type", "multipart/form-data;boundary=" + boundary); + + DataOutputStream request = new DataOutputStream( + httpUrlConnection.getOutputStream()); + + request.writeBytes(twoHyphens + boundary + crlf); + request.writeBytes("Content-Disposition: form-data; name=\"" + + fileName + "\";filename=\"" + + fileName + "\"" + crlf); + request.writeBytes(crlf); + byte[] buf = new byte[8192]; + int length; + while ((length = is.read(buf)) != -1) { + request.write(buf, 0, length); + request.flush(); + } + request.flush(); + request.close(); + + int responseCode = httpUrlConnection.getResponseCode(); + if(! checkOKStatus(responseCode)){ + throw new IOException("Unexpected code " + responseCode + getBody(httpUrlConnection)); + } + + return responseCode; + } + + /** + * Upload files in Zenodo. + * + * @param is the inputStream for the file to upload + * @param file_name the name of the file as it will appear on Zenodo + * @return the response code + */ + public int uploadIS(InputStream is, String file_name) throws IOException { + + URL url = new URL(bucket + "/" + file_name); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip"); + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setDoOutput(true); + conn.setRequestMethod("PUT"); + + byte[] buf = new byte[8192]; + int length; + try (OutputStream os = conn.getOutputStream()) { + while ((length = is.read(buf)) != -1) { + os.write(buf, 0, length); + os.flush(); + } + + } + int responseCode = conn.getResponseCode(); + if(! checkOKStatus(responseCode)){ + throw new IOException("Unexpected code " + responseCode + getBody(conn)); + } + + return responseCode; + } + + @NotNull + private String getBody(HttpURLConnection conn) throws IOException { + String body = "{}"; + try (BufferedReader br = new BufferedReader( + new InputStreamReader(conn.getInputStream(), "utf-8"))) { + StringBuilder response = new StringBuilder(); + String responseLine = null; + while ((responseLine = br.readLine()) != null) { + response.append(responseLine.trim()); + } + + body = response.toString(); + + } + return body; + } + + /** + * Associates metadata information to the current deposition + * + * @param metadata the metadata + * @return response code + * @throws IOException + */ + public int sendMretadata(String metadata) throws IOException { + + URL url = new URL(urlString + "/" + deposition_id); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setDoOutput(true); + conn.setRequestMethod("PUT"); + + + try (OutputStream os = conn.getOutputStream()) { + byte[] input = metadata.getBytes("utf-8"); + os.write(input, 0, input.length); + + } + + final int responseCode = conn.getResponseCode(); + conn.disconnect(); + if(!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + getBody(conn)); + + return responseCode; + + + } + + private boolean checkOKStatus(int responseCode) { + + if(HttpURLConnection.HTTP_OK != responseCode || + HttpURLConnection.HTTP_CREATED != responseCode) + return true ; + return false; + } + + /** + * To publish the current deposition. It works for both new deposition or new version of an old deposition + * + * @return response code + * @throws IOException + */ + @Deprecated + public int publish() throws IOException { + + String json = "{}"; + + OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); + + RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON); + + Request request = new Request.Builder() + .url(urlString + "/" + deposition_id + "/actions/publish") + .addHeader("Authorization", "Bearer " + access_token) + .post(body) + .build(); + + try (Response response = httpClient.newCall(request).execute()) { + + if (!response.isSuccessful()) + throw new IOException("Unexpected code " + response + response.body().string()); + + return response.code(); + + } + } + + /** + * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used + * for the new version. + * + * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last + * part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930 + * concept_rec_id = 656930 + * @return response code + * @throws IOException + * @throws MissingConceptDoiException + */ + public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException { + setDepositionId(concept_rec_id, 1); + String json = "{}"; + + URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setDoOutput(true); + conn.setRequestMethod("POST"); + + + try (OutputStream os = conn.getOutputStream()) { + byte[] input = json.getBytes("utf-8"); + os.write(input, 0, input.length); + + } + + String body = getBody(conn); + + int responseCode = conn.getResponseCode(); + + conn.disconnect(); + if(!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); + + ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); + String latest_draft = zenodoModel.getLinks().getLatest_draft(); + deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); + bucket = getBucket(latest_draft); + + return responseCode; + + } + + /** + * To finish uploading a version or new deposition not published + * It sets the deposition_id and the bucket to be used + * + * + * @param deposition_id the deposition id of the not yet published upload + * concept_rec_id = 656930 + * @return response code + * @throws IOException + * @throws MissingConceptDoiException + */ + public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException { + + this.deposition_id = deposition_id; + + String json = "{}"; + + URL url = new URL(urlString + "/" + deposition_id); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setRequestMethod("POST"); + conn.setDoOutput(true); + try (OutputStream os = conn.getOutputStream()) { + byte[] input = json.getBytes("utf-8"); + os.write(input, 0, input.length); + } + + String body = getBody(conn); + + int responseCode = conn.getResponseCode(); + conn.disconnect(); + + if(!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); + + ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); + bucket = zenodoModel.getLinks().getBucket(); + + + return responseCode; + + } + + private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException { + + ZenodoModelList zenodoModelList = new Gson() + .fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class); + + for (ZenodoModel zm : zenodoModelList) { + if (zm.getConceptrecid().equals(concept_rec_id)) { + deposition_id = zm.getId(); + return; + } + } + if (zenodoModelList.size() == 0) + throw new MissingConceptDoiException( + "The concept record id specified was missing in the list of depositions"); + setDepositionId(concept_rec_id, page + 1); + + } + + private String getPrevDepositions(String page) throws IOException { + + HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder(); + urlBuilder.addQueryParameter("page", page); + + URL url = new URL(urlBuilder.build().toString()); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setDoOutput(true); + conn.setRequestMethod("GET"); + + + + String body = getBody(conn); + + int responseCode = conn.getResponseCode(); + + conn.disconnect(); + if(!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); + + + + return body; + + + + } + + private String getBucket(String inputUurl) throws IOException { + + URL url = new URL(inputUurl); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setDoOutput(true); + conn.setRequestMethod("GET"); + + String body = getBody(conn); + + int responseCode = conn.getResponseCode(); + + conn.disconnect(); + if(!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); + + ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); + + return zenodoModel.getLinks().getBucket(); + + + + } + +} \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/config-default.xml new file mode 100644 index 0000000..d262cb6 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/workflow.xml new file mode 100644 index 0000000..1dfbef6 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/workflow.xml @@ -0,0 +1,132 @@ + + + + + sourcePath + the source path + + + outputPath + the output path + + + accessToken + the access token used for the deposition in Zenodo + + + connectionUrl + the connection url for Zenodo + + + metadata + "" + the metadata associated to the deposition + + + depositionType + the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) + + + conceptRecordId + none + for new version, the id of the record for the old deposition + + + depositionId + none + the depositionId of a deposition open that has to be added content + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + eu.dnetlib.dhp.oa.graph.dump.MakeTar + --hdfsPath${outputPath}/tar + --nameNode${nameNode} + --sourcePath${sourcePath} + + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS + --hdfsPath${outputPath}/tar/ + --nameNode${nameNode} + --accessToken${accessToken} + --connectionUrl${connectionUrl} + --metadata${metadata} + --conceptRecordId${conceptRecordId} + --depositionType${depositionType} + --depositionId${depositionId} + + + + + + + + From d472050ad4ce888e9c473eb8665c65dbdb72e1d2 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 22 Jun 2023 17:43:53 +0200 Subject: [PATCH 17/26] Added new implementation upload huge file --- dump/pom.xml | 4 + .../dhp/oa/graph/dump/ZenodoAPIClient2.java | 77 +++++++++---------- 2 files changed, 39 insertions(+), 42 deletions(-) diff --git a/dump/pom.xml b/dump/pom.xml index 680768e..1600163 100644 --- a/dump/pom.xml +++ b/dump/pom.xml @@ -53,6 +53,10 @@ dump-schema 1.2.5-SNAPSHOT + + org.apache.httpcomponents + httpclient + io.github.classgraph classgraph diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java index 4eea343..5cad5e4 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java @@ -1,22 +1,23 @@ package eu.dnetlib.dhp.oa.graph.dump; -import java.io.*; -import java.io.IOException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.concurrent.TimeUnit; - - -import org.apache.http.HttpHeaders; -import org.apache.http.entity.ContentType; - import com.google.gson.Gson; - import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; import okhttp3.*; +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.InputStreamEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; import org.jetbrains.annotations.NotNull; +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.concurrent.TimeUnit; + public class ZenodoAPIClient2 implements Serializable { String urlString; @@ -93,48 +94,40 @@ public class ZenodoAPIClient2 implements Serializable { public int uploadIS2(InputStream is, String fileName) throws IOException { + + final String crlf = "\r\n"; final String twoHyphens = "--"; final String boundary = "*****"; - final URL url = new URL(bucket + "/" + fileName); - HttpURLConnection httpUrlConnection = null; - httpUrlConnection = (HttpURLConnection) url.openConnection(); - httpUrlConnection.setUseCaches(false); - httpUrlConnection.setDoOutput(true); + HttpPut put = new HttpPut(bucket + "/" + fileName); + + + + put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip"); + put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + + put.setEntity(new InputStreamEntity(is)); + + int statusCode; + try (CloseableHttpClient client = HttpClients.createDefault()){ + CloseableHttpResponse response = client.execute(put); + statusCode= response.getStatusLine().getStatusCode(); - httpUrlConnection.setRequestMethod("PUT"); - httpUrlConnection.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip"); - httpUrlConnection.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - httpUrlConnection.setRequestProperty("Connection", "Keep-Alive"); - httpUrlConnection.setRequestProperty("Cache-Control", "no-cache"); - httpUrlConnection.setRequestProperty( - "Content-Type", "multipart/form-data;boundary=" + boundary); - DataOutputStream request = new DataOutputStream( - httpUrlConnection.getOutputStream()); - request.writeBytes(twoHyphens + boundary + crlf); - request.writeBytes("Content-Disposition: form-data; name=\"" + - fileName + "\";filename=\"" + - fileName + "\"" + crlf); - request.writeBytes(crlf); - byte[] buf = new byte[8192]; - int length; - while ((length = is.read(buf)) != -1) { - request.write(buf, 0, length); - request.flush(); - } - request.flush(); - request.close(); - int responseCode = httpUrlConnection.getResponseCode(); - if(! checkOKStatus(responseCode)){ - throw new IOException("Unexpected code " + responseCode + getBody(httpUrlConnection)); } - return responseCode; + + + + if(! checkOKStatus(statusCode)){ + throw new IOException("Unexpected code " + statusCode ); + } + + return statusCode; } /** From 6ace388cff1e9fef4f6497a1ed5b5bc6e0bbd94a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 23 Jun 2023 14:16:10 +0200 Subject: [PATCH 18/26] fixed method --- .../dump/MissingConceptDoiException.java | 10 +- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 6 +- .../dhp/oa/graph/dump/ZenodoAPIClient2.java | 719 +++++++++--------- .../oozie_app/config-default.xml | 30 - .../sx/scholexplorer/oozie_app/workflow.xml | 132 ---- 5 files changed, 347 insertions(+), 550 deletions(-) delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/config-default.xml delete mode 100644 dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/workflow.xml diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java index 0160d27..2fb3e92 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java @@ -1,8 +1,8 @@ + package eu.dnetlib.dhp.oa.graph.dump; - public class MissingConceptDoiException extends Throwable { - public MissingConceptDoiException(String message) { - super(message); - } -} \ No newline at end of file + public MissingConceptDoiException(String message) { + super(message); + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index 5b016c4..77e9c92 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -9,8 +9,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import eu.dnetlib.dhp.application.ArgumentApplicationParser; - - import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; public class SendToZenodoHDFS implements Serializable { @@ -81,10 +79,8 @@ public class SendToZenodoHDFS implements Serializable { String pString = p.toString(); if (!pString.endsWith("_SUCCESS")) { String name = pString.substring(pString.lastIndexOf("/") + 1); - FSDataInputStream inputStream = fileSystem.open(p); - zenodoApiClient.uploadIS(inputStream, name); - + zenodoApiClient.uploadIS2(inputStream, name); } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java index 5cad5e4..4153f7a 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java @@ -1,418 +1,381 @@ -package eu.dnetlib.dhp.oa.graph.dump; -import com.google.gson.Gson; -import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; -import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; -import okhttp3.*; -import org.apache.http.HttpHeaders; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpPut; -import org.apache.http.entity.ContentType; -import org.apache.http.entity.InputStreamEntity; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.jetbrains.annotations.NotNull; +package eu.dnetlib.dhp.oa.graph.dump; import java.io.*; import java.net.HttpURLConnection; +import java.net.URI; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.concurrent.TimeUnit; +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.client.utils.URIBuilder; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.InputStreamEntity; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.jetbrains.annotations.NotNull; + +import com.google.gson.Gson; + +import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; +import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.RequestBody; +import okhttp3.Response; + public class ZenodoAPIClient2 implements Serializable { - String urlString; - String bucket; + String urlString; + String bucket; - String deposition_id; - String access_token; + String deposition_id; + String access_token; + + public String getUrlString() { + return urlString; + } + + public void setUrlString(String urlString) { + this.urlString = urlString; + } + + public String getBucket() { + return bucket; + } + + public void setBucket(String bucket) { + this.bucket = bucket; + } + + public void setDeposition_id(String deposition_id) { + this.deposition_id = deposition_id; + } + + public ZenodoAPIClient2(String urlString, String access_token) { + + this.urlString = urlString; + this.access_token = access_token; + } + + /** + * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload + * + * @return response code + * @throws IOException + */ + public int newDeposition() throws IOException { + String json = "{}"; + + URL url = new URL(urlString); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setRequestMethod("POST"); + conn.setDoOutput(true); + try (OutputStream os = conn.getOutputStream()) { + byte[] input = json.getBytes("utf-8"); + os.write(input, 0, input.length); + } + + String body = getBody(conn); + + int responseCode = conn.getResponseCode(); + conn.disconnect(); + + if (!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); + + ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); + this.bucket = newSubmission.getLinks().getBucket(); + this.deposition_id = newSubmission.getId(); + + return responseCode; + } + + public int uploadIS2(InputStream is, String fileName) throws IOException { + + final String crlf = "\r\n"; + final String twoHyphens = "--"; + final String boundary = "*****"; + + HttpPut put = new HttpPut(bucket + "/" + fileName); + + put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip"); + put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + + put.setEntity(new InputStreamEntity(is)); + + int statusCode; + try (CloseableHttpClient client = HttpClients.createDefault()) { + CloseableHttpResponse response = client.execute(put); + statusCode = response.getStatusLine().getStatusCode(); + + } + + if (!checkOKStatus(statusCode)) { + throw new IOException("Unexpected code " + statusCode); + } + + return statusCode; + } + + public int publish() throws IOException { + String json = "{}"; + HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish"); + post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json"); + post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + post.setEntity(new StringEntity(json)); + int statusCode; + try (CloseableHttpClient client = HttpClients.createDefault()) { + CloseableHttpResponse response = client.execute(post); + statusCode = response.getStatusLine().getStatusCode(); + } + if (!checkOKStatus(statusCode)) { + throw new IOException("Unexpected code " + statusCode); + } + return statusCode; + } + + /** + * Upload files in Zenodo. + * + * @param is the inputStream for the file to upload + * @param file_name the name of the file as it will appear on Zenodo + * @return the response code + */ + public int uploadIS(InputStream is, String file_name) throws IOException { + + URL url = new URL(bucket + "/" + file_name); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip"); + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setDoOutput(true); + conn.setRequestMethod("PUT"); + + byte[] buf = new byte[8192]; + int length; + try (OutputStream os = conn.getOutputStream()) { + while ((length = is.read(buf)) != -1) { + os.write(buf, 0, length); + os.flush(); + } + + } + int responseCode = conn.getResponseCode(); + if (!checkOKStatus(responseCode)) { + throw new IOException("Unexpected code " + responseCode + getBody(conn)); + } + + return responseCode; + } + + @NotNull + private String getBody(HttpURLConnection conn) throws IOException { + String body = "{}"; + try (BufferedReader br = new BufferedReader( + new InputStreamReader(conn.getInputStream(), "utf-8"))) { + StringBuilder response = new StringBuilder(); + String responseLine = null; + while ((responseLine = br.readLine()) != null) { + response.append(responseLine.trim()); + } + + body = response.toString(); + + } + return body; + } + + /** + * Associates metadata information to the current deposition + * + * @param metadata the metadata + * @return response code + * @throws IOException + */ + public int sendMretadata(String metadata) throws IOException { + + URL url = new URL(urlString + "/" + deposition_id); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setDoOutput(true); + conn.setRequestMethod("PUT"); + + try (OutputStream os = conn.getOutputStream()) { + byte[] input = metadata.getBytes("utf-8"); + os.write(input, 0, input.length); + + } + + final int responseCode = conn.getResponseCode(); + conn.disconnect(); + if (!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + getBody(conn)); + + return responseCode; + + } + + private boolean checkOKStatus(int responseCode) { + + if (HttpURLConnection.HTTP_OK != responseCode || + HttpURLConnection.HTTP_CREATED != responseCode) + return true; + return false; + } + + /** + * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used + * for the new version. + * + * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last + * part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930 + * concept_rec_id = 656930 + * @return response code + * @throws IOException + * @throws MissingConceptDoiException + */ + public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException { + setDepositionId(concept_rec_id, 1); + String json = "{}"; + + URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setDoOutput(true); + conn.setRequestMethod("POST"); + + try (OutputStream os = conn.getOutputStream()) { + byte[] input = json.getBytes("utf-8"); + os.write(input, 0, input.length); + + } + + String body = getBody(conn); + + int responseCode = conn.getResponseCode(); + + conn.disconnect(); + if (!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); + + ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); + String latest_draft = zenodoModel.getLinks().getLatest_draft(); + deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); + bucket = getBucket(latest_draft); - public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8"); + return responseCode; - private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip"); + } - public String getUrlString() { - return urlString; - } + /** + * To finish uploading a version or new deposition not published + * It sets the deposition_id and the bucket to be used + * + * + * @param deposition_id the deposition id of the not yet published upload + * concept_rec_id = 656930 + * @return response code + * @throws IOException + * @throws MissingConceptDoiException + */ + public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException { - public void setUrlString(String urlString) { - this.urlString = urlString; - } + this.deposition_id = deposition_id; - public String getBucket() { - return bucket; - } + String json = "{}"; - public void setBucket(String bucket) { - this.bucket = bucket; - } + URL url = new URL(urlString + "/" + deposition_id); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - public void setDeposition_id(String deposition_id) { - this.deposition_id = deposition_id; - } + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setRequestMethod("POST"); + conn.setDoOutput(true); + try (OutputStream os = conn.getOutputStream()) { + byte[] input = json.getBytes("utf-8"); + os.write(input, 0, input.length); + } - public ZenodoAPIClient2(String urlString, String access_token) { + String body = getBody(conn); - this.urlString = urlString; - this.access_token = access_token; - } + int responseCode = conn.getResponseCode(); + conn.disconnect(); - /** - * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload - * - * @return response code - * @throws IOException - */ - public int newDeposition() throws IOException { - String json = "{}"; + if (!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); - URL url = new URL(urlString); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setRequestMethod("POST"); - conn.setDoOutput(true); - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - } + ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); + bucket = zenodoModel.getLinks().getBucket(); - String body = getBody(conn); + return responseCode; - int responseCode = conn.getResponseCode(); - conn.disconnect(); + } - if(!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); + private void setDepositionId(String concept_rec_id, Integer page) throws Exception, MissingConceptDoiException { - ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); - this.bucket = newSubmission.getLinks().getBucket(); - this.deposition_id = newSubmission.getId(); + ZenodoModelList zenodoModelList = new Gson() + .fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class); - return responseCode; - } + for (ZenodoModel zm : zenodoModelList) { + if (zm.getConceptrecid().equals(concept_rec_id)) { + deposition_id = zm.getId(); + return; + } + } + if (zenodoModelList.size() == 0) + throw new MissingConceptDoiException( + "The concept record id specified was missing in the list of depositions"); + setDepositionId(concept_rec_id, page + 1); + } + private String getPrevDepositions(String page) throws Exception { - public int uploadIS2(InputStream is, String fileName) throws IOException { + HttpGet get = new HttpGet(urlString); + URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build(); + get.setURI(uri); - final String crlf = "\r\n"; - final String twoHyphens = "--"; - final String boundary = "*****"; + get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); + get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + try (CloseableHttpClient client = HttpClients.createDefault()) { + CloseableHttpResponse response = client.execute(get); + final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); + return body; + } + } + private String getBucket(String inputUurl) throws IOException { - HttpPut put = new HttpPut(bucket + "/" + fileName); + URL url = new URL(inputUurl); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setDoOutput(true); + conn.setRequestMethod("GET"); + String body = getBody(conn); + int responseCode = conn.getResponseCode(); - put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip"); - put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.disconnect(); + if (!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); - put.setEntity(new InputStreamEntity(is)); + ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - int statusCode; - try (CloseableHttpClient client = HttpClients.createDefault()){ - CloseableHttpResponse response = client.execute(put); - statusCode= response.getStatusLine().getStatusCode(); + return zenodoModel.getLinks().getBucket(); + } - - - } - - - - - if(! checkOKStatus(statusCode)){ - throw new IOException("Unexpected code " + statusCode ); - } - - return statusCode; - } - - /** - * Upload files in Zenodo. - * - * @param is the inputStream for the file to upload - * @param file_name the name of the file as it will appear on Zenodo - * @return the response code - */ - public int uploadIS(InputStream is, String file_name) throws IOException { - - URL url = new URL(bucket + "/" + file_name); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip"); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("PUT"); - - byte[] buf = new byte[8192]; - int length; - try (OutputStream os = conn.getOutputStream()) { - while ((length = is.read(buf)) != -1) { - os.write(buf, 0, length); - os.flush(); - } - - } - int responseCode = conn.getResponseCode(); - if(! checkOKStatus(responseCode)){ - throw new IOException("Unexpected code " + responseCode + getBody(conn)); - } - - return responseCode; - } - - @NotNull - private String getBody(HttpURLConnection conn) throws IOException { - String body = "{}"; - try (BufferedReader br = new BufferedReader( - new InputStreamReader(conn.getInputStream(), "utf-8"))) { - StringBuilder response = new StringBuilder(); - String responseLine = null; - while ((responseLine = br.readLine()) != null) { - response.append(responseLine.trim()); - } - - body = response.toString(); - - } - return body; - } - - /** - * Associates metadata information to the current deposition - * - * @param metadata the metadata - * @return response code - * @throws IOException - */ - public int sendMretadata(String metadata) throws IOException { - - URL url = new URL(urlString + "/" + deposition_id); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("PUT"); - - - try (OutputStream os = conn.getOutputStream()) { - byte[] input = metadata.getBytes("utf-8"); - os.write(input, 0, input.length); - - } - - final int responseCode = conn.getResponseCode(); - conn.disconnect(); - if(!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + getBody(conn)); - - return responseCode; - - - } - - private boolean checkOKStatus(int responseCode) { - - if(HttpURLConnection.HTTP_OK != responseCode || - HttpURLConnection.HTTP_CREATED != responseCode) - return true ; - return false; - } - - /** - * To publish the current deposition. It works for both new deposition or new version of an old deposition - * - * @return response code - * @throws IOException - */ - @Deprecated - public int publish() throws IOException { - - String json = "{}"; - - OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); - - RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON); - - Request request = new Request.Builder() - .url(urlString + "/" + deposition_id + "/actions/publish") - .addHeader("Authorization", "Bearer " + access_token) - .post(body) - .build(); - - try (Response response = httpClient.newCall(request).execute()) { - - if (!response.isSuccessful()) - throw new IOException("Unexpected code " + response + response.body().string()); - - return response.code(); - - } - } - - /** - * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used - * for the new version. - * - * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last - * part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930 - * concept_rec_id = 656930 - * @return response code - * @throws IOException - * @throws MissingConceptDoiException - */ - public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException { - setDepositionId(concept_rec_id, 1); - String json = "{}"; - - URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("POST"); - - - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - - } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if(!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - String latest_draft = zenodoModel.getLinks().getLatest_draft(); - deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); - bucket = getBucket(latest_draft); - - return responseCode; - - } - - /** - * To finish uploading a version or new deposition not published - * It sets the deposition_id and the bucket to be used - * - * - * @param deposition_id the deposition id of the not yet published upload - * concept_rec_id = 656930 - * @return response code - * @throws IOException - * @throws MissingConceptDoiException - */ - public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException { - - this.deposition_id = deposition_id; - - String json = "{}"; - - URL url = new URL(urlString + "/" + deposition_id); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setRequestMethod("POST"); - conn.setDoOutput(true); - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - conn.disconnect(); - - if(!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - bucket = zenodoModel.getLinks().getBucket(); - - - return responseCode; - - } - - private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException { - - ZenodoModelList zenodoModelList = new Gson() - .fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class); - - for (ZenodoModel zm : zenodoModelList) { - if (zm.getConceptrecid().equals(concept_rec_id)) { - deposition_id = zm.getId(); - return; - } - } - if (zenodoModelList.size() == 0) - throw new MissingConceptDoiException( - "The concept record id specified was missing in the list of depositions"); - setDepositionId(concept_rec_id, page + 1); - - } - - private String getPrevDepositions(String page) throws IOException { - - HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder(); - urlBuilder.addQueryParameter("page", page); - - URL url = new URL(urlBuilder.build().toString()); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("GET"); - - - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if(!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - - - return body; - - - - } - - private String getBucket(String inputUurl) throws IOException { - - URL url = new URL(inputUurl); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("GET"); - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if(!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - - return zenodoModel.getLinks().getBucket(); - - - - } - -} \ No newline at end of file +} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/config-default.xml deleted file mode 100644 index d262cb6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/config-default.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - hiveMetastoreUris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - hiveJdbcUrl - jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 - - - hiveDbName - openaire - - - oozie.launcher.mapreduce.user.classpath.first - true - - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/workflow.xml deleted file mode 100644 index 1dfbef6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/sx/scholexplorer/oozie_app/workflow.xml +++ /dev/null @@ -1,132 +0,0 @@ - - - - - sourcePath - the source path - - - outputPath - the output path - - - accessToken - the access token used for the deposition in Zenodo - - - connectionUrl - the connection url for Zenodo - - - metadata - "" - the metadata associated to the deposition - - - depositionType - the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) - - - conceptRecordId - none - for new version, the id of the record for the old deposition - - - depositionId - none - the depositionId of a deposition open that has to be added content - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - oozieActionShareLibForSpark2 - oozie action sharelib for spark 2.* - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - spark 2.* extra listeners classname - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - spark 2.* sql query execution listeners classname - - - spark2YarnHistoryServerAddress - spark 2.* yarn history server address - - - spark2EventLogDir - spark 2.* event log dir location - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar - --hdfsPath${outputPath}/tar - --nameNode${nameNode} - --sourcePath${sourcePath} - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS - --hdfsPath${outputPath}/tar/ - --nameNode${nameNode} - --accessToken${accessToken} - --connectionUrl${connectionUrl} - --metadata${metadata} - --conceptRecordId${conceptRecordId} - --depositionType${depositionType} - --depositionId${depositionId} - - - - - - - - From 72ead1bd858460bd2b470598c5ac41f66bb95267 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 23 Jun 2023 14:16:15 +0200 Subject: [PATCH 19/26] added okhttp3 again --- .../dhp/oa/graph/dump/ZenodoAPIClient2.java | 36 ++++++++++++------- .../dhp/oa/graph/dump/ZenodoUploadTest.java | 25 ++++++++++++- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java index 5cad5e4..523eed5 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.oa.graph.dump; import com.google.gson.Gson; +import eu.dnetlib.dhp.common.api.InputStreamRequestBody; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; import okhttp3.*; @@ -92,16 +93,30 @@ public class ZenodoAPIClient2 implements Serializable { } + public int uploadIS3(InputStream is, String file_name, long len) throws IOException { + OkHttpClient httpClient = new OkHttpClient.Builder() + .writeTimeout(600, TimeUnit.SECONDS) + .readTimeout(600, TimeUnit.SECONDS) + .connectTimeout(600, TimeUnit.SECONDS) + .build(); + + Request request = new Request.Builder() + .url(bucket + "/" + file_name) + .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) + .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) + .build(); + + try (Response response = httpClient.newCall(request).execute()) { + if (!response.isSuccessful()) + throw new IOException("Unexpected code " + response + response.body().string()); + return response.code(); + } + } public int uploadIS2(InputStream is, String fileName) throws IOException { - - final String crlf = "\r\n"; - final String twoHyphens = "--"; - final String boundary = "*****"; - - - HttpPut put = new HttpPut(bucket + "/" + fileName); + HttpPut put = new HttpPut(bucket + "/" + fileName); @@ -115,9 +130,6 @@ public class ZenodoAPIClient2 implements Serializable { CloseableHttpResponse response = client.execute(put); statusCode= response.getStatusLine().getStatusCode(); - - - } @@ -215,8 +227,8 @@ public class ZenodoAPIClient2 implements Serializable { private boolean checkOKStatus(int responseCode) { - if(HttpURLConnection.HTTP_OK != responseCode || - HttpURLConnection.HTTP_CREATED != responseCode) + if(HttpURLConnection.HTTP_OK == responseCode || + HttpURLConnection.HTTP_CREATED == responseCode) return true ; return false; } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index ada6c83..59912c6 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -1,11 +1,15 @@ package eu.dnetlib.dhp.oa.graph.dump; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -22,7 +26,7 @@ public class ZenodoUploadTest { private static String workingDir; private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; - private final String ACCESS_TOKEN = ""; + private final String ACCESS_TOKEN = "OzzOsyucEIHxCEfhlpsMo3myEiwpCza3trCRL7ddfGTAK9xXkIP2MbXd6Vg4"; @BeforeAll public static void beforeAll() throws IOException { @@ -150,4 +154,23 @@ public class ZenodoUploadTest { .getPath()))); } + @Test + void depositBigFile() throws MissingConceptDoiException, IOException { + ZenodoAPIClient2 client = new ZenodoAPIClient2(URL_STRING, + ACCESS_TOKEN); + + Assertions.assertEquals(201, client.newDeposition()); + + File file = new File("/Users/miriam.baglioni/Desktop/EOSC_DUMP/publication.tar"); +// File file = new File(getClass() +// .getResource("/eu/dnetlib/dhp/common/api/newVersion2") +// .getPath()); + + InputStream is = new FileInputStream(file); + + Assertions.assertEquals(200, client.uploadIS3(is, "newVersion_deposition", file.length())); + + // Assertions.assertEquals(202, client.publish()); + } + } From d746390b9f82c57362cca06038afd7dac59fd3b8 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 23 Jun 2023 15:15:09 +0200 Subject: [PATCH 20/26] new implementatiton with okhttp --- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 2 +- .../dhp/oa/graph/dump/ZenodoAPIClient2.java | 31 +++- .../dump/oozie_app/config-default.xml | 30 ++++ .../scholexplorer/dump/oozie_app/workflow.xml | 132 ++++++++++++++++++ 4 files changed, 190 insertions(+), 5 deletions(-) create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/config-default.xml create mode 100644 dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index 77e9c92..0b263f9 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -80,7 +80,7 @@ public class SendToZenodoHDFS implements Serializable { if (!pString.endsWith("_SUCCESS")) { String name = pString.substring(pString.lastIndexOf("/") + 1); FSDataInputStream inputStream = fileSystem.open(p); - zenodoApiClient.uploadIS2(inputStream, name); + zenodoApiClient.uploadIS3(inputStream, name, fileSystem.getFileStatus(p).getLen()); } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java index 4153f7a..200cbf1 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java @@ -24,12 +24,10 @@ import org.jetbrains.annotations.NotNull; import com.google.gson.Gson; +import eu.dnetlib.dhp.common.api.InputStreamRequestBody; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; -import okhttp3.OkHttpClient; -import okhttp3.Request; -import okhttp3.RequestBody; -import okhttp3.Response; +import okhttp3.*; public class ZenodoAPIClient2 implements Serializable { @@ -39,6 +37,10 @@ public class ZenodoAPIClient2 implements Serializable { String deposition_id; String access_token; + public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8"); + + private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip"); + public String getUrlString() { return urlString; } @@ -194,6 +196,27 @@ public class ZenodoAPIClient2 implements Serializable { return body; } + public int uploadIS3(InputStream is, String file_name, long len) throws IOException { + OkHttpClient httpClient = new OkHttpClient.Builder() + .writeTimeout(600, TimeUnit.SECONDS) + .readTimeout(600, TimeUnit.SECONDS) + .connectTimeout(600, TimeUnit.SECONDS) + .build(); + + Request request = new Request.Builder() + .url(bucket + "/" + file_name) + .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) + .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) + .build(); + + try (Response response = httpClient.newCall(request).execute()) { + if (!response.isSuccessful()) + throw new IOException("Unexpected code " + response + response.body().string()); + return response.code(); + } + } + /** * Associates metadata information to the current deposition * diff --git a/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/config-default.xml new file mode 100644 index 0000000..d262cb6 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml new file mode 100644 index 0000000..5dc1573 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml @@ -0,0 +1,132 @@ + + + + + sourcePath + the source path + + + outputPath + the output path + + + accessToken + the access token used for the deposition in Zenodo + + + connectionUrl + the connection url for Zenodo + + + metadata + "" + the metadata associated to the deposition + + + depositionType + the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided) + + + conceptRecordId + none + for new version, the id of the record for the old deposition + + + depositionId + none + the depositionId of a deposition open that has to be added content + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + eu.dnetlib.dhp.oa.graph.dump.MakeTar + --hdfsPath${outputPath}/tar + --nameNode${nameNode} + --sourcePath${sourcePath} + + + + + + + + + eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS + --hdfsPath${outputPath}/tar/ + --nameNode${nameNode} + --accessToken${accessToken} + --connectionUrl${connectionUrl} + --metadata${metadata} + --conceptRecordId${conceptRecordId} + --depositionType${depositionType} + --depositionId${depositionId} + + + + + + + + From 2ac5c4a9ab0c64333dfcca650378904722ac2bd6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sat, 1 Jul 2023 11:06:41 +0200 Subject: [PATCH 21/26] moved also the model and other linked classes to the dump project --- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 4 +- .../oa/zenodoapi/InputStreamRequestBody.java | 56 +++ .../MissingConceptDoiException.java | 2 +- .../ZenodoAPIClient.java} | 351 +----------------- .../dhp/oa/zenodoapi/model/Community.java | 17 + .../dhp/oa/zenodoapi/model/Creator.java | 51 +++ .../dnetlib/dhp/oa/zenodoapi/model/File.java | 47 +++ .../dnetlib/dhp/oa/zenodoapi/model/Grant.java | 26 ++ .../dnetlib/dhp/oa/zenodoapi/model/Links.java | 95 +++++ .../dhp/oa/zenodoapi/model/Metadata.java | 156 ++++++++ .../dhp/oa/zenodoapi/model/PrereserveDoi.java | 28 ++ .../oa/zenodoapi/model/RelatedIdentifier.java | 46 +++ .../dhp/oa/zenodoapi/model/ZenodoModel.java | 121 ++++++ .../oa/zenodoapi/model/ZenodoModelList.java | 10 + .../scholexplorer/dump/oozie_app/workflow.xml | 2 +- .../dhp/oa/graph/dump/ZenodoUploadTest.java | 8 +- 16 files changed, 669 insertions(+), 351 deletions(-) create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java rename dump/src/main/java/eu/dnetlib/dhp/oa/{graph/dump => zenodoapi}/MissingConceptDoiException.java (78%) rename dump/src/main/java/eu/dnetlib/dhp/oa/{graph/dump/ZenodoAPIClient2.java => zenodoapi/ZenodoAPIClient.java} (51%) create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Community.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Creator.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index 0b263f9..ab7719b 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.dump; import java.io.Serializable; import java.util.Optional; +import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException; +import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; @@ -51,7 +53,7 @@ public class SendToZenodoHDFS implements Serializable { RemoteIterator fileStatusListIterator = fileSystem .listFiles( new Path(hdfsPath), true); - ZenodoAPIClient2 zenodoApiClient = new ZenodoAPIClient2(connection_url, access_token); + ZenodoAPIClient zenodoApiClient = new ZenodoAPIClient(connection_url, access_token); switch (depositionType) { case NEW: zenodoApiClient.newDeposition(); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java new file mode 100644 index 0000000..596440d --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java @@ -0,0 +1,56 @@ +package eu.dnetlib.dhp.oa.zenodoapi; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +import java.io.IOException; +import java.io.InputStream; + +import okhttp3.MediaType; +import okhttp3.RequestBody; +import okhttp3.internal.Util; +import okio.BufferedSink; +import okio.Okio; +import okio.Source; + +public class InputStreamRequestBody extends RequestBody { + + private final InputStream inputStream; + private final MediaType mediaType; + private final long lenght; + + public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) { + + return new InputStreamRequestBody(inputStream, mediaType, len); + } + + private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) { + this.inputStream = inputStream; + this.mediaType = mediaType; + this.lenght = len; + } + + @Override + public MediaType contentType() { + return mediaType; + } + + @Override + public long contentLength() { + + return lenght; + + } + + @Override + public void writeTo(BufferedSink sink) throws IOException { + Source source = null; + try { + source = Okio.source(inputStream); + sink.writeAll(source); + } finally { + Util.closeQuietly(source); + } + } +} \ No newline at end of file diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/MissingConceptDoiException.java similarity index 78% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/MissingConceptDoiException.java index 2fb3e92..6bfdba6 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MissingConceptDoiException.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/MissingConceptDoiException.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.oa.graph.dump; +package eu.dnetlib.dhp.oa.zenodoapi; public class MissingConceptDoiException extends Throwable { public MissingConceptDoiException(String message) { diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java similarity index 51% rename from dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java rename to dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java index 36e0d1b..1f54dbe 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoAPIClient2.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java @@ -1,13 +1,6 @@ -package eu.dnetlib.dhp.oa.graph.dump; +package eu.dnetlib.dhp.oa.zenodoapi; -<<<<<<< HEAD -import com.google.gson.Gson; -import eu.dnetlib.dhp.common.api.InputStreamRequestBody; -import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; -import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; -import okhttp3.*; -======= import java.io.*; import java.net.HttpURLConnection; import java.net.URI; @@ -15,7 +8,8 @@ import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.concurrent.TimeUnit; ->>>>>>> 6ace388cff1e9fef4f6497a1ed5b5bc6e0bbd94a +import eu.dnetlib.dhp.oa.zenodoapi.model.ZenodoModel; +import eu.dnetlib.dhp.oa.zenodoapi.model.ZenodoModelList; import org.apache.http.HttpHeaders; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -30,342 +24,11 @@ import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jetbrains.annotations.NotNull; -<<<<<<< HEAD -import java.io.*; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.concurrent.TimeUnit; - -public class ZenodoAPIClient2 implements Serializable { - - String urlString; - String bucket; - - String deposition_id; - String access_token; - - public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8"); - - private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip"); - - public String getUrlString() { - return urlString; - } - - public void setUrlString(String urlString) { - this.urlString = urlString; - } - - public String getBucket() { - return bucket; - } - - public void setBucket(String bucket) { - this.bucket = bucket; - } - - public void setDeposition_id(String deposition_id) { - this.deposition_id = deposition_id; - } - - public ZenodoAPIClient2(String urlString, String access_token) { - - this.urlString = urlString; - this.access_token = access_token; - } - - /** - * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload - * - * @return response code - * @throws IOException - */ - public int newDeposition() throws IOException { - String json = "{}"; - - URL url = new URL(urlString); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setRequestMethod("POST"); - conn.setDoOutput(true); - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - conn.disconnect(); - - if(!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); - this.bucket = newSubmission.getLinks().getBucket(); - this.deposition_id = newSubmission.getId(); - - return responseCode; - } - - - public int uploadIS3(InputStream is, String file_name, long len) throws IOException { - OkHttpClient httpClient = new OkHttpClient.Builder() - .writeTimeout(600, TimeUnit.SECONDS) - .readTimeout(600, TimeUnit.SECONDS) - .connectTimeout(600, TimeUnit.SECONDS) - .build(); - - Request request = new Request.Builder() - .url(bucket + "/" + file_name) - .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers - .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) - .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) - .build(); - - try (Response response = httpClient.newCall(request).execute()) { - if (!response.isSuccessful()) - throw new IOException("Unexpected code " + response + response.body().string()); - return response.code(); - } - } - - public int uploadIS2(InputStream is, String fileName) throws IOException { - - HttpPut put = new HttpPut(bucket + "/" + fileName); - - - - put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip"); - put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - - put.setEntity(new InputStreamEntity(is)); - - int statusCode; - try (CloseableHttpClient client = HttpClients.createDefault()){ - CloseableHttpResponse response = client.execute(put); - statusCode= response.getStatusLine().getStatusCode(); - - } - - - - - if(! checkOKStatus(statusCode)){ - throw new IOException("Unexpected code " + statusCode ); - } - - return statusCode; - } - - /** - * Upload files in Zenodo. - * - * @param is the inputStream for the file to upload - * @param file_name the name of the file as it will appear on Zenodo - * @return the response code - */ - public int uploadIS(InputStream is, String file_name) throws IOException { - - URL url = new URL(bucket + "/" + file_name); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip"); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("PUT"); - - byte[] buf = new byte[8192]; - int length; - try (OutputStream os = conn.getOutputStream()) { - while ((length = is.read(buf)) != -1) { - os.write(buf, 0, length); - os.flush(); - } - - } - int responseCode = conn.getResponseCode(); - if(! checkOKStatus(responseCode)){ - throw new IOException("Unexpected code " + responseCode + getBody(conn)); - } - - return responseCode; - } - - @NotNull - private String getBody(HttpURLConnection conn) throws IOException { - String body = "{}"; - try (BufferedReader br = new BufferedReader( - new InputStreamReader(conn.getInputStream(), "utf-8"))) { - StringBuilder response = new StringBuilder(); - String responseLine = null; - while ((responseLine = br.readLine()) != null) { - response.append(responseLine.trim()); - } - - body = response.toString(); - - } - return body; - } - - /** - * Associates metadata information to the current deposition - * - * @param metadata the metadata - * @return response code - * @throws IOException - */ - public int sendMretadata(String metadata) throws IOException { - - URL url = new URL(urlString + "/" + deposition_id); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("PUT"); - - - try (OutputStream os = conn.getOutputStream()) { - byte[] input = metadata.getBytes("utf-8"); - os.write(input, 0, input.length); - - } - - final int responseCode = conn.getResponseCode(); - conn.disconnect(); - if(!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + getBody(conn)); - - return responseCode; - - - } - - private boolean checkOKStatus(int responseCode) { - - if(HttpURLConnection.HTTP_OK == responseCode || - HttpURLConnection.HTTP_CREATED == responseCode) - return true ; - return false; - } - - /** - * To publish the current deposition. It works for both new deposition or new version of an old deposition - * - * @return response code - * @throws IOException - */ - @Deprecated - public int publish() throws IOException { - - String json = "{}"; - - OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); - - RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON); - - Request request = new Request.Builder() - .url(urlString + "/" + deposition_id + "/actions/publish") - .addHeader("Authorization", "Bearer " + access_token) - .post(body) - .build(); - - try (Response response = httpClient.newCall(request).execute()) { - - if (!response.isSuccessful()) - throw new IOException("Unexpected code " + response + response.body().string()); - - return response.code(); - - } - } - - /** - * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used - * for the new version. - * - * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last - * part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930 - * concept_rec_id = 656930 - * @return response code - * @throws IOException - * @throws MissingConceptDoiException - */ - public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException { - setDepositionId(concept_rec_id, 1); - String json = "{}"; - - URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("POST"); - - - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - - } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if(!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - String latest_draft = zenodoModel.getLinks().getLatest_draft(); - deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); - bucket = getBucket(latest_draft); - - return responseCode; - - } - - /** - * To finish uploading a version or new deposition not published - * It sets the deposition_id and the bucket to be used - * - * - * @param deposition_id the deposition id of the not yet published upload - * concept_rec_id = 656930 - * @return response code - * @throws IOException - * @throws MissingConceptDoiException - */ - public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException { - - this.deposition_id = deposition_id; - - String json = "{}"; - - URL url = new URL(urlString + "/" + deposition_id); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setRequestMethod("POST"); - conn.setDoOutput(true); - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - } - - String body = getBody(conn); -======= import com.google.gson.Gson; ->>>>>>> 6ace388cff1e9fef4f6497a1ed5b5bc6e0bbd94a -import eu.dnetlib.dhp.common.api.InputStreamRequestBody; -import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; -import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; import okhttp3.*; -public class ZenodoAPIClient2 implements Serializable { +public class ZenodoAPIClient implements Serializable { String urlString; String bucket; @@ -397,7 +60,7 @@ public class ZenodoAPIClient2 implements Serializable { this.deposition_id = deposition_id; } - public ZenodoAPIClient2(String urlString, String access_token) { + public ZenodoAPIClient(String urlString, String access_token) { this.urlString = urlString; this.access_token = access_token; @@ -586,8 +249,8 @@ public class ZenodoAPIClient2 implements Serializable { private boolean checkOKStatus(int responseCode) { - if (HttpURLConnection.HTTP_OK != responseCode || - HttpURLConnection.HTTP_CREATED != responseCode) + if (HttpURLConnection.HTTP_OK == responseCode || + HttpURLConnection.HTTP_CREATED == responseCode) return true; return false; } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Community.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Community.java new file mode 100644 index 0000000..cea1b70 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Community.java @@ -0,0 +1,17 @@ +package eu.dnetlib.dhp.oa.zenodoapi.model; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +public class Community { + private String identifier; + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Creator.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Creator.java new file mode 100644 index 0000000..72d9cae --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Creator.java @@ -0,0 +1,51 @@ +package eu.dnetlib.dhp.oa.zenodoapi.model; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +public class Creator { + private String affiliation; + private String name; + private String orcid; + + public String getAffiliation() { + return affiliation; + } + + public void setAffiliation(String affiliation) { + this.affiliation = affiliation; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getOrcid() { + return orcid; + } + + public void setOrcid(String orcid) { + this.orcid = orcid; + } + + public static Creator newInstance(String name, String affiliation, String orcid) { + Creator c = new Creator(); + if (name != null) { + c.name = name; + } + if (affiliation != null) { + c.affiliation = affiliation; + } + if (orcid != null) { + c.orcid = orcid; + } + + return c; + } +} + diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java new file mode 100644 index 0000000..e478ab7 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java @@ -0,0 +1,47 @@ +package eu.dnetlib.dhp.oa.zenodoapi.model; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +import java.io.Serializable; + +public class File implements Serializable { + private String checksum; + private String filename; + private long filesize; + private String id; + + public String getChecksum() { + return checksum; + } + + public void setChecksum(String checksum) { + this.checksum = checksum; + } + + public String getFilename() { + return filename; + } + + public void setFilename(String filename) { + this.filename = filename; + } + + public long getFilesize() { + return filesize; + } + + public void setFilesize(long filesize) { + this.filesize = filesize; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java new file mode 100644 index 0000000..98f1dae --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java @@ -0,0 +1,26 @@ +package eu.dnetlib.dhp.oa.zenodoapi.model; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +import java.io.Serializable; + +public class Grant implements Serializable { + private String id; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public static Grant newInstance(String id) { + Grant g = new Grant(); + g.id = id; + + return g; + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java new file mode 100644 index 0000000..147f8ef --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java @@ -0,0 +1,95 @@ +package eu.dnetlib.dhp.oa.zenodoapi.model; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +import java.io.Serializable; + +public class Links implements Serializable { + + private String bucket; + + private String discard; + + private String edit; + private String files; + private String html; + private String latest_draft; + private String latest_draft_html; + private String publish; + + private String self; + + public String getBucket() { + return bucket; + } + + public void setBucket(String bucket) { + this.bucket = bucket; + } + + public String getDiscard() { + return discard; + } + + public void setDiscard(String discard) { + this.discard = discard; + } + + public String getEdit() { + return edit; + } + + public void setEdit(String edit) { + this.edit = edit; + } + + public String getFiles() { + return files; + } + + public void setFiles(String files) { + this.files = files; + } + + public String getHtml() { + return html; + } + + public void setHtml(String html) { + this.html = html; + } + + public String getLatest_draft() { + return latest_draft; + } + + public void setLatest_draft(String latest_draft) { + this.latest_draft = latest_draft; + } + + public String getLatest_draft_html() { + return latest_draft_html; + } + + public void setLatest_draft_html(String latest_draft_html) { + this.latest_draft_html = latest_draft_html; + } + + public String getPublish() { + return publish; + } + + public void setPublish(String publish) { + this.publish = publish; + } + + public String getSelf() { + return self; + } + + public void setSelf(String self) { + this.self = self; + } +} \ No newline at end of file diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java new file mode 100644 index 0000000..163e4b3 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java @@ -0,0 +1,156 @@ +package eu.dnetlib.dhp.oa.zenodoapi.model; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +import java.io.Serializable; +import java.util.List; + +public class Metadata implements Serializable { + + private String access_right; + private List communities; + private List creators; + private String description; + private String doi; + private List grants; + private List keywords; + private String language; + private String license; + private PrereserveDoi prereserve_doi; + private String publication_date; + private List references; + private List related_identifiers; + private String title; + private String upload_type; + private String version; + + public String getUpload_type() { + return upload_type; + } + + public void setUpload_type(String upload_type) { + this.upload_type = upload_type; + } + + public String getVersion() { + return version; + } + + public void setVersion(String version) { + this.version = version; + } + + public String getAccess_right() { + return access_right; + } + + public void setAccess_right(String access_right) { + this.access_right = access_right; + } + + public List getCommunities() { + return communities; + } + + public void setCommunities(List communities) { + this.communities = communities; + } + + public List getCreators() { + return creators; + } + + public void setCreators(List creators) { + this.creators = creators; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getDoi() { + return doi; + } + + public void setDoi(String doi) { + this.doi = doi; + } + + public List getGrants() { + return grants; + } + + public void setGrants(List grants) { + this.grants = grants; + } + + public List getKeywords() { + return keywords; + } + + public void setKeywords(List keywords) { + this.keywords = keywords; + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } + + public String getLicense() { + return license; + } + + public void setLicense(String license) { + this.license = license; + } + + public PrereserveDoi getPrereserve_doi() { + return prereserve_doi; + } + + public void setPrereserve_doi(PrereserveDoi prereserve_doi) { + this.prereserve_doi = prereserve_doi; + } + + public String getPublication_date() { + return publication_date; + } + + public void setPublication_date(String publication_date) { + this.publication_date = publication_date; + } + + public List getReferences() { + return references; + } + + public void setReferences(List references) { + this.references = references; + } + + public List getRelated_identifiers() { + return related_identifiers; + } + + public void setRelated_identifiers(List related_identifiers) { + this.related_identifiers = related_identifiers; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java new file mode 100644 index 0000000..375fae1 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java @@ -0,0 +1,28 @@ +package eu.dnetlib.dhp.oa.zenodoapi.model; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +import java.io.Serializable; + +public class PrereserveDoi implements Serializable { + private String doi; + private String recid; + + public String getDoi() { + return doi; + } + + public void setDoi(String doi) { + this.doi = doi; + } + + public String getRecid() { + return recid; + } + + public void setRecid(String recid) { + this.recid = recid; + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java new file mode 100644 index 0000000..9fc7e40 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java @@ -0,0 +1,46 @@ +package eu.dnetlib.dhp.oa.zenodoapi.model; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +import java.io.Serializable; + +public class RelatedIdentifier implements Serializable { + private String identifier; + private String relation; + private String resource_type; + private String scheme; + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public String getRelation() { + return relation; + } + + public void setRelation(String relation) { + this.relation = relation; + } + + public String getResource_type() { + return resource_type; + } + + public void setResource_type(String resource_type) { + this.resource_type = resource_type; + } + + public String getScheme() { + return scheme; + } + + public void setScheme(String scheme) { + this.scheme = scheme; + } +} \ No newline at end of file diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java new file mode 100644 index 0000000..fe35b58 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java @@ -0,0 +1,121 @@ +package eu.dnetlib.dhp.oa.zenodoapi.model; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +import java.io.Serializable; +import java.util.List; + +public class ZenodoModel implements Serializable { + + private String conceptrecid; + private String created; + + private List files; + private String id; + private Links links; + private Metadata metadata; + private String modified; + private String owner; + private String record_id; + private String state; + private boolean submitted; + private String title; + + public String getConceptrecid() { + return conceptrecid; + } + + public void setConceptrecid(String conceptrecid) { + this.conceptrecid = conceptrecid; + } + + public String getCreated() { + return created; + } + + public void setCreated(String created) { + this.created = created; + } + + public List getFiles() { + return files; + } + + public void setFiles(List files) { + this.files = files; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Links getLinks() { + return links; + } + + public void setLinks(Links links) { + this.links = links; + } + + public Metadata getMetadata() { + return metadata; + } + + public void setMetadata(Metadata metadata) { + this.metadata = metadata; + } + + public String getModified() { + return modified; + } + + public void setModified(String modified) { + this.modified = modified; + } + + public String getOwner() { + return owner; + } + + public void setOwner(String owner) { + this.owner = owner; + } + + public String getRecord_id() { + return record_id; + } + + public void setRecord_id(String record_id) { + this.record_id = record_id; + } + + public String getState() { + return state; + } + + public void setState(String state) { + this.state = state; + } + + public boolean isSubmitted() { + return submitted; + } + + public void setSubmitted(boolean submitted) { + this.submitted = submitted; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } +} \ No newline at end of file diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java new file mode 100644 index 0000000..8ba6ac2 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java @@ -0,0 +1,10 @@ +package eu.dnetlib.dhp.oa.zenodoapi.model; + +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ +import java.util.ArrayList; + +public class ZenodoModelList extends ArrayList { +} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml index 5dc1573..6662da0 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/sx/scholexplorer/dump/oozie_app/workflow.xml @@ -92,7 +92,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 59912c6..06723e7 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; +import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.junit.jupiter.api.Assertions; @@ -17,7 +18,6 @@ import org.junit.jupiter.api.Test; import com.google.gson.Gson; import eu.dnetlib.dhp.common.api.MissingConceptDoiException; -import eu.dnetlib.dhp.common.api.ZenodoAPIClient; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; @Disabled @@ -55,7 +55,7 @@ public class ZenodoUploadTest { .getPath()), new Path(workingDir + "/zenodo/dh-ch/dh-ch")); - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, + eu.dnetlib.dhp.common.api.ZenodoAPIClient client = new eu.dnetlib.dhp.common.api.ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); client.newDeposition(); @@ -92,7 +92,7 @@ public class ZenodoUploadTest { @Test void testNewVersion() throws IOException, MissingConceptDoiException { - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, + eu.dnetlib.dhp.common.api.ZenodoAPIClient client = new eu.dnetlib.dhp.common.api.ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); client.newVersion("656628"); @@ -156,7 +156,7 @@ public class ZenodoUploadTest { @Test void depositBigFile() throws MissingConceptDoiException, IOException { - ZenodoAPIClient2 client = new ZenodoAPIClient2(URL_STRING, + ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); Assertions.assertEquals(201, client.newDeposition()); From 3fba247c38e1f39780fb0e4e7b799c72292d6459 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sat, 1 Jul 2023 11:07:41 +0200 Subject: [PATCH 22/26] refactoring --- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 4 +- .../oa/zenodoapi/InputStreamRequestBody.java | 66 ++--- .../dhp/oa/zenodoapi/ZenodoAPIClient.java | 4 +- .../dhp/oa/zenodoapi/model/Community.java | 15 +- .../dhp/oa/zenodoapi/model/Creator.java | 70 +++--- .../dnetlib/dhp/oa/zenodoapi/model/File.java | 62 ++--- .../dnetlib/dhp/oa/zenodoapi/model/Grant.java | 30 ++- .../dnetlib/dhp/oa/zenodoapi/model/Links.java | 134 +++++----- .../dhp/oa/zenodoapi/model/Metadata.java | 230 +++++++++--------- .../dhp/oa/zenodoapi/model/PrereserveDoi.java | 34 +-- .../oa/zenodoapi/model/RelatedIdentifier.java | 64 ++--- .../dhp/oa/zenodoapi/model/ZenodoModel.java | 176 +++++++------- .../oa/zenodoapi/model/ZenodoModelList.java | 6 + .../dhp/oa/graph/dump/ZenodoUploadTest.java | 4 +- 14 files changed, 477 insertions(+), 422 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index ab7719b..32031eb 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -4,14 +4,14 @@ package eu.dnetlib.dhp.oa.graph.dump; import java.io.Serializable; import java.util.Optional; -import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException; -import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; +import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException; +import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient; public class SendToZenodoHDFS implements Serializable { diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java index 596440d..ecbcac6 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java @@ -1,5 +1,11 @@ + package eu.dnetlib.dhp.oa.zenodoapi; +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ + /** * @author miriam.baglioni * @Date 01/07/23 @@ -16,41 +22,41 @@ import okio.Source; public class InputStreamRequestBody extends RequestBody { - private final InputStream inputStream; - private final MediaType mediaType; - private final long lenght; + private final InputStream inputStream; + private final MediaType mediaType; + private final long lenght; - public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) { + public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) { - return new InputStreamRequestBody(inputStream, mediaType, len); - } + return new InputStreamRequestBody(inputStream, mediaType, len); + } - private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) { - this.inputStream = inputStream; - this.mediaType = mediaType; - this.lenght = len; - } + private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) { + this.inputStream = inputStream; + this.mediaType = mediaType; + this.lenght = len; + } - @Override - public MediaType contentType() { - return mediaType; - } + @Override + public MediaType contentType() { + return mediaType; + } - @Override - public long contentLength() { + @Override + public long contentLength() { - return lenght; + return lenght; - } + } - @Override - public void writeTo(BufferedSink sink) throws IOException { - Source source = null; - try { - source = Okio.source(inputStream); - sink.writeAll(source); - } finally { - Util.closeQuietly(source); - } - } -} \ No newline at end of file + @Override + public void writeTo(BufferedSink sink) throws IOException { + Source source = null; + try { + source = Okio.source(inputStream); + sink.writeAll(source); + } finally { + Util.closeQuietly(source); + } + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java index 1f54dbe..0da7d72 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java @@ -8,8 +8,6 @@ import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.concurrent.TimeUnit; -import eu.dnetlib.dhp.oa.zenodoapi.model.ZenodoModel; -import eu.dnetlib.dhp.oa.zenodoapi.model.ZenodoModelList; import org.apache.http.HttpHeaders; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -26,6 +24,8 @@ import org.jetbrains.annotations.NotNull; import com.google.gson.Gson; +import eu.dnetlib.dhp.oa.zenodoapi.model.ZenodoModel; +import eu.dnetlib.dhp.oa.zenodoapi.model.ZenodoModelList; import okhttp3.*; public class ZenodoAPIClient implements Serializable { diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Community.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Community.java index cea1b70..008737b 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Community.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Community.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.zenodoapi.model; /** @@ -5,13 +6,13 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; * @Date 01/07/23 */ public class Community { - private String identifier; + private String identifier; - public String getIdentifier() { - return identifier; - } + public String getIdentifier() { + return identifier; + } - public void setIdentifier(String identifier) { - this.identifier = identifier; - } + public void setIdentifier(String identifier) { + this.identifier = identifier; + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Creator.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Creator.java index 72d9cae..9e6eab8 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Creator.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Creator.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.zenodoapi.model; /** @@ -5,47 +6,46 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; * @Date 01/07/23 */ public class Creator { - private String affiliation; - private String name; - private String orcid; + private String affiliation; + private String name; + private String orcid; - public String getAffiliation() { - return affiliation; - } + public String getAffiliation() { + return affiliation; + } - public void setAffiliation(String affiliation) { - this.affiliation = affiliation; - } + public void setAffiliation(String affiliation) { + this.affiliation = affiliation; + } - public String getName() { - return name; - } + public String getName() { + return name; + } - public void setName(String name) { - this.name = name; - } + public void setName(String name) { + this.name = name; + } - public String getOrcid() { - return orcid; - } + public String getOrcid() { + return orcid; + } - public void setOrcid(String orcid) { - this.orcid = orcid; - } + public void setOrcid(String orcid) { + this.orcid = orcid; + } - public static Creator newInstance(String name, String affiliation, String orcid) { - Creator c = new Creator(); - if (name != null) { - c.name = name; - } - if (affiliation != null) { - c.affiliation = affiliation; - } - if (orcid != null) { - c.orcid = orcid; - } + public static Creator newInstance(String name, String affiliation, String orcid) { + Creator c = new Creator(); + if (name != null) { + c.name = name; + } + if (affiliation != null) { + c.affiliation = affiliation; + } + if (orcid != null) { + c.orcid = orcid; + } - return c; - } + return c; + } } - diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java index e478ab7..d5cd439 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java @@ -1,5 +1,11 @@ + package eu.dnetlib.dhp.oa.zenodoapi.model; +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ + /** * @author miriam.baglioni * @Date 01/07/23 @@ -7,41 +13,41 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; import java.io.Serializable; public class File implements Serializable { - private String checksum; - private String filename; - private long filesize; - private String id; + private String checksum; + private String filename; + private long filesize; + private String id; - public String getChecksum() { - return checksum; - } + public String getChecksum() { + return checksum; + } - public void setChecksum(String checksum) { - this.checksum = checksum; - } + public void setChecksum(String checksum) { + this.checksum = checksum; + } - public String getFilename() { - return filename; - } + public String getFilename() { + return filename; + } - public void setFilename(String filename) { - this.filename = filename; - } + public void setFilename(String filename) { + this.filename = filename; + } - public long getFilesize() { - return filesize; - } + public long getFilesize() { + return filesize; + } - public void setFilesize(long filesize) { - this.filesize = filesize; - } + public void setFilesize(long filesize) { + this.filesize = filesize; + } - public String getId() { - return id; - } + public String getId() { + return id; + } - public void setId(String id) { - this.id = id; - } + public void setId(String id) { + this.id = id; + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java index 98f1dae..530371f 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java @@ -1,5 +1,11 @@ + package eu.dnetlib.dhp.oa.zenodoapi.model; +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ + /** * @author miriam.baglioni * @Date 01/07/23 @@ -7,20 +13,20 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; import java.io.Serializable; public class Grant implements Serializable { - private String id; + private String id; - public String getId() { - return id; - } + public String getId() { + return id; + } - public void setId(String id) { - this.id = id; - } + public void setId(String id) { + this.id = id; + } - public static Grant newInstance(String id) { - Grant g = new Grant(); - g.id = id; + public static Grant newInstance(String id) { + Grant g = new Grant(); + g.id = id; - return g; - } + return g; + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java index 147f8ef..7c07aa2 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java @@ -1,5 +1,11 @@ + package eu.dnetlib.dhp.oa.zenodoapi.model; +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ + /** * @author miriam.baglioni * @Date 01/07/23 @@ -8,88 +14,88 @@ import java.io.Serializable; public class Links implements Serializable { - private String bucket; + private String bucket; - private String discard; + private String discard; - private String edit; - private String files; - private String html; - private String latest_draft; - private String latest_draft_html; - private String publish; + private String edit; + private String files; + private String html; + private String latest_draft; + private String latest_draft_html; + private String publish; - private String self; + private String self; - public String getBucket() { - return bucket; - } + public String getBucket() { + return bucket; + } - public void setBucket(String bucket) { - this.bucket = bucket; - } + public void setBucket(String bucket) { + this.bucket = bucket; + } - public String getDiscard() { - return discard; - } + public String getDiscard() { + return discard; + } - public void setDiscard(String discard) { - this.discard = discard; - } + public void setDiscard(String discard) { + this.discard = discard; + } - public String getEdit() { - return edit; - } + public String getEdit() { + return edit; + } - public void setEdit(String edit) { - this.edit = edit; - } + public void setEdit(String edit) { + this.edit = edit; + } - public String getFiles() { - return files; - } + public String getFiles() { + return files; + } - public void setFiles(String files) { - this.files = files; - } + public void setFiles(String files) { + this.files = files; + } - public String getHtml() { - return html; - } + public String getHtml() { + return html; + } - public void setHtml(String html) { - this.html = html; - } + public void setHtml(String html) { + this.html = html; + } - public String getLatest_draft() { - return latest_draft; - } + public String getLatest_draft() { + return latest_draft; + } - public void setLatest_draft(String latest_draft) { - this.latest_draft = latest_draft; - } + public void setLatest_draft(String latest_draft) { + this.latest_draft = latest_draft; + } - public String getLatest_draft_html() { - return latest_draft_html; - } + public String getLatest_draft_html() { + return latest_draft_html; + } - public void setLatest_draft_html(String latest_draft_html) { - this.latest_draft_html = latest_draft_html; - } + public void setLatest_draft_html(String latest_draft_html) { + this.latest_draft_html = latest_draft_html; + } - public String getPublish() { - return publish; - } + public String getPublish() { + return publish; + } - public void setPublish(String publish) { - this.publish = publish; - } + public void setPublish(String publish) { + this.publish = publish; + } - public String getSelf() { - return self; - } + public String getSelf() { + return self; + } - public void setSelf(String self) { - this.self = self; - } -} \ No newline at end of file + public void setSelf(String self) { + this.self = self; + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java index 163e4b3..dc9d293 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java @@ -1,5 +1,11 @@ + package eu.dnetlib.dhp.oa.zenodoapi.model; +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ + /** * @author miriam.baglioni * @Date 01/07/23 @@ -9,148 +15,148 @@ import java.util.List; public class Metadata implements Serializable { - private String access_right; - private List communities; - private List creators; - private String description; - private String doi; - private List grants; - private List keywords; - private String language; - private String license; - private PrereserveDoi prereserve_doi; - private String publication_date; - private List references; - private List related_identifiers; - private String title; - private String upload_type; - private String version; + private String access_right; + private List communities; + private List creators; + private String description; + private String doi; + private List grants; + private List keywords; + private String language; + private String license; + private PrereserveDoi prereserve_doi; + private String publication_date; + private List references; + private List related_identifiers; + private String title; + private String upload_type; + private String version; - public String getUpload_type() { - return upload_type; - } + public String getUpload_type() { + return upload_type; + } - public void setUpload_type(String upload_type) { - this.upload_type = upload_type; - } + public void setUpload_type(String upload_type) { + this.upload_type = upload_type; + } - public String getVersion() { - return version; - } + public String getVersion() { + return version; + } - public void setVersion(String version) { - this.version = version; - } + public void setVersion(String version) { + this.version = version; + } - public String getAccess_right() { - return access_right; - } + public String getAccess_right() { + return access_right; + } - public void setAccess_right(String access_right) { - this.access_right = access_right; - } + public void setAccess_right(String access_right) { + this.access_right = access_right; + } - public List getCommunities() { - return communities; - } + public List getCommunities() { + return communities; + } - public void setCommunities(List communities) { - this.communities = communities; - } + public void setCommunities(List communities) { + this.communities = communities; + } - public List getCreators() { - return creators; - } + public List getCreators() { + return creators; + } - public void setCreators(List creators) { - this.creators = creators; - } + public void setCreators(List creators) { + this.creators = creators; + } - public String getDescription() { - return description; - } + public String getDescription() { + return description; + } - public void setDescription(String description) { - this.description = description; - } + public void setDescription(String description) { + this.description = description; + } - public String getDoi() { - return doi; - } + public String getDoi() { + return doi; + } - public void setDoi(String doi) { - this.doi = doi; - } + public void setDoi(String doi) { + this.doi = doi; + } - public List getGrants() { - return grants; - } + public List getGrants() { + return grants; + } - public void setGrants(List grants) { - this.grants = grants; - } + public void setGrants(List grants) { + this.grants = grants; + } - public List getKeywords() { - return keywords; - } + public List getKeywords() { + return keywords; + } - public void setKeywords(List keywords) { - this.keywords = keywords; - } + public void setKeywords(List keywords) { + this.keywords = keywords; + } - public String getLanguage() { - return language; - } + public String getLanguage() { + return language; + } - public void setLanguage(String language) { - this.language = language; - } + public void setLanguage(String language) { + this.language = language; + } - public String getLicense() { - return license; - } + public String getLicense() { + return license; + } - public void setLicense(String license) { - this.license = license; - } + public void setLicense(String license) { + this.license = license; + } - public PrereserveDoi getPrereserve_doi() { - return prereserve_doi; - } + public PrereserveDoi getPrereserve_doi() { + return prereserve_doi; + } - public void setPrereserve_doi(PrereserveDoi prereserve_doi) { - this.prereserve_doi = prereserve_doi; - } + public void setPrereserve_doi(PrereserveDoi prereserve_doi) { + this.prereserve_doi = prereserve_doi; + } - public String getPublication_date() { - return publication_date; - } + public String getPublication_date() { + return publication_date; + } - public void setPublication_date(String publication_date) { - this.publication_date = publication_date; - } + public void setPublication_date(String publication_date) { + this.publication_date = publication_date; + } - public List getReferences() { - return references; - } + public List getReferences() { + return references; + } - public void setReferences(List references) { - this.references = references; - } + public void setReferences(List references) { + this.references = references; + } - public List getRelated_identifiers() { - return related_identifiers; - } + public List getRelated_identifiers() { + return related_identifiers; + } - public void setRelated_identifiers(List related_identifiers) { - this.related_identifiers = related_identifiers; - } + public void setRelated_identifiers(List related_identifiers) { + this.related_identifiers = related_identifiers; + } - public String getTitle() { - return title; - } + public String getTitle() { + return title; + } - public void setTitle(String title) { - this.title = title; - } + public void setTitle(String title) { + this.title = title; + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java index 375fae1..d96e825 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java @@ -1,5 +1,11 @@ + package eu.dnetlib.dhp.oa.zenodoapi.model; +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ + /** * @author miriam.baglioni * @Date 01/07/23 @@ -7,22 +13,22 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; import java.io.Serializable; public class PrereserveDoi implements Serializable { - private String doi; - private String recid; + private String doi; + private String recid; - public String getDoi() { - return doi; - } + public String getDoi() { + return doi; + } - public void setDoi(String doi) { - this.doi = doi; - } + public void setDoi(String doi) { + this.doi = doi; + } - public String getRecid() { - return recid; - } + public String getRecid() { + return recid; + } - public void setRecid(String recid) { - this.recid = recid; - } + public void setRecid(String recid) { + this.recid = recid; + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java index 9fc7e40..3c25691 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java @@ -1,5 +1,11 @@ + package eu.dnetlib.dhp.oa.zenodoapi.model; +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ + /** * @author miriam.baglioni * @Date 01/07/23 @@ -7,40 +13,40 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; import java.io.Serializable; public class RelatedIdentifier implements Serializable { - private String identifier; - private String relation; - private String resource_type; - private String scheme; + private String identifier; + private String relation; + private String resource_type; + private String scheme; - public String getIdentifier() { - return identifier; - } + public String getIdentifier() { + return identifier; + } - public void setIdentifier(String identifier) { - this.identifier = identifier; - } + public void setIdentifier(String identifier) { + this.identifier = identifier; + } - public String getRelation() { - return relation; - } + public String getRelation() { + return relation; + } - public void setRelation(String relation) { - this.relation = relation; - } + public void setRelation(String relation) { + this.relation = relation; + } - public String getResource_type() { - return resource_type; - } + public String getResource_type() { + return resource_type; + } - public void setResource_type(String resource_type) { - this.resource_type = resource_type; - } + public void setResource_type(String resource_type) { + this.resource_type = resource_type; + } - public String getScheme() { - return scheme; - } + public String getScheme() { + return scheme; + } - public void setScheme(String scheme) { - this.scheme = scheme; - } -} \ No newline at end of file + public void setScheme(String scheme) { + this.scheme = scheme; + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java index fe35b58..3ec6d76 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java @@ -1,5 +1,11 @@ + package eu.dnetlib.dhp.oa.zenodoapi.model; +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ + /** * @author miriam.baglioni * @Date 01/07/23 @@ -9,113 +15,113 @@ import java.util.List; public class ZenodoModel implements Serializable { - private String conceptrecid; - private String created; + private String conceptrecid; + private String created; - private List files; - private String id; - private Links links; - private Metadata metadata; - private String modified; - private String owner; - private String record_id; - private String state; - private boolean submitted; - private String title; + private List files; + private String id; + private Links links; + private Metadata metadata; + private String modified; + private String owner; + private String record_id; + private String state; + private boolean submitted; + private String title; - public String getConceptrecid() { - return conceptrecid; - } + public String getConceptrecid() { + return conceptrecid; + } - public void setConceptrecid(String conceptrecid) { - this.conceptrecid = conceptrecid; - } + public void setConceptrecid(String conceptrecid) { + this.conceptrecid = conceptrecid; + } - public String getCreated() { - return created; - } + public String getCreated() { + return created; + } - public void setCreated(String created) { - this.created = created; - } + public void setCreated(String created) { + this.created = created; + } - public List getFiles() { - return files; - } + public List getFiles() { + return files; + } - public void setFiles(List files) { - this.files = files; - } + public void setFiles(List files) { + this.files = files; + } - public String getId() { - return id; - } + public String getId() { + return id; + } - public void setId(String id) { - this.id = id; - } + public void setId(String id) { + this.id = id; + } - public Links getLinks() { - return links; - } + public Links getLinks() { + return links; + } - public void setLinks(Links links) { - this.links = links; - } + public void setLinks(Links links) { + this.links = links; + } - public Metadata getMetadata() { - return metadata; - } + public Metadata getMetadata() { + return metadata; + } - public void setMetadata(Metadata metadata) { - this.metadata = metadata; - } + public void setMetadata(Metadata metadata) { + this.metadata = metadata; + } - public String getModified() { - return modified; - } + public String getModified() { + return modified; + } - public void setModified(String modified) { - this.modified = modified; - } + public void setModified(String modified) { + this.modified = modified; + } - public String getOwner() { - return owner; - } + public String getOwner() { + return owner; + } - public void setOwner(String owner) { - this.owner = owner; - } + public void setOwner(String owner) { + this.owner = owner; + } - public String getRecord_id() { - return record_id; - } + public String getRecord_id() { + return record_id; + } - public void setRecord_id(String record_id) { - this.record_id = record_id; - } + public void setRecord_id(String record_id) { + this.record_id = record_id; + } - public String getState() { - return state; - } + public String getState() { + return state; + } - public void setState(String state) { - this.state = state; - } + public void setState(String state) { + this.state = state; + } - public boolean isSubmitted() { - return submitted; - } + public boolean isSubmitted() { + return submitted; + } - public void setSubmitted(boolean submitted) { - this.submitted = submitted; - } + public void setSubmitted(boolean submitted) { + this.submitted = submitted; + } - public String getTitle() { - return title; - } + public String getTitle() { + return title; + } - public void setTitle(String title) { - this.title = title; - } -} \ No newline at end of file + public void setTitle(String title) { + this.title = title; + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java index 8ba6ac2..8042880 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java @@ -1,5 +1,11 @@ + package eu.dnetlib.dhp.oa.zenodoapi.model; +/** + * @author miriam.baglioni + * @Date 01/07/23 + */ + /** * @author miriam.baglioni * @Date 01/07/23 diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 06723e7..766ab3d 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -7,7 +7,6 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; -import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.junit.jupiter.api.Assertions; @@ -19,6 +18,7 @@ import com.google.gson.Gson; import eu.dnetlib.dhp.common.api.MissingConceptDoiException; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient; @Disabled public class ZenodoUploadTest { @@ -157,7 +157,7 @@ public class ZenodoUploadTest { @Test void depositBigFile() throws MissingConceptDoiException, IOException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); + ACCESS_TOKEN); Assertions.assertEquals(201, client.newDeposition()); From 29b81bef265dbc661eabd11828da8f8102756f51 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sat, 1 Jul 2023 11:54:48 +0200 Subject: [PATCH 23/26] refactoring --- .../java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java | 1 - dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java | 1 - dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java | 1 - dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java | 1 - .../main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java | 1 - .../java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java | 1 - .../eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java | 1 - .../main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java | 1 - .../java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java | 1 - 9 files changed, 9 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java index ecbcac6..0f37da0 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/InputStreamRequestBody.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.oa.zenodoapi; * @author miriam.baglioni * @Date 01/07/23 */ - /** * @author miriam.baglioni * @Date 01/07/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java index d5cd439..540bc7c 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/File.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; * @author miriam.baglioni * @Date 01/07/23 */ - /** * @author miriam.baglioni * @Date 01/07/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java index 530371f..3a9ddd2 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Grant.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; * @author miriam.baglioni * @Date 01/07/23 */ - /** * @author miriam.baglioni * @Date 01/07/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java index 7c07aa2..e2c3a74 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Links.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; * @author miriam.baglioni * @Date 01/07/23 */ - /** * @author miriam.baglioni * @Date 01/07/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java index dc9d293..0aaf9b6 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/Metadata.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; * @author miriam.baglioni * @Date 01/07/23 */ - /** * @author miriam.baglioni * @Date 01/07/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java index d96e825..a38b338 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/PrereserveDoi.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; * @author miriam.baglioni * @Date 01/07/23 */ - /** * @author miriam.baglioni * @Date 01/07/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java index 3c25691..d128993 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/RelatedIdentifier.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; * @author miriam.baglioni * @Date 01/07/23 */ - /** * @author miriam.baglioni * @Date 01/07/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java index 3ec6d76..1a0a1a9 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModel.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; * @author miriam.baglioni * @Date 01/07/23 */ - /** * @author miriam.baglioni * @Date 01/07/23 diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java index 8042880..858eff8 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/model/ZenodoModelList.java @@ -5,7 +5,6 @@ package eu.dnetlib.dhp.oa.zenodoapi.model; * @author miriam.baglioni * @Date 01/07/23 */ - /** * @author miriam.baglioni * @Date 01/07/23 From b26fb928383a9780ca3c6bf6289821fe240ee1fd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Sat, 1 Jul 2023 12:38:18 +0200 Subject: [PATCH 24/26] changed the pom dependency of a different schema --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7f650b4..47aa521 100644 --- a/pom.xml +++ b/pom.xml @@ -102,7 +102,8 @@ 5.6.1 3.5 11.0.2 - [2.12.1] + + [2.13.1-patched] \ No newline at end of file From ab791fe424739099d0f0ab0fbc88eb0098e7f075 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 7 Jul 2023 18:12:17 +0200 Subject: [PATCH 25/26] [master] update reference to ZenodoAPI --- .../dhp/oa/graph/dump/ZenodoUploadTest.java | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 766ab3d..41501a5 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -7,6 +7,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; +import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient; +import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.junit.jupiter.api.Assertions; @@ -16,9 +18,8 @@ import org.junit.jupiter.api.Test; import com.google.gson.Gson; -import eu.dnetlib.dhp.common.api.MissingConceptDoiException; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient; + @Disabled public class ZenodoUploadTest { @@ -55,7 +56,7 @@ public class ZenodoUploadTest { .getPath()), new Path(workingDir + "/zenodo/dh-ch/dh-ch")); - eu.dnetlib.dhp.common.api.ZenodoAPIClient client = new eu.dnetlib.dhp.common.api.ZenodoAPIClient(URL_STRING, + ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); client.newDeposition(); @@ -90,9 +91,9 @@ public class ZenodoUploadTest { } @Test - void testNewVersion() throws IOException, MissingConceptDoiException { + void testNewVersion() throws Exception, MissingConceptDoiException { - eu.dnetlib.dhp.common.api.ZenodoAPIClient client = new eu.dnetlib.dhp.common.api.ZenodoAPIClient(URL_STRING, + ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); client.newVersion("656628"); @@ -140,6 +141,26 @@ public class ZenodoUploadTest { } + @Test + void testNewVersion2() throws Exception, MissingConceptDoiException { + + ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, + ACCESS_TOKEN); + + client.newVersion("1210237"); + + File file = new File("/Users/miriam.baglioni/Desktop/EOSC_DUMP/publication.tar"); +// File file = new File(getClass() +// .getResource("/eu/dnetlib/dhp/common/api/newVersion2") +// .getPath()); + + InputStream is = new FileInputStream(file); + + Assertions.assertEquals(200, client.uploadIS3(is, "newVersion_deposition", file.length())); + + // Assertions.assertEquals(202, client.publish()); + + } @Test void readCommunityMap() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); From abc30756e48b8d8a14f570dd28bc61c9c0cad1d8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 7 Jul 2023 18:42:41 +0200 Subject: [PATCH 26/26] - --- .../dhp/oa/zenodoapi/ZenodoAPIClient.java | 281 ++++++++++++------ .../dhp/oa/graph/dump/ZenodoUploadTest.java | 5 +- 2 files changed, 186 insertions(+), 100 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java index 0da7d72..1b3bb7a 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/zenodoapi/ZenodoAPIClient.java @@ -18,7 +18,7 @@ import org.apache.http.entity.ContentType; import org.apache.http.entity.InputStreamEntity; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; +//import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jetbrains.annotations.NotNull; @@ -72,78 +72,110 @@ public class ZenodoAPIClient implements Serializable { * @return response code * @throws IOException */ +// public int newDeposition() throws IOException { +// String json = "{}"; +// +// URL url = new URL(urlString); +// HttpURLConnection conn = (HttpURLConnection) url.openConnection(); +// conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); +// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); +// conn.setRequestMethod("POST"); +// conn.setDoOutput(true); +// try (OutputStream os = conn.getOutputStream()) { +// byte[] input = json.getBytes("utf-8"); +// os.write(input, 0, input.length); +// } +// +// String body = getBody(conn); +// +// int responseCode = conn.getResponseCode(); +// conn.disconnect(); +// +// if (!checkOKStatus(responseCode)) +// throw new IOException("Unexpected code " + responseCode + body); +// +// ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); +// this.bucket = newSubmission.getLinks().getBucket(); +// this.deposition_id = newSubmission.getId(); +// +// return responseCode; +// } + public int newDeposition() throws IOException { String json = "{}"; + OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); + + RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON); + + Request request = new Request.Builder() + .url(urlString) + .addHeader("Content-Type", "application/json") // add request headers + .addHeader("Authorization", "Bearer " + access_token) + .post(body) + .build(); + + try (Response response = httpClient.newCall(request).execute()) { + + if (!response.isSuccessful()) + throw new IOException("Unexpected code " + response + response.body().string()); + + // Get response body + json = response.body().string(); + + eu.dnetlib.dhp.common.api.zenodo.ZenodoModel newSubmission = new Gson().fromJson(json, eu.dnetlib.dhp.common.api.zenodo.ZenodoModel.class); + this.bucket = newSubmission.getLinks().getBucket(); + this.deposition_id = newSubmission.getId(); + + return response.code(); - URL url = new URL(urlString); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setRequestMethod("POST"); - conn.setDoOutput(true); - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); } - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - conn.disconnect(); - - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); - this.bucket = newSubmission.getLinks().getBucket(); - this.deposition_id = newSubmission.getId(); - - return responseCode; } - public int uploadIS2(InputStream is, String fileName) throws IOException { - final String crlf = "\r\n"; - final String twoHyphens = "--"; - final String boundary = "*****"; +// public int uploadIS2(InputStream is, String fileName) throws IOException { +// +// final String crlf = "\r\n"; +// final String twoHyphens = "--"; +// final String boundary = "*****"; +// +// HttpPut put = new HttpPut(bucket + "/" + fileName); +// +// put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip"); +// put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); +// +// put.setEntity(new InputStreamEntity(is)); +// +// int statusCode; +// try (CloseableHttpClient client = HttpClients.createDefault()) { +// CloseableHttpResponse response = client.execute(put); +// statusCode = response.getStatusLine().getStatusCode(); +// +// } +// +// if (!checkOKStatus(statusCode)) { +// throw new IOException("Unexpected code " + statusCode); +// } +// +// return statusCode; +// } - HttpPut put = new HttpPut(bucket + "/" + fileName); - - put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip"); - put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - - put.setEntity(new InputStreamEntity(is)); - - int statusCode; - try (CloseableHttpClient client = HttpClients.createDefault()) { - CloseableHttpResponse response = client.execute(put); - statusCode = response.getStatusLine().getStatusCode(); - - } - - if (!checkOKStatus(statusCode)) { - throw new IOException("Unexpected code " + statusCode); - } - - return statusCode; - } - - public int publish() throws IOException { - String json = "{}"; - HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish"); - post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json"); - post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - post.setEntity(new StringEntity(json)); - int statusCode; - try (CloseableHttpClient client = HttpClients.createDefault()) { - CloseableHttpResponse response = client.execute(post); - statusCode = response.getStatusLine().getStatusCode(); - } - if (!checkOKStatus(statusCode)) { - throw new IOException("Unexpected code " + statusCode); - } - return statusCode; - } +// public int publish() throws IOException { +// String json = "{}"; +// HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish"); +// post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json"); +// post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); +// post.setEntity(new StringEntity(json)); +// int statusCode; +// try (CloseableHttpClient client = HttpClients.createDefault()) { +// CloseableHttpResponse response = client.execute(post); +// statusCode = response.getStatusLine().getStatusCode(); +// } +// if (!checkOKStatus(statusCode)) { +// throw new IOException("Unexpected code " + statusCode); +// } +// return statusCode; +// } /** * Upload files in Zenodo. @@ -266,38 +298,65 @@ public class ZenodoAPIClient implements Serializable { * @throws IOException * @throws MissingConceptDoiException */ +// public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException { +// setDepositionId(concept_rec_id, 1); +// String json = "{}"; +// +// URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); +// HttpURLConnection conn = (HttpURLConnection) url.openConnection(); +// +// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); +// conn.setDoOutput(true); +// conn.setRequestMethod("POST"); +// +// try (OutputStream os = conn.getOutputStream()) { +// byte[] input = json.getBytes("utf-8"); +// os.write(input, 0, input.length); +// +// } +// +// String body = getBody(conn); +// +// int responseCode = conn.getResponseCode(); +// +// conn.disconnect(); +// if (!checkOKStatus(responseCode)) +// throw new IOException("Unexpected code " + responseCode + body); +// +// ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); +// String latest_draft = zenodoModel.getLinks().getLatest_draft(); +// deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); +// bucket = getBucket(latest_draft); +// +// return responseCode; +// +// } public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException { setDepositionId(concept_rec_id, 1); String json = "{}"; - URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("POST"); + RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON); - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); + Request request = new Request.Builder() + .url(urlString + "/" + deposition_id + "/actions/newversion") + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) + .post(body) + .build(); + + try (Response response = httpClient.newCall(request).execute()) { + + if (!response.isSuccessful()) + throw new IOException("Unexpected code " + response + response.body().string()); + + ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class); + String latest_draft = zenodoModel.getLinks().getLatest_draft(); + deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); + bucket = getBucket(latest_draft); + return response.code(); } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - String latest_draft = zenodoModel.getLinks().getLatest_draft(); - deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); - bucket = getBucket(latest_draft); - - return responseCode; - } /** @@ -361,20 +420,46 @@ public class ZenodoAPIClient implements Serializable { } - private String getPrevDepositions(String page) throws Exception { +// private String getPrevDepositions(String page) throws Exception { +// +// HttpGet get = new HttpGet(urlString); +// URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build(); +// +// get.setURI(uri); +// +// get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); +// get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); +// try (CloseableHttpClient client = HttpClients.createDefault()) { +// CloseableHttpResponse response = client.execute(get); +// final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); +// return body; +// } +// } - HttpGet get = new HttpGet(urlString); - URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build(); + private String getPrevDepositions(String page) throws IOException { - get.setURI(uri); + OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); + + HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder(); + urlBuilder.addQueryParameter("page", page); + String url = urlBuilder.build().toString(); + + Request request = new Request.Builder() + .url(url) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) + .get() + .build(); + + try (Response response = httpClient.newCall(request).execute()) { + + if (!response.isSuccessful()) + throw new IOException("Unexpected code " + response + response.body().string()); + + return response.body().string(); - get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - try (CloseableHttpClient client = HttpClients.createDefault()) { - CloseableHttpResponse response = client.execute(get); - final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); - return body; } + } private String getBucket(String inputUurl) throws IOException { diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 41501a5..f119274 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -86,7 +86,8 @@ public class ZenodoUploadTest { System.out.println(client.sendMretadata(metadata)); - System.out.println(client.publish()); + + //System.out.println(client.publish()); } @@ -137,7 +138,7 @@ public class ZenodoUploadTest { } - System.out.println(client.publish()); + //System.out.println(client.publish()); }