From 346ed65e2cec87a37210f42447a7ade9b828506f Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 20 Oct 2020 16:59:55 +0200 Subject: [PATCH] added upload to zenodo node --- .../dhp-graph-provision-scholexplorer/pom.xml | 4 + .../dhp/export/zenodo/SendToZenodoHDFS.java | 80 +++++++++++++++++++ .../eu/dnetlib/dhp/export/upload_zenodo.json | 45 +++++++++++ .../sx/zenodo/oozie_app/config-default.xml | 6 ++ .../dnetlib/sx/zenodo/oozie_app/workflow.xml | 22 ++++- 5 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/zenodo/SendToZenodoHDFS.java create mode 100644 dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/upload_zenodo.json diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml index 05ca7d4ce..b287e9c88 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/pom.xml @@ -62,6 +62,10 @@ dhp-schemas ${project.version} + + org.apache.httpcomponents + httpmime + org.elasticsearch diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/zenodo/SendToZenodoHDFS.java b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/zenodo/SendToZenodoHDFS.java new file mode 100644 index 000000000..1dcbf6ccc --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/java/eu/dnetlib/dhp/export/zenodo/SendToZenodoHDFS.java @@ -0,0 +1,80 @@ + +package eu.dnetlib.dhp.export.zenodo; + +import java.io.Serializable; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.api.MissingConceptDoiException; +import eu.dnetlib.dhp.common.api.ZenodoAPIClient; + +public class SendToZenodoHDFS implements Serializable { + + private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class); + + public static void main(final String[] args) throws Exception, MissingConceptDoiException { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SendToZenodoHDFS.class + .getResourceAsStream( + "/eu/dnetlib/dhp/export/upload_zenodo.json"))); + + parser.parseArgument(args); + + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("nameNode"); + final String access_token = parser.get("accessToken"); + final String connection_url = parser.get("connectionUrl"); + final String metadata = parser.get("metadata"); + final Boolean newDeposition = Boolean.valueOf(parser.get("newDeposition")); + final String concept_rec_id = Optional + .ofNullable(parser.get("conceptRecordId")) + .orElse(null); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + + RemoteIterator fileStatusListIterator = fileSystem + .listFiles( + new Path(hdfsPath), true); + ZenodoAPIClient zenodoApiClient = new ZenodoAPIClient(connection_url, access_token); + if (newDeposition) { + zenodoApiClient.newDeposition(); + } else { + if (concept_rec_id == null) { + throw new MissingConceptDoiException("No concept record id has been provided"); + } + zenodoApiClient.newVersion(concept_rec_id); + } + + while (fileStatusListIterator.hasNext()) { + LocatedFileStatus fileStatus = fileStatusListIterator.next(); + + Path p = fileStatus.getPath(); + String p_string = p.toString(); + if (!p_string.endsWith("_SUCCESS")) { + // String tmp = p_string.substring(0, p_string.lastIndexOf("/")); + String name = p_string.substring(p_string.lastIndexOf("/") + 1); + log.info("Sending information for community: " + name); + FSDataInputStream inputStream = fileSystem.open(p); + zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen()); + + } + + } + + zenodoApiClient.sendMretadata(metadata); + zenodoApiClient.publish(); + + } + +} diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/upload_zenodo.json b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/upload_zenodo.json new file mode 100644 index 000000000..66676005e --- /dev/null +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/dhp/export/upload_zenodo.json @@ -0,0 +1,45 @@ + +[ + { + "paramName":"nd", + "paramLongName":"newDeposition", + "paramDescription": "if it is a new deposition (true) or a new version (false)", + "paramRequired": true + }, + { + "paramName":"cri", + "paramLongName":"conceptRecordId", + "paramDescription": "The id of the concept record for a new version", + "paramRequired": false + }, + { + "paramName":"hdfsp", + "paramLongName":"hdfsPath", + "paramDescription": "the path of the folder tofind files to send to Zenodo", + "paramRequired": true + }, + { + "paramName": "nn", + "paramLongName": "nameNode", + "paramDescription": "the name node", + "paramRequired": true + }, + { + "paramName": "at", + "paramLongName": "accessToken", + "paramDescription": "the access token for the deposition", + "paramRequired": false + }, + { + "paramName":"cu", + "paramLongName":"connectionUrl", + "paramDescription": "the url to connect to deposit", + "paramRequired": false + }, + { + "paramName":"m", + "paramLongName":"metadata", + "paramDescription": "metadata associated to the deposition", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/sx/zenodo/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/sx/zenodo/oozie_app/config-default.xml index 59e5c059f..3b9aaca2a 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/sx/zenodo/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/sx/zenodo/oozie_app/config-default.xml @@ -39,4 +39,10 @@ spark2SqlQueryExecutionListeners "com.cloudera.spark.lineage.NavigatorQueryListener" + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/sx/zenodo/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/sx/zenodo/oozie_app/workflow.xml index ec536de1f..6d7056503 100644 --- a/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/sx/zenodo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision-scholexplorer/src/main/resources/eu/dnetlib/sx/zenodo/oozie_app/workflow.xml @@ -8,9 +8,13 @@ targetPath the target path + + metadata + the metadata + - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -29,5 +33,21 @@ + + + + eu.dnetlib.dhp.export.zenodo.SendToZenodoHDFS + --hdfsPath/user/dnet.scholexplorer/scholix/provision/scholix.tar/scholix-2020-10-16.tar + --nameNode${nameNode} + --accessTokenb6ddrY6b77WxcDEevn9gqVE5sL5sDNjdUijt75W3o7cQo5vpFFI48dMiu8Gv + --connectionUrlhttps://zenodo.org/api/deposit/depositions + --metadata${metadata} + --conceptRecordId1200252 + --newDepositionfalse + + + + + \ No newline at end of file