added upload to zenodo node

This commit is contained in:
Sandro La Bruzzo 2020-10-20 16:59:55 +02:00
parent 271b4db450
commit 346ed65e2c
5 changed files with 156 additions and 1 deletions

View File

@ -62,6 +62,10 @@
<artifactId>dhp-schemas</artifactId> <artifactId>dhp-schemas</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpmime</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.elasticsearch</groupId> <groupId>org.elasticsearch</groupId>

View File

@ -0,0 +1,80 @@
package eu.dnetlib.dhp.export.zenodo;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
public class SendToZenodoHDFS implements Serializable {
private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class);
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
SendToZenodoHDFS.class
.getResourceAsStream(
"/eu/dnetlib/dhp/export/upload_zenodo.json")));
parser.parseArgument(args);
final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("nameNode");
final String access_token = parser.get("accessToken");
final String connection_url = parser.get("connectionUrl");
final String metadata = parser.get("metadata");
final Boolean newDeposition = Boolean.valueOf(parser.get("newDeposition"));
final String concept_rec_id = Optional
.ofNullable(parser.get("conceptRecordId"))
.orElse(null);
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
.listFiles(
new Path(hdfsPath), true);
ZenodoAPIClient zenodoApiClient = new ZenodoAPIClient(connection_url, access_token);
if (newDeposition) {
zenodoApiClient.newDeposition();
} else {
if (concept_rec_id == null) {
throw new MissingConceptDoiException("No concept record id has been provided");
}
zenodoApiClient.newVersion(concept_rec_id);
}
while (fileStatusListIterator.hasNext()) {
LocatedFileStatus fileStatus = fileStatusListIterator.next();
Path p = fileStatus.getPath();
String p_string = p.toString();
if (!p_string.endsWith("_SUCCESS")) {
// String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
log.info("Sending information for community: " + name);
FSDataInputStream inputStream = fileSystem.open(p);
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
}
}
zenodoApiClient.sendMretadata(metadata);
zenodoApiClient.publish();
}
}

View File

@ -0,0 +1,45 @@
[
{
"paramName":"nd",
"paramLongName":"newDeposition",
"paramDescription": "if it is a new deposition (true) or a new version (false)",
"paramRequired": true
},
{
"paramName":"cri",
"paramLongName":"conceptRecordId",
"paramDescription": "The id of the concept record for a new version",
"paramRequired": false
},
{
"paramName":"hdfsp",
"paramLongName":"hdfsPath",
"paramDescription": "the path of the folder tofind files to send to Zenodo",
"paramRequired": true
},
{
"paramName": "nn",
"paramLongName": "nameNode",
"paramDescription": "the name node",
"paramRequired": true
},
{
"paramName": "at",
"paramLongName": "accessToken",
"paramDescription": "the access token for the deposition",
"paramRequired": false
},
{
"paramName":"cu",
"paramLongName":"connectionUrl",
"paramDescription": "the url to connect to deposit",
"paramRequired": false
},
{
"paramName":"m",
"paramLongName":"metadata",
"paramDescription": "metadata associated to the deposition",
"paramRequired": false
}
]

View File

@ -39,4 +39,10 @@
<name>spark2SqlQueryExecutionListeners</name> <name>spark2SqlQueryExecutionListeners</name>
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value> <value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
</property> </property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration> </configuration>

View File

@ -8,9 +8,13 @@
<name>targetPath</name> <name>targetPath</name>
<description>the target path</description> <description>the target path</description>
</property> </property>
<property>
<name>metadata</name>
<description>the metadata</description>
</property>
</parameters> </parameters>
<start to="MakeTar"/> <start to="send_zenodo"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
@ -29,5 +33,21 @@
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="send_zenodo">
<java>
<main-class>eu.dnetlib.dhp.export.zenodo.SendToZenodoHDFS</main-class>
<arg>--hdfsPath</arg><arg>/user/dnet.scholexplorer/scholix/provision/scholix.tar/scholix-2020-10-16.tar</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>b6ddrY6b77WxcDEevn9gqVE5sL5sDNjdUijt75W3o7cQo5vpFFI48dMiu8Gv</arg>
<arg>--connectionUrl</arg><arg>https://zenodo.org/api/deposit/depositions</arg>
<arg>--metadata</arg><arg>${metadata}</arg>
<arg>--conceptRecordId</arg><arg>1200252</arg>
<arg>--newDeposition</arg><arg>false</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/> <end name="End"/>
</workflow-app> </workflow-app>