forked from D-Net/dnet-hadoop
added upload to zenodo node
This commit is contained in:
parent
271b4db450
commit
346ed65e2c
|
@ -62,6 +62,10 @@
|
||||||
<artifactId>dhp-schemas</artifactId>
|
<artifactId>dhp-schemas</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpmime</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.elasticsearch</groupId>
|
<groupId>org.elasticsearch</groupId>
|
||||||
|
|
|
@ -0,0 +1,80 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.export.zenodo;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.*;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
|
||||||
|
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
|
||||||
|
|
||||||
|
public class SendToZenodoHDFS implements Serializable {
|
||||||
|
|
||||||
|
private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class);
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
SendToZenodoHDFS.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/export/upload_zenodo.json")));
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
|
final String access_token = parser.get("accessToken");
|
||||||
|
final String connection_url = parser.get("connectionUrl");
|
||||||
|
final String metadata = parser.get("metadata");
|
||||||
|
final Boolean newDeposition = Boolean.valueOf(parser.get("newDeposition"));
|
||||||
|
final String concept_rec_id = Optional
|
||||||
|
.ofNullable(parser.get("conceptRecordId"))
|
||||||
|
.orElse(null);
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
||||||
|
.listFiles(
|
||||||
|
new Path(hdfsPath), true);
|
||||||
|
ZenodoAPIClient zenodoApiClient = new ZenodoAPIClient(connection_url, access_token);
|
||||||
|
if (newDeposition) {
|
||||||
|
zenodoApiClient.newDeposition();
|
||||||
|
} else {
|
||||||
|
if (concept_rec_id == null) {
|
||||||
|
throw new MissingConceptDoiException("No concept record id has been provided");
|
||||||
|
}
|
||||||
|
zenodoApiClient.newVersion(concept_rec_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (fileStatusListIterator.hasNext()) {
|
||||||
|
LocatedFileStatus fileStatus = fileStatusListIterator.next();
|
||||||
|
|
||||||
|
Path p = fileStatus.getPath();
|
||||||
|
String p_string = p.toString();
|
||||||
|
if (!p_string.endsWith("_SUCCESS")) {
|
||||||
|
// String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
|
||||||
|
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||||
|
log.info("Sending information for community: " + name);
|
||||||
|
FSDataInputStream inputStream = fileSystem.open(p);
|
||||||
|
zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
zenodoApiClient.sendMretadata(metadata);
|
||||||
|
zenodoApiClient.publish();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName":"nd",
|
||||||
|
"paramLongName":"newDeposition",
|
||||||
|
"paramDescription": "if it is a new deposition (true) or a new version (false)",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cri",
|
||||||
|
"paramLongName":"conceptRecordId",
|
||||||
|
"paramDescription": "The id of the concept record for a new version",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"hdfsp",
|
||||||
|
"paramLongName":"hdfsPath",
|
||||||
|
"paramDescription": "the path of the folder tofind files to send to Zenodo",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "nn",
|
||||||
|
"paramLongName": "nameNode",
|
||||||
|
"paramDescription": "the name node",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "at",
|
||||||
|
"paramLongName": "accessToken",
|
||||||
|
"paramDescription": "the access token for the deposition",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"cu",
|
||||||
|
"paramLongName":"connectionUrl",
|
||||||
|
"paramDescription": "the url to connect to deposit",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"m",
|
||||||
|
"paramLongName":"metadata",
|
||||||
|
"paramDescription": "metadata associated to the deposition",
|
||||||
|
"paramRequired": false
|
||||||
|
}
|
||||||
|
]
|
|
@ -39,4 +39,10 @@
|
||||||
<name>spark2SqlQueryExecutionListeners</name>
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
|
<value>"com.cloudera.spark.lineage.NavigatorQueryListener"</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
</configuration>
|
</configuration>
|
|
@ -8,9 +8,13 @@
|
||||||
<name>targetPath</name>
|
<name>targetPath</name>
|
||||||
<description>the target path</description>
|
<description>the target path</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>metadata</name>
|
||||||
|
<description>the metadata</description>
|
||||||
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="MakeTar"/>
|
<start to="send_zenodo"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
@ -29,5 +33,21 @@
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<action name="send_zenodo">
|
||||||
|
<java>
|
||||||
|
<main-class>eu.dnetlib.dhp.export.zenodo.SendToZenodoHDFS</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>/user/dnet.scholexplorer/scholix/provision/scholix.tar/scholix-2020-10-16.tar</arg>
|
||||||
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
|
<arg>--accessToken</arg><arg>b6ddrY6b77WxcDEevn9gqVE5sL5sDNjdUijt75W3o7cQo5vpFFI48dMiu8Gv</arg>
|
||||||
|
<arg>--connectionUrl</arg><arg>https://zenodo.org/api/deposit/depositions</arg>
|
||||||
|
<arg>--metadata</arg><arg>${metadata}</arg>
|
||||||
|
<arg>--conceptRecordId</arg><arg>1200252</arg>
|
||||||
|
<arg>--newDeposition</arg><arg>false</arg>
|
||||||
|
</java>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
</workflow-app>
|
</workflow-app>
|
Loading…
Reference in New Issue