This commit is contained in:
Miriam Baglioni 2023-06-30 19:06:15 +02:00
commit b9d4d67c72
5 changed files with 347 additions and 226 deletions

View File

@ -1,8 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump;
public class MissingConceptDoiException extends Throwable {
public MissingConceptDoiException(String message) {
super(message);
}
}
public MissingConceptDoiException(String message) {
super(message);
}
}

View File

@ -9,8 +9,6 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
public class SendToZenodoHDFS implements Serializable {
@ -81,10 +79,8 @@ public class SendToZenodoHDFS implements Serializable {
String pString = p.toString();
if (!pString.endsWith("_SUCCESS")) {
String name = pString.substring(pString.lastIndexOf("/") + 1);
FSDataInputStream inputStream = fileSystem.open(p);
zenodoApiClient.uploadIS(inputStream, name);
zenodoApiClient.uploadIS2(inputStream, name);
}
}

View File

@ -1,19 +1,36 @@
package eu.dnetlib.dhp.oa.graph.dump;
<<<<<<< HEAD
import com.google.gson.Gson;
import eu.dnetlib.dhp.common.api.InputStreamRequestBody;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
import okhttp3.*;
=======
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.TimeUnit;
>>>>>>> 6ace388cff1e9fef4f6497a1ed5b5bc6e0bbd94a
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jetbrains.annotations.NotNull;
<<<<<<< HEAD
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
@ -339,92 +356,362 @@ public class ZenodoAPIClient2 implements Serializable {
}
String body = getBody(conn);
=======
import com.google.gson.Gson;
>>>>>>> 6ace388cff1e9fef4f6497a1ed5b5bc6e0bbd94a
int responseCode = conn.getResponseCode();
conn.disconnect();
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
if(!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
public class ZenodoAPIClient2 implements Serializable {
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
bucket = zenodoModel.getLinks().getBucket();
String urlString;
String bucket;
String deposition_id;
String access_token;
return responseCode;
public String getUrlString() {
return urlString;
}
}
public void setUrlString(String urlString) {
this.urlString = urlString;
}
private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
public String getBucket() {
return bucket;
}
ZenodoModelList zenodoModelList = new Gson()
.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
public void setBucket(String bucket) {
this.bucket = bucket;
}
for (ZenodoModel zm : zenodoModelList) {
if (zm.getConceptrecid().equals(concept_rec_id)) {
deposition_id = zm.getId();
return;
}
}
if (zenodoModelList.size() == 0)
throw new MissingConceptDoiException(
"The concept record id specified was missing in the list of depositions");
setDepositionId(concept_rec_id, page + 1);
public void setDeposition_id(String deposition_id) {
this.deposition_id = deposition_id;
}
}
public ZenodoAPIClient2(String urlString, String access_token) {
private String getPrevDepositions(String page) throws IOException {
this.urlString = urlString;
this.access_token = access_token;
}
HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
urlBuilder.addQueryParameter("page", page);
/**
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
*
* @return response code
* @throws IOException
*/
public int newDeposition() throws IOException {
String json = "{}";
URL url = new URL(urlBuilder.build().toString());
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("GET");
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setRequestMethod("POST");
conn.setDoOutput(true);
try (OutputStream os = conn.getOutputStream()) {
byte[] input = json.getBytes("utf-8");
os.write(input, 0, input.length);
}
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
String body = getBody(conn);
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
int responseCode = conn.getResponseCode();
ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
this.bucket = newSubmission.getLinks().getBucket();
this.deposition_id = newSubmission.getId();
conn.disconnect();
if(!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
return responseCode;
}
public int uploadIS2(InputStream is, String fileName) throws IOException {
final String crlf = "\r\n";
final String twoHyphens = "--";
final String boundary = "*****";
return body;
HttpPut put = new HttpPut(bucket + "/" + fileName);
put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip");
put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
put.setEntity(new InputStreamEntity(is));
}
int statusCode;
try (CloseableHttpClient client = HttpClients.createDefault()) {
CloseableHttpResponse response = client.execute(put);
statusCode = response.getStatusLine().getStatusCode();
private String getBucket(String inputUurl) throws IOException {
}
URL url = new URL(inputUurl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("GET");
if (!checkOKStatus(statusCode)) {
throw new IOException("Unexpected code " + statusCode);
}
String body = getBody(conn);
return statusCode;
}
int responseCode = conn.getResponseCode();
public int publish() throws IOException {
String json = "{}";
HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish");
post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json");
post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
post.setEntity(new StringEntity(json));
int statusCode;
try (CloseableHttpClient client = HttpClients.createDefault()) {
CloseableHttpResponse response = client.execute(post);
statusCode = response.getStatusLine().getStatusCode();
}
if (!checkOKStatus(statusCode)) {
throw new IOException("Unexpected code " + statusCode);
}
return statusCode;
}
conn.disconnect();
if(!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
/**
* Upload files in Zenodo.
*
* @param is the inputStream for the file to upload
* @param file_name the name of the file as it will appear on Zenodo
* @return the response code
*/
public int uploadIS(InputStream is, String file_name) throws IOException {
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
URL url = new URL(bucket + "/" + file_name);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("PUT");
return zenodoModel.getLinks().getBucket();
byte[] buf = new byte[8192];
int length;
try (OutputStream os = conn.getOutputStream()) {
while ((length = is.read(buf)) != -1) {
os.write(buf, 0, length);
os.flush();
}
}
int responseCode = conn.getResponseCode();
if (!checkOKStatus(responseCode)) {
throw new IOException("Unexpected code " + responseCode + getBody(conn));
}
return responseCode;
}
}
@NotNull
private String getBody(HttpURLConnection conn) throws IOException {
String body = "{}";
try (BufferedReader br = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
StringBuilder response = new StringBuilder();
String responseLine = null;
while ((responseLine = br.readLine()) != null) {
response.append(responseLine.trim());
}
}
body = response.toString();
}
return body;
}
/**
* Associates metadata information to the current deposition
*
* @param metadata the metadata
* @return response code
* @throws IOException
*/
public int sendMretadata(String metadata) throws IOException {
URL url = new URL(urlString + "/" + deposition_id);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("PUT");
try (OutputStream os = conn.getOutputStream()) {
byte[] input = metadata.getBytes("utf-8");
os.write(input, 0, input.length);
}
final int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + getBody(conn));
return responseCode;
}
private boolean checkOKStatus(int responseCode) {
if (HttpURLConnection.HTTP_OK != responseCode ||
HttpURLConnection.HTTP_CREATED != responseCode)
return true;
return false;
}
/**
* To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
* for the new version.
*
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
* part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
* concept_rec_id = 656930
* @return response code
* @throws IOException
* @throws MissingConceptDoiException
*/
public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException {
setDepositionId(concept_rec_id, 1);
String json = "{}";
URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("POST");
try (OutputStream os = conn.getOutputStream()) {
byte[] input = json.getBytes("utf-8");
os.write(input, 0, input.length);
}
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
String latest_draft = zenodoModel.getLinks().getLatest_draft();
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
bucket = getBucket(latest_draft);
return responseCode;
}
/**
* To finish uploading a version or new deposition not published
* It sets the deposition_id and the bucket to be used
*
*
* @param deposition_id the deposition id of the not yet published upload
* concept_rec_id = 656930
* @return response code
* @throws IOException
* @throws MissingConceptDoiException
*/
public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
this.deposition_id = deposition_id;
String json = "{}";
URL url = new URL(urlString + "/" + deposition_id);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setRequestMethod("POST");
conn.setDoOutput(true);
try (OutputStream os = conn.getOutputStream()) {
byte[] input = json.getBytes("utf-8");
os.write(input, 0, input.length);
}
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
bucket = zenodoModel.getLinks().getBucket();
return responseCode;
}
private void setDepositionId(String concept_rec_id, Integer page) throws Exception, MissingConceptDoiException {
ZenodoModelList zenodoModelList = new Gson()
.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
for (ZenodoModel zm : zenodoModelList) {
if (zm.getConceptrecid().equals(concept_rec_id)) {
deposition_id = zm.getId();
return;
}
}
if (zenodoModelList.size() == 0)
throw new MissingConceptDoiException(
"The concept record id specified was missing in the list of depositions");
setDepositionId(concept_rec_id, page + 1);
}
private String getPrevDepositions(String page) throws Exception {
HttpGet get = new HttpGet(urlString);
URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build();
get.setURI(uri);
get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
try (CloseableHttpClient client = HttpClients.createDefault()) {
CloseableHttpResponse response = client.execute(get);
final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8);
return body;
}
}
private String getBucket(String inputUurl) throws IOException {
URL url = new URL(inputUurl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("GET");
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
return zenodoModel.getLinks().getBucket();
}
}

View File

@ -1,30 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,132 +0,0 @@
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>accessToken</name>
<description>the access token used for the deposition in Zenodo</description>
</property>
<property>
<name>connectionUrl</name>
<description>the connection url for Zenodo</description>
</property>
<property>
<name>metadata</name>
<value>""</value>
<description> the metadata associated to the deposition</description>
</property>
<property>
<name>depositionType</name>
<description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
</property>
<property>
<name>conceptRecordId</name>
<value>none</value>
<description>for new version, the id of the record for the old deposition</description>
</property>
<property>
<name>depositionId</name>
<value>none</value>
<description>the depositionId of a deposition open that has to be added content</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="only_upload"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}/tar</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
</java>
<ok to="send_zenodo"/>
<error to="Kill"/>
</action>
<action name="send_zenodo">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}/tar/</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>${accessToken}</arg>
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
<arg>--metadata</arg><arg>${metadata}</arg>
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
<arg>--depositionType</arg><arg>${depositionType}</arg>
<arg>--depositionId</arg><arg>${depositionId}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>