forked from D-Net/dnet-hadoop
merging from bulkTag branch
This commit is contained in:
commit
5f1ed61c1f
|
@ -1,53 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
import okhttp3.MediaType;
|
|
||||||
import okhttp3.RequestBody;
|
|
||||||
import okhttp3.internal.Util;
|
|
||||||
import okio.BufferedSink;
|
|
||||||
import okio.Okio;
|
|
||||||
import okio.Source;
|
|
||||||
|
|
||||||
public class InputStreamRequestBody extends RequestBody {
|
|
||||||
|
|
||||||
private final InputStream inputStream;
|
|
||||||
private final MediaType mediaType;
|
|
||||||
private final long lenght;
|
|
||||||
|
|
||||||
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
|
|
||||||
|
|
||||||
return new InputStreamRequestBody(inputStream, mediaType, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
|
|
||||||
this.inputStream = inputStream;
|
|
||||||
this.mediaType = mediaType;
|
|
||||||
this.lenght = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public MediaType contentType() {
|
|
||||||
return mediaType;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long contentLength() {
|
|
||||||
|
|
||||||
return lenght;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void writeTo(BufferedSink sink) throws IOException {
|
|
||||||
Source source = null;
|
|
||||||
try {
|
|
||||||
source = Okio.source(inputStream);
|
|
||||||
sink.writeAll(source);
|
|
||||||
} finally {
|
|
||||||
Util.closeQuietly(source);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,8 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api;
|
|
||||||
|
|
||||||
public class MissingConceptDoiException extends Throwable {
|
|
||||||
public MissingConceptDoiException(String message) {
|
|
||||||
super(message);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,365 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.HttpURLConnection;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import org.apache.http.HttpHeaders;
|
|
||||||
import org.apache.http.entity.ContentType;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
|
||||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
|
|
||||||
import okhttp3.*;
|
|
||||||
|
|
||||||
public class ZenodoAPIClient implements Serializable {
|
|
||||||
|
|
||||||
String urlString;
|
|
||||||
String bucket;
|
|
||||||
|
|
||||||
String deposition_id;
|
|
||||||
String access_token;
|
|
||||||
|
|
||||||
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
|
|
||||||
|
|
||||||
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
|
|
||||||
|
|
||||||
public String getUrlString() {
|
|
||||||
return urlString;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUrlString(String urlString) {
|
|
||||||
this.urlString = urlString;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getBucket() {
|
|
||||||
return bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setBucket(String bucket) {
|
|
||||||
this.bucket = bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDeposition_id(String deposition_id) {
|
|
||||||
this.deposition_id = deposition_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ZenodoAPIClient(String urlString, String access_token) {
|
|
||||||
|
|
||||||
this.urlString = urlString;
|
|
||||||
this.access_token = access_token;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
|
|
||||||
*
|
|
||||||
* @return response code
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public int newDeposition() throws IOException {
|
|
||||||
String json = "{}";
|
|
||||||
|
|
||||||
URL url = new URL(urlString);
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setRequestMethod("POST");
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
try (OutputStream os = conn.getOutputStream()) {
|
|
||||||
byte[] input = json.getBytes("utf-8");
|
|
||||||
os.write(input, 0, input.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
String body = getBody(conn);
|
|
||||||
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
conn.disconnect();
|
|
||||||
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + body);
|
|
||||||
|
|
||||||
ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
|
|
||||||
this.bucket = newSubmission.getLinks().getBucket();
|
|
||||||
this.deposition_id = newSubmission.getId();
|
|
||||||
|
|
||||||
return responseCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Upload files in Zenodo.
|
|
||||||
*
|
|
||||||
* @param is the inputStream for the file to upload
|
|
||||||
* @param file_name the name of the file as it will appear on Zenodo
|
|
||||||
* @return the response code
|
|
||||||
*/
|
|
||||||
public int uploadIS(InputStream is, String file_name) throws IOException {
|
|
||||||
|
|
||||||
URL url = new URL(bucket + "/" + file_name);
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
conn.setRequestMethod("PUT");
|
|
||||||
|
|
||||||
byte[] buf = new byte[8192];
|
|
||||||
int length;
|
|
||||||
try (OutputStream os = conn.getOutputStream()) {
|
|
||||||
while ((length = is.read(buf)) != -1) {
|
|
||||||
os.write(buf, 0, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
if (!checkOKStatus(responseCode)) {
|
|
||||||
throw new IOException("Unexpected code " + responseCode + getBody(conn));
|
|
||||||
}
|
|
||||||
|
|
||||||
return responseCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
@NotNull
|
|
||||||
private String getBody(HttpURLConnection conn) throws IOException {
|
|
||||||
String body = "{}";
|
|
||||||
try (BufferedReader br = new BufferedReader(
|
|
||||||
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
|
|
||||||
StringBuilder response = new StringBuilder();
|
|
||||||
String responseLine = null;
|
|
||||||
while ((responseLine = br.readLine()) != null) {
|
|
||||||
response.append(responseLine.trim());
|
|
||||||
}
|
|
||||||
|
|
||||||
body = response.toString();
|
|
||||||
|
|
||||||
}
|
|
||||||
return body;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Associates metadata information to the current deposition
|
|
||||||
*
|
|
||||||
* @param metadata the metadata
|
|
||||||
* @return response code
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public int sendMretadata(String metadata) throws IOException {
|
|
||||||
|
|
||||||
URL url = new URL(urlString + "/" + deposition_id);
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
conn.setRequestMethod("PUT");
|
|
||||||
|
|
||||||
try (OutputStream os = conn.getOutputStream()) {
|
|
||||||
byte[] input = metadata.getBytes("utf-8");
|
|
||||||
os.write(input, 0, input.length);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
final int responseCode = conn.getResponseCode();
|
|
||||||
conn.disconnect();
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + getBody(conn));
|
|
||||||
|
|
||||||
return responseCode;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean checkOKStatus(int responseCode) {
|
|
||||||
|
|
||||||
if (HttpURLConnection.HTTP_OK != responseCode ||
|
|
||||||
HttpURLConnection.HTTP_CREATED != responseCode)
|
|
||||||
return true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* To publish the current deposition. It works for both new deposition or new version of an old deposition
|
|
||||||
*
|
|
||||||
* @return response code
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public int publish() throws IOException {
|
|
||||||
|
|
||||||
String json = "{}";
|
|
||||||
|
|
||||||
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
|
|
||||||
|
|
||||||
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
|
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
|
||||||
.url(urlString + "/" + deposition_id + "/actions/publish")
|
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
|
||||||
.post(body)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
try (Response response = httpClient.newCall(request).execute()) {
|
|
||||||
|
|
||||||
if (!response.isSuccessful())
|
|
||||||
throw new IOException("Unexpected code " + response + response.body().string());
|
|
||||||
|
|
||||||
return response.code();
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used
|
|
||||||
* for the new version.
|
|
||||||
*
|
|
||||||
* @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last
|
|
||||||
* part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930
|
|
||||||
* concept_rec_id = 656930
|
|
||||||
* @return response code
|
|
||||||
* @throws IOException
|
|
||||||
* @throws MissingConceptDoiException
|
|
||||||
*/
|
|
||||||
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
|
|
||||||
setDepositionId(concept_rec_id, 1);
|
|
||||||
String json = "{}";
|
|
||||||
|
|
||||||
URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
conn.setRequestMethod("POST");
|
|
||||||
|
|
||||||
try (OutputStream os = conn.getOutputStream()) {
|
|
||||||
byte[] input = json.getBytes("utf-8");
|
|
||||||
os.write(input, 0, input.length);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
String body = getBody(conn);
|
|
||||||
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
|
|
||||||
conn.disconnect();
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + body);
|
|
||||||
|
|
||||||
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
|
|
||||||
String latest_draft = zenodoModel.getLinks().getLatest_draft();
|
|
||||||
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
|
|
||||||
bucket = getBucket(latest_draft);
|
|
||||||
|
|
||||||
return responseCode;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* To finish uploading a version or new deposition not published
|
|
||||||
* It sets the deposition_id and the bucket to be used
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* @param deposition_id the deposition id of the not yet published upload
|
|
||||||
* concept_rec_id = 656930
|
|
||||||
* @return response code
|
|
||||||
* @throws IOException
|
|
||||||
* @throws MissingConceptDoiException
|
|
||||||
*/
|
|
||||||
public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
|
|
||||||
|
|
||||||
this.deposition_id = deposition_id;
|
|
||||||
|
|
||||||
String json = "{}";
|
|
||||||
|
|
||||||
URL url = new URL(urlString + "/" + deposition_id);
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setRequestMethod("POST");
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
try (OutputStream os = conn.getOutputStream()) {
|
|
||||||
byte[] input = json.getBytes("utf-8");
|
|
||||||
os.write(input, 0, input.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
String body = getBody(conn);
|
|
||||||
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
conn.disconnect();
|
|
||||||
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + body);
|
|
||||||
|
|
||||||
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
|
|
||||||
bucket = zenodoModel.getLinks().getBucket();
|
|
||||||
|
|
||||||
return responseCode;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
|
|
||||||
|
|
||||||
ZenodoModelList zenodoModelList = new Gson()
|
|
||||||
.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
|
|
||||||
|
|
||||||
for (ZenodoModel zm : zenodoModelList) {
|
|
||||||
if (zm.getConceptrecid().equals(concept_rec_id)) {
|
|
||||||
deposition_id = zm.getId();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (zenodoModelList.size() == 0)
|
|
||||||
throw new MissingConceptDoiException(
|
|
||||||
"The concept record id specified was missing in the list of depositions");
|
|
||||||
setDepositionId(concept_rec_id, page + 1);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getPrevDepositions(String page) throws IOException {
|
|
||||||
|
|
||||||
HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
|
|
||||||
urlBuilder.addQueryParameter("page", page);
|
|
||||||
|
|
||||||
URL url = new URL(urlBuilder.build().toString());
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
conn.setRequestMethod("GET");
|
|
||||||
|
|
||||||
String body = getBody(conn);
|
|
||||||
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
|
|
||||||
conn.disconnect();
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + body);
|
|
||||||
|
|
||||||
return body;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getBucket(String inputUurl) throws IOException {
|
|
||||||
|
|
||||||
URL url = new URL(inputUurl);
|
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
|
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
|
|
||||||
conn.setDoOutput(true);
|
|
||||||
conn.setRequestMethod("GET");
|
|
||||||
|
|
||||||
String body = getBody(conn);
|
|
||||||
|
|
||||||
int responseCode = conn.getResponseCode();
|
|
||||||
|
|
||||||
conn.disconnect();
|
|
||||||
if (!checkOKStatus(responseCode))
|
|
||||||
throw new IOException("Unexpected code " + responseCode + body);
|
|
||||||
|
|
||||||
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
|
|
||||||
|
|
||||||
return zenodoModel.getLinks().getBucket();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,14 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
public class Community {
|
|
||||||
private String identifier;
|
|
||||||
|
|
||||||
public String getIdentifier() {
|
|
||||||
return identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setIdentifier(String identifier) {
|
|
||||||
this.identifier = identifier;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,47 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
public class Creator {
|
|
||||||
private String affiliation;
|
|
||||||
private String name;
|
|
||||||
private String orcid;
|
|
||||||
|
|
||||||
public String getAffiliation() {
|
|
||||||
return affiliation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setAffiliation(String affiliation) {
|
|
||||||
this.affiliation = affiliation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getName() {
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setName(String name) {
|
|
||||||
this.name = name;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getOrcid() {
|
|
||||||
return orcid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setOrcid(String orcid) {
|
|
||||||
this.orcid = orcid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Creator newInstance(String name, String affiliation, String orcid) {
|
|
||||||
Creator c = new Creator();
|
|
||||||
if (name != null) {
|
|
||||||
c.name = name;
|
|
||||||
}
|
|
||||||
if (affiliation != null) {
|
|
||||||
c.affiliation = affiliation;
|
|
||||||
}
|
|
||||||
if (orcid != null) {
|
|
||||||
c.orcid = orcid;
|
|
||||||
}
|
|
||||||
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,44 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class File implements Serializable {
|
|
||||||
private String checksum;
|
|
||||||
private String filename;
|
|
||||||
private long filesize;
|
|
||||||
private String id;
|
|
||||||
|
|
||||||
public String getChecksum() {
|
|
||||||
return checksum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setChecksum(String checksum) {
|
|
||||||
this.checksum = checksum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getFilename() {
|
|
||||||
return filename;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFilename(String filename) {
|
|
||||||
this.filename = filename;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getFilesize() {
|
|
||||||
return filesize;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFilesize(long filesize) {
|
|
||||||
this.filesize = filesize;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,23 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class Grant implements Serializable {
|
|
||||||
private String id;
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Grant newInstance(String id) {
|
|
||||||
Grant g = new Grant();
|
|
||||||
g.id = id;
|
|
||||||
|
|
||||||
return g;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,92 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class Links implements Serializable {
|
|
||||||
|
|
||||||
private String bucket;
|
|
||||||
|
|
||||||
private String discard;
|
|
||||||
|
|
||||||
private String edit;
|
|
||||||
private String files;
|
|
||||||
private String html;
|
|
||||||
private String latest_draft;
|
|
||||||
private String latest_draft_html;
|
|
||||||
private String publish;
|
|
||||||
|
|
||||||
private String self;
|
|
||||||
|
|
||||||
public String getBucket() {
|
|
||||||
return bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setBucket(String bucket) {
|
|
||||||
this.bucket = bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDiscard() {
|
|
||||||
return discard;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDiscard(String discard) {
|
|
||||||
this.discard = discard;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getEdit() {
|
|
||||||
return edit;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setEdit(String edit) {
|
|
||||||
this.edit = edit;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getFiles() {
|
|
||||||
return files;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFiles(String files) {
|
|
||||||
this.files = files;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getHtml() {
|
|
||||||
return html;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setHtml(String html) {
|
|
||||||
this.html = html;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getLatest_draft() {
|
|
||||||
return latest_draft;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLatest_draft(String latest_draft) {
|
|
||||||
this.latest_draft = latest_draft;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getLatest_draft_html() {
|
|
||||||
return latest_draft_html;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLatest_draft_html(String latest_draft_html) {
|
|
||||||
this.latest_draft_html = latest_draft_html;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getPublish() {
|
|
||||||
return publish;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setPublish(String publish) {
|
|
||||||
this.publish = publish;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getSelf() {
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSelf(String self) {
|
|
||||||
this.self = self;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,153 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class Metadata implements Serializable {
|
|
||||||
|
|
||||||
private String access_right;
|
|
||||||
private List<Community> communities;
|
|
||||||
private List<Creator> creators;
|
|
||||||
private String description;
|
|
||||||
private String doi;
|
|
||||||
private List<Grant> grants;
|
|
||||||
private List<String> keywords;
|
|
||||||
private String language;
|
|
||||||
private String license;
|
|
||||||
private PrereserveDoi prereserve_doi;
|
|
||||||
private String publication_date;
|
|
||||||
private List<String> references;
|
|
||||||
private List<RelatedIdentifier> related_identifiers;
|
|
||||||
private String title;
|
|
||||||
private String upload_type;
|
|
||||||
private String version;
|
|
||||||
|
|
||||||
public String getUpload_type() {
|
|
||||||
return upload_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUpload_type(String upload_type) {
|
|
||||||
this.upload_type = upload_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getVersion() {
|
|
||||||
return version;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setVersion(String version) {
|
|
||||||
this.version = version;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getAccess_right() {
|
|
||||||
return access_right;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setAccess_right(String access_right) {
|
|
||||||
this.access_right = access_right;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Community> getCommunities() {
|
|
||||||
return communities;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCommunities(List<Community> communities) {
|
|
||||||
this.communities = communities;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Creator> getCreators() {
|
|
||||||
return creators;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCreators(List<Creator> creators) {
|
|
||||||
this.creators = creators;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDescription() {
|
|
||||||
return description;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDescription(String description) {
|
|
||||||
this.description = description;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDoi() {
|
|
||||||
return doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDoi(String doi) {
|
|
||||||
this.doi = doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Grant> getGrants() {
|
|
||||||
return grants;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setGrants(List<Grant> grants) {
|
|
||||||
this.grants = grants;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getKeywords() {
|
|
||||||
return keywords;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setKeywords(List<String> keywords) {
|
|
||||||
this.keywords = keywords;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getLanguage() {
|
|
||||||
return language;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLanguage(String language) {
|
|
||||||
this.language = language;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getLicense() {
|
|
||||||
return license;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLicense(String license) {
|
|
||||||
this.license = license;
|
|
||||||
}
|
|
||||||
|
|
||||||
public PrereserveDoi getPrereserve_doi() {
|
|
||||||
return prereserve_doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
|
|
||||||
this.prereserve_doi = prereserve_doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getPublication_date() {
|
|
||||||
return publication_date;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setPublication_date(String publication_date) {
|
|
||||||
this.publication_date = publication_date;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getReferences() {
|
|
||||||
return references;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setReferences(List<String> references) {
|
|
||||||
this.references = references;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<RelatedIdentifier> getRelated_identifiers() {
|
|
||||||
return related_identifiers;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRelated_identifiers(List<RelatedIdentifier> related_identifiers) {
|
|
||||||
this.related_identifiers = related_identifiers;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTitle() {
|
|
||||||
return title;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTitle(String title) {
|
|
||||||
this.title = title;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,25 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class PrereserveDoi implements Serializable {
|
|
||||||
private String doi;
|
|
||||||
private String recid;
|
|
||||||
|
|
||||||
public String getDoi() {
|
|
||||||
return doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDoi(String doi) {
|
|
||||||
this.doi = doi;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getRecid() {
|
|
||||||
return recid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRecid(String recid) {
|
|
||||||
this.recid = recid;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,43 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class RelatedIdentifier implements Serializable {
|
|
||||||
private String identifier;
|
|
||||||
private String relation;
|
|
||||||
private String resource_type;
|
|
||||||
private String scheme;
|
|
||||||
|
|
||||||
public String getIdentifier() {
|
|
||||||
return identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setIdentifier(String identifier) {
|
|
||||||
this.identifier = identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getRelation() {
|
|
||||||
return relation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRelation(String relation) {
|
|
||||||
this.relation = relation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getResource_type() {
|
|
||||||
return resource_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setResource_type(String resource_type) {
|
|
||||||
this.resource_type = resource_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getScheme() {
|
|
||||||
return scheme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setScheme(String scheme) {
|
|
||||||
this.scheme = scheme;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,118 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class ZenodoModel implements Serializable {
|
|
||||||
|
|
||||||
private String conceptrecid;
|
|
||||||
private String created;
|
|
||||||
|
|
||||||
private List<File> files;
|
|
||||||
private String id;
|
|
||||||
private Links links;
|
|
||||||
private Metadata metadata;
|
|
||||||
private String modified;
|
|
||||||
private String owner;
|
|
||||||
private String record_id;
|
|
||||||
private String state;
|
|
||||||
private boolean submitted;
|
|
||||||
private String title;
|
|
||||||
|
|
||||||
public String getConceptrecid() {
|
|
||||||
return conceptrecid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setConceptrecid(String conceptrecid) {
|
|
||||||
this.conceptrecid = conceptrecid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCreated() {
|
|
||||||
return created;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCreated(String created) {
|
|
||||||
this.created = created;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<File> getFiles() {
|
|
||||||
return files;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFiles(List<File> files) {
|
|
||||||
this.files = files;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Links getLinks() {
|
|
||||||
return links;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLinks(Links links) {
|
|
||||||
this.links = links;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Metadata getMetadata() {
|
|
||||||
return metadata;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setMetadata(Metadata metadata) {
|
|
||||||
this.metadata = metadata;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getModified() {
|
|
||||||
return modified;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setModified(String modified) {
|
|
||||||
this.modified = modified;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getOwner() {
|
|
||||||
return owner;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setOwner(String owner) {
|
|
||||||
this.owner = owner;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getRecord_id() {
|
|
||||||
return record_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRecord_id(String record_id) {
|
|
||||||
this.record_id = record_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getState() {
|
|
||||||
return state;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setState(String state) {
|
|
||||||
this.state = state;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isSubmitted() {
|
|
||||||
return submitted;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSubmitted(boolean submitted) {
|
|
||||||
this.submitted = submitted;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTitle() {
|
|
||||||
return title;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTitle(String title) {
|
|
||||||
this.title = title;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,7 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api.zenodo;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
public class ZenodoModelList extends ArrayList<ZenodoModel> {
|
|
||||||
}
|
|
|
@ -1,109 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.api;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.junit.jupiter.api.Assertions;
|
|
||||||
import org.junit.jupiter.api.Disabled;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
@Disabled
|
|
||||||
class ZenodoAPIClientTest {
|
|
||||||
|
|
||||||
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
|
||||||
private final String ACCESS_TOKEN = "";
|
|
||||||
|
|
||||||
private final String CONCEPT_REC_ID = "657113";
|
|
||||||
|
|
||||||
private final String depositionId = "674915";
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
|
||||||
ACCESS_TOKEN);
|
|
||||||
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
|
|
||||||
|
|
||||||
File file = new File(getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
|
||||||
.getPath());
|
|
||||||
|
|
||||||
InputStream is = new FileInputStream(file);
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
|
|
||||||
|
|
||||||
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
|
||||||
|
|
||||||
Assertions.assertEquals(202, client.publish());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testNewDeposition() throws IOException {
|
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
|
||||||
ACCESS_TOKEN);
|
|
||||||
Assertions.assertEquals(201, client.newDeposition());
|
|
||||||
|
|
||||||
File file = new File(getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
|
|
||||||
.getPath());
|
|
||||||
|
|
||||||
InputStream is = new FileInputStream(file);
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
|
|
||||||
|
|
||||||
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.sendMretadata(metadata));
|
|
||||||
|
|
||||||
Assertions.assertEquals(202, client.publish());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
|
||||||
ACCESS_TOKEN);
|
|
||||||
|
|
||||||
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
|
||||||
|
|
||||||
File file = new File(getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/common/api/newVersion")
|
|
||||||
.getPath());
|
|
||||||
|
|
||||||
InputStream is = new FileInputStream(file);
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
|
|
||||||
|
|
||||||
Assertions.assertEquals(202, client.publish());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
|
||||||
ACCESS_TOKEN);
|
|
||||||
|
|
||||||
Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
|
|
||||||
|
|
||||||
File file = new File(getClass()
|
|
||||||
.getResource("/eu/dnetlib/dhp/common/api/newVersion2")
|
|
||||||
.getPath());
|
|
||||||
|
|
||||||
InputStream is = new FileInputStream(file);
|
|
||||||
|
|
||||||
Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
|
|
||||||
|
|
||||||
Assertions.assertEquals(202, client.publish());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,6 +1,24 @@
|
||||||
|
|
||||||
package eu.dnetlib.pace.util;
|
package eu.dnetlib.pace.util;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Diff Match and Patch
|
||||||
|
* Copyright 2018 The diff-match-patch Authors.
|
||||||
|
* https://github.com/google/diff-match-patch
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Diff Match and Patch
|
* Diff Match and Patch
|
||||||
* Copyright 2018 The diff-match-patch Authors.
|
* Copyright 2018 The diff-match-patch Authors.
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.net.HttpURLConnection;
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 06/10/23
|
||||||
|
*/
|
||||||
|
public class QueryCommunityAPI {
|
||||||
|
private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/";
|
||||||
|
private static final String BETA_BASE_URL = "https://beta.services.openaire.eu/openaire/";
|
||||||
|
|
||||||
|
private static String get(String geturl) throws IOException {
|
||||||
|
URL url = new URL(geturl);
|
||||||
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
|
conn.setDoOutput(true);
|
||||||
|
conn.setRequestMethod("GET");
|
||||||
|
|
||||||
|
int responseCode = conn.getResponseCode();
|
||||||
|
String body = getBody(conn);
|
||||||
|
conn.disconnect();
|
||||||
|
if (responseCode != HttpURLConnection.HTTP_OK)
|
||||||
|
throw new IOException("Unexpected code " + responseCode + body);
|
||||||
|
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communities(boolean production) throws IOException {
|
||||||
|
if (production)
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/communities");
|
||||||
|
return get(BETA_BASE_URL + "community/communities");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String community(String id, boolean production) throws IOException {
|
||||||
|
if (production)
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id);
|
||||||
|
return get(BETA_BASE_URL + "community/" + id);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communityDatasource(String id, boolean production) throws IOException {
|
||||||
|
if (production)
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders");
|
||||||
|
return (BETA_BASE_URL + "community/" + id + "/contentproviders");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communityPropagationOrganization(String id, boolean production) throws IOException {
|
||||||
|
if (production)
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations");
|
||||||
|
return get(BETA_BASE_URL + "community/" + id + "/propagationOrganizations");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communityProjects(String id, String page, String size, boolean production) throws IOException {
|
||||||
|
if (production)
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
|
||||||
|
return get(BETA_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
|
||||||
|
}
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
private static String getBody(HttpURLConnection conn) throws IOException {
|
||||||
|
String body = "{}";
|
||||||
|
try (BufferedReader br = new BufferedReader(
|
||||||
|
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
|
||||||
|
StringBuilder response = new StringBuilder();
|
||||||
|
String responseLine = null;
|
||||||
|
while ((responseLine = br.readLine()) != null) {
|
||||||
|
response.append(responseLine.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
body = response.toString();
|
||||||
|
|
||||||
|
}
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,166 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.amazonaws.util.StringUtils;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.api.model.*;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.Community;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.Provider;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 09/10/23
|
||||||
|
*/
|
||||||
|
public class Utils implements Serializable {
|
||||||
|
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||||
|
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(Utils.class);
|
||||||
|
|
||||||
|
public static CommunityConfiguration getCommunityConfiguration(boolean production) throws IOException {
|
||||||
|
final Map<String, Community> communities = Maps.newHashMap();
|
||||||
|
List<Community> validCommunities = new ArrayList<>();
|
||||||
|
getValidCommunities(production)
|
||||||
|
.forEach(community -> {
|
||||||
|
try {
|
||||||
|
CommunityModel cm = MAPPER
|
||||||
|
.readValue(QueryCommunityAPI.community(community.getId(), production), CommunityModel.class);
|
||||||
|
validCommunities.add(getCommunity(cm));
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
validCommunities.forEach(community -> {
|
||||||
|
try {
|
||||||
|
DatasourceList dl = MAPPER
|
||||||
|
.readValue(
|
||||||
|
QueryCommunityAPI.communityDatasource(community.getId(), production), DatasourceList.class);
|
||||||
|
community.setProviders(dl.stream().map(d -> {
|
||||||
|
if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
|
||||||
|
return null;
|
||||||
|
Provider p = new Provider();
|
||||||
|
p.setOpenaireId("10|" + d.getOpenaireId());
|
||||||
|
p.setSelectionConstraints(d.getSelectioncriteria());
|
||||||
|
if (p.getSelectionConstraints() != null)
|
||||||
|
p.getSelectionConstraints().setSelection(resolver);
|
||||||
|
return p;
|
||||||
|
})
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
validCommunities.forEach(community -> {
|
||||||
|
if (community.isValid())
|
||||||
|
communities.put(community.getId(), community);
|
||||||
|
});
|
||||||
|
return new CommunityConfiguration(communities);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Community getCommunity(CommunityModel cm) {
|
||||||
|
Community c = new Community();
|
||||||
|
c.setId(cm.getId());
|
||||||
|
c.setZenodoCommunities(cm.getOtherZenodoCommunities());
|
||||||
|
if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
|
||||||
|
c.getZenodoCommunities().add(cm.getZenodoCommunity());
|
||||||
|
c.setSubjects(cm.getSubjects());
|
||||||
|
c.getSubjects().addAll(cm.getFos());
|
||||||
|
c.getSubjects().addAll(cm.getSdg());
|
||||||
|
if (cm.getAdvancedConstraints() != null) {
|
||||||
|
c.setConstraints(cm.getAdvancedConstraints());
|
||||||
|
c.getConstraints().setSelection(resolver);
|
||||||
|
}
|
||||||
|
if (cm.getRemoveConstraints() != null) {
|
||||||
|
c.setRemoveConstraints(cm.getRemoveConstraints());
|
||||||
|
c.getRemoveConstraints().setSelection(resolver);
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<CommunityModel> getValidCommunities(boolean production) throws IOException {
|
||||||
|
return MAPPER
|
||||||
|
.readValue(QueryCommunityAPI.communities(production), CommunitySummary.class)
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
community -> !community.getStatus().equals("hidden") &&
|
||||||
|
(community.getType().equals("ri") || community.getType().equals("community")))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* it returns for each organization the list of associated communities
|
||||||
|
*/
|
||||||
|
public static CommunityEntityMap getCommunityOrganization(boolean production) throws IOException {
|
||||||
|
CommunityEntityMap organizationMap = new CommunityEntityMap();
|
||||||
|
getValidCommunities(production)
|
||||||
|
.forEach(community -> {
|
||||||
|
String id = community.getId();
|
||||||
|
try {
|
||||||
|
List<String> associatedOrgs = MAPPER
|
||||||
|
.readValue(
|
||||||
|
QueryCommunityAPI.communityPropagationOrganization(id, production), OrganizationList.class);
|
||||||
|
associatedOrgs.forEach(o -> {
|
||||||
|
if (!organizationMap
|
||||||
|
.keySet()
|
||||||
|
.contains(
|
||||||
|
"20|" + o))
|
||||||
|
organizationMap.put("20|" + o, new ArrayList<>());
|
||||||
|
organizationMap.get("20|" + o).add(community.getId());
|
||||||
|
});
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return organizationMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CommunityEntityMap getCommunityProjects(boolean production) throws IOException {
|
||||||
|
CommunityEntityMap projectMap = new CommunityEntityMap();
|
||||||
|
getValidCommunities(production)
|
||||||
|
.forEach(community -> {
|
||||||
|
int page = -1;
|
||||||
|
int size = 100;
|
||||||
|
ContentModel cm = new ContentModel();
|
||||||
|
do {
|
||||||
|
page++;
|
||||||
|
try {
|
||||||
|
cm = MAPPER
|
||||||
|
.readValue(
|
||||||
|
QueryCommunityAPI
|
||||||
|
.communityProjects(
|
||||||
|
community.getId(), String.valueOf(page), String.valueOf(size), production),
|
||||||
|
ContentModel.class);
|
||||||
|
if (cm.getContent().size() > 0) {
|
||||||
|
cm.getContent().forEach(p -> {
|
||||||
|
if (!projectMap.keySet().contains("40|" + p.getOpenaireId()))
|
||||||
|
projectMap.put("40|" + p.getOpenaireId(), new ArrayList<>());
|
||||||
|
projectMap.get("40|" + p.getOpenaireId()).add(community.getId());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
} while (!cm.getLast());
|
||||||
|
});
|
||||||
|
return projectMap;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api.model;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||||
|
|
||||||
|
@JsonAutoDetect
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class CommunityContentprovider {
|
||||||
|
private String openaireId;
|
||||||
|
private SelectionConstraints selectioncriteria;
|
||||||
|
|
||||||
|
private String enabled;
|
||||||
|
|
||||||
|
public String getEnabled() {
|
||||||
|
return enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEnabled(String enabled) {
|
||||||
|
this.enabled = enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOpenaireId() {
|
||||||
|
return openaireId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOpenaireId(final String openaireId) {
|
||||||
|
this.openaireId = openaireId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SelectionConstraints getSelectioncriteria() {
|
||||||
|
|
||||||
|
return this.selectioncriteria;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSelectioncriteria(SelectionConstraints selectioncriteria) {
|
||||||
|
this.selectioncriteria = selectioncriteria;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api.model;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class CommunityEntityMap extends HashMap<String, List<String>> {
|
||||||
|
|
||||||
|
public CommunityEntityMap() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> get(String key) {
|
||||||
|
|
||||||
|
if (super.get(key) == null) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
return super.get(key);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,108 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 06/10/23
|
||||||
|
*/
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class CommunityModel implements Serializable {
|
||||||
|
private String id;
|
||||||
|
private String type;
|
||||||
|
private String status;
|
||||||
|
|
||||||
|
private String zenodoCommunity;
|
||||||
|
private List<String> subjects;
|
||||||
|
private List<String> otherZenodoCommunities;
|
||||||
|
private List<String> fos;
|
||||||
|
private List<String> sdg;
|
||||||
|
private SelectionConstraints advancedConstraints;
|
||||||
|
private SelectionConstraints removeConstraints;
|
||||||
|
|
||||||
|
public String getZenodoCommunity() {
|
||||||
|
return zenodoCommunity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setZenodoCommunity(String zenodoCommunity) {
|
||||||
|
this.zenodoCommunity = zenodoCommunity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getSubjects() {
|
||||||
|
return subjects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubjects(List<String> subjects) {
|
||||||
|
this.subjects = subjects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getOtherZenodoCommunities() {
|
||||||
|
return otherZenodoCommunities;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOtherZenodoCommunities(List<String> otherZenodoCommunities) {
|
||||||
|
this.otherZenodoCommunities = otherZenodoCommunities;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getFos() {
|
||||||
|
return fos;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFos(List<String> fos) {
|
||||||
|
this.fos = fos;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getSdg() {
|
||||||
|
return sdg;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSdg(List<String> sdg) {
|
||||||
|
this.sdg = sdg;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SelectionConstraints getRemoveConstraints() {
|
||||||
|
return removeConstraints;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRemoveConstraints(SelectionConstraints removeConstraints) {
|
||||||
|
this.removeConstraints = removeConstraints;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SelectionConstraints getAdvancedConstraints() {
|
||||||
|
return advancedConstraints;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAdvancedConstraints(SelectionConstraints advancedConstraints) {
|
||||||
|
this.advancedConstraints = advancedConstraints;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStatus() {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStatus(String status) {
|
||||||
|
this.status = status;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 06/10/23
|
||||||
|
*/
|
||||||
|
public class CommunitySummary extends ArrayList<CommunityModel> implements Serializable {
|
||||||
|
public CommunitySummary() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 09/10/23
|
||||||
|
*/
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class ContentModel implements Serializable {
|
||||||
|
private List<ProjectModel> content;
|
||||||
|
private Integer totalPages;
|
||||||
|
private Boolean last;
|
||||||
|
private Integer number;
|
||||||
|
|
||||||
|
public List<ProjectModel> getContent() {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContent(List<ProjectModel> content) {
|
||||||
|
this.content = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getTotalPages() {
|
||||||
|
return totalPages;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTotalPages(Integer totalPages) {
|
||||||
|
this.totalPages = totalPages;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Boolean getLast() {
|
||||||
|
return last;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLast(Boolean last) {
|
||||||
|
this.last = last;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getNumber() {
|
||||||
|
return number;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNumber(Integer number) {
|
||||||
|
this.number = number;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.api.model.CommunityContentprovider;
|
||||||
|
|
||||||
|
public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable {
|
||||||
|
public DatasourceList() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 09/10/23
|
||||||
|
*/
|
||||||
|
public class OrganizationList extends ArrayList<String> implements Serializable {
|
||||||
|
|
||||||
|
public OrganizationList() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.api.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 09/10/23
|
||||||
|
*/
|
||||||
|
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||||
|
public class ProjectModel implements Serializable {
|
||||||
|
|
||||||
|
private String openaireId;
|
||||||
|
|
||||||
|
public String getOpenaireId() {
|
||||||
|
return openaireId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOpenaireId(String openaireId) {
|
||||||
|
this.openaireId = openaireId;
|
||||||
|
}
|
||||||
|
}
|
|
@ -9,7 +9,6 @@ import java.util.*;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
import org.apache.spark.api.java.function.ForeachFunction;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -21,8 +20,11 @@ import org.slf4j.LoggerFactory;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.api.Utils;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.bulktag.community.*;
|
import eu.dnetlib.dhp.bulktag.community.*;
|
||||||
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
@ -54,50 +56,38 @@ public class SparkBulkTagJob {
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
Boolean isTest = Optional
|
|
||||||
.ofNullable(parser.get("isTest"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.FALSE);
|
|
||||||
log.info("isTest: {} ", isTest);
|
|
||||||
|
|
||||||
final String inputPath = parser.get("sourcePath");
|
final String inputPath = parser.get("sourcePath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final boolean production = Boolean.valueOf(parser.get("production"));
|
||||||
|
log.info("production: {}", production);
|
||||||
|
|
||||||
ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class);
|
ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class);
|
||||||
log.info("pathMap: {}", new Gson().toJson(protoMappingParams));
|
log.info("pathMap: {}", new Gson().toJson(protoMappingParams));
|
||||||
|
|
||||||
final String resultClassName = parser.get("resultTableName");
|
|
||||||
log.info("resultTableName: {}", resultClassName);
|
|
||||||
|
|
||||||
final Boolean saveGraph = Optional
|
|
||||||
.ofNullable(parser.get("saveGraph"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
log.info("saveGraph: {}", saveGraph);
|
|
||||||
|
|
||||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
CommunityConfiguration cc;
|
CommunityConfiguration cc;
|
||||||
|
|
||||||
String taggingConf = parser.get("taggingConf");
|
String taggingConf = Optional
|
||||||
|
.ofNullable(parser.get("taggingConf"))
|
||||||
|
.map(String::valueOf)
|
||||||
|
.orElse(null);
|
||||||
|
|
||||||
if (isTest) {
|
if (taggingConf != null) {
|
||||||
cc = CommunityConfigurationFactory.newInstance(taggingConf);
|
cc = CommunityConfigurationFactory.newInstance(taggingConf);
|
||||||
} else {
|
} else {
|
||||||
cc = QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl"));
|
cc = Utils.getCommunityConfiguration(production);
|
||||||
}
|
}
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
removeOutputDir(spark, outputPath);
|
|
||||||
extendCommunityConfigurationForEOSC(spark, inputPath, cc);
|
extendCommunityConfigurationForEOSC(spark, inputPath, cc);
|
||||||
execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc);
|
execBulkTag(spark, inputPath, outputPath, protoMappingParams, cc);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,10 +96,7 @@ public class SparkBulkTagJob {
|
||||||
|
|
||||||
Dataset<String> datasources = readPath(
|
Dataset<String> datasources = readPath(
|
||||||
spark, inputPath
|
spark, inputPath
|
||||||
.substring(
|
+ "datasource",
|
||||||
0,
|
|
||||||
inputPath.lastIndexOf("/"))
|
|
||||||
+ "/datasource",
|
|
||||||
Datasource.class)
|
Datasource.class)
|
||||||
.filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds))
|
.filter((FilterFunction<Datasource>) ds -> isOKDatasource(ds))
|
||||||
.map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING());
|
.map((MapFunction<Datasource, String>) ds -> ds.getId(), Encoders.STRING());
|
||||||
|
@ -117,10 +104,10 @@ public class SparkBulkTagJob {
|
||||||
Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap();
|
Map<String, List<Pair<String, SelectionConstraints>>> dsm = cc.getEoscDatasourceMap();
|
||||||
|
|
||||||
for (String ds : datasources.collectAsList()) {
|
for (String ds : datasources.collectAsList()) {
|
||||||
final String dsId = ds.substring(3);
|
// final String dsId = ds.substring(3);
|
||||||
if (!dsm.containsKey(dsId)) {
|
if (!dsm.containsKey(ds)) {
|
||||||
ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>();
|
ArrayList<Pair<String, SelectionConstraints>> eoscList = new ArrayList<>();
|
||||||
dsm.put(dsId, eoscList);
|
dsm.put(ds, eoscList);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,22 +129,30 @@ public class SparkBulkTagJob {
|
||||||
String inputPath,
|
String inputPath,
|
||||||
String outputPath,
|
String outputPath,
|
||||||
ProtoMap protoMappingParams,
|
ProtoMap protoMappingParams,
|
||||||
Class<R> resultClazz,
|
|
||||||
CommunityConfiguration communityConfiguration) {
|
CommunityConfiguration communityConfiguration) {
|
||||||
|
|
||||||
ResultTagger resultTagger = new ResultTagger();
|
ModelSupport.entityTypes
|
||||||
readPath(spark, inputPath, resultClazz)
|
.keySet()
|
||||||
.map(patchResult(), Encoders.bean(resultClazz))
|
.parallelStream()
|
||||||
.filter(Objects::nonNull)
|
.filter(e -> ModelSupport.isResult(e))
|
||||||
.map(
|
.forEach(e -> {
|
||||||
(MapFunction<R, R>) value -> resultTagger
|
removeOutputDir(spark, outputPath + e.name());
|
||||||
.enrichContextCriteria(
|
ResultTagger resultTagger = new ResultTagger();
|
||||||
value, communityConfiguration, protoMappingParams),
|
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||||
Encoders.bean(resultClazz))
|
readPath(spark, inputPath + e.name(), resultClazz)
|
||||||
.write()
|
.map(patchResult(), Encoders.bean(resultClazz))
|
||||||
.mode(SaveMode.Overwrite)
|
.filter(Objects::nonNull)
|
||||||
.option("compression", "gzip")
|
.map(
|
||||||
.json(outputPath);
|
(MapFunction<R, R>) value -> resultTagger
|
||||||
|
.enrichContextCriteria(
|
||||||
|
value, communityConfiguration, protoMappingParams),
|
||||||
|
Encoders.bean(resultClazz))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + e.name());
|
||||||
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <R> Dataset<R> readPath(
|
public static <R> Dataset<R> readPath(
|
||||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
@ -13,7 +14,7 @@ public class Community implements Serializable {
|
||||||
private String id;
|
private String id;
|
||||||
private List<String> subjects = new ArrayList<>();
|
private List<String> subjects = new ArrayList<>();
|
||||||
private List<Provider> providers = new ArrayList<>();
|
private List<Provider> providers = new ArrayList<>();
|
||||||
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
|
private List<String> zenodoCommunities = new ArrayList<>();
|
||||||
private SelectionConstraints constraints = new SelectionConstraints();
|
private SelectionConstraints constraints = new SelectionConstraints();
|
||||||
private SelectionConstraints removeConstraints = new SelectionConstraints();
|
private SelectionConstraints removeConstraints = new SelectionConstraints();
|
||||||
|
|
||||||
|
@ -26,7 +27,7 @@ public class Community implements Serializable {
|
||||||
return !getSubjects().isEmpty()
|
return !getSubjects().isEmpty()
|
||||||
|| !getProviders().isEmpty()
|
|| !getProviders().isEmpty()
|
||||||
|| !getZenodoCommunities().isEmpty()
|
|| !getZenodoCommunities().isEmpty()
|
||||||
|| getConstraints().getCriteria() != null;
|
|| (Optional.ofNullable(getConstraints()).isPresent() && getConstraints().getCriteria() != null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getId() {
|
public String getId() {
|
||||||
|
@ -53,11 +54,11 @@ public class Community implements Serializable {
|
||||||
this.providers = providers;
|
this.providers = providers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<ZenodoCommunity> getZenodoCommunities() {
|
public List<String> getZenodoCommunities() {
|
||||||
return zenodoCommunities;
|
return zenodoCommunities;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) {
|
public void setZenodoCommunities(List<String> zenodoCommunities) {
|
||||||
this.zenodoCommunities = zenodoCommunities;
|
this.zenodoCommunities = zenodoCommunities;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -81,7 +81,7 @@ public class CommunityConfiguration implements Serializable {
|
||||||
this.removeConstraintsMap = removeConstraintsMap;
|
this.removeConstraintsMap = removeConstraintsMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
CommunityConfiguration(final Map<String, Community> communities) {
|
public CommunityConfiguration(final Map<String, Community> communities) {
|
||||||
this.communities = communities;
|
this.communities = communities;
|
||||||
init();
|
init();
|
||||||
}
|
}
|
||||||
|
@ -117,10 +117,10 @@ public class CommunityConfiguration implements Serializable {
|
||||||
add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap);
|
add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap);
|
||||||
}
|
}
|
||||||
// get zenodo communities
|
// get zenodo communities
|
||||||
for (ZenodoCommunity zc : c.getZenodoCommunities()) {
|
for (String zc : c.getZenodoCommunities()) {
|
||||||
add(
|
add(
|
||||||
zc.getZenodoCommunityId(),
|
zc,
|
||||||
new Pair<>(id, zc.getSelCriteria()),
|
new Pair<>(id, null),
|
||||||
zenodocommunityMap);
|
zenodocommunityMap);
|
||||||
}
|
}
|
||||||
selectionConstraintsMap.put(id, c.getConstraints());
|
selectionConstraintsMap.put(id, c.getConstraints());
|
||||||
|
|
|
@ -143,16 +143,16 @@ public class CommunityConfigurationFactory {
|
||||||
return providerList;
|
return providerList;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
|
private static List<String> parseZenodoCommunities(final Node node) {
|
||||||
|
|
||||||
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
|
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
|
||||||
final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>();
|
final List<String> zenodoCommunityList = new ArrayList<>();
|
||||||
for (Node n : list) {
|
for (Node n : list) {
|
||||||
ZenodoCommunity zc = new ZenodoCommunity();
|
// ZenodoCommunity zc = new ZenodoCommunity();
|
||||||
zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText());
|
// zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText());
|
||||||
zc.setSelCriteria(n.selectSingleNode("./selcriteria"));
|
// zc.setSelCriteria(n.selectSingleNode("./selcriteria"));
|
||||||
|
|
||||||
zenodoCommunityList.add(zc);
|
zenodoCommunityList.add(n.selectSingleNode("./zenodoid").getText());
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("size of the zenodo community list " + zenodoCommunityList.size());
|
log.info("size of the zenodo community list " + zenodoCommunityList.size());
|
||||||
|
|
|
@ -4,6 +4,8 @@ package eu.dnetlib.dhp.bulktag.community;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
|
||||||
|
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
|
||||||
|
@ -11,7 +13,8 @@ public class Constraint implements Serializable {
|
||||||
private String verb;
|
private String verb;
|
||||||
private String field;
|
private String field;
|
||||||
private String value;
|
private String value;
|
||||||
// private String element;
|
|
||||||
|
@JsonIgnore
|
||||||
private Selection selection;
|
private Selection selection;
|
||||||
|
|
||||||
public String getVerb() {
|
public String getVerb() {
|
||||||
|
@ -38,10 +41,7 @@ public class Constraint implements Serializable {
|
||||||
this.value = value;
|
this.value = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSelection(Selection sel) {
|
@JsonIgnore
|
||||||
selection = sel;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSelection(VerbResolver resolver)
|
public void setSelection(VerbResolver resolver)
|
||||||
throws InvocationTargetException, NoSuchMethodException, InstantiationException,
|
throws InvocationTargetException, NoSuchMethodException, InstantiationException,
|
||||||
IllegalAccessException {
|
IllegalAccessException {
|
||||||
|
@ -52,11 +52,4 @@ public class Constraint implements Serializable {
|
||||||
return selection.apply(metadata);
|
return selection.apply(metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
// public String getElement() {
|
|
||||||
// return element;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public void setElement(String element) {
|
|
||||||
// this.element = element;
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,34 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.bulktag.community;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
|
|
||||||
public class QueryInformationSystem {
|
|
||||||
|
|
||||||
public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl)
|
|
||||||
throws ISLookUpException, DocumentException, SAXException, IOException {
|
|
||||||
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
|
||||||
final List<String> res = isLookUp
|
|
||||||
.quickSearchProfile(
|
|
||||||
IOUtils
|
|
||||||
.toString(
|
|
||||||
QueryInformationSystem.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/bulktag/query.xq")));
|
|
||||||
|
|
||||||
final String xmlConf = "<communities>" + Joiner.on(" ").join(res) + "</communities>";
|
|
||||||
|
|
||||||
return CommunityConfigurationFactory.newInstance(xmlConf);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -85,16 +85,18 @@ public class ResultTagger implements Serializable {
|
||||||
conf
|
conf
|
||||||
.getRemoveConstraintsMap()
|
.getRemoveConstraintsMap()
|
||||||
.keySet()
|
.keySet()
|
||||||
.forEach(communityId -> {
|
.forEach(
|
||||||
if (conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
|
communityId -> {
|
||||||
conf
|
if (conf.getRemoveConstraintsMap().keySet().contains(communityId) &&
|
||||||
.getRemoveConstraintsMap()
|
conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null &&
|
||||||
.get(communityId)
|
conf
|
||||||
.getCriteria()
|
.getRemoveConstraintsMap()
|
||||||
.stream()
|
.get(communityId)
|
||||||
.anyMatch(crit -> crit.verifyCriteria(param)))
|
.getCriteria()
|
||||||
removeCommunities.add(communityId);
|
.stream()
|
||||||
});
|
.anyMatch(crit -> crit.verifyCriteria(param)))
|
||||||
|
removeCommunities.add(communityId);
|
||||||
|
});
|
||||||
|
|
||||||
// communities contains all the communities to be added as context for the result
|
// communities contains all the communities to be added as context for the result
|
||||||
final Set<String> communities = new HashSet<>();
|
final Set<String> communities = new HashSet<>();
|
||||||
|
@ -124,10 +126,10 @@ public class ResultTagger implements Serializable {
|
||||||
if (Objects.nonNull(result.getInstance())) {
|
if (Objects.nonNull(result.getInstance())) {
|
||||||
for (Instance i : result.getInstance()) {
|
for (Instance i : result.getInstance()) {
|
||||||
if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) {
|
if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) {
|
||||||
collfrom.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|"));
|
collfrom.add(i.getCollectedfrom().getKey());
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) {
|
if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) {
|
||||||
hostdby.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|"));
|
hostdby.add(i.getHostedby().getKey());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,11 +7,13 @@ import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
import com.google.gson.reflect.TypeToken;
|
import com.google.gson.reflect.TypeToken;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||||
|
|
||||||
|
@JsonAutoDetect
|
||||||
public class SelectionConstraints implements Serializable {
|
public class SelectionConstraints implements Serializable {
|
||||||
private List<Constraints> criteria;
|
private List<Constraints> criteria;
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,5 @@
|
||||||
[
|
[
|
||||||
{
|
|
||||||
"paramName":"is",
|
|
||||||
"paramLongName":"isLookUpUrl",
|
|
||||||
"paramDescription": "URL of the isLookUp Service",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"paramName":"s",
|
"paramName":"s",
|
||||||
"paramLongName":"sourcePath",
|
"paramLongName":"sourcePath",
|
||||||
|
@ -17,12 +12,7 @@
|
||||||
"paramDescription": "the json path associated to each selection field",
|
"paramDescription": "the json path associated to each selection field",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"paramName":"tn",
|
|
||||||
"paramLongName":"resultTableName",
|
|
||||||
"paramDescription": "the name of the result table we are currently working on",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"paramName": "out",
|
"paramName": "out",
|
||||||
"paramLongName": "outputPath",
|
"paramLongName": "outputPath",
|
||||||
|
@ -35,17 +25,19 @@
|
||||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"paramName": "test",
|
|
||||||
"paramLongName": "isTest",
|
|
||||||
"paramDescription": "Parameter intended for testing purposes only. True if the reun is relatesd to a test and so the taggingConf parameter should be loaded",
|
|
||||||
"paramRequired": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"paramName": "tg",
|
"paramName": "tg",
|
||||||
"paramLongName": "taggingConf",
|
"paramLongName": "taggingConf",
|
||||||
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
|
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
|
||||||
"paramRequired": false
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName": "p",
|
||||||
|
"paramLongName": "production",
|
||||||
|
"paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed",
|
||||||
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
|
@ -45,10 +45,14 @@
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkExecutorMemory</name>
|
<name>sparkExecutorMemory</name>
|
||||||
<value>6G</value>
|
<value>5G</value>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkExecutorCores</name>
|
<name>memoryOverhead</name>
|
||||||
<value>1</value>
|
<value>3g</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>partitions</name>
|
||||||
|
<value>3284</value>
|
||||||
</property>
|
</property>
|
||||||
</configuration>
|
</configuration>
|
|
@ -4,10 +4,6 @@
|
||||||
<name>sourcePath</name>
|
<name>sourcePath</name>
|
||||||
<description>the source path</description>
|
<description>the source path</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
|
||||||
<name>isLookUpUrl</name>
|
|
||||||
<description>the isLookup service endpoint</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
<property>
|
||||||
<name>pathMap</name>
|
<name>pathMap</name>
|
||||||
<description>the json path associated to each selection field</description>
|
<description>the json path associated to each selection field</description>
|
||||||
|
@ -102,16 +98,9 @@
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<join name="copy_wait" to="fork_exec_bulktag"/>
|
<join name="copy_wait" to="exec_bulktag"/>
|
||||||
|
|
||||||
<fork name="fork_exec_bulktag">
|
<action name="exec_bulktag">
|
||||||
<path start="bulktag_publication"/>
|
|
||||||
<path start="bulktag_dataset"/>
|
|
||||||
<path start="bulktag_otherresearchproduct"/>
|
|
||||||
<path start="bulktag_software"/>
|
|
||||||
</fork>
|
|
||||||
|
|
||||||
<action name="bulktag_publication">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn-cluster</master>
|
<master>yarn-cluster</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
|
@ -122,104 +111,23 @@
|
||||||
--num-executors=${sparkExecutorNumber}
|
--num-executors=${sparkExecutorNumber}
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
--executor-cores=${sparkExecutorCores}
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--conf spark.executor.memoryOverhead=${memoryOverhead}
|
||||||
|
--conf spark.sql.shuffle.partitions=${partitions}
|
||||||
--driver-memory=${sparkDriverMemory}
|
--driver-memory=${sparkDriverMemory}
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
|
||||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
<arg>--production</arg><arg>${production}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="bulktag_dataset">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
|
||||||
<master>yarn-cluster</master>
|
|
||||||
<mode>cluster</mode>
|
|
||||||
<name>bulkTagging-dataset</name>
|
|
||||||
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
|
|
||||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
|
||||||
<spark-opts>
|
|
||||||
--num-executors=${sparkExecutorNumber}
|
|
||||||
--executor-memory=${sparkExecutorMemory}
|
|
||||||
--executor-cores=${sparkExecutorCores}
|
|
||||||
--driver-memory=${sparkDriverMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
|
||||||
</spark-opts>
|
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
|
||||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
|
||||||
</spark>
|
|
||||||
<ok to="wait"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<action name="bulktag_otherresearchproduct">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
|
||||||
<master>yarn-cluster</master>
|
|
||||||
<mode>cluster</mode>
|
|
||||||
<name>bulkTagging-orp</name>
|
|
||||||
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
|
|
||||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
|
||||||
<spark-opts>
|
|
||||||
--num-executors=${sparkExecutorNumber}
|
|
||||||
--executor-memory=${sparkExecutorMemory}
|
|
||||||
--executor-cores=${sparkExecutorCores}
|
|
||||||
--driver-memory=${sparkDriverMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
|
||||||
</spark-opts>
|
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
|
||||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
|
||||||
</spark>
|
|
||||||
<ok to="wait"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<action name="bulktag_software">
|
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
|
||||||
<master>yarn-cluster</master>
|
|
||||||
<mode>cluster</mode>
|
|
||||||
<name>bulkTagging-software</name>
|
|
||||||
<class>eu.dnetlib.dhp.bulktag.SparkBulkTagJob</class>
|
|
||||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
|
||||||
<spark-opts>
|
|
||||||
--num-executors=${sparkExecutorNumber}
|
|
||||||
--executor-memory=${sparkExecutorMemory}
|
|
||||||
--executor-cores=${sparkExecutorCores}
|
|
||||||
--driver-memory=${sparkDriverMemory}
|
|
||||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
|
||||||
</spark-opts>
|
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
|
||||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
|
||||||
</spark>
|
|
||||||
<ok to="wait"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<join name="wait" to="End"/>
|
|
||||||
|
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
|
|
|
@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
|
@ -98,14 +99,11 @@ public class BulkTagJobTest {
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath",
|
"-sourcePath",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -133,19 +131,16 @@ public class BulkTagJobTest {
|
||||||
@Test
|
@Test
|
||||||
void bulktagBySubjectNoPreviousContextTest() throws Exception {
|
void bulktagBySubjectNoPreviousContextTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext")
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext/")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String pathMap = BulkTagJobTest.pathMap;
|
final String pathMap = BulkTagJobTest.pathMap;
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -230,19 +225,19 @@ public class BulkTagJobTest {
|
||||||
void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
|
void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance")
|
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/")
|
||||||
.getPath();
|
.getPath();
|
||||||
final String pathMap = BulkTagJobTest.pathMap;
|
final String pathMap = BulkTagJobTest.pathMap;
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -311,18 +306,18 @@ public class BulkTagJobTest {
|
||||||
@Test
|
@Test
|
||||||
void bulktagByDatasourceTest() throws Exception {
|
void bulktagByDatasourceTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource")
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
|
||||||
"-outputPath", workingDir.toString() + "/publication",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -384,25 +379,25 @@ public class BulkTagJobTest {
|
||||||
void bulktagByZenodoCommunityTest() throws Exception {
|
void bulktagByZenodoCommunityTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity")
|
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
|
||||||
"-outputPath", workingDir.toString() + "/orp",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<OtherResearchProduct> tmp = sc
|
JavaRDD<OtherResearchProduct> tmp = sc
|
||||||
.textFile(workingDir.toString() + "/orp")
|
.textFile(workingDir.toString() + "/otherresearchproduct")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class));
|
||||||
|
|
||||||
Assertions.assertEquals(10, tmp.count());
|
Assertions.assertEquals(10, tmp.count());
|
||||||
|
@ -505,18 +500,18 @@ public class BulkTagJobTest {
|
||||||
@Test
|
@Test
|
||||||
void bulktagBySubjectDatasourceTest() throws Exception {
|
void bulktagBySubjectDatasourceTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource")
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -636,14 +631,14 @@ public class BulkTagJobTest {
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath",
|
"-sourcePath",
|
||||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz").getPath(),
|
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(),
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
"-outputPath", workingDir.toString() + "/software",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -732,18 +727,18 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
|
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -774,19 +769,19 @@ public class BulkTagJobTest {
|
||||||
void bulkTagOtherJupyter() throws Exception {
|
void bulkTagOtherJupyter() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct")
|
"/eu/dnetlib/dhp/eosctag/jupyter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
|
||||||
"-outputPath", workingDir.toString() + "/otherresearchproduct",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -829,18 +824,18 @@ public class BulkTagJobTest {
|
||||||
public void bulkTagDatasetJupyter() throws Exception {
|
public void bulkTagDatasetJupyter() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/jupyter/dataset")
|
"/eu/dnetlib/dhp/eosctag/jupyter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -878,18 +873,18 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/jupyter/software")
|
"/eu/dnetlib/dhp/eosctag/jupyter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
"-outputPath", workingDir.toString() + "/software",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1096,18 +1091,18 @@ public class BulkTagJobTest {
|
||||||
void galaxyOtherTest() throws Exception {
|
void galaxyOtherTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct")
|
"/eu/dnetlib/dhp/eosctag/galaxy/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
|
||||||
"-outputPath", workingDir.toString() + "/otherresearchproduct",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1214,18 +1209,18 @@ public class BulkTagJobTest {
|
||||||
void galaxySoftwareTest() throws Exception {
|
void galaxySoftwareTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/galaxy/software")
|
"/eu/dnetlib/dhp/eosctag/galaxy/")
|
||||||
.getPath();
|
.getPath();
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
"-outputPath", workingDir.toString() + "/software",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1333,19 +1328,19 @@ public class BulkTagJobTest {
|
||||||
void twitterDatasetTest() throws Exception {
|
void twitterDatasetTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/twitter/dataset")
|
"/eu/dnetlib/dhp/eosctag/twitter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1373,19 +1368,19 @@ public class BulkTagJobTest {
|
||||||
void twitterOtherTest() throws Exception {
|
void twitterOtherTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct")
|
"/eu/dnetlib/dhp/eosctag/twitter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
|
|
||||||
"-outputPath", workingDir.toString() + "/otherresearchproduct",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1418,19 +1413,19 @@ public class BulkTagJobTest {
|
||||||
void twitterSoftwareTest() throws Exception {
|
void twitterSoftwareTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/eosctag/twitter/software")
|
"/eu/dnetlib/dhp/eosctag/twitter/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
|
|
||||||
"-outputPath", workingDir.toString() + "/software",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1455,19 +1450,19 @@ public class BulkTagJobTest {
|
||||||
void EoscContextTagTest() throws Exception {
|
void EoscContextTagTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource(
|
.getResource(
|
||||||
"/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json")
|
"/eu/dnetlib/dhp/bulktag/eosc/dataset/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1533,16 +1528,16 @@ public class BulkTagJobTest {
|
||||||
SparkBulkTagJob
|
SparkBulkTagJob
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isTest", Boolean.TRUE.toString(),
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath",
|
"-sourcePath",
|
||||||
getClass()
|
getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/")
|
||||||
.getPath(),
|
.getPath(),
|
||||||
"-taggingConf", taggingConf,
|
"-taggingConf", taggingConf,
|
||||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
|
||||||
"-outputPath", workingDir.toString() + "/dataset",
|
"-outputPath", workingDir.toString() + "/",
|
||||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
|
||||||
"-pathMap", pathMap
|
"-pathMap", pathMap
|
||||||
});
|
});
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
@ -1568,4 +1563,42 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void newConfTest() throws Exception {
|
||||||
|
final String pathMap = BulkTagJobTest.pathMap;
|
||||||
|
SparkBulkTagJob
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath",
|
||||||
|
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
|
||||||
|
"-taggingConf", taggingConf,
|
||||||
|
|
||||||
|
"-outputPath", workingDir.toString() + "/",
|
||||||
|
"-production", Boolean.TRUE.toString(),
|
||||||
|
"-pathMap", pathMap
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Dataset> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/dataset")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(10, tmp.count());
|
||||||
|
org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("dataset");
|
||||||
|
|
||||||
|
String query = "select id, MyT.id community "
|
||||||
|
+ "from dataset "
|
||||||
|
+ "lateral view explode(context) c as MyT "
|
||||||
|
+ "lateral view explode(MyT.datainfo) d as MyD "
|
||||||
|
+ "where MyD.inferenceprovenance = 'bulktagging'";
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, spark.sql(query).count());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest {
|
||||||
sc.setVerb("not_contains");
|
sc.setVerb("not_contains");
|
||||||
sc.setField("contributor");
|
sc.setField("contributor");
|
||||||
sc.setValue("DARIAH");
|
sc.setValue("DARIAH");
|
||||||
sc.setSelection(resolver.getSelectionCriteria(sc.getVerb(), sc.getValue()));
|
sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue()));
|
||||||
String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
|
String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
|
||||||
Assertions.assertFalse(sc.verifyCriteria(metadata));
|
Assertions.assertFalse(sc.verifyCriteria(metadata));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue