diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java deleted file mode 100644 index c127783e5..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java +++ /dev/null @@ -1,53 +0,0 @@ - -package eu.dnetlib.dhp.common.api; - -import java.io.IOException; -import java.io.InputStream; - -import okhttp3.MediaType; -import okhttp3.RequestBody; -import okhttp3.internal.Util; -import okio.BufferedSink; -import okio.Okio; -import okio.Source; - -public class InputStreamRequestBody extends RequestBody { - - private final InputStream inputStream; - private final MediaType mediaType; - private final long lenght; - - public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) { - - return new InputStreamRequestBody(inputStream, mediaType, len); - } - - private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) { - this.inputStream = inputStream; - this.mediaType = mediaType; - this.lenght = len; - } - - @Override - public MediaType contentType() { - return mediaType; - } - - @Override - public long contentLength() { - - return lenght; - - } - - @Override - public void writeTo(BufferedSink sink) throws IOException { - Source source = null; - try { - source = Okio.source(inputStream); - sink.writeAll(source); - } finally { - Util.closeQuietly(source); - } - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java deleted file mode 100644 index b75872eb4..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java +++ /dev/null @@ -1,8 +0,0 @@ - -package eu.dnetlib.dhp.common.api; - -public class MissingConceptDoiException extends Throwable { - public MissingConceptDoiException(String message) { - super(message); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java deleted file mode 100644 index 544da78f5..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java +++ /dev/null @@ -1,365 +0,0 @@ - -package eu.dnetlib.dhp.common.api; - -import java.io.*; -import java.io.IOException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.concurrent.TimeUnit; - -import org.apache.http.HttpHeaders; -import org.apache.http.entity.ContentType; -import org.jetbrains.annotations.NotNull; - -import com.google.gson.Gson; - -import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; -import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; -import okhttp3.*; - -public class ZenodoAPIClient implements Serializable { - - String urlString; - String bucket; - - String deposition_id; - String access_token; - - public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8"); - - private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip"); - - public String getUrlString() { - return urlString; - } - - public void setUrlString(String urlString) { - this.urlString = urlString; - } - - public String getBucket() { - return bucket; - } - - public void setBucket(String bucket) { - this.bucket = bucket; - } - - public void setDeposition_id(String deposition_id) { - this.deposition_id = deposition_id; - } - - public ZenodoAPIClient(String urlString, String access_token) { - - this.urlString = urlString; - this.access_token = access_token; - } - - /** - * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload - * - * @return response code - * @throws IOException - */ - public int newDeposition() throws IOException { - String json = "{}"; - - URL url = new URL(urlString); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setRequestMethod("POST"); - conn.setDoOutput(true); - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - conn.disconnect(); - - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); - this.bucket = newSubmission.getLinks().getBucket(); - this.deposition_id = newSubmission.getId(); - - return responseCode; - } - - /** - * Upload files in Zenodo. - * - * @param is the inputStream for the file to upload - * @param file_name the name of the file as it will appear on Zenodo - * @return the response code - */ - public int uploadIS(InputStream is, String file_name) throws IOException { - - URL url = new URL(bucket + "/" + file_name); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip"); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("PUT"); - - byte[] buf = new byte[8192]; - int length; - try (OutputStream os = conn.getOutputStream()) { - while ((length = is.read(buf)) != -1) { - os.write(buf, 0, length); - } - - } - int responseCode = conn.getResponseCode(); - if (!checkOKStatus(responseCode)) { - throw new IOException("Unexpected code " + responseCode + getBody(conn)); - } - - return responseCode; - } - - @NotNull - private String getBody(HttpURLConnection conn) throws IOException { - String body = "{}"; - try (BufferedReader br = new BufferedReader( - new InputStreamReader(conn.getInputStream(), "utf-8"))) { - StringBuilder response = new StringBuilder(); - String responseLine = null; - while ((responseLine = br.readLine()) != null) { - response.append(responseLine.trim()); - } - - body = response.toString(); - - } - return body; - } - - /** - * Associates metadata information to the current deposition - * - * @param metadata the metadata - * @return response code - * @throws IOException - */ - public int sendMretadata(String metadata) throws IOException { - - URL url = new URL(urlString + "/" + deposition_id); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("PUT"); - - try (OutputStream os = conn.getOutputStream()) { - byte[] input = metadata.getBytes("utf-8"); - os.write(input, 0, input.length); - - } - - final int responseCode = conn.getResponseCode(); - conn.disconnect(); - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + getBody(conn)); - - return responseCode; - - } - - private boolean checkOKStatus(int responseCode) { - - if (HttpURLConnection.HTTP_OK != responseCode || - HttpURLConnection.HTTP_CREATED != responseCode) - return true; - return false; - } - - /** - * To publish the current deposition. It works for both new deposition or new version of an old deposition - * - * @return response code - * @throws IOException - */ - @Deprecated - public int publish() throws IOException { - - String json = "{}"; - - OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); - - RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON); - - Request request = new Request.Builder() - .url(urlString + "/" + deposition_id + "/actions/publish") - .addHeader("Authorization", "Bearer " + access_token) - .post(body) - .build(); - - try (Response response = httpClient.newCall(request).execute()) { - - if (!response.isSuccessful()) - throw new IOException("Unexpected code " + response + response.body().string()); - - return response.code(); - - } - } - - /** - * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used - * for the new version. - * - * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last - * part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930 - * concept_rec_id = 656930 - * @return response code - * @throws IOException - * @throws MissingConceptDoiException - */ - public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException { - setDepositionId(concept_rec_id, 1); - String json = "{}"; - - URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("POST"); - - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - - } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - String latest_draft = zenodoModel.getLinks().getLatest_draft(); - deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); - bucket = getBucket(latest_draft); - - return responseCode; - - } - - /** - * To finish uploading a version or new deposition not published - * It sets the deposition_id and the bucket to be used - * - * - * @param deposition_id the deposition id of the not yet published upload - * concept_rec_id = 656930 - * @return response code - * @throws IOException - * @throws MissingConceptDoiException - */ - public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException { - - this.deposition_id = deposition_id; - - String json = "{}"; - - URL url = new URL(urlString + "/" + deposition_id); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setRequestMethod("POST"); - conn.setDoOutput(true); - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - conn.disconnect(); - - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - bucket = zenodoModel.getLinks().getBucket(); - - return responseCode; - - } - - private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException { - - ZenodoModelList zenodoModelList = new Gson() - .fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class); - - for (ZenodoModel zm : zenodoModelList) { - if (zm.getConceptrecid().equals(concept_rec_id)) { - deposition_id = zm.getId(); - return; - } - } - if (zenodoModelList.size() == 0) - throw new MissingConceptDoiException( - "The concept record id specified was missing in the list of depositions"); - setDepositionId(concept_rec_id, page + 1); - - } - - private String getPrevDepositions(String page) throws IOException { - - HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder(); - urlBuilder.addQueryParameter("page", page); - - URL url = new URL(urlBuilder.build().toString()); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("GET"); - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - return body; - - } - - private String getBucket(String inputUurl) throws IOException { - - URL url = new URL(inputUurl); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("GET"); - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - - return zenodoModel.getLinks().getBucket(); - - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java deleted file mode 100644 index a02224383..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java +++ /dev/null @@ -1,14 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -public class Community { - private String identifier; - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java deleted file mode 100644 index c14af55b6..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java +++ /dev/null @@ -1,47 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -public class Creator { - private String affiliation; - private String name; - private String orcid; - - public String getAffiliation() { - return affiliation; - } - - public void setAffiliation(String affiliation) { - this.affiliation = affiliation; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getOrcid() { - return orcid; - } - - public void setOrcid(String orcid) { - this.orcid = orcid; - } - - public static Creator newInstance(String name, String affiliation, String orcid) { - Creator c = new Creator(); - if (name != null) { - c.name = name; - } - if (affiliation != null) { - c.affiliation = affiliation; - } - if (orcid != null) { - c.orcid = orcid; - } - - return c; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java deleted file mode 100644 index 509f444b9..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java +++ /dev/null @@ -1,44 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; - -public class File implements Serializable { - private String checksum; - private String filename; - private long filesize; - private String id; - - public String getChecksum() { - return checksum; - } - - public void setChecksum(String checksum) { - this.checksum = checksum; - } - - public String getFilename() { - return filename; - } - - public void setFilename(String filename) { - this.filename = filename; - } - - public long getFilesize() { - return filesize; - } - - public void setFilesize(long filesize) { - this.filesize = filesize; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java deleted file mode 100644 index 476f1d9d8..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java +++ /dev/null @@ -1,23 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; - -public class Grant implements Serializable { - private String id; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public static Grant newInstance(String id) { - Grant g = new Grant(); - g.id = id; - - return g; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java deleted file mode 100644 index bdf8e5d2c..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java +++ /dev/null @@ -1,92 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; - -public class Links implements Serializable { - - private String bucket; - - private String discard; - - private String edit; - private String files; - private String html; - private String latest_draft; - private String latest_draft_html; - private String publish; - - private String self; - - public String getBucket() { - return bucket; - } - - public void setBucket(String bucket) { - this.bucket = bucket; - } - - public String getDiscard() { - return discard; - } - - public void setDiscard(String discard) { - this.discard = discard; - } - - public String getEdit() { - return edit; - } - - public void setEdit(String edit) { - this.edit = edit; - } - - public String getFiles() { - return files; - } - - public void setFiles(String files) { - this.files = files; - } - - public String getHtml() { - return html; - } - - public void setHtml(String html) { - this.html = html; - } - - public String getLatest_draft() { - return latest_draft; - } - - public void setLatest_draft(String latest_draft) { - this.latest_draft = latest_draft; - } - - public String getLatest_draft_html() { - return latest_draft_html; - } - - public void setLatest_draft_html(String latest_draft_html) { - this.latest_draft_html = latest_draft_html; - } - - public String getPublish() { - return publish; - } - - public void setPublish(String publish) { - this.publish = publish; - } - - public String getSelf() { - return self; - } - - public void setSelf(String self) { - this.self = self; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java deleted file mode 100644 index b161adb9b..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java +++ /dev/null @@ -1,153 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; -import java.util.List; - -public class Metadata implements Serializable { - - private String access_right; - private List communities; - private List creators; - private String description; - private String doi; - private List grants; - private List keywords; - private String language; - private String license; - private PrereserveDoi prereserve_doi; - private String publication_date; - private List references; - private List related_identifiers; - private String title; - private String upload_type; - private String version; - - public String getUpload_type() { - return upload_type; - } - - public void setUpload_type(String upload_type) { - this.upload_type = upload_type; - } - - public String getVersion() { - return version; - } - - public void setVersion(String version) { - this.version = version; - } - - public String getAccess_right() { - return access_right; - } - - public void setAccess_right(String access_right) { - this.access_right = access_right; - } - - public List getCommunities() { - return communities; - } - - public void setCommunities(List communities) { - this.communities = communities; - } - - public List getCreators() { - return creators; - } - - public void setCreators(List creators) { - this.creators = creators; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public String getDoi() { - return doi; - } - - public void setDoi(String doi) { - this.doi = doi; - } - - public List getGrants() { - return grants; - } - - public void setGrants(List grants) { - this.grants = grants; - } - - public List getKeywords() { - return keywords; - } - - public void setKeywords(List keywords) { - this.keywords = keywords; - } - - public String getLanguage() { - return language; - } - - public void setLanguage(String language) { - this.language = language; - } - - public String getLicense() { - return license; - } - - public void setLicense(String license) { - this.license = license; - } - - public PrereserveDoi getPrereserve_doi() { - return prereserve_doi; - } - - public void setPrereserve_doi(PrereserveDoi prereserve_doi) { - this.prereserve_doi = prereserve_doi; - } - - public String getPublication_date() { - return publication_date; - } - - public void setPublication_date(String publication_date) { - this.publication_date = publication_date; - } - - public List getReferences() { - return references; - } - - public void setReferences(List references) { - this.references = references; - } - - public List getRelated_identifiers() { - return related_identifiers; - } - - public void setRelated_identifiers(List related_identifiers) { - this.related_identifiers = related_identifiers; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java deleted file mode 100644 index aa088ef31..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java +++ /dev/null @@ -1,25 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; - -public class PrereserveDoi implements Serializable { - private String doi; - private String recid; - - public String getDoi() { - return doi; - } - - public void setDoi(String doi) { - this.doi = doi; - } - - public String getRecid() { - return recid; - } - - public void setRecid(String recid) { - this.recid = recid; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java deleted file mode 100644 index 15a349636..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java +++ /dev/null @@ -1,43 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; - -public class RelatedIdentifier implements Serializable { - private String identifier; - private String relation; - private String resource_type; - private String scheme; - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } - - public String getRelation() { - return relation; - } - - public void setRelation(String relation) { - this.relation = relation; - } - - public String getResource_type() { - return resource_type; - } - - public void setResource_type(String resource_type) { - this.resource_type = resource_type; - } - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java deleted file mode 100644 index 9843ea0f9..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java +++ /dev/null @@ -1,118 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; -import java.util.List; - -public class ZenodoModel implements Serializable { - - private String conceptrecid; - private String created; - - private List files; - private String id; - private Links links; - private Metadata metadata; - private String modified; - private String owner; - private String record_id; - private String state; - private boolean submitted; - private String title; - - public String getConceptrecid() { - return conceptrecid; - } - - public void setConceptrecid(String conceptrecid) { - this.conceptrecid = conceptrecid; - } - - public String getCreated() { - return created; - } - - public void setCreated(String created) { - this.created = created; - } - - public List getFiles() { - return files; - } - - public void setFiles(List files) { - this.files = files; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public Links getLinks() { - return links; - } - - public void setLinks(Links links) { - this.links = links; - } - - public Metadata getMetadata() { - return metadata; - } - - public void setMetadata(Metadata metadata) { - this.metadata = metadata; - } - - public String getModified() { - return modified; - } - - public void setModified(String modified) { - this.modified = modified; - } - - public String getOwner() { - return owner; - } - - public void setOwner(String owner) { - this.owner = owner; - } - - public String getRecord_id() { - return record_id; - } - - public void setRecord_id(String record_id) { - this.record_id = record_id; - } - - public String getState() { - return state; - } - - public void setState(String state) { - this.state = state; - } - - public boolean isSubmitted() { - return submitted; - } - - public void setSubmitted(boolean submitted) { - this.submitted = submitted; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java deleted file mode 100644 index b3b150714..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java +++ /dev/null @@ -1,7 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.util.ArrayList; - -public class ZenodoModelList extends ArrayList { -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java deleted file mode 100644 index 92c1dcda3..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java +++ /dev/null @@ -1,109 +0,0 @@ - -package eu.dnetlib.dhp.common.api; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; - -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -@Disabled -class ZenodoAPIClientTest { - - private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; - private final String ACCESS_TOKEN = ""; - - private final String CONCEPT_REC_ID = "657113"; - - private final String depositionId = "674915"; - - @Test - void testUploadOldDeposition() throws IOException, MissingConceptDoiException { - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); - Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId)); - - File file = new File(getClass() - .getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz") - .getPath()); - - InputStream is = new FileInputStream(file); - - Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz")); - - String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json")); - - Assertions.assertEquals(200, client.sendMretadata(metadata)); - - Assertions.assertEquals(202, client.publish()); - - } - - @Test - void testNewDeposition() throws IOException { - - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); - Assertions.assertEquals(201, client.newDeposition()); - - File file = new File(getClass() - .getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz") - .getPath()); - - InputStream is = new FileInputStream(file); - - Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz")); - - String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json")); - - Assertions.assertEquals(200, client.sendMretadata(metadata)); - - Assertions.assertEquals(202, client.publish()); - - } - - @Test - void testNewVersionNewName() throws IOException, MissingConceptDoiException { - - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); - - Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID)); - - File file = new File(getClass() - .getResource("/eu/dnetlib/dhp/common/api/newVersion") - .getPath()); - - InputStream is = new FileInputStream(file); - - Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition")); - - Assertions.assertEquals(202, client.publish()); - - } - - @Test - void testNewVersionOldName() throws IOException, MissingConceptDoiException { - - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); - - Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID)); - - File file = new File(getClass() - .getResource("/eu/dnetlib/dhp/common/api/newVersion2") - .getPath()); - - InputStream is = new FileInputStream(file); - - Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition")); - - Assertions.assertEquals(202, client.publish()); - - } - -} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java index 84d49bd5c..12c96500e 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java @@ -1,6 +1,24 @@ package eu.dnetlib.pace.util; +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + /* * Diff Match and Patch * Copyright 2018 The diff-match-patch Authors. diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java new file mode 100644 index 000000000..262ca0290 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java @@ -0,0 +1,83 @@ + +package eu.dnetlib.dhp.api; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; + +import org.jetbrains.annotations.NotNull; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class QueryCommunityAPI { + private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/"; + private static final String BETA_BASE_URL = "https://beta.services.openaire.eu/openaire/"; + + private static String get(String geturl) throws IOException { + URL url = new URL(geturl); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("GET"); + + int responseCode = conn.getResponseCode(); + String body = getBody(conn); + conn.disconnect(); + if (responseCode != HttpURLConnection.HTTP_OK) + throw new IOException("Unexpected code " + responseCode + body); + + return body; + } + + public static String communities(boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/communities"); + return get(BETA_BASE_URL + "community/communities"); + } + + public static String community(String id, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id); + return get(BETA_BASE_URL + "community/" + id); + } + + public static String communityDatasource(String id, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders"); + return (BETA_BASE_URL + "community/" + id + "/contentproviders"); + + } + + public static String communityPropagationOrganization(String id, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations"); + return get(BETA_BASE_URL + "community/" + id + "/propagationOrganizations"); + } + + public static String communityProjects(String id, String page, String size, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + return get(BETA_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + } + + @NotNull + private static String getBody(HttpURLConnection conn) throws IOException { + String body = "{}"; + try (BufferedReader br = new BufferedReader( + new InputStreamReader(conn.getInputStream(), "utf-8"))) { + StringBuilder response = new StringBuilder(); + String responseLine = null; + while ((responseLine = br.readLine()) != null) { + response.append(responseLine.trim()); + } + + body = response.toString(); + + } + return body; + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java new file mode 100644 index 000000000..c1dda1ddc --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -0,0 +1,166 @@ + +package eu.dnetlib.dhp.api; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.amazonaws.util.StringUtils; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Maps; + +import eu.dnetlib.dhp.api.model.*; +import eu.dnetlib.dhp.bulktag.community.Community; +import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; +import eu.dnetlib.dhp.bulktag.community.Provider; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +public class Utils implements Serializable { + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final VerbResolver resolver = VerbResolverFactory.newInstance(); + + private static final Logger log = LoggerFactory.getLogger(Utils.class); + + public static CommunityConfiguration getCommunityConfiguration(boolean production) throws IOException { + final Map communities = Maps.newHashMap(); + List validCommunities = new ArrayList<>(); + getValidCommunities(production) + .forEach(community -> { + try { + CommunityModel cm = MAPPER + .readValue(QueryCommunityAPI.community(community.getId(), production), CommunityModel.class); + validCommunities.add(getCommunity(cm)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + validCommunities.forEach(community -> { + try { + DatasourceList dl = MAPPER + .readValue( + QueryCommunityAPI.communityDatasource(community.getId(), production), DatasourceList.class); + community.setProviders(dl.stream().map(d -> { + if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled())) + return null; + Provider p = new Provider(); + p.setOpenaireId("10|" + d.getOpenaireId()); + p.setSelectionConstraints(d.getSelectioncriteria()); + if (p.getSelectionConstraints() != null) + p.getSelectionConstraints().setSelection(resolver); + return p; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList())); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + validCommunities.forEach(community -> { + if (community.isValid()) + communities.put(community.getId(), community); + }); + return new CommunityConfiguration(communities); + } + + private static Community getCommunity(CommunityModel cm) { + Community c = new Community(); + c.setId(cm.getId()); + c.setZenodoCommunities(cm.getOtherZenodoCommunities()); + if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity())) + c.getZenodoCommunities().add(cm.getZenodoCommunity()); + c.setSubjects(cm.getSubjects()); + c.getSubjects().addAll(cm.getFos()); + c.getSubjects().addAll(cm.getSdg()); + if (cm.getAdvancedConstraints() != null) { + c.setConstraints(cm.getAdvancedConstraints()); + c.getConstraints().setSelection(resolver); + } + if (cm.getRemoveConstraints() != null) { + c.setRemoveConstraints(cm.getRemoveConstraints()); + c.getRemoveConstraints().setSelection(resolver); + } + return c; + } + + public static List getValidCommunities(boolean production) throws IOException { + return MAPPER + .readValue(QueryCommunityAPI.communities(production), CommunitySummary.class) + .stream() + .filter( + community -> !community.getStatus().equals("hidden") && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); + } + + /** + * it returns for each organization the list of associated communities + */ + public static CommunityEntityMap getCommunityOrganization(boolean production) throws IOException { + CommunityEntityMap organizationMap = new CommunityEntityMap(); + getValidCommunities(production) + .forEach(community -> { + String id = community.getId(); + try { + List associatedOrgs = MAPPER + .readValue( + QueryCommunityAPI.communityPropagationOrganization(id, production), OrganizationList.class); + associatedOrgs.forEach(o -> { + if (!organizationMap + .keySet() + .contains( + "20|" + o)) + organizationMap.put("20|" + o, new ArrayList<>()); + organizationMap.get("20|" + o).add(community.getId()); + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + return organizationMap; + } + + public static CommunityEntityMap getCommunityProjects(boolean production) throws IOException { + CommunityEntityMap projectMap = new CommunityEntityMap(); + getValidCommunities(production) + .forEach(community -> { + int page = -1; + int size = 100; + ContentModel cm = new ContentModel(); + do { + page++; + try { + cm = MAPPER + .readValue( + QueryCommunityAPI + .communityProjects( + community.getId(), String.valueOf(page), String.valueOf(size), production), + ContentModel.class); + if (cm.getContent().size() > 0) { + cm.getContent().forEach(p -> { + if (!projectMap.keySet().contains("40|" + p.getOpenaireId())) + projectMap.put("40|" + p.getOpenaireId(), new ArrayList<>()); + projectMap.get("40|" + p.getOpenaireId()).add(community.getId()); + }); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } while (!cm.getLast()); + }); + return projectMap; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java new file mode 100644 index 000000000..9fab5a80c --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java @@ -0,0 +1,43 @@ + +package eu.dnetlib.dhp.api.model; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.google.gson.Gson; + +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; + +@JsonAutoDetect +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityContentprovider { + private String openaireId; + private SelectionConstraints selectioncriteria; + + private String enabled; + + public String getEnabled() { + return enabled; + } + + public void setEnabled(String enabled) { + this.enabled = enabled; + } + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(final String openaireId) { + this.openaireId = openaireId; + } + + public SelectionConstraints getSelectioncriteria() { + + return this.selectioncriteria; + } + + public void setSelectioncriteria(SelectionConstraints selectioncriteria) { + this.selectioncriteria = selectioncriteria; + + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java new file mode 100644 index 000000000..ca3eb2857 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java @@ -0,0 +1,21 @@ + +package eu.dnetlib.dhp.api.model; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +public class CommunityEntityMap extends HashMap> { + + public CommunityEntityMap() { + super(); + } + + public List get(String key) { + + if (super.get(key) == null) { + return new ArrayList<>(); + } + return super.get(key); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java new file mode 100644 index 000000000..745e7efc2 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java @@ -0,0 +1,108 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityModel implements Serializable { + private String id; + private String type; + private String status; + + private String zenodoCommunity; + private List subjects; + private List otherZenodoCommunities; + private List fos; + private List sdg; + private SelectionConstraints advancedConstraints; + private SelectionConstraints removeConstraints; + + public String getZenodoCommunity() { + return zenodoCommunity; + } + + public void setZenodoCommunity(String zenodoCommunity) { + this.zenodoCommunity = zenodoCommunity; + } + + public List getSubjects() { + return subjects; + } + + public void setSubjects(List subjects) { + this.subjects = subjects; + } + + public List getOtherZenodoCommunities() { + return otherZenodoCommunities; + } + + public void setOtherZenodoCommunities(List otherZenodoCommunities) { + this.otherZenodoCommunities = otherZenodoCommunities; + } + + public List getFos() { + return fos; + } + + public void setFos(List fos) { + this.fos = fos; + } + + public List getSdg() { + return sdg; + } + + public void setSdg(List sdg) { + this.sdg = sdg; + } + + public SelectionConstraints getRemoveConstraints() { + return removeConstraints; + } + + public void setRemoveConstraints(SelectionConstraints removeConstraints) { + this.removeConstraints = removeConstraints; + } + + public SelectionConstraints getAdvancedConstraints() { + return advancedConstraints; + } + + public void setAdvancedConstraints(SelectionConstraints advancedConstraints) { + this.advancedConstraints = advancedConstraints; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java new file mode 100644 index 000000000..a0541f7ee --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java @@ -0,0 +1,15 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class CommunitySummary extends ArrayList implements Serializable { + public CommunitySummary() { + super(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java new file mode 100644 index 000000000..469709f59 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java @@ -0,0 +1,51 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ContentModel implements Serializable { + private List content; + private Integer totalPages; + private Boolean last; + private Integer number; + + public List getContent() { + return content; + } + + public void setContent(List content) { + this.content = content; + } + + public Integer getTotalPages() { + return totalPages; + } + + public void setTotalPages(Integer totalPages) { + this.totalPages = totalPages; + } + + public Boolean getLast() { + return last; + } + + public void setLast(Boolean last) { + this.last = last; + } + + public Integer getNumber() { + return number; + } + + public void setNumber(Integer number) { + this.number = number; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java new file mode 100644 index 000000000..30d0241c3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java @@ -0,0 +1,13 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.ArrayList; + +import eu.dnetlib.dhp.api.model.CommunityContentprovider; + +public class DatasourceList extends ArrayList implements Serializable { + public DatasourceList() { + super(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java new file mode 100644 index 000000000..3c81ad179 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java @@ -0,0 +1,16 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +public class OrganizationList extends ArrayList implements Serializable { + + public OrganizationList() { + super(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java new file mode 100644 index 000000000..3495d6a63 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java @@ -0,0 +1,24 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ProjectModel implements Serializable { + + private String openaireId; + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(String openaireId) { + this.openaireId = openaireId; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index fc3882b73..15712ad66 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -9,7 +9,6 @@ import java.util.*; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -21,8 +20,11 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; +import eu.dnetlib.dhp.api.Utils; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bulktag.community.*; +import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @@ -54,50 +56,38 @@ public class SparkBulkTagJob { .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - Boolean isTest = Optional - .ofNullable(parser.get("isTest")) - .map(Boolean::valueOf) - .orElse(Boolean.FALSE); - log.info("isTest: {} ", isTest); - final String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + final boolean production = Boolean.valueOf(parser.get("production")); + log.info("production: {}", production); + ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class); log.info("pathMap: {}", new Gson().toJson(protoMappingParams)); - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - final Boolean saveGraph = Optional - .ofNullable(parser.get("saveGraph")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("saveGraph: {}", saveGraph); - - Class resultClazz = (Class) Class.forName(resultClassName); - SparkConf conf = new SparkConf(); CommunityConfiguration cc; - String taggingConf = parser.get("taggingConf"); + String taggingConf = Optional + .ofNullable(parser.get("taggingConf")) + .map(String::valueOf) + .orElse(null); - if (isTest) { + if (taggingConf != null) { cc = CommunityConfigurationFactory.newInstance(taggingConf); } else { - cc = QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl")); + cc = Utils.getCommunityConfiguration(production); } runWithSparkSession( conf, isSparkSessionManaged, spark -> { - removeOutputDir(spark, outputPath); extendCommunityConfigurationForEOSC(spark, inputPath, cc); - execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc); + execBulkTag(spark, inputPath, outputPath, protoMappingParams, cc); }); } @@ -106,10 +96,7 @@ public class SparkBulkTagJob { Dataset datasources = readPath( spark, inputPath - .substring( - 0, - inputPath.lastIndexOf("/")) - + "/datasource", + + "datasource", Datasource.class) .filter((FilterFunction) ds -> isOKDatasource(ds)) .map((MapFunction) ds -> ds.getId(), Encoders.STRING()); @@ -117,10 +104,10 @@ public class SparkBulkTagJob { Map>> dsm = cc.getEoscDatasourceMap(); for (String ds : datasources.collectAsList()) { - final String dsId = ds.substring(3); - if (!dsm.containsKey(dsId)) { + // final String dsId = ds.substring(3); + if (!dsm.containsKey(ds)) { ArrayList> eoscList = new ArrayList<>(); - dsm.put(dsId, eoscList); + dsm.put(ds, eoscList); } } @@ -142,22 +129,30 @@ public class SparkBulkTagJob { String inputPath, String outputPath, ProtoMap protoMappingParams, - Class resultClazz, CommunityConfiguration communityConfiguration) { - ResultTagger resultTagger = new ResultTagger(); - readPath(spark, inputPath, resultClazz) - .map(patchResult(), Encoders.bean(resultClazz)) - .filter(Objects::nonNull) - .map( - (MapFunction) value -> resultTagger - .enrichContextCriteria( - value, communityConfiguration, protoMappingParams), - Encoders.bean(resultClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); + ModelSupport.entityTypes + .keySet() + .parallelStream() + .filter(e -> ModelSupport.isResult(e)) + .forEach(e -> { + removeOutputDir(spark, outputPath + e.name()); + ResultTagger resultTagger = new ResultTagger(); + Class resultClazz = ModelSupport.entityTypes.get(e); + readPath(spark, inputPath + e.name(), resultClazz) + .map(patchResult(), Encoders.bean(resultClazz)) + .filter(Objects::nonNull) + .map( + (MapFunction) value -> resultTagger + .enrichContextCriteria( + value, communityConfiguration, protoMappingParams), + Encoders.bean(resultClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); + }); + } public static Dataset readPath( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index b44376e22..9cd3a8f82 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import com.google.gson.Gson; @@ -13,7 +14,7 @@ public class Community implements Serializable { private String id; private List subjects = new ArrayList<>(); private List providers = new ArrayList<>(); - private List zenodoCommunities = new ArrayList<>(); + private List zenodoCommunities = new ArrayList<>(); private SelectionConstraints constraints = new SelectionConstraints(); private SelectionConstraints removeConstraints = new SelectionConstraints(); @@ -26,7 +27,7 @@ public class Community implements Serializable { return !getSubjects().isEmpty() || !getProviders().isEmpty() || !getZenodoCommunities().isEmpty() - || getConstraints().getCriteria() != null; + || (Optional.ofNullable(getConstraints()).isPresent() && getConstraints().getCriteria() != null); } public String getId() { @@ -53,11 +54,11 @@ public class Community implements Serializable { this.providers = providers; } - public List getZenodoCommunities() { + public List getZenodoCommunities() { return zenodoCommunities; } - public void setZenodoCommunities(List zenodoCommunities) { + public void setZenodoCommunities(List zenodoCommunities) { this.zenodoCommunities = zenodoCommunities; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index e061ccd5e..a658c7ff5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -81,7 +81,7 @@ public class CommunityConfiguration implements Serializable { this.removeConstraintsMap = removeConstraintsMap; } - CommunityConfiguration(final Map communities) { + public CommunityConfiguration(final Map communities) { this.communities = communities; init(); } @@ -117,10 +117,10 @@ public class CommunityConfiguration implements Serializable { add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap); } // get zenodo communities - for (ZenodoCommunity zc : c.getZenodoCommunities()) { + for (String zc : c.getZenodoCommunities()) { add( - zc.getZenodoCommunityId(), - new Pair<>(id, zc.getSelCriteria()), + zc, + new Pair<>(id, null), zenodocommunityMap); } selectionConstraintsMap.put(id, c.getConstraints()); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 7b9e03ef6..955ca3856 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -143,16 +143,16 @@ public class CommunityConfigurationFactory { return providerList; } - private static List parseZenodoCommunities(final Node node) { + private static List parseZenodoCommunities(final Node node) { final List list = node.selectNodes("./zenodocommunities/zenodocommunity"); - final List zenodoCommunityList = new ArrayList<>(); + final List zenodoCommunityList = new ArrayList<>(); for (Node n : list) { - ZenodoCommunity zc = new ZenodoCommunity(); - zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText()); - zc.setSelCriteria(n.selectSingleNode("./selcriteria")); +// ZenodoCommunity zc = new ZenodoCommunity(); +// zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText()); +// zc.setSelCriteria(n.selectSingleNode("./selcriteria")); - zenodoCommunityList.add(zc); + zenodoCommunityList.add(n.selectSingleNode("./zenodoid").getText()); } log.info("size of the zenodo community list " + zenodoCommunityList.size()); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index ed58cc14d..13d29b940 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; import java.lang.reflect.InvocationTargetException; +import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore; + import eu.dnetlib.dhp.bulktag.criteria.Selection; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; @@ -11,7 +13,8 @@ public class Constraint implements Serializable { private String verb; private String field; private String value; -// private String element; + + @JsonIgnore private Selection selection; public String getVerb() { @@ -38,10 +41,7 @@ public class Constraint implements Serializable { this.value = value; } - public void setSelection(Selection sel) { - selection = sel; - } - + @JsonIgnore public void setSelection(VerbResolver resolver) throws InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException { @@ -52,11 +52,4 @@ public class Constraint implements Serializable { return selection.apply(metadata); } -// public String getElement() { -// return element; -// } -// -// public void setElement(String element) { -// this.element = element; -// } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java deleted file mode 100644 index 5fe3cf81f..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ /dev/null @@ -1,34 +0,0 @@ - -package eu.dnetlib.dhp.bulktag.community; - -import java.io.IOException; -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.dom4j.DocumentException; -import org.xml.sax.SAXException; - -import com.google.common.base.Joiner; - -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -public class QueryInformationSystem { - - public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) - throws ISLookUpException, DocumentException, SAXException, IOException { - ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); - final List res = isLookUp - .quickSearchProfile( - IOUtils - .toString( - QueryInformationSystem.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bulktag/query.xq"))); - - final String xmlConf = "" + Joiner.on(" ").join(res) + ""; - - return CommunityConfigurationFactory.newInstance(xmlConf); - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 5f62c10f4..3b231a52d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -85,16 +85,18 @@ public class ResultTagger implements Serializable { conf .getRemoveConstraintsMap() .keySet() - .forEach(communityId -> { - if (conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && - conf - .getRemoveConstraintsMap() - .get(communityId) - .getCriteria() - .stream() - .anyMatch(crit -> crit.verifyCriteria(param))) - removeCommunities.add(communityId); - }); + .forEach( + communityId -> { + if (conf.getRemoveConstraintsMap().keySet().contains(communityId) && + conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && + conf + .getRemoveConstraintsMap() + .get(communityId) + .getCriteria() + .stream() + .anyMatch(crit -> crit.verifyCriteria(param))) + removeCommunities.add(communityId); + }); // communities contains all the communities to be added as context for the result final Set communities = new HashSet<>(); @@ -124,10 +126,10 @@ public class ResultTagger implements Serializable { if (Objects.nonNull(result.getInstance())) { for (Instance i : result.getInstance()) { if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { - collfrom.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); + collfrom.add(i.getCollectedfrom().getKey()); } if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { - hostdby.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); + hostdby.add(i.getHostedby().getKey()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index c7dcce812..57cc658fc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -7,11 +7,13 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +@JsonAutoDetect public class SelectionConstraints implements Serializable { private List criteria; diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json index a8be7c32e..dbe2d088f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json @@ -1,10 +1,5 @@ [ - { - "paramName":"is", - "paramLongName":"isLookUpUrl", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": true - }, + { "paramName":"s", "paramLongName":"sourcePath", @@ -17,12 +12,7 @@ "paramDescription": "the json path associated to each selection field", "paramRequired": true }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, + { "paramName": "out", "paramLongName": "outputPath", @@ -35,17 +25,19 @@ "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false }, - { - "paramName": "test", - "paramLongName": "isTest", - "paramDescription": "Parameter intended for testing purposes only. True if the reun is relatesd to a test and so the taggingConf parameter should be loaded", - "paramRequired": false - }, + { "paramName": "tg", "paramLongName": "taggingConf", "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", "paramRequired": false + }, + + { + "paramName": "p", + "paramLongName": "production", + "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", + "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml index fe82ae194..c92f559f9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml @@ -45,10 +45,14 @@ sparkExecutorMemory - 6G + 5G - sparkExecutorCores - 1 + memoryOverhead + 3g + + + partitions + 3284 \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml index b868e4c72..e3ca72493 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml @@ -4,10 +4,6 @@ sourcePath the source path - - isLookUpUrl - the isLookup service endpoint - pathMap the json path associated to each selection field @@ -102,16 +98,9 @@ - + - - - - - - - - + yarn-cluster cluster @@ -122,104 +111,23 @@ --num-executors=${sparkExecutorNumber} --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} + --conf spark.executor.memoryOverhead=${memoryOverhead} + --conf spark.sql.shuffle.partitions=${partitions} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication + --sourcePath${sourcePath}/ + --outputPath${outputPath}/ --pathMap${pathMap} - --isLookUpUrl${isLookUpUrl} + --production${production} - + - - - yarn-cluster - cluster - bulkTagging-dataset - eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-enrichment-${projectVersion}.jar - - --num-executors=${sparkExecutorNumber} - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - --pathMap${pathMap} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn-cluster - cluster - bulkTagging-orp - eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-enrichment-${projectVersion}.jar - - --num-executors=${sparkExecutorNumber} - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - --pathMap${pathMap} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn-cluster - cluster - bulkTagging-software - eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-enrichment-${projectVersion}.jar - - --num-executors=${sparkExecutorNumber} - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - --pathMap${pathMap} - --isLookUpUrl${isLookUpUrl} - - - - - - diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 660a55472..7cbbcaafb 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.HashMap; import java.util.List; import org.apache.commons.io.FileUtils; @@ -98,14 +99,11 @@ public class BulkTagJobTest { SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", - getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), + getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(), "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap }); @@ -133,19 +131,16 @@ public class BulkTagJobTest { @Test void bulktagBySubjectNoPreviousContextTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext/") .getPath(); final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap }); @@ -230,19 +225,19 @@ public class BulkTagJobTest { void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/") .getPath(); final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -311,18 +306,18 @@ public class BulkTagJobTest { @Test void bulktagByDatasourceTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") + .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-outputPath", workingDir.toString() + "/publication", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -384,25 +379,25 @@ public class BulkTagJobTest { void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") + "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", - "-outputPath", workingDir.toString() + "/orp", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/orp") + .textFile(workingDir.toString() + "/otherresearchproduct") .map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class)); Assertions.assertEquals(10, tmp.count()); @@ -505,18 +500,18 @@ public class BulkTagJobTest { @Test void bulktagBySubjectDatasourceTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -636,14 +631,14 @@ public class BulkTagJobTest { SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", - getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz").getPath(), + getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(), "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", - "-outputPath", workingDir.toString() + "/software", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -732,18 +727,18 @@ public class BulkTagJobTest { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -774,19 +769,19 @@ public class BulkTagJobTest { void bulkTagOtherJupyter() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct") + "/eu/dnetlib/dhp/eosctag/jupyter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", - "-outputPath", workingDir.toString() + "/otherresearchproduct", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -829,18 +824,18 @@ public class BulkTagJobTest { public void bulkTagDatasetJupyter() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/jupyter/dataset") + "/eu/dnetlib/dhp/eosctag/jupyter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -878,18 +873,18 @@ public class BulkTagJobTest { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/jupyter/software") + "/eu/dnetlib/dhp/eosctag/jupyter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", - "-outputPath", workingDir.toString() + "/software", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1096,18 +1091,18 @@ public class BulkTagJobTest { void galaxyOtherTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct") + "/eu/dnetlib/dhp/eosctag/galaxy/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", - "-outputPath", workingDir.toString() + "/otherresearchproduct", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1214,18 +1209,18 @@ public class BulkTagJobTest { void galaxySoftwareTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/galaxy/software") + "/eu/dnetlib/dhp/eosctag/galaxy/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", - "-outputPath", workingDir.toString() + "/software", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1333,19 +1328,19 @@ public class BulkTagJobTest { void twitterDatasetTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/twitter/dataset") + "/eu/dnetlib/dhp/eosctag/twitter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1373,19 +1368,19 @@ public class BulkTagJobTest { void twitterOtherTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct") + "/eu/dnetlib/dhp/eosctag/twitter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", - "-outputPath", workingDir.toString() + "/otherresearchproduct", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1418,19 +1413,19 @@ public class BulkTagJobTest { void twitterSoftwareTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/twitter/software") + "/eu/dnetlib/dhp/eosctag/twitter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", - "-outputPath", workingDir.toString() + "/software", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1455,19 +1450,19 @@ public class BulkTagJobTest { void EoscContextTagTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json") + "/eu/dnetlib/dhp/bulktag/eosc/dataset/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1533,16 +1528,16 @@ public class BulkTagJobTest { SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/") .getPath(), "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1568,4 +1563,42 @@ public class BulkTagJobTest { } + @Test + void newConfTest() throws Exception { + final String pathMap = BulkTagJobTest.pathMap; + SparkBulkTagJob + .main( + new String[] { + + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", + getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(), + "-taggingConf", taggingConf, + + "-outputPath", workingDir.toString() + "/", + "-production", Boolean.TRUE.toString(), + "-pathMap", pathMap + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + + Assertions.assertEquals(10, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); + + verificationDataset.createOrReplaceTempView("dataset"); + + String query = "select id, MyT.id community " + + "from dataset " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + Assertions.assertEquals(0, spark.sql(query).count()); + } + } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index c8fd62c8e..5f0b1d7f1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest { sc.setVerb("not_contains"); sc.setField("contributor"); sc.setValue("DARIAH"); - sc.setSelection(resolver.getSelectionCriteria(sc.getVerb(), sc.getValue())); + sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue())); String metadata = "This work has been partially supported by DARIAH-EU infrastructure"; Assertions.assertFalse(sc.verifyCriteria(metadata)); }