From 9fc8ebe98b6e95faf27cd162ebf836f21447b168 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 19 Apr 2023 09:32:13 +0200 Subject: [PATCH 01/11] refactoring --- .../dhp/common/api/ZenodoAPIClient.java | 38 +++++++------------ 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java index 2aeccfcf2..544da78f5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java @@ -9,13 +9,13 @@ import java.util.concurrent.TimeUnit; import org.apache.http.HttpHeaders; import org.apache.http.entity.ContentType; +import org.jetbrains.annotations.NotNull; import com.google.gson.Gson; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; import okhttp3.*; -import org.jetbrains.annotations.NotNull; public class ZenodoAPIClient implements Serializable { @@ -80,7 +80,7 @@ public class ZenodoAPIClient implements Serializable { int responseCode = conn.getResponseCode(); conn.disconnect(); - if(!checkOKStatus(responseCode)) + if (!checkOKStatus(responseCode)) throw new IOException("Unexpected code " + responseCode + body); ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); @@ -115,7 +115,7 @@ public class ZenodoAPIClient implements Serializable { } int responseCode = conn.getResponseCode(); - if(! checkOKStatus(responseCode)){ + if (!checkOKStatus(responseCode)) { throw new IOException("Unexpected code " + responseCode + getBody(conn)); } @@ -126,7 +126,7 @@ public class ZenodoAPIClient implements Serializable { private String getBody(HttpURLConnection conn) throws IOException { String body = "{}"; try (BufferedReader br = new BufferedReader( - new InputStreamReader(conn.getInputStream(), "utf-8"))) { + new InputStreamReader(conn.getInputStream(), "utf-8"))) { StringBuilder response = new StringBuilder(); String responseLine = null; while ((responseLine = br.readLine()) != null) { @@ -155,7 +155,6 @@ public class ZenodoAPIClient implements Serializable { conn.setDoOutput(true); conn.setRequestMethod("PUT"); - try (OutputStream os = conn.getOutputStream()) { byte[] input = metadata.getBytes("utf-8"); os.write(input, 0, input.length); @@ -164,19 +163,18 @@ public class ZenodoAPIClient implements Serializable { final int responseCode = conn.getResponseCode(); conn.disconnect(); - if(!checkOKStatus(responseCode)) + if (!checkOKStatus(responseCode)) throw new IOException("Unexpected code " + responseCode + getBody(conn)); return responseCode; - } - private boolean checkOKStatus(int responseCode) { + private boolean checkOKStatus(int responseCode) { - if(HttpURLConnection.HTTP_OK != responseCode || - HttpURLConnection.HTTP_CREATED != responseCode) - return true ; + if (HttpURLConnection.HTTP_OK != responseCode || + HttpURLConnection.HTTP_CREATED != responseCode) + return true; return false; } @@ -233,7 +231,6 @@ public class ZenodoAPIClient implements Serializable { conn.setDoOutput(true); conn.setRequestMethod("POST"); - try (OutputStream os = conn.getOutputStream()) { byte[] input = json.getBytes("utf-8"); os.write(input, 0, input.length); @@ -245,7 +242,7 @@ public class ZenodoAPIClient implements Serializable { int responseCode = conn.getResponseCode(); conn.disconnect(); - if(!checkOKStatus(responseCode)) + if (!checkOKStatus(responseCode)) throw new IOException("Unexpected code " + responseCode + body); ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); @@ -290,13 +287,12 @@ public class ZenodoAPIClient implements Serializable { int responseCode = conn.getResponseCode(); conn.disconnect(); - if(!checkOKStatus(responseCode)) + if (!checkOKStatus(responseCode)) throw new IOException("Unexpected code " + responseCode + body); ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); bucket = zenodoModel.getLinks().getBucket(); - return responseCode; } @@ -331,22 +327,16 @@ public class ZenodoAPIClient implements Serializable { conn.setDoOutput(true); conn.setRequestMethod("GET"); - - String body = getBody(conn); int responseCode = conn.getResponseCode(); conn.disconnect(); - if(!checkOKStatus(responseCode)) + if (!checkOKStatus(responseCode)) throw new IOException("Unexpected code " + responseCode + body); - - return body; - - } private String getBucket(String inputUurl) throws IOException { @@ -363,15 +353,13 @@ public class ZenodoAPIClient implements Serializable { int responseCode = conn.getResponseCode(); conn.disconnect(); - if(!checkOKStatus(responseCode)) + if (!checkOKStatus(responseCode)) throw new IOException("Unexpected code " + responseCode + body); ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); return zenodoModel.getLinks().getBucket(); - - } } From d9506035e47b951331d26ddf5ab484b9179d825d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 9 Jun 2023 12:05:02 +0200 Subject: [PATCH 02/11] [ZenodoApi] gone back to okhttp3 to send the payload. --- .../dhp/common/api/ZenodoAPIClient.java | 76 +++++++++++++------ .../dhp/common/api/ZenodoAPIClientTest.java | 33 ++++++-- 2 files changed, 80 insertions(+), 29 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java index 544da78f5..a69624e3b 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java @@ -90,36 +90,68 @@ public class ZenodoAPIClient implements Serializable { return responseCode; } +// /** +// * Upload files in Zenodo. +// * +// * @param is the inputStream for the file to upload +// * @param file_name the name of the file as it will appear on Zenodo +// * @return the response code +// */ +// public int uploadIS(InputStream is, String file_name) throws IOException { +// +// URL url = new URL(bucket + "/" + file_name); +// HttpURLConnection conn = (HttpURLConnection) url.openConnection(); +// conn.setChunkedStreamingMode(8192); +// conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip"); +// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); +// conn.setDoOutput(true); +// conn.setRequestMethod("PUT"); +// conn.setRequestProperty("connection", "close"); +// +// byte[] buf = new byte[8192]; +// int length; +// try (OutputStream os = conn.getOutputStream()) { +// +// while ((length = is.read(buf)) != -1) { +// os.write(buf, 0, length); +// } +// +// } +// int responseCode = conn.getResponseCode(); +// if (!checkOKStatus(responseCode)) { +// throw new IOException("Unexpected code " + responseCode + getBody(conn)); +// } +// +// return responseCode; +// } + /** * Upload files in Zenodo. * * @param is the inputStream for the file to upload * @param file_name the name of the file as it will appear on Zenodo + * @param len the size of the file * @return the response code */ - public int uploadIS(InputStream is, String file_name) throws IOException { + public int uploadIS(InputStream is, String file_name, long len) throws IOException { + OkHttpClient httpClient = new OkHttpClient.Builder() + .writeTimeout(600, TimeUnit.SECONDS) + .readTimeout(600, TimeUnit.SECONDS) + .connectTimeout(600, TimeUnit.SECONDS) + .build(); - URL url = new URL(bucket + "/" + file_name); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip"); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("PUT"); - - byte[] buf = new byte[8192]; - int length; - try (OutputStream os = conn.getOutputStream()) { - while ((length = is.read(buf)) != -1) { - os.write(buf, 0, length); - } + Request request = new Request.Builder() + .url(bucket + "/" + file_name) + .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) + .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) + .build(); + try (Response response = httpClient.newCall(request).execute()) { + if (!response.isSuccessful()) + throw new IOException("Unexpected code " + response + response.body().string()); + return response.code(); } - int responseCode = conn.getResponseCode(); - if (!checkOKStatus(responseCode)) { - throw new IOException("Unexpected code " + responseCode + getBody(conn)); - } - - return responseCode; } @NotNull @@ -172,8 +204,8 @@ public class ZenodoAPIClient implements Serializable { private boolean checkOKStatus(int responseCode) { - if (HttpURLConnection.HTTP_OK != responseCode || - HttpURLConnection.HTTP_CREATED != responseCode) + if (HttpURLConnection.HTTP_OK == responseCode || + HttpURLConnection.HTTP_CREATED == responseCode) return true; return false; } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java index 92c1dcda3..87b68617b 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java @@ -15,7 +15,7 @@ import org.junit.jupiter.api.Test; class ZenodoAPIClientTest { private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; - private final String ACCESS_TOKEN = ""; + private final String ACCESS_TOKEN = "OzzOsyucEIHxCEfhlpsMo3myEiwpCza3trCRL7ddfGTAK9xXkIP2MbXd6Vg4"; private final String CONCEPT_REC_ID = "657113"; @@ -33,7 +33,7 @@ class ZenodoAPIClientTest { InputStream is = new FileInputStream(file); - Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz")); + Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length())); String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json")); @@ -51,18 +51,18 @@ class ZenodoAPIClientTest { Assertions.assertEquals(201, client.newDeposition()); File file = new File(getClass() - .getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz") + .getResource("/eu/dnetlib/dhp/common/api/newVersion") .getPath()); InputStream is = new FileInputStream(file); - Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz")); + Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length())); String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json")); Assertions.assertEquals(200, client.sendMretadata(metadata)); - Assertions.assertEquals(202, client.publish()); + // Assertions.assertEquals(202, client.publish()); } @@ -80,7 +80,7 @@ class ZenodoAPIClientTest { InputStream is = new FileInputStream(file); - Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition")); + Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length())); Assertions.assertEquals(202, client.publish()); @@ -100,10 +100,29 @@ class ZenodoAPIClientTest { InputStream is = new FileInputStream(file); - Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition")); + Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length())); Assertions.assertEquals(202, client.publish()); } + @Test + void depositBigFile() throws MissingConceptDoiException, IOException { + ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, + ACCESS_TOKEN); + + Assertions.assertEquals(201, client.newDeposition()); + + File file = new File("/Users/miriam.baglioni/Desktop/EOSC_DUMP/publication.tar"); +// File file = new File(getClass() +// .getResource("/eu/dnetlib/dhp/common/api/newVersion2") +// .getPath()); + + InputStream is = new FileInputStream(file); + + Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length())); + + //Assertions.assertEquals(202, client.publish()); + } + } From e4b27182d0c55ef2dd566e7d0f12da2b6e32a6b1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 21 Jun 2023 11:15:53 +0200 Subject: [PATCH 03/11] [master] refactoring --- .../dhp/common/api/ZenodoAPIClient.java | 18 +++++++++--------- .../dhp/common/api/ZenodoAPIClientTest.java | 6 +++--- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 3 +-- .../provision/IndexRecordTransformerTest.java | 4 +++- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java index a69624e3b..0164bd249 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java @@ -135,17 +135,17 @@ public class ZenodoAPIClient implements Serializable { */ public int uploadIS(InputStream is, String file_name, long len) throws IOException { OkHttpClient httpClient = new OkHttpClient.Builder() - .writeTimeout(600, TimeUnit.SECONDS) - .readTimeout(600, TimeUnit.SECONDS) - .connectTimeout(600, TimeUnit.SECONDS) - .build(); + .writeTimeout(600, TimeUnit.SECONDS) + .readTimeout(600, TimeUnit.SECONDS) + .connectTimeout(600, TimeUnit.SECONDS) + .build(); Request request = new Request.Builder() - .url(bucket + "/" + file_name) - .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers - .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) - .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) - .build(); + .url(bucket + "/" + file_name) + .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) + .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) + .build(); try (Response response = httpClient.newCall(request).execute()) { if (!response.isSuccessful()) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java index 87b68617b..15ca81d17 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java @@ -62,7 +62,7 @@ class ZenodoAPIClientTest { Assertions.assertEquals(200, client.sendMretadata(metadata)); - // Assertions.assertEquals(202, client.publish()); + // Assertions.assertEquals(202, client.publish()); } @@ -109,7 +109,7 @@ class ZenodoAPIClientTest { @Test void depositBigFile() throws MissingConceptDoiException, IOException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); + ACCESS_TOKEN); Assertions.assertEquals(201, client.newDeposition()); @@ -122,7 +122,7 @@ class ZenodoAPIClientTest { Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length())); - //Assertions.assertEquals(202, client.publish()); + // Assertions.assertEquals(202, client.publish()); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index bfd6d461d..55b49ee4f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1010,7 +1010,7 @@ class MappersTest { @Test void testD4Science() throws IOException { final String xml = IOUtils - .toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science.xml"))); + .toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science.xml"))); final List actual = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); assertNotNull(actual); assertFalse(actual.isEmpty()); @@ -1023,7 +1023,6 @@ class MappersTest { } - private void assertValidId(final String id) { // System.out.println(id); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index 74f203cbf..b5a5b5f0d 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -136,9 +136,11 @@ public class IndexRecordTransformerTest { @Test public void testForEdithDemoCovid() throws IOException, TransformerException { - final String record = IOUtils.toString(getClass().getResourceAsStream("edith-demo/10.3390-pr9111967-covid.xml")); + final String record = IOUtils + .toString(getClass().getResourceAsStream("edith-demo/10.3390-pr9111967-covid.xml")); testRecordTransformation(record); } + @Test public void testForEdithDemoEthics() throws IOException, TransformerException { final String record = IOUtils.toString(getClass().getResourceAsStream("edith-demo/10.2196-33081-ethics.xml")); From 8621377917082245383934e41b96ccb63e8e49ea Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jun 2023 19:02:44 +0200 Subject: [PATCH 04/11] [UsageCount] fixed typo in attribute name for datasource table --- .../dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java index 9b444c6fa..e62b80f6a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/usagestats/SparkAtomicActionUsageJob.java @@ -75,7 +75,7 @@ public class SparkAtomicActionUsageJob implements Serializable { removeOutputDir(spark, outputPath); prepareData(dbname, spark, workingPath + "/usageDb", "usage_stats", "result_id"); prepareData(dbname, spark, workingPath + "/projectDb", "project_stats", "id"); - prepareData(dbname, spark, workingPath + "/datasourceDb", "datasource_stats", "repositor_id"); + prepareData(dbname, spark, workingPath + "/datasourceDb", "datasource_stats", "repository_id"); writeActionSet(spark, workingPath, outputPath); }); } From 69dac916590e98327ba7f7a13c1946bc0423b3ff Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Oct 2023 15:45:52 +0200 Subject: [PATCH 05/11] adding the new code to use the API instead of the Information Service --- .../eu/dnetlib/dhp/api/QueryCommunityAPI.java | 83 +++++++++ .../main/java/eu/dnetlib/dhp/api/Utils.java | 169 ++++++++++++++++++ .../api/model/CommunityContentprovider.java | 43 +++++ .../dhp/api/model/CommunityEntityMap.java | 21 +++ .../dnetlib/dhp/api/model/CommunityModel.java | 108 +++++++++++ .../dhp/api/model/CommunitySummary.java | 15 ++ .../dnetlib/dhp/api/model/ContentModel.java | 51 ++++++ .../dnetlib/dhp/api/model/DatasourceList.java | 13 ++ .../dhp/api/model/OrganizationList.java | 16 ++ .../dnetlib/dhp/api/model/ProjectModel.java | 24 +++ .../community/QueryInformationSystem.java | 34 ---- .../dhp/api/QueryCommunityAPITest.java | 9 + 12 files changed, 552 insertions(+), 34 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java new file mode 100644 index 000000000..262ca0290 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java @@ -0,0 +1,83 @@ + +package eu.dnetlib.dhp.api; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; + +import org.jetbrains.annotations.NotNull; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class QueryCommunityAPI { + private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/"; + private static final String BETA_BASE_URL = "https://beta.services.openaire.eu/openaire/"; + + private static String get(String geturl) throws IOException { + URL url = new URL(geturl); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("GET"); + + int responseCode = conn.getResponseCode(); + String body = getBody(conn); + conn.disconnect(); + if (responseCode != HttpURLConnection.HTTP_OK) + throw new IOException("Unexpected code " + responseCode + body); + + return body; + } + + public static String communities(boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/communities"); + return get(BETA_BASE_URL + "community/communities"); + } + + public static String community(String id, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id); + return get(BETA_BASE_URL + "community/" + id); + } + + public static String communityDatasource(String id, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders"); + return (BETA_BASE_URL + "community/" + id + "/contentproviders"); + + } + + public static String communityPropagationOrganization(String id, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations"); + return get(BETA_BASE_URL + "community/" + id + "/propagationOrganizations"); + } + + public static String communityProjects(String id, String page, String size, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + return get(BETA_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + } + + @NotNull + private static String getBody(HttpURLConnection conn) throws IOException { + String body = "{}"; + try (BufferedReader br = new BufferedReader( + new InputStreamReader(conn.getInputStream(), "utf-8"))) { + StringBuilder response = new StringBuilder(); + String responseLine = null; + while ((responseLine = br.readLine()) != null) { + response.append(responseLine.trim()); + } + + body = response.toString(); + + } + return body; + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java new file mode 100644 index 000000000..43d5e7e98 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -0,0 +1,169 @@ + +package eu.dnetlib.dhp.api; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import javax.management.Query; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.amazonaws.util.StringUtils; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Maps; + +import eu.dnetlib.dhp.api.model.*; +import eu.dnetlib.dhp.bulktag.community.Community; +import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; +import eu.dnetlib.dhp.bulktag.community.Provider; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; +import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +public class Utils implements Serializable { + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final VerbResolver resolver = VerbResolverFactory.newInstance(); + + private static final Logger log = LoggerFactory.getLogger(Utils.class); + + public static CommunityConfiguration getCommunityConfiguration(boolean production) throws IOException { + final Map communities = Maps.newHashMap(); + List validCommunities = new ArrayList<>(); + getValidCommunities(production) + .forEach(community -> { + try { + CommunityModel cm = MAPPER + .readValue(QueryCommunityAPI.community(community.getId(), production), CommunityModel.class); + validCommunities.add(getCommunity(cm)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + validCommunities.forEach(community -> { + try { + DatasourceList dl = MAPPER + .readValue( + QueryCommunityAPI.communityDatasource(community.getId(), production), DatasourceList.class); + community.setProviders(dl.stream().map(d -> { + if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled())) + return null; + Provider p = new Provider(); + p.setOpenaireId("10|" + d.getOpenaireId()); + p.setSelectionConstraints(d.getSelectioncriteria()); + if (p.getSelectionConstraints() != null) + p.getSelectionConstraints().setSelection(resolver); + return p; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList())); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + validCommunities.forEach(community -> { + if (community.isValid()) + communities.put(community.getId(), community); + }); + return new CommunityConfiguration(communities); + } + + private static Community getCommunity(CommunityModel cm) { + Community c = new Community(); + c.setId(cm.getId()); + c.setZenodoCommunities(cm.getOtherZenodoCommunities()); + if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity())) + c.getZenodoCommunities().add(cm.getZenodoCommunity()); + c.setSubjects(cm.getSubjects()); + c.getSubjects().addAll(cm.getFos()); + c.getSubjects().addAll(cm.getSdg()); + if (cm.getAdvancedConstraints() != null) { + c.setConstraints(cm.getAdvancedConstraints()); + c.getConstraints().setSelection(resolver); + } + if (cm.getRemoveConstraints() != null) { + c.setRemoveConstraints(cm.getRemoveConstraints()); + c.getRemoveConstraints().setSelection(resolver); + } + return c; + } + + public static List getValidCommunities(boolean production) throws IOException { + return MAPPER + .readValue(QueryCommunityAPI.communities(production), CommunitySummary.class) + .stream() + .filter( + community -> !community.getStatus().equals("hidden") && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); + } + + /** + * it returns for each organization the list of associated communities + */ + public static CommunityEntityMap getCommunityOrganization(boolean production) throws IOException { + CommunityEntityMap organizationMap = new CommunityEntityMap(); + getValidCommunities(production) + .forEach(community -> { + String id = community.getId(); + try { + List associatedOrgs = MAPPER + .readValue( + QueryCommunityAPI.communityPropagationOrganization(id, production), OrganizationList.class); + associatedOrgs.forEach(o -> { + if (!organizationMap + .keySet() + .contains( + "20|" + o)) + organizationMap.put("20|" + o, new ArrayList<>()); + organizationMap.get("20|" + o).add(community.getId()); + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + return organizationMap; + } + + public static CommunityEntityMap getCommunityProjects(boolean production) throws IOException { + CommunityEntityMap projectMap = new CommunityEntityMap(); + getValidCommunities(production) + .forEach(community -> { + int page = -1; + int size = 100; + ContentModel cm = new ContentModel(); + do { + page++; + try { + cm = MAPPER + .readValue( + QueryCommunityAPI + .communityProjects( + community.getId(), String.valueOf(page), String.valueOf(size), production), + ContentModel.class); + if (cm.getContent().size() > 0) { + cm.getContent().forEach(p -> { + if (!projectMap.keySet().contains("40|" + p.getOpenaireId())) + projectMap.put("40|" + p.getOpenaireId(), new ArrayList<>()); + projectMap.get("40|" + p.getOpenaireId()).add(community.getId()); + }); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } while (!cm.getLast()); + }); + return projectMap; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java new file mode 100644 index 000000000..9fab5a80c --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java @@ -0,0 +1,43 @@ + +package eu.dnetlib.dhp.api.model; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.google.gson.Gson; + +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; + +@JsonAutoDetect +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityContentprovider { + private String openaireId; + private SelectionConstraints selectioncriteria; + + private String enabled; + + public String getEnabled() { + return enabled; + } + + public void setEnabled(String enabled) { + this.enabled = enabled; + } + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(final String openaireId) { + this.openaireId = openaireId; + } + + public SelectionConstraints getSelectioncriteria() { + + return this.selectioncriteria; + } + + public void setSelectioncriteria(SelectionConstraints selectioncriteria) { + this.selectioncriteria = selectioncriteria; + + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java new file mode 100644 index 000000000..ca3eb2857 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java @@ -0,0 +1,21 @@ + +package eu.dnetlib.dhp.api.model; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +public class CommunityEntityMap extends HashMap> { + + public CommunityEntityMap() { + super(); + } + + public List get(String key) { + + if (super.get(key) == null) { + return new ArrayList<>(); + } + return super.get(key); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java new file mode 100644 index 000000000..745e7efc2 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java @@ -0,0 +1,108 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityModel implements Serializable { + private String id; + private String type; + private String status; + + private String zenodoCommunity; + private List subjects; + private List otherZenodoCommunities; + private List fos; + private List sdg; + private SelectionConstraints advancedConstraints; + private SelectionConstraints removeConstraints; + + public String getZenodoCommunity() { + return zenodoCommunity; + } + + public void setZenodoCommunity(String zenodoCommunity) { + this.zenodoCommunity = zenodoCommunity; + } + + public List getSubjects() { + return subjects; + } + + public void setSubjects(List subjects) { + this.subjects = subjects; + } + + public List getOtherZenodoCommunities() { + return otherZenodoCommunities; + } + + public void setOtherZenodoCommunities(List otherZenodoCommunities) { + this.otherZenodoCommunities = otherZenodoCommunities; + } + + public List getFos() { + return fos; + } + + public void setFos(List fos) { + this.fos = fos; + } + + public List getSdg() { + return sdg; + } + + public void setSdg(List sdg) { + this.sdg = sdg; + } + + public SelectionConstraints getRemoveConstraints() { + return removeConstraints; + } + + public void setRemoveConstraints(SelectionConstraints removeConstraints) { + this.removeConstraints = removeConstraints; + } + + public SelectionConstraints getAdvancedConstraints() { + return advancedConstraints; + } + + public void setAdvancedConstraints(SelectionConstraints advancedConstraints) { + this.advancedConstraints = advancedConstraints; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java new file mode 100644 index 000000000..a0541f7ee --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java @@ -0,0 +1,15 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class CommunitySummary extends ArrayList implements Serializable { + public CommunitySummary() { + super(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java new file mode 100644 index 000000000..469709f59 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java @@ -0,0 +1,51 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ContentModel implements Serializable { + private List content; + private Integer totalPages; + private Boolean last; + private Integer number; + + public List getContent() { + return content; + } + + public void setContent(List content) { + this.content = content; + } + + public Integer getTotalPages() { + return totalPages; + } + + public void setTotalPages(Integer totalPages) { + this.totalPages = totalPages; + } + + public Boolean getLast() { + return last; + } + + public void setLast(Boolean last) { + this.last = last; + } + + public Integer getNumber() { + return number; + } + + public void setNumber(Integer number) { + this.number = number; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java new file mode 100644 index 000000000..30d0241c3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java @@ -0,0 +1,13 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.ArrayList; + +import eu.dnetlib.dhp.api.model.CommunityContentprovider; + +public class DatasourceList extends ArrayList implements Serializable { + public DatasourceList() { + super(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java new file mode 100644 index 000000000..3c81ad179 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java @@ -0,0 +1,16 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +public class OrganizationList extends ArrayList implements Serializable { + + public OrganizationList() { + super(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java new file mode 100644 index 000000000..3495d6a63 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java @@ -0,0 +1,24 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ProjectModel implements Serializable { + + private String openaireId; + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(String openaireId) { + this.openaireId = openaireId; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java deleted file mode 100644 index 5fe3cf81f..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ /dev/null @@ -1,34 +0,0 @@ - -package eu.dnetlib.dhp.bulktag.community; - -import java.io.IOException; -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.dom4j.DocumentException; -import org.xml.sax.SAXException; - -import com.google.common.base.Joiner; - -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -public class QueryInformationSystem { - - public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) - throws ISLookUpException, DocumentException, SAXException, IOException { - ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); - final List res = isLookUp - .quickSearchProfile( - IOUtils - .toString( - QueryInformationSystem.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bulktag/query.xq"))); - - final String xmlConf = "" + Joiner.on(" ").join(res) + ""; - - return CommunityConfigurationFactory.newInstance(xmlConf); - } -} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java new file mode 100644 index 000000000..03084301e --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java @@ -0,0 +1,9 @@ +package eu.dnetlib.dhp.api;/** + + * @author miriam.baglioni + + * @Date 20/10/23 + + */ +public class QueryCommunityAPITest { +} From 18bfff8af303ae1ecc4e5eb2d5118d64b3d2e56a Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Oct 2023 15:47:03 +0200 Subject: [PATCH 06/11] adding test classes and modifying test for bulktag --- .../dhp/api/QueryCommunityAPITest.java | 115 +++++++++- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 215 ++++++++++-------- .../CommunityConfigurationFactoryTest.java | 2 +- 3 files changed, 236 insertions(+), 96 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java index 03084301e..0cdf0f50b 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java @@ -1,9 +1,116 @@ -package eu.dnetlib.dhp.api;/** +package eu.dnetlib.dhp.api; + + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.api.model.CommunityEntityMap; +import eu.dnetlib.dhp.api.model.CommunityModel; +import eu.dnetlib.dhp.api.model.CommunitySummary; +import eu.dnetlib.dhp.api.model.DatasourceList; +import eu.dnetlib.dhp.bulktag.community.Community; +import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; + +/** * @author miriam.baglioni - - * @Date 20/10/23 - + * @Date 06/10/23 */ public class QueryCommunityAPITest { + + @Test + void communityList() throws Exception { + String body = QueryCommunityAPI.communities(true); + new ObjectMapper() + .readValue(body, CommunitySummary.class) + .forEach(p -> { + try { + System.out.println(new ObjectMapper().writeValueAsString(p)); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }); + } + + @Test + void community() throws Exception { + String id = "dh-ch"; + String body = QueryCommunityAPI.community(id, true); + System.out + .println( + new ObjectMapper() + .writeValueAsString( + new ObjectMapper() + .readValue(body, CommunityModel.class))); + } + + @Test + void communityDatasource() throws Exception { + String id = "dh-ch"; + String body = QueryCommunityAPI.communityDatasource(id, true); + new ObjectMapper() + .readValue(body, DatasourceList.class) + .forEach(ds -> { + try { + System.out.println(new ObjectMapper().writeValueAsString(ds)); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }); + ; + } + + @Test + void validCommunities() throws Exception { + CommunityConfiguration cc = Utils.getCommunityConfiguration(true); + System.out.println(cc.getCommunities().keySet()); + Community community = cc.getCommunities().get("aurora"); + Assertions.assertEquals(0, community.getSubjects().size()); + Assertions.assertEquals(null, community.getConstraints()); + Assertions.assertEquals(null, community.getRemoveConstraints()); + Assertions.assertEquals(2, community.getZenodoCommunities().size()); + Assertions + .assertTrue( + community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network"))); + Assertions + .assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck"))); + Assertions.assertEquals(35, community.getProviders().size()); + Assertions + .assertEquals( + 35, community.getProviders().stream().filter(p -> p.getSelectionConstraints() == null).count()); + + } + + @Test + void eutopiaCommunityConfiguration() throws Exception { + CommunityConfiguration cc = Utils.getCommunityConfiguration(true); + System.out.println(cc.getCommunities().keySet()); + Community community = cc.getCommunities().get("eutopia"); + community.getProviders().forEach(p -> System.out.println(p.getOpenaireId())); + } + + @Test + void getCommunityProjects() throws Exception { + CommunityEntityMap projectMap = Utils.getCommunityProjects(true); + + Assertions + .assertTrue( + projectMap + .keySet() + .stream() + .allMatch(k -> k.startsWith("40|"))); + + System.out.println(projectMap); + } + + @Test + void getCommunityOrganizations() throws Exception { + CommunityEntityMap organizationMap = Utils.getCommunityOrganization(true); + Assertions.assertTrue(organizationMap.keySet().stream().allMatch(k -> k.startsWith("20|"))); + + } + } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 660a55472..7cbbcaafb 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.HashMap; import java.util.List; import org.apache.commons.io.FileUtils; @@ -98,14 +99,11 @@ public class BulkTagJobTest { SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", - getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), + getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(), "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap }); @@ -133,19 +131,16 @@ public class BulkTagJobTest { @Test void bulktagBySubjectNoPreviousContextTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext/") .getPath(); final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap }); @@ -230,19 +225,19 @@ public class BulkTagJobTest { void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/") .getPath(); final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -311,18 +306,18 @@ public class BulkTagJobTest { @Test void bulktagByDatasourceTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") + .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication", - "-outputPath", workingDir.toString() + "/publication", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -384,25 +379,25 @@ public class BulkTagJobTest { void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") + "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", - "-outputPath", workingDir.toString() + "/orp", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/orp") + .textFile(workingDir.toString() + "/otherresearchproduct") .map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class)); Assertions.assertEquals(10, tmp.count()); @@ -505,18 +500,18 @@ public class BulkTagJobTest { @Test void bulktagBySubjectDatasourceTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -636,14 +631,14 @@ public class BulkTagJobTest { SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", - getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/software_10.json.gz").getPath(), + getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/software/").getPath(), "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", - "-outputPath", workingDir.toString() + "/software", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -732,18 +727,18 @@ public class BulkTagJobTest { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -774,19 +769,19 @@ public class BulkTagJobTest { void bulkTagOtherJupyter() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/jupyter/otherresearchproduct") + "/eu/dnetlib/dhp/eosctag/jupyter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", - "-outputPath", workingDir.toString() + "/otherresearchproduct", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -829,18 +824,18 @@ public class BulkTagJobTest { public void bulkTagDatasetJupyter() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/jupyter/dataset") + "/eu/dnetlib/dhp/eosctag/jupyter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -878,18 +873,18 @@ public class BulkTagJobTest { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/jupyter/software") + "/eu/dnetlib/dhp/eosctag/jupyter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", - "-outputPath", workingDir.toString() + "/software", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1096,18 +1091,18 @@ public class BulkTagJobTest { void galaxyOtherTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/galaxy/otherresearchproduct") + "/eu/dnetlib/dhp/eosctag/galaxy/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", - "-outputPath", workingDir.toString() + "/otherresearchproduct", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1214,18 +1209,18 @@ public class BulkTagJobTest { void galaxySoftwareTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/galaxy/software") + "/eu/dnetlib/dhp/eosctag/galaxy/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", - "-outputPath", workingDir.toString() + "/software", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1333,19 +1328,19 @@ public class BulkTagJobTest { void twitterDatasetTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/twitter/dataset") + "/eu/dnetlib/dhp/eosctag/twitter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1373,19 +1368,19 @@ public class BulkTagJobTest { void twitterOtherTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/twitter/otherresearchproduct") + "/eu/dnetlib/dhp/eosctag/twitter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", - "-outputPath", workingDir.toString() + "/otherresearchproduct", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1418,19 +1413,19 @@ public class BulkTagJobTest { void twitterSoftwareTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/eosctag/twitter/software") + "/eu/dnetlib/dhp/eosctag/twitter/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software", - "-outputPath", workingDir.toString() + "/software", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1455,19 +1450,19 @@ public class BulkTagJobTest { void EoscContextTagTest() throws Exception { final String sourcePath = getClass() .getResource( - "/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json") + "/eu/dnetlib/dhp/bulktag/eosc/dataset/") .getPath(); SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); @@ -1533,16 +1528,16 @@ public class BulkTagJobTest { SparkBulkTagJob .main( new String[] { - "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints/") .getPath(), "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + + "-outputPath", workingDir.toString() + "/", + "-pathMap", pathMap }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1568,4 +1563,42 @@ public class BulkTagJobTest { } + @Test + void newConfTest() throws Exception { + final String pathMap = BulkTagJobTest.pathMap; + SparkBulkTagJob + .main( + new String[] { + + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", + getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(), + "-taggingConf", taggingConf, + + "-outputPath", workingDir.toString() + "/", + "-production", Boolean.TRUE.toString(), + "-pathMap", pathMap + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + + Assertions.assertEquals(10, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); + + verificationDataset.createOrReplaceTempView("dataset"); + + String query = "select id, MyT.id community " + + "from dataset " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + Assertions.assertEquals(0, spark.sql(query).count()); + } + } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index c8fd62c8e..5f0b1d7f1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest { sc.setVerb("not_contains"); sc.setField("contributor"); sc.setValue("DARIAH"); - sc.setSelection(resolver.getSelectionCriteria(sc.getVerb(), sc.getValue())); + sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue())); String metadata = "This work has been partially supported by DARIAH-EU infrastructure"; Assertions.assertFalse(sc.verifyCriteria(metadata)); } From 34358afe756d2ea602a1aa7e58969b2adbb204cf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Oct 2023 15:48:27 +0200 Subject: [PATCH 07/11] modified resource file, workflow anf default-config. Add 3g of memory Overhead and specified the shuffle partition in the wf confiduration. Removed the multiple instantiation in the wf because of different implementation of the spark job --- .../dhp/bulktag/input_bulkTag_parameters.json | 28 ++--- .../dhp/bulktag/oozie_app/config-default.xml | 10 +- .../dhp/bulktag/oozie_app/workflow.xml | 108 ++---------------- 3 files changed, 25 insertions(+), 121 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json index a8be7c32e..dbe2d088f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json @@ -1,10 +1,5 @@ [ - { - "paramName":"is", - "paramLongName":"isLookUpUrl", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": true - }, + { "paramName":"s", "paramLongName":"sourcePath", @@ -17,12 +12,7 @@ "paramDescription": "the json path associated to each selection field", "paramRequired": true }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, + { "paramName": "out", "paramLongName": "outputPath", @@ -35,17 +25,19 @@ "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false }, - { - "paramName": "test", - "paramLongName": "isTest", - "paramDescription": "Parameter intended for testing purposes only. True if the reun is relatesd to a test and so the taggingConf parameter should be loaded", - "paramRequired": false - }, + { "paramName": "tg", "paramLongName": "taggingConf", "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", "paramRequired": false + }, + + { + "paramName": "p", + "paramLongName": "production", + "paramDescription": "this parameter is intended for testing purposes only. It is a possible tagging configuration obtained via the XQUERY. Intended to be removed", + "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml index fe82ae194..c92f559f9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml @@ -45,10 +45,14 @@ sparkExecutorMemory - 6G + 5G - sparkExecutorCores - 1 + memoryOverhead + 3g + + + partitions + 3284 \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml index b868e4c72..4b81c58e4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml @@ -4,10 +4,6 @@ sourcePath the source path - - isLookUpUrl - the isLookup service endpoint - pathMap the json path associated to each selection field @@ -102,16 +98,9 @@ - + - - - - - - - - + yarn-cluster cluster @@ -122,104 +111,23 @@ --num-executors=${sparkExecutorNumber} --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} + --conf spark.executor.memoryOverhead=${memeoryOverhead} + --conf spark.sql.shuffle.partitions=${partitions} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --sourcePath${sourcePath}/publication - --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication + --sourcePath${sourcePath}/ + --outputPath${outputPath}/ --pathMap${pathMap} - --isLookUpUrl${isLookUpUrl} + --production${production} - + - - - yarn-cluster - cluster - bulkTagging-dataset - eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-enrichment-${projectVersion}.jar - - --num-executors=${sparkExecutorNumber} - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/dataset - --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - --pathMap${pathMap} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn-cluster - cluster - bulkTagging-orp - eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-enrichment-${projectVersion}.jar - - --num-executors=${sparkExecutorNumber} - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/otherresearchproduct - --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - --pathMap${pathMap} - --isLookUpUrl${isLookUpUrl} - - - - - - - - yarn-cluster - cluster - bulkTagging-software - eu.dnetlib.dhp.bulktag.SparkBulkTagJob - dhp-enrichment-${projectVersion}.jar - - --num-executors=${sparkExecutorNumber} - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/software - --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - --pathMap${pathMap} - --isLookUpUrl${isLookUpUrl} - - - - - - From f206ff42d6ebec290ce3cf9c3222c10a02313ce6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Oct 2023 15:49:41 +0200 Subject: [PATCH 08/11] modified code to use the the API. Removing not needed parameters. Rewritten the code to exploit the parallel stream on the entity types --- .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 83 +++---- .../dhp/bulktag/community/Community.java | 9 +- .../community/CommunityConfiguration.java | 8 +- .../CommunityConfigurationFactory.java | 12 +- .../dhp/bulktag/community/Constraint.java | 17 +- .../dhp/bulktag/community/ResultTagger.java | 26 +- .../community/SelectionConstraints.java | 2 + .../PrepareResultCommunity2.java | 225 ++++++++++++++++++ 8 files changed, 301 insertions(+), 81 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunity2.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 3186ed5c0..68c740dd5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -9,7 +9,6 @@ import java.util.*; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -21,8 +20,11 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; +import eu.dnetlib.dhp.api.Utils; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bulktag.community.*; +import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Result; @@ -53,50 +55,38 @@ public class SparkBulkTagJob { .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - Boolean isTest = Optional - .ofNullable(parser.get("isTest")) - .map(Boolean::valueOf) - .orElse(Boolean.FALSE); - log.info("isTest: {} ", isTest); - final String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + final boolean production = Boolean.valueOf(parser.get("production")); + log.info("production: {}", production); + ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class); log.info("pathMap: {}", new Gson().toJson(protoMappingParams)); - final String resultClassName = parser.get("resultTableName"); - log.info("resultTableName: {}", resultClassName); - - final Boolean saveGraph = Optional - .ofNullable(parser.get("saveGraph")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("saveGraph: {}", saveGraph); - - Class resultClazz = (Class) Class.forName(resultClassName); - SparkConf conf = new SparkConf(); CommunityConfiguration cc; - String taggingConf = parser.get("taggingConf"); + String taggingConf = Optional + .ofNullable(parser.get("taggingConf")) + .map(String::valueOf) + .orElse(null); - if (isTest) { + if (taggingConf != null) { cc = CommunityConfigurationFactory.newInstance(taggingConf); } else { - cc = QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl")); + cc = Utils.getCommunityConfiguration(production); } runWithSparkSession( conf, isSparkSessionManaged, spark -> { - removeOutputDir(spark, outputPath); extendCommunityConfigurationForEOSC(spark, inputPath, cc); - execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc); + execBulkTag(spark, inputPath, outputPath, protoMappingParams, cc); }); } @@ -105,10 +95,7 @@ public class SparkBulkTagJob { Dataset datasources = readPath( spark, inputPath - .substring( - 0, - inputPath.lastIndexOf("/")) - + "/datasource", + + "datasource", Datasource.class) .filter((FilterFunction) ds -> isOKDatasource(ds)) .map((MapFunction) ds -> ds.getId(), Encoders.STRING()); @@ -116,10 +103,10 @@ public class SparkBulkTagJob { Map>> dsm = cc.getEoscDatasourceMap(); for (String ds : datasources.collectAsList()) { - final String dsId = ds.substring(3); - if (!dsm.containsKey(dsId)) { + // final String dsId = ds.substring(3); + if (!dsm.containsKey(ds)) { ArrayList> eoscList = new ArrayList<>(); - dsm.put(dsId, eoscList); + dsm.put(ds, eoscList); } } @@ -141,22 +128,30 @@ public class SparkBulkTagJob { String inputPath, String outputPath, ProtoMap protoMappingParams, - Class resultClazz, CommunityConfiguration communityConfiguration) { - ResultTagger resultTagger = new ResultTagger(); - readPath(spark, inputPath, resultClazz) - .map(patchResult(), Encoders.bean(resultClazz)) - .filter(Objects::nonNull) - .map( - (MapFunction) value -> resultTagger - .enrichContextCriteria( - value, communityConfiguration, protoMappingParams), - Encoders.bean(resultClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); + ModelSupport.entityTypes + .keySet() + .parallelStream() + .filter(e -> ModelSupport.isResult(e)) + .forEach(e -> { + removeOutputDir(spark, outputPath + e.name()); + ResultTagger resultTagger = new ResultTagger(); + Class resultClazz = ModelSupport.entityTypes.get(e); + readPath(spark, inputPath + e.name(), resultClazz) + .map(patchResult(), Encoders.bean(resultClazz)) + .filter(Objects::nonNull) + .map( + (MapFunction) value -> resultTagger + .enrichContextCriteria( + value, communityConfiguration, protoMappingParams), + Encoders.bean(resultClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); + }); + } public static Dataset readPath( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index b44376e22..9cd3a8f82 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import com.google.gson.Gson; @@ -13,7 +14,7 @@ public class Community implements Serializable { private String id; private List subjects = new ArrayList<>(); private List providers = new ArrayList<>(); - private List zenodoCommunities = new ArrayList<>(); + private List zenodoCommunities = new ArrayList<>(); private SelectionConstraints constraints = new SelectionConstraints(); private SelectionConstraints removeConstraints = new SelectionConstraints(); @@ -26,7 +27,7 @@ public class Community implements Serializable { return !getSubjects().isEmpty() || !getProviders().isEmpty() || !getZenodoCommunities().isEmpty() - || getConstraints().getCriteria() != null; + || (Optional.ofNullable(getConstraints()).isPresent() && getConstraints().getCriteria() != null); } public String getId() { @@ -53,11 +54,11 @@ public class Community implements Serializable { this.providers = providers; } - public List getZenodoCommunities() { + public List getZenodoCommunities() { return zenodoCommunities; } - public void setZenodoCommunities(List zenodoCommunities) { + public void setZenodoCommunities(List zenodoCommunities) { this.zenodoCommunities = zenodoCommunities; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index e061ccd5e..a658c7ff5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -81,7 +81,7 @@ public class CommunityConfiguration implements Serializable { this.removeConstraintsMap = removeConstraintsMap; } - CommunityConfiguration(final Map communities) { + public CommunityConfiguration(final Map communities) { this.communities = communities; init(); } @@ -117,10 +117,10 @@ public class CommunityConfiguration implements Serializable { add(d.getOpenaireId(), new Pair<>(id, d.getSelectionConstraints()), datasourceMap); } // get zenodo communities - for (ZenodoCommunity zc : c.getZenodoCommunities()) { + for (String zc : c.getZenodoCommunities()) { add( - zc.getZenodoCommunityId(), - new Pair<>(id, zc.getSelCriteria()), + zc, + new Pair<>(id, null), zenodocommunityMap); } selectionConstraintsMap.put(id, c.getConstraints()); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 7b9e03ef6..955ca3856 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -143,16 +143,16 @@ public class CommunityConfigurationFactory { return providerList; } - private static List parseZenodoCommunities(final Node node) { + private static List parseZenodoCommunities(final Node node) { final List list = node.selectNodes("./zenodocommunities/zenodocommunity"); - final List zenodoCommunityList = new ArrayList<>(); + final List zenodoCommunityList = new ArrayList<>(); for (Node n : list) { - ZenodoCommunity zc = new ZenodoCommunity(); - zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText()); - zc.setSelCriteria(n.selectSingleNode("./selcriteria")); +// ZenodoCommunity zc = new ZenodoCommunity(); +// zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText()); +// zc.setSelCriteria(n.selectSingleNode("./selcriteria")); - zenodoCommunityList.add(zc); + zenodoCommunityList.add(n.selectSingleNode("./zenodoid").getText()); } log.info("size of the zenodo community list " + zenodoCommunityList.size()); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index ed58cc14d..48d9be7cd 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; import java.lang.reflect.InvocationTargetException; +import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore; + import eu.dnetlib.dhp.bulktag.criteria.Selection; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; @@ -11,7 +13,8 @@ public class Constraint implements Serializable { private String verb; private String field; private String value; -// private String element; + + @JsonIgnore private Selection selection; public String getVerb() { @@ -38,10 +41,8 @@ public class Constraint implements Serializable { this.value = value; } - public void setSelection(Selection sel) { - selection = sel; - } + @JsonIgnore public void setSelection(VerbResolver resolver) throws InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException { @@ -52,11 +53,5 @@ public class Constraint implements Serializable { return selection.apply(metadata); } -// public String getElement() { -// return element; -// } -// -// public void setElement(String element) { -// this.element = element; -// } + } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 5f62c10f4..3b231a52d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -85,16 +85,18 @@ public class ResultTagger implements Serializable { conf .getRemoveConstraintsMap() .keySet() - .forEach(communityId -> { - if (conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && - conf - .getRemoveConstraintsMap() - .get(communityId) - .getCriteria() - .stream() - .anyMatch(crit -> crit.verifyCriteria(param))) - removeCommunities.add(communityId); - }); + .forEach( + communityId -> { + if (conf.getRemoveConstraintsMap().keySet().contains(communityId) && + conf.getRemoveConstraintsMap().get(communityId).getCriteria() != null && + conf + .getRemoveConstraintsMap() + .get(communityId) + .getCriteria() + .stream() + .anyMatch(crit -> crit.verifyCriteria(param))) + removeCommunities.add(communityId); + }); // communities contains all the communities to be added as context for the result final Set communities = new HashSet<>(); @@ -124,10 +126,10 @@ public class ResultTagger implements Serializable { if (Objects.nonNull(result.getInstance())) { for (Instance i : result.getInstance()) { if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { - collfrom.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); + collfrom.add(i.getCollectedfrom().getKey()); } if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { - hostdby.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); + hostdby.add(i.getHostedby().getKey()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index c7dcce812..57cc658fc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -7,11 +7,13 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +@JsonAutoDetect public class SelectionConstraints implements Serializable { private List criteria; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunity2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunity2.java new file mode 100644 index 000000000..96523c502 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunity2.java @@ -0,0 +1,225 @@ + +package eu.dnetlib.dhp.resulttocommunityfromorganization; + +/** + * @author miriam.baglioni + * @Date 16/10/23 + */ +/** + * @author miriam.baglioni + * @Date 16/10/23 + */ +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.PropagationConstant.OBJECT_MAPPER; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.ObjectWritable; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.*; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; + +import eu.dnetlib.dhp.api.Utils; +import eu.dnetlib.dhp.api.model.CommunityEntityMap; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; + +public class PrepareResultCommunity2 { + + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet.class + .getResourceAsStream( + "/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final boolean production = Boolean.valueOf(parser.get("production")); + log.info("production: {}", production); + + final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(production); + log.info("organizationMap: {}", new Gson().toJson(organizationMap)); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + removeOutputDir(spark, outputPath); + prepareInfo(spark, inputPath, outputPath, organizationMap); + }); + } + + private static void prepareInfo( + SparkSession spark, + String inputPath, + String outputPath, + CommunityEntityMap organizationMap) { + + final StructType structureSchema = new StructType() + .add( + "dataInfo", new StructType() + .add("deletedbyinference", DataTypes.BooleanType) + .add("invisible", DataTypes.BooleanType)) + .add("source", DataTypes.StringType) + .add("target", DataTypes.StringType) + .add("relClass", DataTypes.StringType); + + readPath(spark, inputPath, Relation.class) + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + Dataset resultOrganization = spark + .read() + .schema(Encoders.bean(Relation.class).schema()) + .json(inputPath) + .filter( + "dataInfo.deletedbyinference != true " + + "and relClass == '" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'") + .select( + new Column("source").as("resultId"), + new Column("target").as("organizationId")); + + resultOrganization + .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + + Dataset organizationOrganization = spark + .read() + .schema(structureSchema) + .json(inputPath) + .filter( + "dataInfo.deletedbyinference != true " + + "and relClass == '" + ModelConstants.MERGES + "'") + .select( + new Column("source").as("masterOrganization"), + new Column("target").as("duplicateOrganization")); + + resultOrganization + .joinWith( + organizationOrganization, resultOrganization + .col("organizationId") + .equalTo(organizationOrganization.col("masterOrganization")), + "left") + .groupByKey( + (MapFunction, String>) t2 -> (String) t2._1().getAs("resultId"), Encoders.STRING()) + .mapGroups((MapGroupsFunction, ResultCommunityList>) (k, v) -> { + ResultCommunityList rcl = new ResultCommunityList(); + rcl.setResultId(k); + ArrayList cl = new ArrayList<>(); + Tuple2 first = v.next(); + cl.addAll(organizationMap.get(first._1().getAs("organizationId"))); + if (Optional.ofNullable(first._2()).isPresent()) { + cl.addAll(organizationMap.get(first._2().getAs(("duplicateOrganization")))); + } + v.forEachRemaining(o -> cl.addAll(organizationMap.get(o._2().getAs("duplicateOrganization")))); + if (cl.size() == 0) + return null; + rcl.setCommunityList(new ArrayList<>(cl.stream().distinct().collect(Collectors.toList()))); + return rcl; + }, Encoders.bean(ResultCommunityList.class)) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + +// Dataset relation = readPath(spark, inputPath, Relation.class) +// .filter((FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() ); +// +// relation.createOrReplaceTempView("relation"); +// +// String query = "SELECT result_organization.source resultId, result_organization.target orgId, org_set merges " +// + "FROM (SELECT source, target " +// + " FROM relation " +// + " AND lower(relClass) = '" +// + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() +// + "') result_organization " +// + "LEFT JOIN (SELECT source, collect_set(target) org_set " +// + " FROM relation " +// + " AND lower(relClass) = '" +// + ModelConstants.MERGES.toLowerCase() +// + "' " +// + " GROUP BY source) organization_organization " +// + "ON result_organization.target = organization_organization.source "; +// +// Dataset result_organizationset = spark +// .sql(query) +// .as(Encoders.bean(ResultOrganizations.class)); +// +// result_organizationset +// .map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class)) +// .filter(Objects::nonNull) +// .toJavaRDD() +// .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) +// .reduceByKey((a, b) -> { +// ArrayList cl = a.getCommunityList(); +// b.getCommunityList().stream().forEach(s -> { +// if (!cl.contains(s)) { +// cl.add(s); +// } +// }); +// a.setCommunityList(cl); +// return a; +// }) +// .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) +// .saveAsTextFile(outputPath, GzipCodec.class); + } + + private static MapFunction mapResultCommunityFn( + CommunityEntityMap organizationMap) { + return value -> { + String rId = value.getResultId(); + Optional> orgs = Optional.ofNullable(value.getMerges()); + String oTarget = value.getOrgId(); + Set communitySet = new HashSet<>(); + if (organizationMap.containsKey(oTarget)) { + communitySet.addAll(organizationMap.get(oTarget)); + } + if (orgs.isPresent()) + for (String oId : orgs.get()) { + if (organizationMap.containsKey(oId)) { + communitySet.addAll(organizationMap.get(oId)); + } + } + if (!communitySet.isEmpty()) { + ResultCommunityList rcl = new ResultCommunityList(); + rcl.setResultId(rId); + ArrayList communityList = new ArrayList<>(); + communityList.addAll(communitySet); + rcl.setCommunityList(communityList); + return rcl; + } + return null; + }; + } +} From 70b78a40c7b6fb1a9ec38c4bab5f13e1cc74850e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Oct 2023 15:50:49 +0200 Subject: [PATCH 09/11] removed file from different propagation --- .../PrepareResultCommunity2.java | 225 ------------------ 1 file changed, 225 deletions(-) delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunity2.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunity2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunity2.java deleted file mode 100644 index 96523c502..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunity2.java +++ /dev/null @@ -1,225 +0,0 @@ - -package eu.dnetlib.dhp.resulttocommunityfromorganization; - -/** - * @author miriam.baglioni - * @Date 16/10/23 - */ -/** - * @author miriam.baglioni - * @Date 16/10/23 - */ -import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.PropagationConstant.OBJECT_MAPPER; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - -import java.util.*; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.ObjectWritable; -import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.*; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; -import org.apache.spark.sql.types.DataTypes; -import org.apache.spark.sql.types.StructType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.gson.Gson; - -import eu.dnetlib.dhp.api.Utils; -import eu.dnetlib.dhp.api.model.CommunityEntityMap; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Relation; -import scala.Tuple2; - -public class PrepareResultCommunity2 { - - private static final Logger log = LoggerFactory - .getLogger(eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet.class); - - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - eu.dnetlib.dhp.resulttocommunityfromorganization.PrepareResultCommunitySet.class - .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json")); - - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); - - Boolean isSparkSessionManaged = isSparkSessionManaged(parser); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - - String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); - - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); - - final boolean production = Boolean.valueOf(parser.get("production")); - log.info("production: {}", production); - - final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(production); - log.info("organizationMap: {}", new Gson().toJson(organizationMap)); - - SparkConf conf = new SparkConf(); - - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - prepareInfo(spark, inputPath, outputPath, organizationMap); - }); - } - - private static void prepareInfo( - SparkSession spark, - String inputPath, - String outputPath, - CommunityEntityMap organizationMap) { - - final StructType structureSchema = new StructType() - .add( - "dataInfo", new StructType() - .add("deletedbyinference", DataTypes.BooleanType) - .add("invisible", DataTypes.BooleanType)) - .add("source", DataTypes.StringType) - .add("target", DataTypes.StringType) - .add("relClass", DataTypes.StringType); - - readPath(spark, inputPath, Relation.class) - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); - - Dataset resultOrganization = spark - .read() - .schema(Encoders.bean(Relation.class).schema()) - .json(inputPath) - .filter( - "dataInfo.deletedbyinference != true " + - "and relClass == '" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'") - .select( - new Column("source").as("resultId"), - new Column("target").as("organizationId")); - - resultOrganization - .foreach((ForeachFunction) r -> System.out.println(new ObjectMapper().writeValueAsString(r))); - - Dataset organizationOrganization = spark - .read() - .schema(structureSchema) - .json(inputPath) - .filter( - "dataInfo.deletedbyinference != true " + - "and relClass == '" + ModelConstants.MERGES + "'") - .select( - new Column("source").as("masterOrganization"), - new Column("target").as("duplicateOrganization")); - - resultOrganization - .joinWith( - organizationOrganization, resultOrganization - .col("organizationId") - .equalTo(organizationOrganization.col("masterOrganization")), - "left") - .groupByKey( - (MapFunction, String>) t2 -> (String) t2._1().getAs("resultId"), Encoders.STRING()) - .mapGroups((MapGroupsFunction, ResultCommunityList>) (k, v) -> { - ResultCommunityList rcl = new ResultCommunityList(); - rcl.setResultId(k); - ArrayList cl = new ArrayList<>(); - Tuple2 first = v.next(); - cl.addAll(organizationMap.get(first._1().getAs("organizationId"))); - if (Optional.ofNullable(first._2()).isPresent()) { - cl.addAll(organizationMap.get(first._2().getAs(("duplicateOrganization")))); - } - v.forEachRemaining(o -> cl.addAll(organizationMap.get(o._2().getAs("duplicateOrganization")))); - if (cl.size() == 0) - return null; - rcl.setCommunityList(new ArrayList<>(cl.stream().distinct().collect(Collectors.toList()))); - return rcl; - }, Encoders.bean(ResultCommunityList.class)) - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath); - -// Dataset relation = readPath(spark, inputPath, Relation.class) -// .filter((FilterFunction) r -> !r.getDataInfo().getDeletedbyinference() ); -// -// relation.createOrReplaceTempView("relation"); -// -// String query = "SELECT result_organization.source resultId, result_organization.target orgId, org_set merges " -// + "FROM (SELECT source, target " -// + " FROM relation " -// + " AND lower(relClass) = '" -// + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() -// + "') result_organization " -// + "LEFT JOIN (SELECT source, collect_set(target) org_set " -// + " FROM relation " -// + " AND lower(relClass) = '" -// + ModelConstants.MERGES.toLowerCase() -// + "' " -// + " GROUP BY source) organization_organization " -// + "ON result_organization.target = organization_organization.source "; -// -// Dataset result_organizationset = spark -// .sql(query) -// .as(Encoders.bean(ResultOrganizations.class)); -// -// result_organizationset -// .map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class)) -// .filter(Objects::nonNull) -// .toJavaRDD() -// .mapToPair(value -> new Tuple2<>(value.getResultId(), value)) -// .reduceByKey((a, b) -> { -// ArrayList cl = a.getCommunityList(); -// b.getCommunityList().stream().forEach(s -> { -// if (!cl.contains(s)) { -// cl.add(s); -// } -// }); -// a.setCommunityList(cl); -// return a; -// }) -// .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) -// .saveAsTextFile(outputPath, GzipCodec.class); - } - - private static MapFunction mapResultCommunityFn( - CommunityEntityMap organizationMap) { - return value -> { - String rId = value.getResultId(); - Optional> orgs = Optional.ofNullable(value.getMerges()); - String oTarget = value.getOrgId(); - Set communitySet = new HashSet<>(); - if (organizationMap.containsKey(oTarget)) { - communitySet.addAll(organizationMap.get(oTarget)); - } - if (orgs.isPresent()) - for (String oId : orgs.get()) { - if (organizationMap.containsKey(oId)) { - communitySet.addAll(organizationMap.get(oId)); - } - } - if (!communitySet.isEmpty()) { - ResultCommunityList rcl = new ResultCommunityList(); - rcl.setResultId(rId); - ArrayList communityList = new ArrayList<>(); - communityList.addAll(communitySet); - rcl.setCommunityList(communityList); - return rcl; - } - return null; - }; - } -} From 5c5a195e97b8a7711cce04854e834e3f63073e3e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 23 Oct 2023 11:26:17 +0200 Subject: [PATCH 10/11] refactoring and fixing issue on property name --- .../main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java | 2 -- .../resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml | 2 +- .../src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index 48d9be7cd..13d29b940 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -41,7 +41,6 @@ public class Constraint implements Serializable { this.value = value; } - @JsonIgnore public void setSelection(VerbResolver resolver) throws InvocationTargetException, NoSuchMethodException, InstantiationException, @@ -53,5 +52,4 @@ public class Constraint implements Serializable { return selection.apply(metadata); } - } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml index 4b81c58e4..e3ca72493 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml @@ -111,7 +111,7 @@ --num-executors=${sparkExecutorNumber} --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} - --conf spark.executor.memoryOverhead=${memeoryOverhead} + --conf spark.executor.memoryOverhead=${memoryOverhead} --conf spark.sql.shuffle.partitions=${partitions} --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java index 0cdf0f50b..e3c2a5f45 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.api; - import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; From 0097f4e64bd79e67781c4d681dd8d91d70ab8938 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 26 Oct 2023 09:38:09 +0200 Subject: [PATCH 11/11] Removed Query community testing. Removed package from common related to the interaction with Zenodo since it was moved to the dump-project --- .../common/api/InputStreamRequestBody.java | 53 --- .../api/MissingConceptDoiException.java | 8 - .../dhp/common/api/ZenodoAPIClient.java | 365 ------------------ .../dhp/common/api/zenodo/Community.java | 14 - .../dhp/common/api/zenodo/Creator.java | 47 --- .../dnetlib/dhp/common/api/zenodo/File.java | 44 --- .../dnetlib/dhp/common/api/zenodo/Grant.java | 23 -- .../dnetlib/dhp/common/api/zenodo/Links.java | 92 ----- .../dhp/common/api/zenodo/Metadata.java | 153 -------- .../dhp/common/api/zenodo/PrereserveDoi.java | 25 -- .../common/api/zenodo/RelatedIdentifier.java | 43 --- .../dhp/common/api/zenodo/ZenodoModel.java | 118 ------ .../common/api/zenodo/ZenodoModelList.java | 7 - .../dhp/common/api/ZenodoAPIClientTest.java | 128 ------ .../eu/dnetlib/pace/util/DiffPatchMatch.java | 1 - .../main/java/eu/dnetlib/dhp/api/Utils.java | 3 - .../dhp/api/QueryCommunityAPITest.java | 115 ------ 17 files changed, 1239 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java delete mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java deleted file mode 100644 index c127783e5..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java +++ /dev/null @@ -1,53 +0,0 @@ - -package eu.dnetlib.dhp.common.api; - -import java.io.IOException; -import java.io.InputStream; - -import okhttp3.MediaType; -import okhttp3.RequestBody; -import okhttp3.internal.Util; -import okio.BufferedSink; -import okio.Okio; -import okio.Source; - -public class InputStreamRequestBody extends RequestBody { - - private final InputStream inputStream; - private final MediaType mediaType; - private final long lenght; - - public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) { - - return new InputStreamRequestBody(inputStream, mediaType, len); - } - - private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) { - this.inputStream = inputStream; - this.mediaType = mediaType; - this.lenght = len; - } - - @Override - public MediaType contentType() { - return mediaType; - } - - @Override - public long contentLength() { - - return lenght; - - } - - @Override - public void writeTo(BufferedSink sink) throws IOException { - Source source = null; - try { - source = Okio.source(inputStream); - sink.writeAll(source); - } finally { - Util.closeQuietly(source); - } - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java deleted file mode 100644 index b75872eb4..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java +++ /dev/null @@ -1,8 +0,0 @@ - -package eu.dnetlib.dhp.common.api; - -public class MissingConceptDoiException extends Throwable { - public MissingConceptDoiException(String message) { - super(message); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java deleted file mode 100644 index 544da78f5..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java +++ /dev/null @@ -1,365 +0,0 @@ - -package eu.dnetlib.dhp.common.api; - -import java.io.*; -import java.io.IOException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.concurrent.TimeUnit; - -import org.apache.http.HttpHeaders; -import org.apache.http.entity.ContentType; -import org.jetbrains.annotations.NotNull; - -import com.google.gson.Gson; - -import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; -import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; -import okhttp3.*; - -public class ZenodoAPIClient implements Serializable { - - String urlString; - String bucket; - - String deposition_id; - String access_token; - - public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8"); - - private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip"); - - public String getUrlString() { - return urlString; - } - - public void setUrlString(String urlString) { - this.urlString = urlString; - } - - public String getBucket() { - return bucket; - } - - public void setBucket(String bucket) { - this.bucket = bucket; - } - - public void setDeposition_id(String deposition_id) { - this.deposition_id = deposition_id; - } - - public ZenodoAPIClient(String urlString, String access_token) { - - this.urlString = urlString; - this.access_token = access_token; - } - - /** - * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload - * - * @return response code - * @throws IOException - */ - public int newDeposition() throws IOException { - String json = "{}"; - - URL url = new URL(urlString); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setRequestMethod("POST"); - conn.setDoOutput(true); - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - conn.disconnect(); - - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); - this.bucket = newSubmission.getLinks().getBucket(); - this.deposition_id = newSubmission.getId(); - - return responseCode; - } - - /** - * Upload files in Zenodo. - * - * @param is the inputStream for the file to upload - * @param file_name the name of the file as it will appear on Zenodo - * @return the response code - */ - public int uploadIS(InputStream is, String file_name) throws IOException { - - URL url = new URL(bucket + "/" + file_name); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip"); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("PUT"); - - byte[] buf = new byte[8192]; - int length; - try (OutputStream os = conn.getOutputStream()) { - while ((length = is.read(buf)) != -1) { - os.write(buf, 0, length); - } - - } - int responseCode = conn.getResponseCode(); - if (!checkOKStatus(responseCode)) { - throw new IOException("Unexpected code " + responseCode + getBody(conn)); - } - - return responseCode; - } - - @NotNull - private String getBody(HttpURLConnection conn) throws IOException { - String body = "{}"; - try (BufferedReader br = new BufferedReader( - new InputStreamReader(conn.getInputStream(), "utf-8"))) { - StringBuilder response = new StringBuilder(); - String responseLine = null; - while ((responseLine = br.readLine()) != null) { - response.append(responseLine.trim()); - } - - body = response.toString(); - - } - return body; - } - - /** - * Associates metadata information to the current deposition - * - * @param metadata the metadata - * @return response code - * @throws IOException - */ - public int sendMretadata(String metadata) throws IOException { - - URL url = new URL(urlString + "/" + deposition_id); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("PUT"); - - try (OutputStream os = conn.getOutputStream()) { - byte[] input = metadata.getBytes("utf-8"); - os.write(input, 0, input.length); - - } - - final int responseCode = conn.getResponseCode(); - conn.disconnect(); - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + getBody(conn)); - - return responseCode; - - } - - private boolean checkOKStatus(int responseCode) { - - if (HttpURLConnection.HTTP_OK != responseCode || - HttpURLConnection.HTTP_CREATED != responseCode) - return true; - return false; - } - - /** - * To publish the current deposition. It works for both new deposition or new version of an old deposition - * - * @return response code - * @throws IOException - */ - @Deprecated - public int publish() throws IOException { - - String json = "{}"; - - OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); - - RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON); - - Request request = new Request.Builder() - .url(urlString + "/" + deposition_id + "/actions/publish") - .addHeader("Authorization", "Bearer " + access_token) - .post(body) - .build(); - - try (Response response = httpClient.newCall(request).execute()) { - - if (!response.isSuccessful()) - throw new IOException("Unexpected code " + response + response.body().string()); - - return response.code(); - - } - } - - /** - * To create a new version of an already published deposition. It sets the deposition_id and the bucket to be used - * for the new version. - * - * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is the last - * part of the url for the DOI Zenodo suggests to use to cite all versions: DOI: 10.xxx/zenodo.656930 - * concept_rec_id = 656930 - * @return response code - * @throws IOException - * @throws MissingConceptDoiException - */ - public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException { - setDepositionId(concept_rec_id, 1); - String json = "{}"; - - URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("POST"); - - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - - } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - String latest_draft = zenodoModel.getLinks().getLatest_draft(); - deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); - bucket = getBucket(latest_draft); - - return responseCode; - - } - - /** - * To finish uploading a version or new deposition not published - * It sets the deposition_id and the bucket to be used - * - * - * @param deposition_id the deposition id of the not yet published upload - * concept_rec_id = 656930 - * @return response code - * @throws IOException - * @throws MissingConceptDoiException - */ - public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException { - - this.deposition_id = deposition_id; - - String json = "{}"; - - URL url = new URL(urlString + "/" + deposition_id); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setRequestMethod("POST"); - conn.setDoOutput(true); - try (OutputStream os = conn.getOutputStream()) { - byte[] input = json.getBytes("utf-8"); - os.write(input, 0, input.length); - } - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - conn.disconnect(); - - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - bucket = zenodoModel.getLinks().getBucket(); - - return responseCode; - - } - - private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException { - - ZenodoModelList zenodoModelList = new Gson() - .fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class); - - for (ZenodoModel zm : zenodoModelList) { - if (zm.getConceptrecid().equals(concept_rec_id)) { - deposition_id = zm.getId(); - return; - } - } - if (zenodoModelList.size() == 0) - throw new MissingConceptDoiException( - "The concept record id specified was missing in the list of depositions"); - setDepositionId(concept_rec_id, page + 1); - - } - - private String getPrevDepositions(String page) throws IOException { - - HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder(); - urlBuilder.addQueryParameter("page", page); - - URL url = new URL(urlBuilder.build().toString()); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("GET"); - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - return body; - - } - - private String getBucket(String inputUurl) throws IOException { - - URL url = new URL(inputUurl); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); - conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); - conn.setDoOutput(true); - conn.setRequestMethod("GET"); - - String body = getBody(conn); - - int responseCode = conn.getResponseCode(); - - conn.disconnect(); - if (!checkOKStatus(responseCode)) - throw new IOException("Unexpected code " + responseCode + body); - - ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); - - return zenodoModel.getLinks().getBucket(); - - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java deleted file mode 100644 index a02224383..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java +++ /dev/null @@ -1,14 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -public class Community { - private String identifier; - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java deleted file mode 100644 index c14af55b6..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java +++ /dev/null @@ -1,47 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -public class Creator { - private String affiliation; - private String name; - private String orcid; - - public String getAffiliation() { - return affiliation; - } - - public void setAffiliation(String affiliation) { - this.affiliation = affiliation; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getOrcid() { - return orcid; - } - - public void setOrcid(String orcid) { - this.orcid = orcid; - } - - public static Creator newInstance(String name, String affiliation, String orcid) { - Creator c = new Creator(); - if (name != null) { - c.name = name; - } - if (affiliation != null) { - c.affiliation = affiliation; - } - if (orcid != null) { - c.orcid = orcid; - } - - return c; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java deleted file mode 100644 index 509f444b9..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java +++ /dev/null @@ -1,44 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; - -public class File implements Serializable { - private String checksum; - private String filename; - private long filesize; - private String id; - - public String getChecksum() { - return checksum; - } - - public void setChecksum(String checksum) { - this.checksum = checksum; - } - - public String getFilename() { - return filename; - } - - public void setFilename(String filename) { - this.filename = filename; - } - - public long getFilesize() { - return filesize; - } - - public void setFilesize(long filesize) { - this.filesize = filesize; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java deleted file mode 100644 index 476f1d9d8..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java +++ /dev/null @@ -1,23 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; - -public class Grant implements Serializable { - private String id; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public static Grant newInstance(String id) { - Grant g = new Grant(); - g.id = id; - - return g; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java deleted file mode 100644 index bdf8e5d2c..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java +++ /dev/null @@ -1,92 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; - -public class Links implements Serializable { - - private String bucket; - - private String discard; - - private String edit; - private String files; - private String html; - private String latest_draft; - private String latest_draft_html; - private String publish; - - private String self; - - public String getBucket() { - return bucket; - } - - public void setBucket(String bucket) { - this.bucket = bucket; - } - - public String getDiscard() { - return discard; - } - - public void setDiscard(String discard) { - this.discard = discard; - } - - public String getEdit() { - return edit; - } - - public void setEdit(String edit) { - this.edit = edit; - } - - public String getFiles() { - return files; - } - - public void setFiles(String files) { - this.files = files; - } - - public String getHtml() { - return html; - } - - public void setHtml(String html) { - this.html = html; - } - - public String getLatest_draft() { - return latest_draft; - } - - public void setLatest_draft(String latest_draft) { - this.latest_draft = latest_draft; - } - - public String getLatest_draft_html() { - return latest_draft_html; - } - - public void setLatest_draft_html(String latest_draft_html) { - this.latest_draft_html = latest_draft_html; - } - - public String getPublish() { - return publish; - } - - public void setPublish(String publish) { - this.publish = publish; - } - - public String getSelf() { - return self; - } - - public void setSelf(String self) { - this.self = self; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java deleted file mode 100644 index b161adb9b..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java +++ /dev/null @@ -1,153 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; -import java.util.List; - -public class Metadata implements Serializable { - - private String access_right; - private List communities; - private List creators; - private String description; - private String doi; - private List grants; - private List keywords; - private String language; - private String license; - private PrereserveDoi prereserve_doi; - private String publication_date; - private List references; - private List related_identifiers; - private String title; - private String upload_type; - private String version; - - public String getUpload_type() { - return upload_type; - } - - public void setUpload_type(String upload_type) { - this.upload_type = upload_type; - } - - public String getVersion() { - return version; - } - - public void setVersion(String version) { - this.version = version; - } - - public String getAccess_right() { - return access_right; - } - - public void setAccess_right(String access_right) { - this.access_right = access_right; - } - - public List getCommunities() { - return communities; - } - - public void setCommunities(List communities) { - this.communities = communities; - } - - public List getCreators() { - return creators; - } - - public void setCreators(List creators) { - this.creators = creators; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public String getDoi() { - return doi; - } - - public void setDoi(String doi) { - this.doi = doi; - } - - public List getGrants() { - return grants; - } - - public void setGrants(List grants) { - this.grants = grants; - } - - public List getKeywords() { - return keywords; - } - - public void setKeywords(List keywords) { - this.keywords = keywords; - } - - public String getLanguage() { - return language; - } - - public void setLanguage(String language) { - this.language = language; - } - - public String getLicense() { - return license; - } - - public void setLicense(String license) { - this.license = license; - } - - public PrereserveDoi getPrereserve_doi() { - return prereserve_doi; - } - - public void setPrereserve_doi(PrereserveDoi prereserve_doi) { - this.prereserve_doi = prereserve_doi; - } - - public String getPublication_date() { - return publication_date; - } - - public void setPublication_date(String publication_date) { - this.publication_date = publication_date; - } - - public List getReferences() { - return references; - } - - public void setReferences(List references) { - this.references = references; - } - - public List getRelated_identifiers() { - return related_identifiers; - } - - public void setRelated_identifiers(List related_identifiers) { - this.related_identifiers = related_identifiers; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java deleted file mode 100644 index aa088ef31..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java +++ /dev/null @@ -1,25 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; - -public class PrereserveDoi implements Serializable { - private String doi; - private String recid; - - public String getDoi() { - return doi; - } - - public void setDoi(String doi) { - this.doi = doi; - } - - public String getRecid() { - return recid; - } - - public void setRecid(String recid) { - this.recid = recid; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java deleted file mode 100644 index 15a349636..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java +++ /dev/null @@ -1,43 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; - -public class RelatedIdentifier implements Serializable { - private String identifier; - private String relation; - private String resource_type; - private String scheme; - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } - - public String getRelation() { - return relation; - } - - public void setRelation(String relation) { - this.relation = relation; - } - - public String getResource_type() { - return resource_type; - } - - public void setResource_type(String resource_type) { - this.resource_type = resource_type; - } - - public String getScheme() { - return scheme; - } - - public void setScheme(String scheme) { - this.scheme = scheme; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java deleted file mode 100644 index 9843ea0f9..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java +++ /dev/null @@ -1,118 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.io.Serializable; -import java.util.List; - -public class ZenodoModel implements Serializable { - - private String conceptrecid; - private String created; - - private List files; - private String id; - private Links links; - private Metadata metadata; - private String modified; - private String owner; - private String record_id; - private String state; - private boolean submitted; - private String title; - - public String getConceptrecid() { - return conceptrecid; - } - - public void setConceptrecid(String conceptrecid) { - this.conceptrecid = conceptrecid; - } - - public String getCreated() { - return created; - } - - public void setCreated(String created) { - this.created = created; - } - - public List getFiles() { - return files; - } - - public void setFiles(List files) { - this.files = files; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public Links getLinks() { - return links; - } - - public void setLinks(Links links) { - this.links = links; - } - - public Metadata getMetadata() { - return metadata; - } - - public void setMetadata(Metadata metadata) { - this.metadata = metadata; - } - - public String getModified() { - return modified; - } - - public void setModified(String modified) { - this.modified = modified; - } - - public String getOwner() { - return owner; - } - - public void setOwner(String owner) { - this.owner = owner; - } - - public String getRecord_id() { - return record_id; - } - - public void setRecord_id(String record_id) { - this.record_id = record_id; - } - - public String getState() { - return state; - } - - public void setState(String state) { - this.state = state; - } - - public boolean isSubmitted() { - return submitted; - } - - public void setSubmitted(boolean submitted) { - this.submitted = submitted; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java deleted file mode 100644 index b3b150714..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java +++ /dev/null @@ -1,7 +0,0 @@ - -package eu.dnetlib.dhp.common.api.zenodo; - -import java.util.ArrayList; - -public class ZenodoModelList extends ArrayList { -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java deleted file mode 100644 index 5dbe3b75b..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java +++ /dev/null @@ -1,128 +0,0 @@ - -package eu.dnetlib.dhp.common.api; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; - -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -@Disabled -class ZenodoAPIClientTest { - - private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; - private final String ACCESS_TOKEN = "OzzOsyucEIHxCEfhlpsMo3myEiwpCza3trCRL7ddfGTAK9xXkIP2MbXd6Vg4"; - - private final String CONCEPT_REC_ID = "657113"; - - private final String depositionId = "674915"; - - @Test - void testUploadOldDeposition() throws IOException, MissingConceptDoiException { - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); - Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId)); - - File file = new File(getClass() - .getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz") - .getPath()); - - InputStream is = new FileInputStream(file); - - Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz")); - - String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json")); - - Assertions.assertEquals(200, client.sendMretadata(metadata)); - - Assertions.assertEquals(202, client.publish()); - - } - - @Test - void testNewDeposition() throws IOException { - - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); - Assertions.assertEquals(201, client.newDeposition()); - - File file = new File(getClass() - .getResource("/eu/dnetlib/dhp/common/api/newVersion") - .getPath()); - - InputStream is = new FileInputStream(file); - -// Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz")); - - String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json")); - - Assertions.assertEquals(200, client.sendMretadata(metadata)); - - // Assertions.assertEquals(202, client.publish()); - - } - - @Test - void testNewVersionNewName() throws IOException, MissingConceptDoiException { - - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); - - Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID)); - - File file = new File(getClass() - .getResource("/eu/dnetlib/dhp/common/api/newVersion") - .getPath()); - - InputStream is = new FileInputStream(file); - - Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition")); - - Assertions.assertEquals(202, client.publish()); - - } - - @Test - void testNewVersionOldName() throws IOException, MissingConceptDoiException { - - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); - - Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID)); - - File file = new File(getClass() - .getResource("/eu/dnetlib/dhp/common/api/newVersion2") - .getPath()); - - InputStream is = new FileInputStream(file); - - Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition")); - - Assertions.assertEquals(202, client.publish()); - - } - - @Test - void depositBigFile() throws MissingConceptDoiException, IOException { - ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, - ACCESS_TOKEN); - - Assertions.assertEquals(201, client.newDeposition()); - - File file = new File("/Users/miriam.baglioni/Desktop/EOSC_DUMP/publication.tar"); -// File file = new File(getClass() -// .getResource("/eu/dnetlib/dhp/common/api/newVersion2") -// .getPath()); - - InputStream is = new FileInputStream(file); - - Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition")); - - // Assertions.assertEquals(202, client.publish()); - } - -} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java index 12c96500e..cfd9acd70 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/DiffPatchMatch.java @@ -18,7 +18,6 @@ package eu.dnetlib.pace.util; * See the License for the specific language governing permissions and * limitations under the License. */ - /* * Diff Match and Patch * Copyright 2018 The diff-match-patch Authors. diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index 43d5e7e98..c1dda1ddc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -9,8 +9,6 @@ import java.util.Map; import java.util.Objects; import java.util.stream.Collectors; -import javax.management.Query; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +22,6 @@ import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; import eu.dnetlib.dhp.bulktag.community.Provider; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; -import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob; /** * @author miriam.baglioni diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java deleted file mode 100644 index e3c2a5f45..000000000 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java +++ /dev/null @@ -1,115 +0,0 @@ - -package eu.dnetlib.dhp.api; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.api.model.CommunityEntityMap; -import eu.dnetlib.dhp.api.model.CommunityModel; -import eu.dnetlib.dhp.api.model.CommunitySummary; -import eu.dnetlib.dhp.api.model.DatasourceList; -import eu.dnetlib.dhp.bulktag.community.Community; -import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; - -/** - * @author miriam.baglioni - * @Date 06/10/23 - */ -public class QueryCommunityAPITest { - - @Test - void communityList() throws Exception { - String body = QueryCommunityAPI.communities(true); - new ObjectMapper() - .readValue(body, CommunitySummary.class) - .forEach(p -> { - try { - System.out.println(new ObjectMapper().writeValueAsString(p)); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - }); - } - - @Test - void community() throws Exception { - String id = "dh-ch"; - String body = QueryCommunityAPI.community(id, true); - System.out - .println( - new ObjectMapper() - .writeValueAsString( - new ObjectMapper() - .readValue(body, CommunityModel.class))); - } - - @Test - void communityDatasource() throws Exception { - String id = "dh-ch"; - String body = QueryCommunityAPI.communityDatasource(id, true); - new ObjectMapper() - .readValue(body, DatasourceList.class) - .forEach(ds -> { - try { - System.out.println(new ObjectMapper().writeValueAsString(ds)); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - }); - ; - } - - @Test - void validCommunities() throws Exception { - CommunityConfiguration cc = Utils.getCommunityConfiguration(true); - System.out.println(cc.getCommunities().keySet()); - Community community = cc.getCommunities().get("aurora"); - Assertions.assertEquals(0, community.getSubjects().size()); - Assertions.assertEquals(null, community.getConstraints()); - Assertions.assertEquals(null, community.getRemoveConstraints()); - Assertions.assertEquals(2, community.getZenodoCommunities().size()); - Assertions - .assertTrue( - community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network"))); - Assertions - .assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck"))); - Assertions.assertEquals(35, community.getProviders().size()); - Assertions - .assertEquals( - 35, community.getProviders().stream().filter(p -> p.getSelectionConstraints() == null).count()); - - } - - @Test - void eutopiaCommunityConfiguration() throws Exception { - CommunityConfiguration cc = Utils.getCommunityConfiguration(true); - System.out.println(cc.getCommunities().keySet()); - Community community = cc.getCommunities().get("eutopia"); - community.getProviders().forEach(p -> System.out.println(p.getOpenaireId())); - } - - @Test - void getCommunityProjects() throws Exception { - CommunityEntityMap projectMap = Utils.getCommunityProjects(true); - - Assertions - .assertTrue( - projectMap - .keySet() - .stream() - .allMatch(k -> k.startsWith("40|"))); - - System.out.println(projectMap); - } - - @Test - void getCommunityOrganizations() throws Exception { - CommunityEntityMap organizationMap = Utils.getCommunityOrganization(true); - Assertions.assertTrue(organizationMap.keySet().stream().allMatch(k -> k.startsWith("20|"))); - - } - -}