diff --git a/dhp-build/dhp-code-style/src/main/resources/eclipse/formatter_dnet.xml b/dhp-build/dhp-code-style/src/main/resources/eclipse/formatter_dnet.xml
index fcba2c4b2..e4d85bf39 100644
--- a/dhp-build/dhp-code-style/src/main/resources/eclipse/formatter_dnet.xml
+++ b/dhp-build/dhp-code-style/src/main/resources/eclipse/formatter_dnet.xml
@@ -19,7 +19,7 @@
-
+
diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml
index 0819a8bd2..1dc3208b5 100644
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@@ -87,6 +87,11 @@
org.postgresql
postgresql
+
+
+ com.squareup.okhttp3
+ okhttp
+
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
new file mode 100644
index 000000000..c3f393436
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/InputStreamRequestBody.java
@@ -0,0 +1,53 @@
+
+package eu.dnetlib.dhp.common.api;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import okhttp3.MediaType;
+import okhttp3.RequestBody;
+import okhttp3.internal.Util;
+import okio.BufferedSink;
+import okio.Okio;
+import okio.Source;
+
+public class InputStreamRequestBody extends RequestBody {
+
+ private InputStream inputStream;
+ private MediaType mediaType;
+ private long lenght;
+
+ public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
+
+ return new InputStreamRequestBody(inputStream, mediaType, len);
+ }
+
+ private InputStreamRequestBody(InputStream inputStream, MediaType mediaType, long len) {
+ this.inputStream = inputStream;
+ this.mediaType = mediaType;
+ this.lenght = len;
+ }
+
+ @Override
+ public MediaType contentType() {
+ return mediaType;
+ }
+
+ @Override
+ public long contentLength() {
+
+ return lenght;
+
+ }
+
+ @Override
+ public void writeTo(BufferedSink sink) throws IOException {
+ Source source = null;
+ try {
+ source = Okio.source(inputStream);
+ sink.writeAll(source);
+ } finally {
+ Util.closeQuietly(source);
+ }
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
new file mode 100644
index 000000000..b75872eb4
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/MissingConceptDoiException.java
@@ -0,0 +1,8 @@
+
+package eu.dnetlib.dhp.common.api;
+
+public class MissingConceptDoiException extends Throwable {
+ public MissingConceptDoiException(String message) {
+ super(message);
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
new file mode 100644
index 000000000..f2dd4f0ac
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@@ -0,0 +1,264 @@
+
+package eu.dnetlib.dhp.common.api;
+
+import java.io.*;
+import java.io.IOException;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
+import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
+import okhttp3.*;
+
+public class ZenodoAPIClient implements Serializable {
+
+ String urlString;
+ String bucket;
+
+ String deposition_id;
+ String access_token;
+
+ public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
+
+ private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
+
+ public String getUrlString() {
+ return urlString;
+ }
+
+ public void setUrlString(String urlString) {
+ this.urlString = urlString;
+ }
+
+ public String getBucket() {
+ return bucket;
+ }
+
+ public void setBucket(String bucket) {
+ this.bucket = bucket;
+ }
+
+ public void setDeposition_id(String deposition_id) {
+ this.deposition_id = deposition_id;
+ }
+
+ public ZenodoAPIClient(String urlString, String access_token) throws IOException {
+
+ this.urlString = urlString;
+ this.access_token = access_token;
+ }
+
+ /**
+ * Brand new deposition in Zenodo. It sets the deposition_id and the bucket where to store the files to upload
+ * @return response code
+ * @throws IOException
+ */
+ public int newDeposition() throws IOException {
+ String json = "{}";
+ OkHttpClient httpClient = new OkHttpClient();
+
+ RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, json);
+
+ Request request = new Request.Builder()
+ .url(urlString)
+ .addHeader("Content-Type", "application/json") // add request headers
+ .addHeader("Authorization", "Bearer " + access_token)
+ .post(body)
+ .build();
+
+ try (Response response = httpClient.newCall(request).execute()) {
+
+ if (!response.isSuccessful())
+ throw new IOException("Unexpected code " + response + response.body().string());
+
+ // Get response body
+ json = response.body().string();
+
+ ZenodoModel newSubmission = new Gson().fromJson(json, ZenodoModel.class);
+ this.bucket = newSubmission.getLinks().getBucket();
+ this.deposition_id = newSubmission.getId();
+
+ return response.code();
+
+ }
+
+ }
+
+ /**
+ * Upload files in Zenodo.
+ * @param is the inputStream for the file to upload
+ * @param file_name the name of the file as it will appear on Zenodo
+ * @param len the size of the file
+ * @return the response code
+ */
+ public int uploadIS(InputStream is, String file_name, long len) throws IOException {
+ OkHttpClient httpClient = new OkHttpClient();
+
+ Request request = new Request.Builder()
+ .url(bucket + "/" + file_name)
+ .addHeader("Content-Type", "application/zip") // add request headers
+ .addHeader("Authorization", "Bearer " + access_token)
+ .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
+ .build();
+
+ try (Response response = httpClient.newCall(request).execute()) {
+ if (!response.isSuccessful())
+ throw new IOException("Unexpected code " + response + response.body().string());
+ return response.code();
+ }
+ }
+
+ /**
+ * Associates metadata information to the current deposition
+ * @param metadata the metadata
+ * @return response code
+ * @throws IOException
+ */
+ public int sendMretadata(String metadata) throws IOException {
+
+ OkHttpClient httpClient = new OkHttpClient();
+
+ RequestBody body = RequestBody.create(MEDIA_TYPE_JSON, metadata);
+
+ Request request = new Request.Builder()
+ .url(urlString + "/" + deposition_id)
+ .addHeader("Content-Type", "application/json") // add request headers
+ .addHeader("Authorization", "Bearer " + access_token)
+ .put(body)
+ .build();
+
+ try (Response response = httpClient.newCall(request).execute()) {
+
+ if (!response.isSuccessful())
+ throw new IOException("Unexpected code " + response + response.body().string());
+
+ return response.code();
+
+ }
+
+ }
+
+ /**
+ * To publish the current deposition. It works for both new deposition or new version of an old deposition
+ * @return response code
+ * @throws IOException
+ */
+ public int publish() throws IOException {
+
+ String json = "{}";
+
+ OkHttpClient httpClient = new OkHttpClient();
+
+ Request request = new Request.Builder()
+ .url(urlString + "/" + deposition_id + "/actions/publish")
+ .addHeader("Authorization", "Bearer " + access_token)
+ .post(RequestBody.create(MEDIA_TYPE_JSON, json))
+ .build();
+
+ try (Response response = httpClient.newCall(request).execute()) {
+
+ if (!response.isSuccessful())
+ throw new IOException("Unexpected code " + response + response.body().string());
+
+ return response.code();
+
+ }
+ }
+
+ /**
+ * To create a new version of an already published deposition.
+ * It sets the deposition_id and the bucket to be used for the new version.
+ * @param concept_rec_id the concept record id of the deposition for which to create a new version. It is
+ * the last part of the url for the DOI Zenodo suggests to use to cite all versions:
+ * DOI: 10.xxx/zenodo.656930 concept_rec_id = 656930
+ * @return response code
+ * @throws IOException
+ * @throws MissingConceptDoiException
+ */
+ public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
+ setDepositionId(concept_rec_id);
+ String json = "{}";
+
+ OkHttpClient httpClient = new OkHttpClient();
+
+ Request request = new Request.Builder()
+ .url(urlString + "/" + deposition_id + "/actions/newversion")
+ .addHeader("Authorization", "Bearer " + access_token)
+ .post(RequestBody.create(MEDIA_TYPE_JSON, json))
+ .build();
+
+ try (Response response = httpClient.newCall(request).execute()) {
+
+ if (!response.isSuccessful())
+ throw new IOException("Unexpected code " + response + response.body().string());
+
+ ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
+ String latest_draft = zenodoModel.getLinks().getLatest_draft();
+ deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
+ bucket = getBucket(latest_draft);
+ return response.code();
+
+ }
+ }
+
+ private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {
+
+ ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
+
+ for (ZenodoModel zm : zenodoModelList) {
+ if (zm.getConceptrecid().equals(concept_rec_id)) {
+ deposition_id = zm.getId();
+ return;
+ }
+ }
+
+ throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
+
+ }
+
+ private String getPrevDepositions() throws IOException {
+ OkHttpClient httpClient = new OkHttpClient();
+
+ Request request = new Request.Builder()
+ .url(urlString)
+ .addHeader("Content-Type", "application/json") // add request headers
+ .addHeader("Authorization", "Bearer " + access_token)
+ .get()
+ .build();
+
+ try (Response response = httpClient.newCall(request).execute()) {
+
+ if (!response.isSuccessful())
+ throw new IOException("Unexpected code " + response + response.body().string());
+
+ return response.body().string();
+
+ }
+
+ }
+
+ private String getBucket(String url) throws IOException {
+ OkHttpClient httpClient = new OkHttpClient();
+
+ Request request = new Request.Builder()
+ .url(url)
+ .addHeader("Content-Type", "application/json") // add request headers
+ .addHeader("Authorization", "Bearer " + access_token)
+ .get()
+ .build();
+
+ try (Response response = httpClient.newCall(request).execute()) {
+
+ if (!response.isSuccessful())
+ throw new IOException("Unexpected code " + response + response.body().string());
+
+ // Get response body
+ ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
+
+ return zenodoModel.getLinks().getBucket();
+
+ }
+
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
new file mode 100644
index 000000000..a02224383
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Community.java
@@ -0,0 +1,14 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+public class Community {
+ private String identifier;
+
+ public String getIdentifier() {
+ return identifier;
+ }
+
+ public void setIdentifier(String identifier) {
+ this.identifier = identifier;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
new file mode 100644
index 000000000..c03762693
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java
@@ -0,0 +1,47 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+public class Creator {
+ private String affiliation;
+ private String name;
+ private String orcid;
+
+ public String getAffiliation() {
+ return affiliation;
+ }
+
+ public void setAffiliation(String affiliation) {
+ this.affiliation = affiliation;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getOrcid() {
+ return orcid;
+ }
+
+ public void setOrcid(String orcid) {
+ this.orcid = orcid;
+ }
+
+ public static Creator newInstance(String name, String affiliation, String orcid) {
+ Creator c = new Creator();
+ if (!(name == null)) {
+ c.name = name;
+ }
+ if (!(affiliation == null)) {
+ c.affiliation = affiliation;
+ }
+ if (!(orcid == null)) {
+ c.orcid = orcid;
+ }
+
+ return c;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
new file mode 100644
index 000000000..c7428de7d
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java
@@ -0,0 +1,58 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+import net.minidev.json.annotate.JsonIgnore;
+
+public class File implements Serializable {
+ private String checksum;
+ private String filename;
+ private long filesize;
+ private String id;
+
+ @JsonIgnore
+ // private Links links;
+
+ public String getChecksum() {
+ return checksum;
+ }
+
+ public void setChecksum(String checksum) {
+ this.checksum = checksum;
+ }
+
+ public String getFilename() {
+ return filename;
+ }
+
+ public void setFilename(String filename) {
+ this.filename = filename;
+ }
+
+ public long getFilesize() {
+ return filesize;
+ }
+
+ public void setFilesize(long filesize) {
+ this.filesize = filesize;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+// @JsonIgnore
+// public Links getLinks() {
+// return links;
+// }
+//
+// @JsonIgnore
+// public void setLinks(Links links) {
+// this.links = links;
+// }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
new file mode 100644
index 000000000..476f1d9d8
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Grant.java
@@ -0,0 +1,23 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class Grant implements Serializable {
+ private String id;
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public static Grant newInstance(String id) {
+ Grant g = new Grant();
+ g.id = id;
+
+ return g;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
new file mode 100644
index 000000000..bdf8e5d2c
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Links.java
@@ -0,0 +1,92 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class Links implements Serializable {
+
+ private String bucket;
+
+ private String discard;
+
+ private String edit;
+ private String files;
+ private String html;
+ private String latest_draft;
+ private String latest_draft_html;
+ private String publish;
+
+ private String self;
+
+ public String getBucket() {
+ return bucket;
+ }
+
+ public void setBucket(String bucket) {
+ this.bucket = bucket;
+ }
+
+ public String getDiscard() {
+ return discard;
+ }
+
+ public void setDiscard(String discard) {
+ this.discard = discard;
+ }
+
+ public String getEdit() {
+ return edit;
+ }
+
+ public void setEdit(String edit) {
+ this.edit = edit;
+ }
+
+ public String getFiles() {
+ return files;
+ }
+
+ public void setFiles(String files) {
+ this.files = files;
+ }
+
+ public String getHtml() {
+ return html;
+ }
+
+ public void setHtml(String html) {
+ this.html = html;
+ }
+
+ public String getLatest_draft() {
+ return latest_draft;
+ }
+
+ public void setLatest_draft(String latest_draft) {
+ this.latest_draft = latest_draft;
+ }
+
+ public String getLatest_draft_html() {
+ return latest_draft_html;
+ }
+
+ public void setLatest_draft_html(String latest_draft_html) {
+ this.latest_draft_html = latest_draft_html;
+ }
+
+ public String getPublish() {
+ return publish;
+ }
+
+ public void setPublish(String publish) {
+ this.publish = publish;
+ }
+
+ public String getSelf() {
+ return self;
+ }
+
+ public void setSelf(String self) {
+ this.self = self;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
new file mode 100644
index 000000000..b161adb9b
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Metadata.java
@@ -0,0 +1,153 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+import java.util.List;
+
+public class Metadata implements Serializable {
+
+ private String access_right;
+ private List communities;
+ private List creators;
+ private String description;
+ private String doi;
+ private List grants;
+ private List keywords;
+ private String language;
+ private String license;
+ private PrereserveDoi prereserve_doi;
+ private String publication_date;
+ private List references;
+ private List related_identifiers;
+ private String title;
+ private String upload_type;
+ private String version;
+
+ public String getUpload_type() {
+ return upload_type;
+ }
+
+ public void setUpload_type(String upload_type) {
+ this.upload_type = upload_type;
+ }
+
+ public String getVersion() {
+ return version;
+ }
+
+ public void setVersion(String version) {
+ this.version = version;
+ }
+
+ public String getAccess_right() {
+ return access_right;
+ }
+
+ public void setAccess_right(String access_right) {
+ this.access_right = access_right;
+ }
+
+ public List getCommunities() {
+ return communities;
+ }
+
+ public void setCommunities(List communities) {
+ this.communities = communities;
+ }
+
+ public List getCreators() {
+ return creators;
+ }
+
+ public void setCreators(List creators) {
+ this.creators = creators;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ public void setDescription(String description) {
+ this.description = description;
+ }
+
+ public String getDoi() {
+ return doi;
+ }
+
+ public void setDoi(String doi) {
+ this.doi = doi;
+ }
+
+ public List getGrants() {
+ return grants;
+ }
+
+ public void setGrants(List grants) {
+ this.grants = grants;
+ }
+
+ public List getKeywords() {
+ return keywords;
+ }
+
+ public void setKeywords(List keywords) {
+ this.keywords = keywords;
+ }
+
+ public String getLanguage() {
+ return language;
+ }
+
+ public void setLanguage(String language) {
+ this.language = language;
+ }
+
+ public String getLicense() {
+ return license;
+ }
+
+ public void setLicense(String license) {
+ this.license = license;
+ }
+
+ public PrereserveDoi getPrereserve_doi() {
+ return prereserve_doi;
+ }
+
+ public void setPrereserve_doi(PrereserveDoi prereserve_doi) {
+ this.prereserve_doi = prereserve_doi;
+ }
+
+ public String getPublication_date() {
+ return publication_date;
+ }
+
+ public void setPublication_date(String publication_date) {
+ this.publication_date = publication_date;
+ }
+
+ public List getReferences() {
+ return references;
+ }
+
+ public void setReferences(List references) {
+ this.references = references;
+ }
+
+ public List getRelated_identifiers() {
+ return related_identifiers;
+ }
+
+ public void setRelated_identifiers(List related_identifiers) {
+ this.related_identifiers = related_identifiers;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
new file mode 100644
index 000000000..aa088ef31
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/PrereserveDoi.java
@@ -0,0 +1,25 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class PrereserveDoi implements Serializable {
+ private String doi;
+ private String recid;
+
+ public String getDoi() {
+ return doi;
+ }
+
+ public void setDoi(String doi) {
+ this.doi = doi;
+ }
+
+ public String getRecid() {
+ return recid;
+ }
+
+ public void setRecid(String recid) {
+ this.recid = recid;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
new file mode 100644
index 000000000..15a349636
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/RelatedIdentifier.java
@@ -0,0 +1,43 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+
+public class RelatedIdentifier implements Serializable {
+ private String identifier;
+ private String relation;
+ private String resource_type;
+ private String scheme;
+
+ public String getIdentifier() {
+ return identifier;
+ }
+
+ public void setIdentifier(String identifier) {
+ this.identifier = identifier;
+ }
+
+ public String getRelation() {
+ return relation;
+ }
+
+ public void setRelation(String relation) {
+ this.relation = relation;
+ }
+
+ public String getResource_type() {
+ return resource_type;
+ }
+
+ public void setResource_type(String resource_type) {
+ this.resource_type = resource_type;
+ }
+
+ public String getScheme() {
+ return scheme;
+ }
+
+ public void setScheme(String scheme) {
+ this.scheme = scheme;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
new file mode 100644
index 000000000..9843ea0f9
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModel.java
@@ -0,0 +1,118 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.io.Serializable;
+import java.util.List;
+
+public class ZenodoModel implements Serializable {
+
+ private String conceptrecid;
+ private String created;
+
+ private List files;
+ private String id;
+ private Links links;
+ private Metadata metadata;
+ private String modified;
+ private String owner;
+ private String record_id;
+ private String state;
+ private boolean submitted;
+ private String title;
+
+ public String getConceptrecid() {
+ return conceptrecid;
+ }
+
+ public void setConceptrecid(String conceptrecid) {
+ this.conceptrecid = conceptrecid;
+ }
+
+ public String getCreated() {
+ return created;
+ }
+
+ public void setCreated(String created) {
+ this.created = created;
+ }
+
+ public List getFiles() {
+ return files;
+ }
+
+ public void setFiles(List files) {
+ this.files = files;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public Links getLinks() {
+ return links;
+ }
+
+ public void setLinks(Links links) {
+ this.links = links;
+ }
+
+ public Metadata getMetadata() {
+ return metadata;
+ }
+
+ public void setMetadata(Metadata metadata) {
+ this.metadata = metadata;
+ }
+
+ public String getModified() {
+ return modified;
+ }
+
+ public void setModified(String modified) {
+ this.modified = modified;
+ }
+
+ public String getOwner() {
+ return owner;
+ }
+
+ public void setOwner(String owner) {
+ this.owner = owner;
+ }
+
+ public String getRecord_id() {
+ return record_id;
+ }
+
+ public void setRecord_id(String record_id) {
+ this.record_id = record_id;
+ }
+
+ public String getState() {
+ return state;
+ }
+
+ public void setState(String state) {
+ this.state = state;
+ }
+
+ public boolean isSubmitted() {
+ return submitted;
+ }
+
+ public void setSubmitted(boolean submitted) {
+ this.submitted = submitted;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
new file mode 100644
index 000000000..b3b150714
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/ZenodoModelList.java
@@ -0,0 +1,7 @@
+
+package eu.dnetlib.dhp.common.api.zenodo;
+
+import java.util.ArrayList;
+
+public class ZenodoModelList extends ArrayList {
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java
index 97fe4b9d8..9552eb2b3 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java
@@ -1,15 +1,22 @@
package eu.dnetlib.dhp.utils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
+import java.util.Map;
+
+import javax.xml.ws.BindingProvider;
+
import org.apache.cxf.jaxws.JaxWsProxyFactoryBean;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class ISLookupClientFactory {
- private static final Log log = LogFactory.getLog(ISLookupClientFactory.class);
+ private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);
+
+ private static int requestTimeout = 60000 * 10;
+ private static int connectTimeout = 60000 * 10;
public static ISLookUpService getLookUpService(final String isLookupUrl) {
return getServiceStub(ISLookUpService.class, isLookupUrl);
@@ -21,6 +28,25 @@ public class ISLookupClientFactory {
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
jaxWsProxyFactory.setServiceClass(clazz);
jaxWsProxyFactory.setAddress(endpoint);
- return (T) jaxWsProxyFactory.create();
+
+ final T service = (T) jaxWsProxyFactory.create();
+
+ if (service instanceof BindingProvider) {
+ log
+ .info(
+ "setting timeouts for {} to requestTimeout: {}, connectTimeout: {}",
+ BindingProvider.class.getName(), requestTimeout, connectTimeout);
+
+ Map requestContext = ((BindingProvider) service).getRequestContext();
+
+ requestContext.put("com.sun.xml.internal.ws.request.timeout", requestTimeout);
+ requestContext.put("com.sun.xml.internal.ws.connect.timeout", connectTimeout);
+ requestContext.put("com.sun.xml.ws.request.timeout", requestTimeout);
+ requestContext.put("com.sun.xml.ws.connect.timeout", connectTimeout);
+ requestContext.put("javax.xml.ws.client.receiveTimeout", requestTimeout);
+ requestContext.put("javax.xml.ws.client.connectionTimeout", connectTimeout);
+ }
+
+ return service;
}
}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
new file mode 100644
index 000000000..f961d6748
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
@@ -0,0 +1,85 @@
+
+package eu.dnetlib.dhp.common.api;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+@Disabled
+public class ZenodoAPIClientTest {
+
+ private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
+ private final String ACCESS_TOKEN = "";
+
+ private final String CONCEPT_REC_ID = "657113";
+
+ @Test
+ public void testNewDeposition() throws IOException {
+
+ ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+ ACCESS_TOKEN);
+ Assertions.assertEquals(201, client.newDeposition());
+
+ File file = new File(getClass()
+ .getResource("/eu/dnetlib/dhp/common/api/COVID-19.json.gz")
+ .getPath());
+
+ InputStream is = new FileInputStream(file);
+
+ Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
+
+ String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
+
+ Assertions.assertEquals(200, client.sendMretadata(metadata));
+
+ Assertions.assertEquals(202, client.publish());
+
+ }
+
+ @Test
+ public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
+
+ ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+ ACCESS_TOKEN);
+
+ Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
+
+ File file = new File(getClass()
+ .getResource("/eu/dnetlib/dhp/common/api/newVersion")
+ .getPath());
+
+ InputStream is = new FileInputStream(file);
+
+ Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
+
+ Assertions.assertEquals(202, client.publish());
+
+ }
+
+ @Test
+ public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
+
+ ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+ ACCESS_TOKEN);
+
+ Assertions.assertEquals(201, client.newVersion(CONCEPT_REC_ID));
+
+ File file = new File(getClass()
+ .getResource("/eu/dnetlib/dhp/common/api/newVersion2")
+ .getPath());
+
+ InputStream is = new FileInputStream(file);
+
+ Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
+
+ Assertions.assertEquals(202, client.publish());
+
+ }
+
+}
diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/metadata.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/metadata.json
new file mode 100644
index 000000000..eab3ae278
--- /dev/null
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/metadata.json
@@ -0,0 +1 @@
+{"metadata":{"access_right":"open","communities":[{"identifier":"openaire-research-graph"}],"creators":[{"affiliation":"ISTI - CNR","name":"Bardi, Alessia","orcid":"0000-0002-1112-1292"},{"affiliation":"eifl", "name":"Kuchma, Iryna"},{"affiliation":"BIH", "name":"Brobov, Evgeny"},{"affiliation":"GIDIF RBM", "name":"Truccolo, Ivana"},{"affiliation":"unesp", "name":"Monteiro, Elizabete"},{"affiliation":"und", "name":"Casalegno, Carlotta"},{"affiliation":"CARL ABRC", "name":"Clary, Erin"},{"affiliation":"The University of Edimburgh", "name":"Romanowski, Andrew"},{"affiliation":"ISTI - CNR", "name":"Pavone, Gina"},{"affiliation":"ISTI - CNR", "name":"Artini, Michele"},{"affiliation":"ISTI - CNR","name":"Atzori, Claudio","orcid":"0000-0001-9613-6639"},{"affiliation":"University of Bielefeld","name":"Bäcker, Amelie","orcid":"0000-0001-6015-2063"},{"affiliation":"ISTI - CNR","name":"Baglioni, Miriam","orcid":"0000-0002-2273-9004"},{"affiliation":"University of Bielefeld","name":"Czerniak, Andreas","orcid":"0000-0003-3883-4169"},{"affiliation":"ISTI - CNR","name":"De Bonis, Michele"},{"affiliation":"Athena Research and Innovation Centre","name":"Dimitropoulos, Harry"},{"affiliation":"Athena Research and Innovation Centre","name":"Foufoulas, Ioannis"},{"affiliation":"University of Warsaw","name":"Horst, Marek"},{"affiliation":"Athena Research and Innovation Centre","name":"Iatropoulou, Katerina"},{"affiliation":"University of Warsaw","name":"Jacewicz, Przemyslaw"},{"affiliation":"Athena Research and Innovation Centre","name":"Kokogiannaki, Argiro", "orcid":"0000-0002-3880-0244"},{"affiliation":"ISTI - CNR","name":"La Bruzzo, Sandro","orcid":"0000-0003-2855-1245"},{"affiliation":"ISTI - CNR","name":"Lazzeri, Emma"},{"affiliation":"University of Bielefeld","name":"Löhden, Aenne"},{"affiliation":"ISTI - CNR","name":"Manghi, Paolo","orcid":"0000-0001-7291-3210"},{"affiliation":"ISTI - CNR","name":"Mannocci, Andrea","orcid":"0000-0002-5193-7851"},{"affiliation":"Athena Research and Innovation Center","name":"Manola, Natalia"},{"affiliation":"ISTI - CNR","name":"Ottonello, Enrico"},{"affiliation":"University of Bielefeld","name":"Shirrwagen, Jochen"}],"description":"\\u003cp\\u003eThis dump provides access to the metadata records of publications, research data, software and projects that may be relevant to the Corona Virus Disease (COVID-19) fight. The dump contains records of the OpenAIRE COVID-19 Gateway (https://covid-19.openaire.eu/), identified via full-text mining and inference techniques applied to the OpenAIRE Research Graph (https://explore.openaire.eu/). The Graph is one of the largest Open Access collections of metadata records and links between publications, datasets, software, projects, funders, and organizations, aggregating 12,000+ scientific data sources world-wide, among which the Covid-19 data sources Zenodo COVID-19 Community, WHO (World Health Organization), BIP! FInder for COVID-19, Protein Data Bank, Dimensions, scienceOpen, and RSNA. \\u003cp\\u003eThe dump consists of a gzip file containing one json per line. Each json is compliant to the schema available at https://doi.org/10.5281/zenodo.3974226\\u003c/p\\u003e ","title":"OpenAIRE Covid-19 publications, datasets, software and projects metadata.","upload_type":"dataset","version":"1.0"}}
\ No newline at end of file
diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/newDeposition b/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/newDeposition
new file mode 100644
index 000000000..df7dfb041
--- /dev/null
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/newDeposition
@@ -0,0 +1 @@
+This is a test for a new deposition
\ No newline at end of file
diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/newVersion b/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/newVersion
new file mode 100644
index 000000000..eb8ec9ef2
--- /dev/null
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/newVersion
@@ -0,0 +1 @@
+This is a test for a new version of an old deposition
\ No newline at end of file
diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/newVersion2 b/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/newVersion2
new file mode 100644
index 000000000..914a2442c
--- /dev/null
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/common/api/newVersion2
@@ -0,0 +1,2 @@
+This is a test for a new version of an old deposition. This should replace the other new version. I expect to have only two
+files in the deposition
\ No newline at end of file
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
index bf48605d2..b27fc9267 100644
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
@@ -7,6 +7,7 @@ import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class ModelConstants {
+ public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource";
public static final String DNET_ACCESS_MODES = "dnet:access_modes";
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
index 7d8be81ac..b5bca2e93 100644
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
@@ -79,6 +79,15 @@ public class ModelSupport {
entityIdPrefix.put("result", "50");
}
+ public static final Map idPrefixEntity = Maps.newHashMap();
+
+ static {
+ idPrefixEntity.put("10", "datasource");
+ idPrefixEntity.put("20", "organization");
+ idPrefixEntity.put("40", "project");
+ idPrefixEntity.put("50", "result");
+ }
+
public static final Map relationInverseMap = Maps.newHashMap();
static {
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/APC.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/APC.java
new file mode 100644
index 000000000..7f5dcb397
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/APC.java
@@ -0,0 +1,29 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+
+/**
+ * Used to refer to the Article Processing Charge information. Not dumped in this release. It contains two parameters: -
+ * currency of type String to store the currency of the APC - amount of type String to stores the charged amount
+ */
+public class APC implements Serializable {
+ private String currency;
+ private String amount;
+
+ public String getCurrency() {
+ return currency;
+ }
+
+ public void setCurrency(String currency) {
+ this.currency = currency;
+ }
+
+ public String getAmount() {
+ return amount;
+ }
+
+ public void setAmount(String amount) {
+ this.amount = amount;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java
new file mode 100644
index 000000000..f28c544f6
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java
@@ -0,0 +1,31 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+/**
+ * AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.Qualifier
+ * element with a parameter scheme of type String to store the scheme. Values for this element are found against the
+ * COAR access right scheme. The classid of the element accessright in eu.dnetlib.dhp.schema.oaf.Result is used to get
+ * the COAR corresponding code whose value will be used to set the code parameter. The COAR label corresponding to the
+ * COAR code will be used to set the label parameter. The scheme value will always be the one referring to the COAR
+ * access right scheme
+ */
+public class AccessRight extends Qualifier {
+
+ private String scheme;
+
+ public String getScheme() {
+ return scheme;
+ }
+
+ public void setScheme(String scheme) {
+ this.scheme = scheme;
+ }
+
+ public static AccessRight newInstance(String code, String label, String scheme) {
+ AccessRight ar = new AccessRight();
+ ar.setCode(code);
+ ar.setLabel(label);
+ ar.setScheme(scheme);
+ return ar;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Author.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Author.java
new file mode 100644
index 000000000..34920bcf7
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Author.java
@@ -0,0 +1,73 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * Used to represent the generic author of the result. It has six parameters: - name of type String to store the given
+ * name of the author. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author name - surname of
+ * type String to store the family name of the author. The value for this parameter corresponds to
+ * eu.dnetlib.dhp.schema.oaf.Author surname - fullname of type String to store the fullname of the author. The value for
+ * this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author fullname - rank of type Integer to store the rank on
+ * the author in the result's authors list. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author
+ * rank - pid of type eu.dnetlib.dhp.schema.dump.oaf.Pid to store the persistent identifier for the author. For the
+ * moment only ORCID identifiers will be dumped. - The id element is instantiated by using the following values in the
+ * eu.dnetlib.dhp.schema.oaf.Result pid: * Qualifier.classid for scheme * value for value - The provenance element is
+ * instantiated only if the dataInfo is set for the pid in the result to be dumped. The provenance element is
+ * instantiated by using the following values in the eu.dnetlib.dhp.schema.oaf.Result pid: *
+ * dataInfo.provenanceaction.classname for provenance * dataInfo.trust for trust
+ */
+public class Author implements Serializable {
+
+ private String fullname;
+
+ private String name;
+
+ private String surname;
+
+ private Integer rank;
+
+ private Pid pid;
+
+ public String getFullname() {
+ return fullname;
+ }
+
+ public void setFullname(String fullname) {
+ this.fullname = fullname;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getSurname() {
+ return surname;
+ }
+
+ public void setSurname(String surname) {
+ this.surname = surname;
+ }
+
+ public Integer getRank() {
+ return rank;
+ }
+
+ public void setRank(Integer rank) {
+ this.rank = rank;
+ }
+
+ public Pid getPid() {
+ return pid;
+ }
+
+ public void setPid(Pid pid) {
+ this.pid = pid;
+ }
+
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java
new file mode 100644
index 000000000..8699528ca
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java
@@ -0,0 +1,136 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+import java.util.Objects;
+
+/**
+ * To store information about the conference or journal where the result has been presented or published. It contains
+ * eleven parameters: - name of type String to store the name of the journal or conference. It corresponds to the
+ * parameter name of eu.dnetlib.dhp.schema.oaf.Journal - issnPrinted ot type String to store the journal printed issn.
+ * It corresponds to the parameter issnPrinted of eu.dnetlib.dhp.schema.oaf.Journal - issnOnline of type String to store
+ * the journal online issn. It corresponds to the parameter issnOnline of eu.dnetlib.dhp.schema.oaf.Journal -
+ * issnLinking of type String to store the journal linking issn. It corresponds to the parameter issnLinking of
+ * eu.dnetlib.dhp.schema.oaf.Journal - ep of type String to store the end page. It corresponds to the parameter ep of
+ * eu.dnetlib.dhp.schema.oaf.Journal - iss of type String to store the journal issue. It corresponds to the parameter
+ * iss of eu.dnetlib.dhp.schema.oaf.Journal - sp of type String to store the start page. It corresponds to the parameter
+ * sp of eu.dnetlib.dhp.schema.oaf.Journal - vol of type String to store the Volume. It corresponds to the parameter vol
+ * of eu.dnetlib.dhp.schema.oaf.Journal - edition of type String to store the edition of the journal or conference
+ * proceeding. It corresponds to the parameter edition of eu.dnetlib.dhp.schema.oaf.Journal - conferenceplace of type
+ * String to store the place of the conference. It corresponds to the parameter conferenceplace of
+ * eu.dnetlib.dhp.schema.oaf.Journal - conferencedate of type String to store the date of the conference. It corresponds
+ * to the parameter conferencedate of eu.dnetlib.dhp.schema.oaf.Journal
+ */
+public class Container implements Serializable {
+
+ private String name;
+
+ private String issnPrinted;
+
+ private String issnOnline;
+
+ private String issnLinking;
+
+ private String ep;
+
+ private String iss;
+
+ private String sp;
+
+ private String vol;
+
+ private String edition;
+
+ private String conferenceplace;
+
+ private String conferencedate;
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getIssnPrinted() {
+ return issnPrinted;
+ }
+
+ public void setIssnPrinted(String issnPrinted) {
+ this.issnPrinted = issnPrinted;
+ }
+
+ public String getIssnOnline() {
+ return issnOnline;
+ }
+
+ public void setIssnOnline(String issnOnline) {
+ this.issnOnline = issnOnline;
+ }
+
+ public String getIssnLinking() {
+ return issnLinking;
+ }
+
+ public void setIssnLinking(String issnLinking) {
+ this.issnLinking = issnLinking;
+ }
+
+ public String getEp() {
+ return ep;
+ }
+
+ public void setEp(String ep) {
+ this.ep = ep;
+ }
+
+ public String getIss() {
+ return iss;
+ }
+
+ public void setIss(String iss) {
+ this.iss = iss;
+ }
+
+ public String getSp() {
+ return sp;
+ }
+
+ public void setSp(String sp) {
+ this.sp = sp;
+ }
+
+ public String getVol() {
+ return vol;
+ }
+
+ public void setVol(String vol) {
+ this.vol = vol;
+ }
+
+ public String getEdition() {
+ return edition;
+ }
+
+ public void setEdition(String edition) {
+ this.edition = edition;
+ }
+
+ public String getConferenceplace() {
+ return conferenceplace;
+ }
+
+ public void setConferenceplace(String conferenceplace) {
+ this.conferenceplace = conferenceplace;
+ }
+
+ public String getConferencedate() {
+ return conferencedate;
+ }
+
+ public void setConferencedate(String conferencedate) {
+ this.conferencedate = conferencedate;
+ }
+
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java
new file mode 100644
index 000000000..cad7b8b5c
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java
@@ -0,0 +1,38 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+
+/**
+ * To represent the information described by a scheme and a value in that scheme (i.e. pid). It has two parameters: -
+ * scheme of type String to store the scheme - value of type String to store the value in that scheme
+ */
+public class ControlledField implements Serializable {
+ private String scheme;
+ private String value;
+
+ public String getScheme() {
+ return scheme;
+ }
+
+ public void setScheme(String scheme) {
+ this.scheme = scheme;
+ }
+
+ public String getValue() {
+ return value;
+ }
+
+ public void setValue(String value) {
+ this.value = value;
+ }
+
+ public static ControlledField newInstance(String scheme, String value) {
+ ControlledField cf = new ControlledField();
+
+ cf.setScheme(scheme);
+ cf.setValue(value);
+
+ return cf;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Country.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Country.java
new file mode 100644
index 000000000..3ab4d90fe
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Country.java
@@ -0,0 +1,37 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+/**
+ * Represents the country associated to this result. It extends eu.dnetlib.dhp.schema.dump.oaf.Qualifier with a
+ * provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country in not mapped if its value in the
+ * result reprensented in the internal format is Unknown. The value for this element correspond to: - code corresponds
+ * to the classid of eu.dnetlib.dhp.schema.oaf.Country - label corresponds to the classname of
+ * eu.dnetlib.dhp.schema.oaf.Country - provenance set only if the dataInfo associated to the Country of the result to be
+ * dumped is not null. In this case : - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with
+ * datainfo.provenanceaction.classname) - trust corresponds to dataInfo.trust
+ */
+public class Country extends Qualifier {
+
+ private Provenance provenance;
+
+ public Provenance getProvenance() {
+ return provenance;
+ }
+
+ public void setProvenance(Provenance provenance) {
+ this.provenance = provenance;
+ }
+
+ public static Country newInstance(String code, String label, Provenance provenance) {
+ Country c = new Country();
+ c.setProvenance(provenance);
+ c.setCode(code);
+ c.setLabel(label);
+ return c;
+ }
+
+ public static Country newInstance(String code, String label, String provenance, String trust) {
+ return newInstance(code, label, Provenance.newInstance(provenance, trust));
+ }
+
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Funder.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Funder.java
new file mode 100644
index 000000000..16cab22cc
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Funder.java
@@ -0,0 +1,36 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+
+public class Funder implements Serializable {
+ private String shortName;
+
+ private String name;
+
+ private String jurisdiction;
+
+ public String getJurisdiction() {
+ return jurisdiction;
+ }
+
+ public void setJurisdiction(String jurisdiction) {
+ this.jurisdiction = jurisdiction;
+ }
+
+ public String getShortName() {
+ return shortName;
+ }
+
+ public void setShortName(String shortName) {
+ this.shortName = shortName;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/GeoLocation.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/GeoLocation.java
new file mode 100644
index 000000000..6bd891bbd
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/GeoLocation.java
@@ -0,0 +1,53 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+/**
+ * Represents the geolocation information. It has three parameters: - point of type String to store the point
+ * information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation point - box ot type String to store the box
+ * information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation box - place of type String to store the place
+ * information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation place
+ */
+public class GeoLocation implements Serializable {
+
+ private String point;
+
+ private String box;
+
+ private String place;
+
+ public String getPoint() {
+ return point;
+ }
+
+ public void setPoint(String point) {
+ this.point = point;
+ }
+
+ public String getBox() {
+ return box;
+ }
+
+ public void setBox(String box) {
+ this.box = box;
+ }
+
+ public String getPlace() {
+ return place;
+ }
+
+ public void setPlace(String place) {
+ this.place = place;
+ }
+
+ @JsonIgnore
+ public boolean isBlank() {
+ return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place);
+ }
+
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Instance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Instance.java
new file mode 100644
index 000000000..4a09f5a86
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Instance.java
@@ -0,0 +1,107 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published
+ * versions are two manifestations of the same research result. It has the following parameters: - license of type
+ * String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be
+ * dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. -
+ * type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
+ * (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - hostedby of
+ * type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance can be
+ * viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and - key corresponds
+ * to hostedby.key - value corresponds to hostedby.value - url of type List list of locations where the instance
+ * is accessible. It corresponds to url of the instance to be dumped - collectedfrom of type
+ * eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been
+ * collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to
+ * collectedfrom.key - value corresponds to collectedfrom.value - publicationdate of type String to store the
+ * publication date of the instance ;// dateofacceptance; - refereed of type String to store information abour tthe
+ * review status of the instance. Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed'. It corresponds to
+ * refereed.classname of the instance to be dumped
+ */
+public class Instance implements Serializable {
+
+ private String license;
+
+ private AccessRight accessright;
+
+ private String type;
+
+ private KeyValue hostedby;
+
+ private List url;
+
+ private KeyValue collectedfrom;
+
+ private String publicationdate;// dateofacceptance;
+
+ private String refereed; // peer-review status
+
+ public String getLicense() {
+ return license;
+ }
+
+ public void setLicense(String license) {
+ this.license = license;
+ }
+
+ public AccessRight getAccessright() {
+ return accessright;
+ }
+
+ public void setAccessright(AccessRight accessright) {
+ this.accessright = accessright;
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public KeyValue getHostedby() {
+ return hostedby;
+ }
+
+ public void setHostedby(KeyValue hostedby) {
+ this.hostedby = hostedby;
+ }
+
+ public List getUrl() {
+ return url;
+ }
+
+ public void setUrl(List url) {
+ this.url = url;
+ }
+
+ public KeyValue getCollectedfrom() {
+ return collectedfrom;
+ }
+
+ public void setCollectedfrom(KeyValue collectedfrom) {
+ this.collectedfrom = collectedfrom;
+ }
+
+ public String getPublicationdate() {
+ return publicationdate;
+ }
+
+ public void setPublicationdate(String publicationdate) {
+ this.publicationdate = publicationdate;
+ }
+
+ public String getRefereed() {
+ return refereed;
+ }
+
+ public void setRefereed(String refereed) {
+ this.refereed = refereed;
+ }
+
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java
new file mode 100644
index 000000000..849aa4d3c
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java
@@ -0,0 +1,48 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+/**
+ * To represent the information described by a key and a value. It has two parameters: - key to store the key (generally
+ * the OpenAIRE id for some entity) - value to store the value (generally the OpenAIRE name for the key)
+ */
+public class KeyValue implements Serializable {
+
+ private String key;
+
+ private String value;
+
+ public String getKey() {
+ return key;
+ }
+
+ public void setKey(String key) {
+ this.key = key;
+ }
+
+ public String getValue() {
+ return value;
+ }
+
+ public void setValue(String value) {
+ this.value = value;
+ }
+
+ public static KeyValue newInstance(String key, String value) {
+ KeyValue inst = new KeyValue();
+ inst.key = key;
+ inst.value = value;
+ return inst;
+ }
+
+ @JsonIgnore
+ public boolean isBlank() {
+ return StringUtils.isBlank(key) && StringUtils.isBlank(value);
+ }
+
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Pid.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Pid.java
new file mode 100644
index 000000000..786ddb1d7
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Pid.java
@@ -0,0 +1,45 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+
+/**
+ * To represent the generic persistent identifier. It has two parameters: - id of type
+ * eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the scheme and value of the Persistent Identifier. -
+ * provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store the provenance and trust of the information
+ */
+public class Pid implements Serializable {
+ private ControlledField id;
+ private Provenance provenance;
+
+ public ControlledField getId() {
+ return id;
+ }
+
+ public void setId(ControlledField pid) {
+ this.id = pid;
+ }
+
+ public Provenance getProvenance() {
+ return provenance;
+ }
+
+ public void setProvenance(Provenance provenance) {
+ this.provenance = provenance;
+ }
+
+ public static Pid newInstance(ControlledField pid, Provenance provenance) {
+ Pid p = new Pid();
+ p.id = pid;
+ p.provenance = provenance;
+
+ return p;
+ }
+
+ public static Pid newInstance(ControlledField pid) {
+ Pid p = new Pid();
+ p.id = pid;
+
+ return p;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Project.java
new file mode 100644
index 000000000..f23d5a670
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Project.java
@@ -0,0 +1,45 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+
+public class Project implements Serializable {
+ protected String id;// OpenAIRE id
+ protected String code;
+
+ protected String acronym;
+
+ protected String title;
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public String getCode() {
+ return code;
+ }
+
+ public void setCode(String code) {
+ this.code = code;
+ }
+
+ public String getAcronym() {
+ return acronym;
+ }
+
+ public void setAcronym(String acronym) {
+ this.acronym = acronym;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Provenance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Provenance.java
new file mode 100644
index 000000000..28fb3aaa6
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Provenance.java
@@ -0,0 +1,41 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+
+/**
+ * Indicates the process that produced (or provided) the information, and the trust associated to the information. It
+ * has two parameters: - provenance of type String to store the provenance of the information, - trust of type String to
+ * store the trust associated to the information
+ */
+public class Provenance implements Serializable {
+ private String provenance;
+ private String trust;
+
+ public String getProvenance() {
+ return provenance;
+ }
+
+ public void setProvenance(String provenance) {
+ this.provenance = provenance;
+ }
+
+ public String getTrust() {
+ return trust;
+ }
+
+ public void setTrust(String trust) {
+ this.trust = trust;
+ }
+
+ public static Provenance newInstance(String provenance, String trust) {
+ Provenance p = new Provenance();
+ p.provenance = provenance;
+ p.trust = trust;
+ return p;
+ }
+
+ public String toString() {
+ return provenance + trust;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java
new file mode 100644
index 000000000..348c22b31
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java
@@ -0,0 +1,42 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+/**
+ * To represent the information described by a code and a value It has two parameters: - code to store the code
+ * (generally the classid of the eu.dnetlib.dhp.schema.oaf.Qualifier element) - label to store the label (generally the
+ * classname of the eu.dnetlib.dhp.schema.oaf.Qualifier element
+ */
+public class Qualifier implements Serializable {
+
+ private String code; // the classid in the Qualifier
+ private String label; // the classname in the Qualifier
+
+ public String getCode() {
+ return code;
+ }
+
+ public void setCode(String code) {
+ this.code = code;
+ }
+
+ public String getLabel() {
+ return label;
+ }
+
+ public void setLabel(String label) {
+ this.label = label;
+ }
+
+ public static Qualifier newInstance(String code, String value) {
+ Qualifier qualifier = new Qualifier();
+ qualifier.setCode(code);
+ qualifier.setLabel(value);
+ return qualifier;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java
new file mode 100644
index 000000000..97ee72259
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java
@@ -0,0 +1,391 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+import java.util.List;
+
+import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
+
+/**
+ * To represent the dumped result. It will be extended in the dump for Research Communities - Research
+ * Initiative/Infrastructures. It has the following parameters: - author of type
+ * List to describe the authors of a result. For each author in the result
+ * represented in the internal model one author in the esternal model is produced. - type of type String to represent
+ * the category of the result. Possible values are publication, dataset, software, other. It corresponds to
+ * resulttype.classname of the dumped result - language of type eu.dnetlib.dhp.schema.dump.oaf.Qualifier to store
+ * information about the language of the result. It is dumped as - code corresponds to language.classid - value
+ * corresponds to language.classname - country of type List to store the country
+ * list to which the result is associated. For each country in the result respresented in the internal model one country
+ * in the external model is produces - subjects of type List to store the subjects for
+ * the result. For each subject in the result represented in the internal model one subject in the external model is
+ * produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first
+ * title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle
+ * of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to
+ * "subtitle" - description of type List to store the description of the result. It corresponds to the list of
+ * description.value in the result represented in the internal model - publicationdate of type String to store the
+ * pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model -
+ * publisher of type String to store information about the publisher. It corresponds to publisher.value of the result
+ * represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to
+ * embargoenddate.value of the result represented in the internal model - source of type List See definition of
+ * Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal
+ * model - format of type List It corresponds to the list of format.value in the result represented in the
+ * internal model - contributor of type List to represent contributors for this result. It corresponds to the
+ * list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds
+ * to the list of coverage.value in the result represented in the internal model - bestaccessright of type
+ * eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the
+ * manifestations of this research results. It corresponds to the same parameter in the result represented in the
+ * internal model - instance of type List to store all the instances associated
+ * to the result. It corresponds to the same parameter in the result represented in the internal model - container of
+ * type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It corresponds to the parameter
+ * journal of the result represented in the internal model - documentationUrl of type List (only for results of
+ * type software) to store the URLs to the software documentation. It corresponds to the list of documentationUrl.value
+ * of the result represented in the internal model - codeRepositoryUrl of type String (only for results of type
+ * software) to store the URL to the repository with the source code. It corresponds to codeRepositoryUrl.value of the
+ * result represented in the internal model - programmingLanguage of type String (only for results of type software) to
+ * store the programming language. It corresponds to programmingLanguaga.classid of the result represented in the
+ * internal model - contactperson of type List (only for results of type other) to store the contact person for
+ * this result. It corresponds to the list of contactperson.value of the result represented in the internal model -
+ * contactgroup of type List (only for results of type other) to store the information for the contact group. It
+ * corresponds to the list of contactgroup.value of the result represented in the internal model - tool of type
+ * List (only fro results of type other) to store information about tool useful for the interpretation and/or
+ * re-used of the research product. It corresponds to the list of tool.value in the result represented in the internal
+ * modelt - size of type String (only for results of type dataset) to store the size of the dataset. It corresponds to
+ * size.value in the result represented in the internal model - version of type String (only for results of type
+ * dataset) to store the version. It corresponds to version.value of the result represented in the internal model -
+ * geolocation fo type List (only for results of type dataset) to store
+ * geolocation information. For each geolocation element in the result represented in the internal model a GeoLocation
+ * in the external model il produced - id of type String to store the OpenAIRE id of the result. It corresponds to the
+ * id of the result represented in the internal model - originalId of type List to store the original ids of the
+ * result. It corresponds to the originalId of the result represented in the internal model - pid of type
+ * List to store the persistent identifiers for the result. For each pid
+ * in the results represented in the internal model one pid in the external model is produced. The value correspondence
+ * is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model - value corresponds
+ * to the pid.value of the result represented in the internal model - dateofcollection of type String to store
+ * information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result
+ * represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of
+ * the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model
+ */
+public class Result implements Serializable {
+
+ private List author;
+
+ // resulttype allows subclassing results into publications | datasets | software
+ private String type; // resulttype
+
+ // common fields
+ private Qualifier language;
+
+ private List country;
+
+ private List subjects;
+
+ private String maintitle;
+
+ private String subtitle;
+
+ private List description;
+
+ private String publicationdate; // dateofacceptance;
+
+ private String publisher;
+
+ private String embargoenddate;
+
+ private List source;
+
+ private List format;
+
+ private List contributor;
+
+ private List coverage;
+
+ private AccessRight bestaccessright;
+
+ private List instance;
+
+ private Container container;// Journal
+
+ private List documentationUrl; // software
+
+ private String codeRepositoryUrl; // software
+
+ private String programmingLanguage; // software
+
+ private List contactperson; // orp
+
+ private List contactgroup; // orp
+
+ private List tool; // orp
+
+ private String size; // dataset
+
+ private String version; // dataset
+
+ private List geolocation; // dataset
+
+ private String id;
+
+ private List originalId;
+
+ private List pid;
+
+ private String dateofcollection;
+
+ private Long lastupdatetimestamp;
+
+ public Long getLastupdatetimestamp() {
+ return lastupdatetimestamp;
+ }
+
+ public void setLastupdatetimestamp(Long lastupdatetimestamp) {
+ this.lastupdatetimestamp = lastupdatetimestamp;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public List getOriginalId() {
+ return originalId;
+ }
+
+ public void setOriginalId(List originalId) {
+ this.originalId = originalId;
+ }
+
+ public List getPid() {
+ return pid;
+ }
+
+ public void setPid(List pid) {
+ this.pid = pid;
+ }
+
+ public String getDateofcollection() {
+ return dateofcollection;
+ }
+
+ public void setDateofcollection(String dateofcollection) {
+ this.dateofcollection = dateofcollection;
+ }
+
+ public List getAuthor() {
+ return author;
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public Container getContainer() {
+ return container;
+ }
+
+ public void setContainer(Container container) {
+ this.container = container;
+ }
+
+ public void setAuthor(List author) {
+ this.author = author;
+ }
+
+ public Qualifier getLanguage() {
+ return language;
+ }
+
+ public void setLanguage(Qualifier language) {
+ this.language = language;
+ }
+
+ public List getCountry() {
+ return country;
+ }
+
+ public void setCountry(List country) {
+ this.country = country;
+ }
+
+ public List getSubjects() {
+ return subjects;
+ }
+
+ public void setSubjects(List subjects) {
+ this.subjects = subjects;
+ }
+
+ public String getMaintitle() {
+ return maintitle;
+ }
+
+ public void setMaintitle(String maintitle) {
+ this.maintitle = maintitle;
+ }
+
+ public String getSubtitle() {
+ return subtitle;
+ }
+
+ public void setSubtitle(String subtitle) {
+ this.subtitle = subtitle;
+ }
+
+ public List getDescription() {
+ return description;
+ }
+
+ public void setDescription(List description) {
+ this.description = description;
+ }
+
+ public String getPublicationdate() {
+ return publicationdate;
+ }
+
+ public void setPublicationdate(String publicationdate) {
+ this.publicationdate = publicationdate;
+ }
+
+ public String getPublisher() {
+ return publisher;
+ }
+
+ public void setPublisher(String publisher) {
+ this.publisher = publisher;
+ }
+
+ public String getEmbargoenddate() {
+ return embargoenddate;
+ }
+
+ public void setEmbargoenddate(String embargoenddate) {
+ this.embargoenddate = embargoenddate;
+ }
+
+ public List getSource() {
+ return source;
+ }
+
+ public void setSource(List source) {
+ this.source = source;
+ }
+
+ public List getFormat() {
+ return format;
+ }
+
+ public void setFormat(List format) {
+ this.format = format;
+ }
+
+ public List getContributor() {
+ return contributor;
+ }
+
+ public void setContributor(List contributor) {
+ this.contributor = contributor;
+ }
+
+ public List getCoverage() {
+ return coverage;
+ }
+
+ public void setCoverage(List coverage) {
+ this.coverage = coverage;
+ }
+
+ public AccessRight getBestaccessright() {
+ return bestaccessright;
+ }
+
+ public void setBestaccessright(AccessRight bestaccessright) {
+ this.bestaccessright = bestaccessright;
+ }
+
+ public List getInstance() {
+ return instance;
+ }
+
+ public void setInstance(List instance) {
+ this.instance = instance;
+ }
+
+ public List getDocumentationUrl() {
+ return documentationUrl;
+ }
+
+ public void setDocumentationUrl(List documentationUrl) {
+ this.documentationUrl = documentationUrl;
+ }
+
+ public String getCodeRepositoryUrl() {
+ return codeRepositoryUrl;
+ }
+
+ public void setCodeRepositoryUrl(String codeRepositoryUrl) {
+ this.codeRepositoryUrl = codeRepositoryUrl;
+ }
+
+ public String getProgrammingLanguage() {
+ return programmingLanguage;
+ }
+
+ public void setProgrammingLanguage(String programmingLanguage) {
+ this.programmingLanguage = programmingLanguage;
+ }
+
+ public List getContactperson() {
+ return contactperson;
+ }
+
+ public void setContactperson(List contactperson) {
+ this.contactperson = contactperson;
+ }
+
+ public List getContactgroup() {
+ return contactgroup;
+ }
+
+ public void setContactgroup(List contactgroup) {
+ this.contactgroup = contactgroup;
+ }
+
+ public List getTool() {
+ return tool;
+ }
+
+ public void setTool(List tool) {
+ this.tool = tool;
+ }
+
+ public String getSize() {
+ return size;
+ }
+
+ public void setSize(String size) {
+ this.size = size;
+ }
+
+ public String getVersion() {
+ return version;
+ }
+
+ public void setVersion(String version) {
+ this.version = version;
+ }
+
+ public List getGeolocation() {
+ return geolocation;
+ }
+
+ public void setGeolocation(List geolocation) {
+ this.geolocation = geolocation;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Subject.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Subject.java
new file mode 100644
index 000000000..5c4bbef3c
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Subject.java
@@ -0,0 +1,34 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf;
+
+import java.io.Serializable;
+
+/**
+ * To represent keywords associated to the result. It has two parameters: - subject of type
+ * eu.dnetlib.dhp.schema.dump.oaf.ControlledField to describe the subject. It mapped as: - schema it corresponds to
+ * qualifier.classid of the dumped subject - value it corresponds to the subject value - provenance of type
+ * eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo
+ * is not null. In this case: - provenance corresponds to dataInfo.provenanceaction.classname - trust corresponds to
+ * dataInfo.trust
+ */
+public class Subject implements Serializable {
+ private ControlledField subject;
+ private Provenance provenance;
+
+ public ControlledField getSubject() {
+ return subject;
+ }
+
+ public void setSubject(ControlledField subject) {
+ this.subject = subject;
+ }
+
+ public Provenance getProvenance() {
+ return provenance;
+ }
+
+ public void setProvenance(Provenance provenance) {
+ this.provenance = provenance;
+ }
+
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityResult.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityResult.java
new file mode 100644
index 000000000..8c748e103
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/CommunityResult.java
@@ -0,0 +1,51 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf.community;
+
+import java.util.List;
+
+import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
+import eu.dnetlib.dhp.schema.dump.oaf.Result;
+
+/**
+ * extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type
+ * List to store the list of projects related to the result. The
+ * information is added after the result is mapped to the external model - context of type
+ * List to store information about the RC RI related to the result.
+ * For each context in the result represented in the internal model one context in the external model is produced -
+ * collectedfrom of type List to store information about the sources from which
+ * the record has been collected. For each collectedfrom in the result represented in the internal model one
+ * collectedfrom in the external model is produced
+ */
+public class CommunityResult extends Result {
+
+ private List projects;
+
+ private List context;
+
+ protected List collectedfrom;
+
+ public List getCollectedfrom() {
+ return collectedfrom;
+ }
+
+ public void setCollectedfrom(List collectedfrom) {
+ this.collectedfrom = collectedfrom;
+ }
+
+ public List getProjects() {
+ return projects;
+ }
+
+ public void setProjects(List projects) {
+ this.projects = projects;
+ }
+
+ public List getContext() {
+ return context;
+ }
+
+ public void setContext(List context) {
+ this.context = context;
+ }
+
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Context.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Context.java
new file mode 100644
index 000000000..3ad692b30
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Context.java
@@ -0,0 +1,40 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf.community;
+
+import java.util.List;
+import java.util.Objects;
+
+import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
+import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
+
+/**
+ * Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with
+ * OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type
+ * List to store the provenances of the association between the result and
+ * the RC/RI. The values for this element correspond to: - code: it corresponds to the id of the context in the result
+ * to be mapped. If the context id refers to a RC/RI and contains '::' only the part of the id before the first "::"
+ * will be used as value for code - label it corresponds to the label associated to the id. The information id taken
+ * from the profile of the RC/RI - provenance it is set only if the dataInfo associated to the contenxt element of the
+ * result to be dumped is not null. For each dataInfo one instance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance is
+ * instantiated if the element datainfo.provenanceaction is not null. In this case - provenance corresponds to
+ * dataInfo.provenanceaction.classname - trust corresponds to dataInfo.trust
+ */
+public class Context extends Qualifier {
+ private List provenance;
+
+ public List getProvenance() {
+ return provenance;
+ }
+
+ public void setProvenance(List provenance) {
+ this.provenance = provenance;
+ }
+
+ @Override
+ public int hashCode() {
+ String provenance = new String();
+ this.provenance.forEach(p -> provenance.concat(p.toString()));
+ return Objects.hash(getCode(), getLabel(), provenance);
+ }
+
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Funder.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Funder.java
new file mode 100644
index 000000000..b795fd100
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Funder.java
@@ -0,0 +1,52 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf.community;
+
+import java.io.Serializable;
+
+/**
+ * To store information about the funder funding the project related to the result. It has the following parameters: -
+ * shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
+ * (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
+ * store the jurisdiction of the funder
+ */
+public class Funder implements Serializable {
+ private String shortName;
+
+ private String name;
+
+ private String fundingStream;
+
+ private String jurisdiction;
+
+ public String getJurisdiction() {
+ return jurisdiction;
+ }
+
+ public void setJurisdiction(String jurisdiction) {
+ this.jurisdiction = jurisdiction;
+ }
+
+ public String getShortName() {
+ return shortName;
+ }
+
+ public void setShortName(String shortName) {
+ this.shortName = shortName;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getFundingStream() {
+ return fundingStream;
+ }
+
+ public void setFundingStream(String fundingStream) {
+ this.fundingStream = fundingStream;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Project.java
new file mode 100644
index 000000000..7e23a1311
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/community/Project.java
@@ -0,0 +1,88 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf.community;
+
+import java.io.Serializable;
+
+import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
+
+/**
+ * To store information about the project related to the result. This information is not directly mapped from the result
+ * represented in the internal model because it is not there. The mapped result will be enriched with project
+ * information derived by relation between results and projects. Project class has the following parameters: - id of
+ * type String to store the OpenAIRE id for the Project - code of type String to store the grant agreement - acronym of
+ * type String to store the acronym for the project - title of type String to store the title of the project - funder of
+ * type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information about the funder funding the project -
+ * provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store information about the. provenance of the
+ * association between the result and the project
+ */
+public class Project implements Serializable {
+
+ private String id;// OpenAIRE id
+ private String code;
+
+ private String acronym;
+
+ private String title;
+
+ private Funder funder;
+
+ private Provenance provenance;
+
+ public Provenance getProvenance() {
+ return provenance;
+ }
+
+ public void setProvenance(Provenance provenance) {
+ this.provenance = provenance;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public String getCode() {
+ return code;
+ }
+
+ public void setCode(String code) {
+ this.code = code;
+ }
+
+ public String getAcronym() {
+ return acronym;
+ }
+
+ public void setAcronym(String acronym) {
+ this.acronym = acronym;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public Funder getFunder() {
+ return funder;
+ }
+
+ public void setFunder(Funder funders) {
+ this.funder = funders;
+ }
+
+ public static Project newInstance(String id, String code, String acronym, String title, Funder funder) {
+ Project project = new Project();
+ project.setAcronym(acronym);
+ project.setCode(code);
+ project.setFunder(funder);
+ project.setId(id);
+ project.setTitle(title);
+ return project;
+ }
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Constants.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Constants.java
new file mode 100644
index 000000000..35cc60c1c
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Constants.java
@@ -0,0 +1,21 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf.graph;
+
+import java.io.Serializable;
+
+public class Constants implements Serializable {
+ // collectedFrom va con isProvidedBy -> becco da ModelSupport
+
+ public static final String HOSTED_BY = "isHostedBy";
+ public static final String HOSTS = "hosts";
+
+ // community result uso isrelatedto
+
+ public static final String RESULT_ENTITY = "result";
+ public static final String DATASOURCE_ENTITY = "datasource";
+ public static final String CONTEXT_ENTITY = "context";
+
+ public static final String CONTEXT_ID = "60";
+ public static final String CONTEXT_NS_PREFIX = "context____";
+
+}
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Datasource.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Datasource.java
new file mode 100644
index 000000000..6b2b7b1ab
--- /dev/null
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/Datasource.java
@@ -0,0 +1,316 @@
+
+package eu.dnetlib.dhp.schema.dump.oaf.graph;
+
+import java.io.Serializable;
+import java.util.List;
+
+import eu.dnetlib.dhp.schema.dump.oaf.Container;
+import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
+import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
+
+/**
+ * To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -
+ * id of type String to store the OpenAIRE id for the datasource. It corresponds to the parameter id of the datasource
+ * represented in the internal model - originalId of type List to store the list of original ids associated to
+ * the datasource. It corresponds to the parameter originalId of the datasource represented in the internal model. The
+ * null values are filtered out - pid of type List to store the
+ * persistent identifiers for the datasource. For each pid in the datasource represented in the internal model one pid
+ * in the external model is produced as : - schema corresponds to pid.qualifier.classid of the datasource represented in
+ * the internal model - value corresponds to pid.value of the datasource represented in the internal model -
+ * datasourceType of type eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the datasource type (e.g.
+ * pubsrepository::institutional, Institutional Repository) as in the dnet vocabulary dnet:datasource_typologies. It
+ * corresponds to datasourcetype of the datasource represented in the internal model and : - code corresponds to
+ * datasourcetype.classid - value corresponds to datasourcetype.classname - openairecompatibility of type String to
+ * store information about the OpenAIRE compatibility of the ingested results (which guidelines they are compliant to).
+ * It corresponds to openairecompatibility.classname of the datasource represented in the internal model - officialname
+ * of type Sgtring to store the official name of the datasource. It correspond to officialname.value of the datasource
+ * represented in the internal model - englishname of type String to store the English name of the datasource. It
+ * corresponds to englishname.value of the datasource represented in the internal model - websiteurl of type String to
+ * store the URL of the website of the datasource. It corresponds to websiteurl.value of the datasource represented in
+ * the internal model - logourl of type String to store the URL of the logo for the datasource. It corresponds to
+ * logourl.value of the datasource represented in the internal model - dateofvalidation of type String to store the data
+ * of validation against the guidelines for the datasource records. It corresponds to dateofvalidation.value of the
+ * datasource represented in the internal model - description of type String to store the description for the
+ * datasource. It corresponds to description.value of the datasource represented in the internal model
+ */
+public class Datasource implements Serializable {
+
+ private String id; // string
+
+ private List originalId; // list string
+
+ private List pid; // list
+
+ private ControlledField datasourcetype; // value
+
+ private String openairecompatibility; // value
+
+ private String officialname; // string
+
+ private String englishname; // string
+
+ private String websiteurl; // string
+
+ private String logourl; // string
+
+ private String dateofvalidation; // string
+
+ private String description; // description
+
+ private List subjects; // List
+
+ // opendoar specific fields (od*)
+
+ private List languages; // odlanguages List
+
+ private List contenttypes; // odcontent types List
+
+ // re3data fields
+ private String releasestartdate; // string
+
+ private String releaseenddate; // string
+
+ private String missionstatementurl; // string
+
+ // {open, restricted or closed}
+ private String accessrights; // databaseaccesstype string
+
+ // {open, restricted or closed}
+ private String uploadrights; // datauploadtype string
+
+ // {feeRequired, registration, other}
+ private String databaseaccessrestriction; // string
+
+ // {feeRequired, registration, other}
+ private String datauploadrestriction; // string
+
+ private Boolean versioning; // boolean
+
+ private String citationguidelineurl; // string
+
+ // {yes, no, uknown}
+
+ private String pidsystems; // string
+
+ private String certificates; // string
+
+ private List
diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml
index 5ddcda3fa..a0a334e3c 100644
--- a/dhp-workflows/dhp-graph-mapper/pom.xml
+++ b/dhp-workflows/dhp-graph-mapper/pom.xml
@@ -42,6 +42,12 @@
+
+
+ org.apache.commons
+ commons-compress
+
+
commons-io
commons-io
@@ -59,6 +65,12 @@
org.apache.spark
spark-hive_2.11
test
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
@@ -92,14 +104,21 @@
org.postgresql
postgresql
+
+
+ org.apache.httpcomponents
+ httpmime
+
+
+ com.github.victools
+ jsonschema-generator
+ test
+
org.json4s
json4s-jackson_2.11
- 3.5.3
-
-
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java
index 7091d9740..ae1b37906 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.java
@@ -90,6 +90,7 @@ public class CleanGraphSparkJob {
final CleaningRuleMap mapping = CleaningRuleMap.create(vocs);
readTableFromPath(spark, inputPath, clazz)
+ .map((MapFunction) value -> fixVocabularyNames(value), Encoders.bean(clazz))
.map((MapFunction) value -> OafCleaner.apply(value, mapping), Encoders.bean(clazz))
.map((MapFunction) value -> fixDefaults(value), Encoders.bean(clazz))
.write()
@@ -98,6 +99,65 @@ public class CleanGraphSparkJob {
.json(outputPath);
}
+ protected static T fixVocabularyNames(T value) {
+ if (value instanceof Datasource) {
+ // nothing to clean here
+ } else if (value instanceof Project) {
+ // nothing to clean here
+ } else if (value instanceof Organization) {
+ Organization o = (Organization) value;
+ if (Objects.nonNull(o.getCountry())) {
+ fixVocabName(o.getCountry(), ModelConstants.DNET_COUNTRY_TYPE);
+ }
+ } else if (value instanceof Relation) {
+ // nothing to clean here
+ } else if (value instanceof Result) {
+
+ Result r = (Result) value;
+
+ fixVocabName(r.getLanguage(), ModelConstants.DNET_LANGUAGES);
+ fixVocabName(r.getResourcetype(), ModelConstants.DNET_DATA_CITE_RESOURCE);
+ fixVocabName(r.getBestaccessright(), ModelConstants.DNET_ACCESS_MODES);
+
+ if (Objects.nonNull(r.getSubject())) {
+ r.getSubject().forEach(s -> fixVocabName(s.getQualifier(), ModelConstants.DNET_SUBJECT_TYPOLOGIES));
+ }
+ if (Objects.nonNull(r.getInstance())) {
+ for (Instance i : r.getInstance()) {
+ fixVocabName(i.getAccessright(), ModelConstants.DNET_ACCESS_MODES);
+ fixVocabName(i.getRefereed(), ModelConstants.DNET_REVIEW_LEVELS);
+ }
+ }
+ if (Objects.nonNull(r.getAuthor())) {
+ r.getAuthor().forEach(a -> {
+ if (Objects.nonNull(a.getPid())) {
+ a.getPid().forEach(p -> {
+ fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES);
+ });
+ }
+ });
+ }
+ if (value instanceof Publication) {
+
+ } else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) {
+
+ } else if (value instanceof OtherResearchProduct) {
+
+ } else if (value instanceof Software) {
+
+ }
+ }
+
+ return value;
+ }
+
+ private static void fixVocabName(Qualifier q, String vocabularyName) {
+ if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
+ q.setSchemeid(vocabularyName);
+ q.setSchemename(vocabularyName);
+ }
+ }
+
protected static T fixDefaults(T value) {
if (value instanceof Datasource) {
// nothing to clean here
@@ -113,6 +173,9 @@ public class CleanGraphSparkJob {
} else if (value instanceof Result) {
Result r = (Result) value;
+ if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) {
+ r.setPublisher(null);
+ }
if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
r
.setLanguage(
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java
new file mode 100644
index 000000000..a466cf074
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java
@@ -0,0 +1,43 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.util.Map;
+
+import com.google.common.collect.Maps;
+
+public class Constants {
+
+ public static final Map accessRightsCoarMap = Maps.newHashMap();
+ public static final Map coarCodeLabelMap = Maps.newHashMap();
+
+ public static final String INFERRED = "Inferred by OpenAIRE";
+
+ public static final String HARVESTED = "Harvested";
+ public static final String DEFAULT_TRUST = "0.9";
+ public static final String USER_CLAIM = "Linked by user";;
+
+ public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
+
+ public static String ZENODO_COMMUNITY_PREFIX = "https://zenodo.org/communities/";
+
+ public static String RESEARCH_COMMUNITY = "Research Community";
+
+ public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
+
+ public static String ORCID = "orcid";
+
+ static {
+ accessRightsCoarMap.put("OPEN", "c_abf2");
+ accessRightsCoarMap.put("RESTRICTED", "c_16ec");
+ accessRightsCoarMap.put("OPEN SOURCE", "c_abf2");
+ accessRightsCoarMap.put("CLOSED", "c_14cb");
+ accessRightsCoarMap.put("EMBARGO", "c_f1cf");
+ }
+
+ static {
+ coarCodeLabelMap.put("c_abf2", "OPEN");
+ coarCodeLabelMap.put("c_16ec", "RESTRICTED");
+ coarCodeLabelMap.put("c_14cb", "CLOSED");
+ coarCodeLabelMap.put("c_f1cf", "EMBARGO");
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java
new file mode 100644
index 000000000..c97d2d72a
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java
@@ -0,0 +1,106 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+
+import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
+import eu.dnetlib.dhp.schema.oaf.*;
+
+/**
+ * It fires the execution of the actual dump for result entities. If the dump is for RC/RI products its checks for each
+ * result its belongingess to at least one RC/RI before "asking" for its mapping.
+ */
+public class DumpProducts implements Serializable {
+
+ public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
+ Class extends OafEntity> inputClazz,
+ Class extends eu.dnetlib.dhp.schema.dump.oaf.Result> outputClazz,
+ boolean graph) {
+
+ SparkConf conf = new SparkConf();
+
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath);
+ execDump(spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, graph);
+ });
+ }
+
+ public static void execDump(
+ SparkSession spark,
+ String inputPath,
+ String outputPath,
+ String communityMapPath,
+ Class inputClazz,
+ Class outputClazz,
+ boolean graph) {
+
+ CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
+
+ Utils
+ .readPath(spark, inputPath, inputClazz)
+ .map(value -> execMap(value, communityMap, graph), Encoders.bean(outputClazz))
+ .filter(Objects::nonNull)
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression", "gzip")
+ .json(outputPath);
+
+ }
+
+ private static O execMap(I value,
+ CommunityMap communityMap,
+ boolean graph) {
+
+ Optional odInfo = Optional.ofNullable(value.getDataInfo());
+ if (odInfo.isPresent()) {
+ if (odInfo.get().getDeletedbyinference()) {
+ return null;
+ }
+ } else {
+ return null;
+ }
+
+ if (!graph) {
+ Set communities = communityMap.keySet();
+
+ Optional> inputContext = Optional
+ .ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
+ if (!inputContext.isPresent()) {
+ return null;
+ }
+ List toDumpFor = inputContext.get().stream().map(c -> {
+ if (communities.contains(c.getId())) {
+ return c.getId();
+ }
+ if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
+ return c.getId().substring(0, 3);
+ }
+ return null;
+ }).filter(Objects::nonNull).collect(Collectors.toList());
+ if (toDumpFor.size() == 0) {
+ return null;
+ }
+ }
+ return (O) ResultMapper.map(value, communityMap, graph);
+
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java
new file mode 100644
index 000000000..199960104
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java
@@ -0,0 +1,114 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.*;
+
+import org.apache.commons.compress.archivers.ar.ArArchiveEntry;
+import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+
+public class MakeTar implements Serializable {
+
+ private static final Logger log = LoggerFactory.getLogger(MakeTar.class);
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ MakeTar.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ final String outputPath = parser.get("hdfsPath");
+ log.info("hdfsPath: {}", outputPath);
+
+ final String hdfsNameNode = parser.get("nameNode");
+ log.info("nameNode: {}", hdfsNameNode);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("input path : {}", inputPath);
+
+ Configuration conf = new Configuration();
+ conf.set("fs.defaultFS", hdfsNameNode);
+
+ FileSystem fileSystem = FileSystem.get(conf);
+
+ makeTArArchive(fileSystem, inputPath, outputPath);
+
+ }
+
+ public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath) throws IOException {
+
+ RemoteIterator dir_iterator = fileSystem.listLocatedStatus(new Path(inputPath));
+
+ while (dir_iterator.hasNext()) {
+ LocatedFileStatus fileStatus = dir_iterator.next();
+
+ Path p = fileStatus.getPath();
+ String p_string = p.toString();
+ String entity = p_string.substring(p_string.lastIndexOf("/") + 1);
+
+ write(fileSystem, p_string, outputPath + "/" + entity + ".tar", entity);
+ }
+
+ }
+
+ private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
+ throws IOException {
+
+ Path hdfsWritePath = new Path(outputPath);
+ FSDataOutputStream fsDataOutputStream = null;
+ if (fileSystem.exists(hdfsWritePath)) {
+ fileSystem.delete(hdfsWritePath, true);
+
+ }
+ fsDataOutputStream = fileSystem.create(hdfsWritePath);
+
+ TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
+
+ RemoteIterator fileStatusListIterator = fileSystem
+ .listFiles(
+ new Path(inputPath), true);
+
+ while (fileStatusListIterator.hasNext()) {
+ LocatedFileStatus fileStatus = fileStatusListIterator.next();
+
+ Path p = fileStatus.getPath();
+ String p_string = p.toString();
+ if (!p_string.endsWith("_SUCCESS")) {
+ String name = p_string.substring(p_string.lastIndexOf("/") + 1);
+ TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name + ".json.gz");
+ entry.setSize(fileStatus.getLen());
+ ar.putArchiveEntry(entry);
+
+ InputStream is = fileSystem.open(fileStatus.getPath());
+
+ BufferedInputStream bis = new BufferedInputStream(is);
+
+ int count;
+ byte data[] = new byte[1024];
+ while ((count = bis.read(data, 0, data.length)) != -1) {
+ ar.write(data, 0, count);
+ }
+ bis.close();
+ ar.closeArchiveEntry();
+
+ }
+
+ }
+
+ ar.close();
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java
new file mode 100644
index 000000000..d118accba
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java
@@ -0,0 +1,58 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.StringReader;
+import java.util.List;
+
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.Element;
+import org.dom4j.io.SAXReader;
+
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
+public class QueryInformationSystem {
+
+ private ISLookUpService isLookUp;
+
+ private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+ +
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
+ " and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') "
+ +
+ " return " +
+ " " +
+ "{$x//CONFIGURATION/context/@id}" +
+ "{$x//CONFIGURATION/context/@label}" +
+ "";
+
+ public CommunityMap getCommunityMap()
+ throws ISLookUpException, DocumentException {
+ return getMap(isLookUp.quickSearchProfile(XQUERY));
+
+ }
+
+ public ISLookUpService getIsLookUp() {
+ return isLookUp;
+ }
+
+ public void setIsLookUp(ISLookUpService isLookUpService) {
+ this.isLookUp = isLookUpService;
+ }
+
+ private CommunityMap getMap(List communityMap) throws DocumentException {
+ final CommunityMap map = new CommunityMap();
+
+ for (String xml : communityMap) {
+ final Document doc;
+ doc = new SAXReader().read(new StringReader(xml));
+ Element root = doc.getRootElement();
+ map.put(root.attribute("id").getValue(), root.attribute("label").getValue());
+ }
+
+ return map;
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java
new file mode 100644
index 000000000..41142d285
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java
@@ -0,0 +1,522 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.Serializable;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.dump.oaf.*;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+import eu.dnetlib.dhp.schema.dump.oaf.community.Context;
+import eu.dnetlib.dhp.schema.oaf.DataInfo;
+import eu.dnetlib.dhp.schema.oaf.Field;
+import eu.dnetlib.dhp.schema.oaf.Journal;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class ResultMapper implements Serializable {
+
+ public static Result map(
+ I in, Map communityMap, boolean graph) {
+
+ Result out;
+ if (graph) {
+ out = new Result();
+ } else {
+ out = new CommunityResult();
+ }
+
+ eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in;
+ Optional ort = Optional.ofNullable(input.getResulttype());
+ if (ort.isPresent()) {
+ switch (ort.get().getClassid()) {
+ case "publication":
+ Optional journal = Optional
+ .ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
+ if (journal.isPresent()) {
+ Journal j = journal.get();
+ Container c = new Container();
+ c.setConferencedate(j.getConferencedate());
+ c.setConferenceplace(j.getConferenceplace());
+ c.setEdition(j.getEdition());
+ c.setEp(j.getEp());
+ c.setIss(j.getIss());
+ c.setIssnLinking(j.getIssnLinking());
+ c.setIssnOnline(j.getIssnOnline());
+ c.setIssnPrinted(j.getIssnPrinted());
+ c.setName(j.getName());
+ c.setSp(j.getSp());
+ c.setVol(j.getVol());
+ out.setContainer(c);
+ out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname());
+ }
+ break;
+ case "dataset":
+ eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
+ Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue()));
+ Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
+
+ out
+ .setGeolocation(
+ Optional
+ .ofNullable(id.getGeolocation())
+ .map(
+ igl -> igl
+ .stream()
+ .filter(Objects::nonNull)
+ .map(gli -> {
+ GeoLocation gl = new GeoLocation();
+ gl.setBox(gli.getBox());
+ gl.setPlace(gli.getPlace());
+ gl.setPoint(gli.getPoint());
+ return gl;
+ })
+ .collect(Collectors.toList()))
+ .orElse(null));
+
+ out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname());
+ break;
+ case "software":
+
+ eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
+ Optional
+ .ofNullable(is.getCodeRepositoryUrl())
+ .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue()));
+ Optional
+ .ofNullable(is.getDocumentationUrl())
+ .ifPresent(
+ value -> out
+ .setDocumentationUrl(
+ value
+ .stream()
+ .map(v -> v.getValue())
+ .collect(Collectors.toList())));
+
+ Optional
+ .ofNullable(is.getProgrammingLanguage())
+ .ifPresent(value -> out.setProgrammingLanguage(value.getClassid()));
+
+ out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname());
+ break;
+ case "other":
+
+ eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input;
+ out
+ .setContactgroup(
+ Optional
+ .ofNullable(ir.getContactgroup())
+ .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList()))
+ .orElse(null));
+
+ out
+ .setContactperson(
+ Optional
+ .ofNullable(ir.getContactperson())
+ .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList()))
+ .orElse(null));
+ out
+ .setTool(
+ Optional
+ .ofNullable(ir.getTool())
+ .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList()))
+ .orElse(null));
+
+ out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
+
+ break;
+ }
+
+ Optional
+ .ofNullable(input.getAuthor())
+ .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList())));
+
+ // I do not map Access Right UNKNOWN or OTHER
+
+ Optional oar = Optional.ofNullable(input.getBestaccessright());
+ if (oar.isPresent()) {
+ if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
+ String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
+ out
+ .setBestaccessright(
+ AccessRight
+ .newInstance(
+ code,
+ Constants.coarCodeLabelMap.get(code),
+ Constants.COAR_ACCESS_RIGHT_SCHEMA));
+ }
+ }
+
+ final List contributorList = new ArrayList<>();
+ Optional
+ .ofNullable(input.getContributor())
+ .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
+ out.setContributor(contributorList);
+
+ // List countryList = new ArrayList<>();
+ Optional
+ .ofNullable(input.getCountry())
+ .ifPresent(
+ value -> out
+ .setCountry(
+ value
+ .stream()
+ .map(
+ c -> {
+ if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
+ return null;
+ }
+ Country country = new Country();
+ country.setCode(c.getClassid());
+ country.setLabel(c.getClassname());
+ Optional
+ .ofNullable(c.getDataInfo())
+ .ifPresent(
+ provenance -> country
+ .setProvenance(
+ Provenance
+ .newInstance(
+ provenance
+ .getProvenanceaction()
+ .getClassname(),
+ c.getDataInfo().getTrust())));
+ return country;
+ })
+ .filter(Objects::nonNull)
+ .collect(Collectors.toList())));
+
+ // out.setCountry(countryList);
+
+ final List coverageList = new ArrayList<>();
+ Optional
+ .ofNullable(input.getCoverage())
+ .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
+ out.setCoverage(coverageList);
+
+ out.setDateofcollection(input.getDateofcollection());
+
+ final List descriptionList = new ArrayList<>();
+ Optional
+ .ofNullable(input.getDescription())
+ .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
+ out.setDescription(descriptionList);
+ Optional> oStr = Optional.ofNullable(input.getEmbargoenddate());
+ if (oStr.isPresent()) {
+ out.setEmbargoenddate(oStr.get().getValue());
+ }
+
+ final List formatList = new ArrayList<>();
+ Optional
+ .ofNullable(input.getFormat())
+ .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
+ out.setFormat(formatList);
+ out.setId(input.getId());
+ out.setOriginalId(input.getOriginalId());
+
+ final List instanceList = new ArrayList<>();
+ Optional
+ .ofNullable(input.getInstance())
+ .ifPresent(
+ inst -> inst
+ .stream()
+ .forEach(i -> instanceList.add(getInstance(i, graph))));
+ out
+ .setInstance(instanceList);
+
+ Optional oL = Optional.ofNullable(input.getLanguage());
+ if (oL.isPresent()) {
+ eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
+ out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname()));
+ }
+ Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp());
+ if (oLong.isPresent()) {
+ out.setLastupdatetimestamp(oLong.get());
+ }
+ Optional> otitle = Optional.ofNullable(input.getTitle());
+ if (otitle.isPresent()) {
+ List iTitle = otitle
+ .get()
+ .stream()
+ .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
+ .collect(Collectors.toList());
+ if (iTitle.size() > 0) {
+ out.setMaintitle(iTitle.get(0).getValue());
+ }
+
+ iTitle = otitle
+ .get()
+ .stream()
+ .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
+ .collect(Collectors.toList());
+ if (iTitle.size() > 0) {
+ out.setSubtitle(iTitle.get(0).getValue());
+ }
+
+ }
+
+ List pids = new ArrayList<>();
+ Optional
+ .ofNullable(input.getPid())
+ .ifPresent(
+ value -> value
+ .stream()
+ .forEach(
+ p -> pids
+ .add(
+ ControlledField
+ .newInstance(p.getQualifier().getClassid(), p.getValue()))));
+ out.setPid(pids);
+ oStr = Optional.ofNullable(input.getDateofacceptance());
+ if (oStr.isPresent()) {
+ out.setPublicationdate(oStr.get().getValue());
+ }
+ oStr = Optional.ofNullable(input.getPublisher());
+ if (oStr.isPresent()) {
+ out.setPublisher(oStr.get().getValue());
+ }
+
+ List sourceList = new ArrayList<>();
+ Optional
+ .ofNullable(input.getSource())
+ .ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue())));
+ // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
+ List subjectList = new ArrayList<>();
+ Optional
+ .ofNullable(input.getSubject())
+ .ifPresent(
+ value -> value
+ .forEach(s -> subjectList.add(getSubject(s))));
+
+ out.setSubjects(subjectList);
+
+ out.setType(input.getResulttype().getClassid());
+ }
+
+ if (!graph) {
+ ((CommunityResult) out)
+ .setCollectedfrom(
+ input
+ .getCollectedfrom()
+ .stream()
+ .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
+ .collect(Collectors.toList()));
+
+ Set communities = communityMap.keySet();
+ List contextList = Optional
+ .ofNullable(
+ input
+ .getContext())
+ .map(
+ value -> value
+ .stream()
+ .map(c -> {
+ String community_id = c.getId();
+ if (community_id.indexOf("::") > 0) {
+ community_id = community_id.substring(0, community_id.indexOf("::"));
+ }
+ if (communities.contains(community_id)) {
+ Context context = new Context();
+ context.setCode(community_id);
+ context.setLabel(communityMap.get(community_id));
+ Optional> dataInfo = Optional.ofNullable(c.getDataInfo());
+ if (dataInfo.isPresent()) {
+ List provenance = new ArrayList<>();
+ provenance
+ .addAll(
+ dataInfo
+ .get()
+ .stream()
+ .map(
+ di -> Optional
+ .ofNullable(di.getProvenanceaction())
+ .map(
+ provenanceaction -> Provenance
+ .newInstance(
+ provenanceaction.getClassname(), di.getTrust()))
+ .orElse(null))
+ .filter(Objects::nonNull)
+ .collect(Collectors.toSet()));
+
+ context.setProvenance(getUniqueProvenance(provenance));
+ }
+ return context;
+ }
+ return null;
+ })
+ .filter(Objects::nonNull)
+ .collect(Collectors.toList()))
+ .orElse(new ArrayList<>());
+
+ if (contextList.size() > 0) {
+ Set hashValue = new HashSet<>();
+ List remainigContext = new ArrayList<>();
+ contextList.forEach(c -> {
+ if (!hashValue.contains(c.hashCode())) {
+ remainigContext.add(c);
+ hashValue.add(c.hashCode());
+ }
+ });
+ ((CommunityResult) out).setContext(remainigContext);
+ }
+ }
+ return out;
+
+ }
+
+ private static Instance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i, boolean graph) {
+
+ Instance instance = new Instance();
+
+ if (!graph) {
+ instance
+ .setCollectedfrom(
+ KeyValue
+ .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
+ instance
+ .setHostedby(
+ KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
+ }
+
+ Optional opAr = Optional
+ .ofNullable(i.getAccessright());
+ if (opAr.isPresent()) {
+ if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
+ String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
+ instance
+ .setAccessright(
+ AccessRight
+ .newInstance(
+ code,
+ Constants.coarCodeLabelMap.get(code),
+ Constants.COAR_ACCESS_RIGHT_SCHEMA));
+ }
+ }
+
+ Optional
+ .ofNullable(i.getLicense())
+ .ifPresent(value -> instance.setLicense(value.getValue()));
+ Optional
+ .ofNullable(i.getDateofacceptance())
+ .ifPresent(value -> instance.setPublicationdate(value.getValue()));
+ Optional
+ .ofNullable(i.getRefereed())
+ .ifPresent(value -> instance.setRefereed(value.getClassname()));
+ // .ifPresent(value -> instance.setRefereed(value.getValue()));
+ Optional
+ .ofNullable(i.getInstancetype())
+ .ifPresent(value -> instance.setType(value.getClassname()));
+ Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));
+
+ return instance;
+ }
+
+ private static List getUniqueProvenance(List provenance) {
+ Provenance iProv = new Provenance();
+ // iProv.setProvenance(Constants.INFERRED);
+
+ Provenance hProv = new Provenance();
+ // hProv.setProvenance(Constants.HARVESTED);
+ Provenance lProv = new Provenance();
+
+ for (Provenance p : provenance) {
+ switch (p.getProvenance()) {
+ case Constants.HARVESTED:
+ hProv = getHighestTrust(hProv, p);
+ break;
+ case Constants.INFERRED:
+ iProv = getHighestTrust(iProv, p);
+ // To be removed as soon as the new beta run has been done
+ // this fixex issue of not set trust during bulktagging
+ if (StringUtils.isEmpty(iProv.getTrust())) {
+ iProv.setTrust(Constants.DEFAULT_TRUST);
+ }
+ break;
+ case Constants.USER_CLAIM:
+ lProv = getHighestTrust(lProv, p);
+ break;
+ }
+
+ }
+
+ return Arrays
+ .asList(iProv, hProv, lProv)
+ .stream()
+ .filter(p -> !StringUtils.isEmpty(p.getProvenance()))
+ .collect(Collectors.toList());
+
+ }
+
+ private static Provenance getHighestTrust(Provenance hProv, Provenance p) {
+ if (StringUtils.isNoneEmpty(hProv.getTrust(), p.getTrust()))
+ return hProv.getTrust().compareTo(p.getTrust()) > 0 ? hProv : p;
+
+ return (StringUtils.isEmpty(p.getTrust()) && !StringUtils.isEmpty(hProv.getTrust())) ? hProv : p;
+
+ }
+
+ private static Subject getSubject(StructuredProperty s) {
+ Subject subject = new Subject();
+ subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()));
+ Optional di = Optional.ofNullable(s.getDataInfo());
+ if (di.isPresent()) {
+ Provenance p = new Provenance();
+ p.setProvenance(di.get().getProvenanceaction().getClassname());
+ p.setTrust(di.get().getTrust());
+ subject.setProvenance(p);
+ }
+
+ return subject;
+ }
+
+ private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
+ Author a = new Author();
+ a.setFullname(oa.getFullname());
+ a.setName(oa.getName());
+ a.setSurname(oa.getSurname());
+ a.setRank(oa.getRank());
+
+ Optional> oPids = Optional
+ .ofNullable(oa.getPid());
+ if (oPids.isPresent()) {
+ Pid pid = getOrcid(oPids.get());
+ if (pid != null) {
+ a.setPid(pid);
+ }
+ }
+
+ return a;
+ }
+
+ private static Pid getOrcid(List p) {
+ for (StructuredProperty pid : p) {
+ if (pid.getQualifier().getClassid().equals(Constants.ORCID)) {
+ Optional di = Optional.ofNullable(pid.getDataInfo());
+ if (di.isPresent()) {
+ return Pid
+ .newInstance(
+ ControlledField
+ .newInstance(
+ pid.getQualifier().getClassid(),
+ pid.getValue()),
+ Provenance
+ .newInstance(
+ di.get().getProvenanceaction().getClassname(),
+ di.get().getTrust()));
+ } else {
+ return Pid
+ .newInstance(
+ ControlledField
+ .newInstance(
+ pid.getQualifier().getClassid(),
+ pid.getValue())
+
+ );
+ }
+
+ }
+ }
+ return null;
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java
new file mode 100644
index 000000000..6ac626518
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java
@@ -0,0 +1,84 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.dom4j.DocumentException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+
+/**
+ * This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
+ * context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community
+ * - research infrastructure/initiative , the value is the label of the research community - research
+ * infrastructure/initiative.
+ */
+
+public class SaveCommunityMap implements Serializable {
+
+ private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
+ private final QueryInformationSystem queryInformationSystem;
+
+ private final Configuration conf;
+ private final BufferedWriter writer;
+
+ public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
+ conf = new Configuration();
+ conf.set("fs.defaultFS", hdfsNameNode);
+ FileSystem fileSystem = FileSystem.get(conf);
+ Path hdfsWritePath = new Path(hdfsPath);
+ FSDataOutputStream fsDataOutputStream = null;
+ if (fileSystem.exists(hdfsWritePath)) {
+ fileSystem.delete(hdfsWritePath);
+ }
+ fsDataOutputStream = fileSystem.create(hdfsWritePath);
+
+ queryInformationSystem = new QueryInformationSystem();
+ queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
+
+ writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
+
+ }
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ SaveCommunityMap.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ final String nameNode = parser.get("nameNode");
+ log.info("nameNode: {}", nameNode);
+
+ final String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ final String isLookUpUrl = parser.get("isLookUpUrl");
+ log.info("isLookUpUrl: {}", isLookUpUrl);
+
+ final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
+
+ scm.saveCommunityMap();
+
+ }
+
+ private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException {
+ writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap()));
+ writer.close();
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java
new file mode 100644
index 000000000..23784cd66
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java
@@ -0,0 +1,88 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.*;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
+import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+
+public class SendToZenodoHDFS implements Serializable {
+
+ private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class);
+
+ public static void main(final String[] args) throws Exception, MissingConceptDoiException {
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+ IOUtils
+ .toString(
+ SendToZenodoHDFS.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json")));
+
+ parser.parseArgument(args);
+
+ final String hdfsPath = parser.get("hdfsPath");
+ final String hdfsNameNode = parser.get("nameNode");
+ final String access_token = parser.get("accessToken");
+ final String connection_url = parser.get("connectionUrl");
+ final String metadata = parser.get("metadata");
+ final Boolean newDeposition = Boolean.valueOf(parser.get("newDeposition"));
+ final String concept_rec_id = Optional
+ .ofNullable(parser.get("conceptRecordId"))
+ .orElse(null);
+ final String communityMapPath = parser.get("communityMapPath");
+
+ Configuration conf = new Configuration();
+ conf.set("fs.defaultFS", hdfsNameNode);
+
+ FileSystem fileSystem = FileSystem.get(conf);
+
+ CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath);
+
+ RemoteIterator fileStatusListIterator = fileSystem
+ .listFiles(
+ new Path(hdfsPath), true);
+ ZenodoAPIClient zenodoApiClient = new ZenodoAPIClient(connection_url, access_token);
+ if (newDeposition) {
+ zenodoApiClient.newDeposition();
+ } else {
+ if (concept_rec_id == null) {
+ throw new MissingConceptDoiException("No concept record id has been provided");
+ }
+ zenodoApiClient.newVersion(concept_rec_id);
+ }
+
+ while (fileStatusListIterator.hasNext()) {
+ LocatedFileStatus fileStatus = fileStatusListIterator.next();
+
+ Path p = fileStatus.getPath();
+ String p_string = p.toString();
+ if (!p_string.endsWith("_SUCCESS")) {
+ // String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
+ String name = p_string.substring(p_string.lastIndexOf("/") + 1);
+ log.info("Sending information for community: " + name);
+ if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) {
+ name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar";
+ }
+
+ FSDataInputStream inputStream = fileSystem.open(p);
+ zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());
+
+ }
+
+ }
+
+ zenodoApiClient.sendMretadata(metadata);
+ zenodoApiClient.publish();
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java
new file mode 100644
index 000000000..c112c5c72
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java
@@ -0,0 +1,73 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.oa.graph.dump.graph.Constants;
+import eu.dnetlib.dhp.utils.DHPUtils;
+import eu.dnetlib.dhp.utils.ISLookupClientFactory;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
+public class Utils {
+ public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static void removeOutputDir(SparkSession spark, String path) {
+ HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
+ }
+
+ public static Dataset readPath(
+ SparkSession spark, String inputPath, Class clazz) {
+ return spark
+ .read()
+ .textFile(inputPath)
+ .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
+ }
+
+ public static ISLookUpService getIsLookUpService(String isLookUpUrl) {
+ return ISLookupClientFactory.getLookUpService(isLookUpUrl);
+ }
+
+ public static String getContextId(String id) {
+
+ return String
+ .format(
+ "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
+ DHPUtils.md5(id));
+ }
+
+ public static CommunityMap getCommunityMap(SparkSession spark, String communityMapPath) {
+
+ return new Gson().fromJson(spark.read().textFile(communityMapPath).collectAsList().get(0), CommunityMap.class);
+
+ }
+
+ public static CommunityMap readCommunityMap(FileSystem fileSystem, String communityMapPath) throws IOException {
+ BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(communityMapPath))));
+ StringBuffer sb = new StringBuffer();
+ try {
+ String line;
+ while ((line = br.readLine()) != null) {
+ sb.append(line);
+ }
+ } finally {
+ br.close();
+
+ }
+
+ return new Gson().fromJson(sb.toString(), CommunityMap.class);
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunityMap.java
new file mode 100644
index 000000000..d45906337
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunityMap.java
@@ -0,0 +1,8 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.community;
+
+import java.io.Serializable;
+import java.util.HashMap;
+
+public class CommunityMap extends HashMap implements Serializable {
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java
new file mode 100644
index 000000000..6be1befce
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java
@@ -0,0 +1,83 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.community;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+
+/**
+ * This class splits the dumped results according to the research community - research initiative/infrastructure they
+ * are related to. The information about the community is found in the element "context.id" in the result. Since the
+ * context that can be found in the result can be associated not only to communities, a community Map is provided. It
+ * will guide the splitting process. Note: the repartition(1) just before writing the results related to a community.
+ * This is a choice due to uploading constraints (just one file for each community) As soon as a better solution will be
+ * in place remove the repartition
+ */
+public class CommunitySplit implements Serializable {
+
+ public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath) {
+ SparkConf conf = new SparkConf();
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath);
+ execSplit(spark, inputPath, outputPath, Utils.getCommunityMap(spark, communityMapPath).keySet());
+ });
+ }
+
+ private static void execSplit(SparkSession spark, String inputPath, String outputPath,
+ Set communities) {
+
+ Dataset result = Utils
+ .readPath(spark, inputPath + "/publication", CommunityResult.class)
+ .union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class))
+ .union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class))
+ .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
+
+ communities
+ .stream()
+ .forEach(c -> printResult(c, result, outputPath));
+
+ }
+
+ private static void printResult(String c, Dataset result, String outputPath) {
+ Dataset community_products = result
+ .filter(r -> containsCommunity(r, c));
+
+ try {
+ community_products.first();
+ community_products
+ .write()
+ .option("compression", "gzip")
+ .mode(SaveMode.Overwrite)
+ .json(outputPath + "/" + c);
+ } catch (Exception e) {
+
+ }
+
+ }
+
+ private static boolean containsCommunity(CommunityResult r, String c) {
+ if (Optional.ofNullable(r.getContext()).isPresent()) {
+ return r
+ .getContext()
+ .stream()
+ .filter(con -> con.getCode().equals(c))
+ .collect(Collectors.toList())
+ .size() > 0;
+ }
+ return false;
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/ResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/ResultProject.java
new file mode 100644
index 000000000..300af62f3
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/ResultProject.java
@@ -0,0 +1,28 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.community;
+
+import java.io.Serializable;
+import java.util.List;
+
+import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
+
+public class ResultProject implements Serializable {
+ private String resultId;
+ private List projectsList;
+
+ public String getResultId() {
+ return resultId;
+ }
+
+ public void setResultId(String resultId) {
+ this.resultId = resultId;
+ }
+
+ public List getProjectsList() {
+ return projectsList;
+ }
+
+ public void setProjectsList(List projectsList) {
+ this.projectsList = projectsList;
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java
new file mode 100644
index 000000000..c4b89936f
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java
@@ -0,0 +1,62 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.community;
+
+import java.io.Serializable;
+import java.util.*;
+
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+/**
+ * Spark action to trigger the dump of results associated to research community - reseach initiative/infrasctructure The
+ * actual dump if performed via the class DumpProducts that is used also for the entire graph dump
+ */
+public class SparkDumpCommunityProducts implements Serializable {
+
+ private static final Logger log = LoggerFactory.getLogger(SparkDumpCommunityProducts.class);
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ SparkDumpCommunityProducts.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("inputPath: {}", inputPath);
+
+ final String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ final String resultClassName = parser.get("resultTableName");
+ log.info("resultTableName: {}", resultClassName);
+
+ String communityMapPath = parser.get("communityMapPath");
+
+ Class extends Result> inputClazz = (Class extends Result>) Class.forName(resultClassName);
+
+ DumpProducts dump = new DumpProducts();
+
+ dump
+ .run(
+ isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, CommunityResult.class,
+ false);
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java
new file mode 100644
index 000000000..6e0e059f3
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java
@@ -0,0 +1,185 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.community;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.io.StringReader;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.MapGroupsFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.Node;
+import org.dom4j.io.SAXReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
+import eu.dnetlib.dhp.schema.dump.oaf.community.Funder;
+import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
+import eu.dnetlib.dhp.schema.oaf.DataInfo;
+import eu.dnetlib.dhp.schema.oaf.Relation;
+import scala.Tuple2;
+
+/**
+ * Preparation of the Project information to be added to the dumped results. For each result associated to at least one
+ * Project, a serialization of an instance af ResultProject closs is done. ResultProject contains the resultId, and the
+ * list of Projects (as in eu.dnetlib.dhp.schema.dump.oaf.community.Project) it is associated to
+ */
+public class SparkPrepareResultProject implements Serializable {
+ private static final Logger log = LoggerFactory.getLogger(SparkPrepareResultProject.class);
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ SparkPrepareResultProject.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("inputPath: {}", inputPath);
+
+ final String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ SparkConf conf = new SparkConf();
+
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath);
+ prepareResultProjectList(spark, inputPath, outputPath);
+ });
+ }
+
+ private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) {
+ Dataset relation = Utils
+ .readPath(spark, inputPath + "/relation", Relation.class)
+ .filter("dataInfo.deletedbyinference = false and relClass = 'produces'");
+ Dataset projects = Utils
+ .readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);
+
+ projects
+ .joinWith(relation, projects.col("id").equalTo(relation.col("source")))
+ .groupByKey(
+ (MapFunction, String>) value -> value
+ ._2()
+ .getTarget(),
+ Encoders.STRING())
+ .mapGroups(
+ (MapGroupsFunction, ResultProject>) (s,
+ it) -> {
+ Set projectSet = new HashSet<>();
+ Tuple2 first = it.next();
+ ResultProject rp = new ResultProject();
+ rp.setResultId(first._2().getTarget());
+ eu.dnetlib.dhp.schema.oaf.Project p = first._1();
+ projectSet.add(p.getId());
+ Project ps = getProject(p);
+
+ List projList = new ArrayList<>();
+ projList.add(ps);
+ rp.setProjectsList(projList);
+ it.forEachRemaining(c -> {
+ eu.dnetlib.dhp.schema.oaf.Project op = c._1();
+ if (!projectSet.contains(op.getId())) {
+ projList
+ .add(getProject(op));
+
+ projectSet.add(op.getId());
+
+ }
+
+ });
+ return rp;
+ }, Encoders.bean(ResultProject.class))
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression", "gzip")
+ .json(outputPath);
+ }
+
+ private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op) {
+ Project p = Project
+ .newInstance(
+ op.getId(),
+ op.getCode().getValue(),
+ Optional
+ .ofNullable(op.getAcronym())
+ .map(a -> a.getValue())
+ .orElse(null),
+ Optional
+ .ofNullable(op.getTitle())
+ .map(v -> v.getValue())
+ .orElse(null),
+ Optional
+ .ofNullable(op.getFundingtree())
+ .map(
+ value -> value
+ .stream()
+ .map(ft -> getFunder(ft.getValue()))
+ .collect(Collectors.toList())
+ .get(0))
+ .orElse(null));
+
+ Optional di = Optional.ofNullable(op.getDataInfo());
+ Provenance provenance = new Provenance();
+ if (di.isPresent()) {
+ provenance.setProvenance(di.get().getProvenanceaction().getClassname());
+ provenance.setTrust(di.get().getTrust());
+ p.setProvenance(provenance);
+ }
+
+ return p;
+
+ }
+
+ private static Funder getFunder(String fundingtree) {
+ // ["nsf_________::NSFNSFNational Science
+ // FoundationUSnsf_________::NSF::CISE/OAD::CISE/CCFDivision
+ // of Computing and Communication FoundationsDivision of Computing and Communication
+ // Foundationsnsf_________::NSF::CISE/OADDirectorate for
+ // Computer & Information Science & EngineeringDirectorate for Computer &
+ // Information Science &
+ // Engineeringnsf:fundingStream"]
+ Funder f = new Funder();
+ final Document doc;
+ try {
+ doc = new SAXReader().read(new StringReader(fundingtree));
+ f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
+ f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
+ f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
+ for (Object o : doc.selectNodes("//funding_level_0")) {
+ List node = ((Node) o).selectNodes("./name");
+ f.setFundingStream(((Node) node.get(0)).getText());
+
+ }
+
+ return f;
+ } catch (DocumentException e) {
+ e.printStackTrace();
+ }
+ return f;
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkSplitForCommunity.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkSplitForCommunity.java
new file mode 100644
index 000000000..b62bf18e7
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkSplitForCommunity.java
@@ -0,0 +1,50 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.community;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+/**
+ * Spark job to trigger the split of results associated to research community - reseach initiative/infrasctructure. The
+ * actual split is performed by the class CommunitySplit
+ */
+public class SparkSplitForCommunity implements Serializable {
+
+ private static final Logger log = LoggerFactory.getLogger(SparkSplitForCommunity.class);
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ SparkSplitForCommunity.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("inputPath: {}", inputPath);
+
+ final String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ final String communityMapPath = parser.get("communityMapPath");
+
+ CommunitySplit split = new CommunitySplit();
+ split.run(isSparkSessionManaged, inputPath, outputPath, communityMapPath);
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java
new file mode 100644
index 000000000..1276d8495
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java
@@ -0,0 +1,90 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.community;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+
+public class SparkUpdateProjectInfo implements Serializable {
+
+ private static final Logger log = LoggerFactory.getLogger(SparkUpdateProjectInfo.class);
+ public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ SparkUpdateProjectInfo.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("inputPath: {}", inputPath);
+
+ final String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ final String preparedInfoPath = parser.get("preparedInfoPath");
+ log.info("preparedInfoPath: {}", preparedInfoPath);
+
+ SparkConf conf = new SparkConf();
+
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath);
+ extend(spark, inputPath, outputPath, preparedInfoPath);// , inputClazz);
+ });
+ }
+
+ private static void extend(
+ SparkSession spark,
+ String inputPath,
+ String outputPath,
+ String preparedInfoPath) {
+ Dataset result = Utils.readPath(spark, inputPath, CommunityResult.class);
+ Dataset resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
+ result
+ .joinWith(
+ resultProject, result.col("id").equalTo(resultProject.col("resultId")),
+ "left")
+ .map(value -> {
+ CommunityResult r = value._1();
+ Optional.ofNullable(value._2()).ifPresent(rp -> {
+ r.setProjects(rp.getProjectsList());
+ });
+ return r;
+ }, Encoders.bean(CommunityResult.class))
+ .write()
+ .option("compression", "gzip")
+ .mode(SaveMode.Append)
+ .json(outputPath);
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Constants.java
new file mode 100644
index 000000000..4c1e1c08c
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Constants.java
@@ -0,0 +1,26 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.Serializable;
+
+public class Constants implements Serializable {
+
+ public static final String IS_HOSTED_BY = "isHostedBy";
+ public static final String HOSTS = "hosts";
+
+ public static final String IS_FUNDED_BY = "isFundedBy";
+ public static final String FUNDS = "funds";
+
+ public static final String FUNDINGS = "fundings";
+
+ public static final String RESULT_ENTITY = "result";
+ public static final String DATASOURCE_ENTITY = "datasource";
+ public static final String CONTEXT_ENTITY = "context";
+ public static final String ORGANIZATION_ENTITY = "organization";
+ public static final String PROJECT_ENTITY = "project";
+
+ public static final String CONTEXT_ID = "00";
+ public static final String CONTEXT_NS_PREFIX = "context_____";
+
+ // public static final String FUNDER_DS = "entityregistry::projects";
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/ContextInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/ContextInfo.java
new file mode 100644
index 000000000..7befaaf6f
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/ContextInfo.java
@@ -0,0 +1,84 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * Deserialization of the information in the context needed to create Context Entities, and relations between context
+ * entities and datasources and projects
+ */
+public class ContextInfo implements Serializable {
+ private String id;
+ private String description;
+ private String type;
+ private String zenodocommunity;
+ private String name;
+ private List projectList;
+ private List datasourceList;
+ private List subject;
+
+ public List getSubject() {
+ return subject;
+ }
+
+ public void setSubject(List subject) {
+ this.subject = subject;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ public void setDescription(String description) {
+ this.description = description;
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public String getZenodocommunity() {
+ return zenodocommunity;
+ }
+
+ public void setZenodocommunity(String zenodocommunity) {
+ this.zenodocommunity = zenodocommunity;
+ }
+
+ public List getProjectList() {
+ return projectList;
+ }
+
+ public void setProjectList(List projectList) {
+ this.projectList = projectList;
+ }
+
+ public List getDatasourceList() {
+ return datasourceList;
+ }
+
+ public void setDatasourceList(List datasourceList) {
+ this.datasourceList = datasourceList;
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateContextEntities.java
new file mode 100644
index 000000000..0f28438af
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateContextEntities.java
@@ -0,0 +1,105 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.util.function.Consumer;
+import java.util.function.Function;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
+
+/**
+ * Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
+ * collects the general information for contexes of type community or ri. The general information is the id of the
+ * context, its label, the subjects associated to the context, its zenodo community, description and type. This
+ * information is used to create a new Context Entity
+ */
+public class CreateContextEntities implements Serializable {
+
+ private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class);
+ private final Configuration conf;
+ private final BufferedWriter writer;
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ CreateContextEntities.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ final String hdfsPath = parser.get("hdfsPath");
+ log.info("hdfsPath: {}", hdfsPath);
+
+ final String hdfsNameNode = parser.get("nameNode");
+ log.info("nameNode: {}", hdfsNameNode);
+
+ final String isLookUpUrl = parser.get("isLookUpUrl");
+ log.info("isLookUpUrl: {}", isLookUpUrl);
+
+ final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
+
+ log.info("Processing contexts...");
+ cce.execute(Process::getEntity, isLookUpUrl);
+
+ cce.close();
+
+ }
+
+ private void close() throws IOException {
+ writer.close();
+ }
+
+ public CreateContextEntities(String hdfsPath, String hdfsNameNode) throws IOException {
+ this.conf = new Configuration();
+ this.conf.set("fs.defaultFS", hdfsNameNode);
+ FileSystem fileSystem = FileSystem.get(this.conf);
+ Path hdfsWritePath = new Path(hdfsPath);
+ FSDataOutputStream fsDataOutputStream = null;
+ if (fileSystem.exists(hdfsWritePath)) {
+ fsDataOutputStream = fileSystem.append(hdfsWritePath);
+ } else {
+ fsDataOutputStream = fileSystem.create(hdfsWritePath);
+ }
+
+ this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
+
+ }
+
+ public void execute(final Function producer, String isLookUpUrl)
+ throws Exception {
+
+ QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
+ queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
+
+ final Consumer consumer = ci -> writeEntity(producer.apply(ci));
+
+ queryInformationSystem.getContextInformation(consumer);
+ }
+
+ protected void writeEntity(final R r) {
+ try {
+ writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
+ // log.info("writing context : {}", new Gson().toJson(r));
+ writer.newLine();
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateContextRelation.java
new file mode 100644
index 000000000..129077932
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateContextRelation.java
@@ -0,0 +1,124 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Optional;
+import java.util.function.Consumer;
+import java.util.function.Function;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
+import eu.dnetlib.dhp.schema.oaf.Datasource;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+
+/**
+ * Writes the set of new Relation between the context and datasources. At the moment the relation between the context
+ * and the project is not created because of a low coverage in the profiles of openaire ids related to projects
+ */
+public class CreateContextRelation implements Serializable {
+ private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class);
+ private final Configuration conf;
+ private final BufferedWriter writer;
+ private final QueryInformationSystem queryInformationSystem;
+
+ private static final String CONTEX_RELATION_DATASOURCE = "contentproviders";
+ private static final String CONTEX_RELATION_PROJECT = "projects";
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ CreateContextRelation.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String hdfsPath = parser.get("hdfsPath");
+ log.info("hdfsPath: {}", hdfsPath);
+
+ final String hdfsNameNode = parser.get("nameNode");
+ log.info("nameNode: {}", hdfsNameNode);
+
+ final String isLookUpUrl = parser.get("isLookUpUrl");
+ log.info("isLookUpUrl: {}", isLookUpUrl);
+
+ final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl);
+
+ log.info("Creating relation for datasource...");
+ cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class));
+
+ log.info("Creating relations for projects... ");
+// cce
+// .execute(
+// Process::getRelation, CONTEX_RELATION_PROJECT,
+// ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
+
+ cce.close();
+
+ }
+
+ private void close() throws IOException {
+ writer.close();
+ }
+
+ public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl)
+ throws IOException, ISLookUpException {
+ this.conf = new Configuration();
+ this.conf.set("fs.defaultFS", hdfsNameNode);
+
+ queryInformationSystem = new QueryInformationSystem();
+ queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
+ queryInformationSystem.execContextRelationQuery();
+
+ FileSystem fileSystem = FileSystem.get(this.conf);
+ Path hdfsWritePath = new Path(hdfsPath);
+ FSDataOutputStream fsDataOutputStream = null;
+ if (fileSystem.exists(hdfsWritePath)) {
+ fsDataOutputStream = fileSystem.append(hdfsWritePath);
+ } else {
+ fsDataOutputStream = fileSystem.create(hdfsWritePath);
+ }
+
+ this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
+
+ }
+
+ public void execute(final Function> producer, String category, String prefix) {
+
+ final Consumer consumer = ci -> producer.apply(ci).forEach(c -> writeEntity(c));
+
+ queryInformationSystem.getContextRelation(consumer, category, prefix);
+ }
+
+ protected void writeEntity(final Relation r) {
+ try {
+ writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
+ writer.newLine();
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpGraphEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpGraphEntities.java
new file mode 100644
index 000000000..86421cff5
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpGraphEntities.java
@@ -0,0 +1,498 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.io.StringReader;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.Node;
+import org.dom4j.io.SAXReader;
+
+import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.dump.oaf.*;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
+import eu.dnetlib.dhp.schema.oaf.Field;
+import eu.dnetlib.dhp.schema.oaf.Journal;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+
+/**
+ * Dumps of entities in the model defined in eu.dnetlib.dhp.schema.dump.oaf.graph. Results are dumped using the same
+ * Mapper as for eu.dnetlib.dhp.schema.dump.oaf.community, while for the other entities the mapping is defined below
+ */
+public class DumpGraphEntities implements Serializable {
+
+ public void run(Boolean isSparkSessionManaged,
+ String inputPath,
+ String outputPath,
+ Class extends OafEntity> inputClazz,
+ String communityMapPath) {
+
+ SparkConf conf = new SparkConf();
+
+ switch (ModelSupport.idPrefixMap.get(inputClazz)) {
+ case "50":
+ DumpProducts d = new DumpProducts();
+ d
+ .run(
+ isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, Result.class,
+ true);
+ break;
+ case "40":
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath);
+ projectMap(spark, inputPath, outputPath, inputClazz);
+
+ });
+ break;
+ case "20":
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath);
+ organizationMap(spark, inputPath, outputPath, inputClazz);
+
+ });
+ break;
+ case "10":
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath);
+ datasourceMap(spark, inputPath, outputPath, inputClazz);
+
+ });
+ break;
+ }
+
+ }
+
+ private static void datasourceMap(SparkSession spark, String inputPath, String outputPath,
+ Class inputClazz) {
+ Utils
+ .readPath(spark, inputPath, inputClazz)
+ .map(d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d), Encoders.bean(Datasource.class))
+ .filter(Objects::nonNull)
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression", "gzip")
+ .json(outputPath);
+ }
+
+ private static void projectMap(SparkSession spark, String inputPath, String outputPath,
+ Class inputClazz) {
+ Utils
+ .readPath(spark, inputPath, inputClazz)
+ .map(p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p), Encoders.bean(Project.class))
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression", "gzip")
+ .json(outputPath);
+ }
+
+ private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
+ Datasource datasource = new Datasource();
+
+ datasource.setId(d.getId());
+
+ Optional
+ .ofNullable(d.getOriginalId())
+ .ifPresent(
+ oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
+
+ Optional
+ .ofNullable(d.getPid())
+ .ifPresent(
+ pids -> pids
+ .stream()
+ .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
+ .collect(Collectors.toList()));
+
+ Optional
+ .ofNullable(d.getDatasourcetype())
+ .ifPresent(
+ dsType -> datasource
+ .setDatasourcetype(ControlledField.newInstance(dsType.getClassid(), dsType.getClassname())));
+
+ Optional
+ .ofNullable(d.getOpenairecompatibility())
+ .ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname()));
+
+ Optional
+ .ofNullable(d.getOfficialname())
+ .ifPresent(oname -> datasource.setOfficialname(oname.getValue()));
+
+ Optional
+ .ofNullable(d.getEnglishname())
+ .ifPresent(ename -> datasource.setEnglishname(ename.getValue()));
+
+ Optional
+ .ofNullable(d.getWebsiteurl())
+ .ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue()));
+
+ Optional
+ .ofNullable(d.getLogourl())
+ .ifPresent(lurl -> datasource.setLogourl(lurl.getValue()));
+
+ Optional
+ .ofNullable(d.getDateofvalidation())
+ .ifPresent(dval -> datasource.setDateofvalidation(dval.getValue()));
+
+ Optional
+ .ofNullable(d.getDescription())
+ .ifPresent(dex -> datasource.setDescription(dex.getValue()));
+
+ Optional
+ .ofNullable(d.getSubjects())
+ .ifPresent(
+ sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList())));
+
+ Optional
+ .ofNullable(d.getOdpolicies())
+ .ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue())));
+
+ Optional
+ .ofNullable(d.getOdlanguages())
+ .ifPresent(
+ langs -> datasource
+ .setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList())));
+
+ Optional
+ .ofNullable(d.getOdcontenttypes())
+ .ifPresent(
+ ctypes -> datasource
+ .setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList())));
+
+ Optional
+ .ofNullable(d.getReleasestartdate())
+ .ifPresent(rd -> datasource.setReleasestartdate(rd.getValue()));
+
+ Optional
+ .ofNullable(d.getReleaseenddate())
+ .ifPresent(ed -> datasource.setReleaseenddate(ed.getValue()));
+
+ Optional
+ .ofNullable(d.getMissionstatementurl())
+ .ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue()));
+
+ Optional
+ .ofNullable(d.getDatabaseaccesstype())
+ .ifPresent(ar -> datasource.setAccessrights(ar.getValue()));
+
+ Optional
+ .ofNullable(d.getDatauploadtype())
+ .ifPresent(dut -> datasource.setUploadrights(dut.getValue()));
+
+ Optional
+ .ofNullable(d.getDatabaseaccessrestriction())
+ .ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue()));
+
+ Optional
+ .ofNullable(d.getDatauploadrestriction())
+ .ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue()));
+
+ Optional
+ .ofNullable(d.getVersioning())
+ .ifPresent(v -> datasource.setVersioning(v.getValue()));
+
+ Optional
+ .ofNullable(d.getCitationguidelineurl())
+ .ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue()));
+
+ Optional
+ .ofNullable(d.getPidsystems())
+ .ifPresent(ps -> datasource.setPidsystems(ps.getValue()));
+
+ Optional
+ .ofNullable(d.getCertificates())
+ .ifPresent(c -> datasource.setCertificates(c.getValue()));
+
+ Optional
+ .ofNullable(d.getPolicies())
+ .ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList())));
+
+ Optional
+ .ofNullable(d.getJournal())
+ .ifPresent(j -> datasource.setJournal(getContainer(j)));
+
+ return datasource;
+
+ }
+
+ private static Container getContainer(Journal j) {
+ Container c = new Container();
+
+ Optional
+ .ofNullable(j.getName())
+ .ifPresent(n -> c.setName(n));
+
+ Optional
+ .ofNullable(j.getIssnPrinted())
+ .ifPresent(issnp -> c.setIssnPrinted(issnp));
+
+ Optional
+ .ofNullable(j.getIssnOnline())
+ .ifPresent(issno -> c.setIssnOnline(issno));
+
+ Optional
+ .ofNullable(j.getIssnLinking())
+ .ifPresent(isnl -> c.setIssnLinking(isnl));
+
+ Optional
+ .ofNullable(j.getEp())
+ .ifPresent(ep -> c.setEp(ep));
+
+ Optional
+ .ofNullable(j.getIss())
+ .ifPresent(iss -> c.setIss(iss));
+
+ Optional
+ .ofNullable(j.getSp())
+ .ifPresent(sp -> c.setSp(sp));
+
+ Optional
+ .ofNullable(j.getVol())
+ .ifPresent(vol -> c.setVol(vol));
+
+ Optional
+ .ofNullable(j.getEdition())
+ .ifPresent(edition -> c.setEdition(edition));
+
+ Optional
+ .ofNullable(j.getConferencedate())
+ .ifPresent(cdate -> c.setConferencedate(cdate));
+
+ Optional
+ .ofNullable(j.getConferenceplace())
+ .ifPresent(cplace -> c.setConferenceplace(cplace));
+
+ return c;
+ }
+
+ private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException {
+ Project project = new Project();
+
+ Optional
+ .ofNullable(p.getId())
+ .ifPresent(id -> project.setId(id));
+
+ Optional
+ .ofNullable(p.getWebsiteurl())
+ .ifPresent(w -> project.setWebsiteurl(w.getValue()));
+
+ Optional
+ .ofNullable(p.getCode())
+ .ifPresent(code -> project.setCode(code.getValue()));
+
+ Optional
+ .ofNullable(p.getAcronym())
+ .ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
+
+ Optional
+ .ofNullable(p.getTitle())
+ .ifPresent(title -> project.setTitle(title.getValue()));
+
+ Optional
+ .ofNullable(p.getStartdate())
+ .ifPresent(sdate -> project.setStartdate(sdate.getValue()));
+
+ Optional
+ .ofNullable(p.getEnddate())
+ .ifPresent(edate -> project.setEnddate(edate.getValue()));
+
+ Optional
+ .ofNullable(p.getCallidentifier())
+ .ifPresent(cide -> project.setCallidentifier(cide.getValue()));
+
+ Optional
+ .ofNullable(p.getKeywords())
+ .ifPresent(key -> project.setKeywords(key.getValue()));
+
+ Optional> omandate = Optional.ofNullable(p.getOamandatepublications());
+ Optional> oecsc39 = Optional.ofNullable(p.getEcsc39());
+ boolean mandate = false;
+ if (omandate.isPresent()) {
+ if (omandate.get().getValue().equals("true")) {
+ mandate = true;
+ }
+ }
+ if (oecsc39.isPresent()) {
+ if (oecsc39.get().getValue().equals("true")) {
+ mandate = true;
+ }
+ }
+
+ project.setOpenaccessmandateforpublications(mandate);
+ project.setOpenaccessmandatefordataset(false);
+
+ Optional
+ .ofNullable(p.getEcarticle29_3())
+ .ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true")));
+
+ project
+ .setSubject(
+ Optional
+ .ofNullable(p.getSubjects())
+ .map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
+ .orElse(new ArrayList<>()));
+
+ Optional
+ .ofNullable(p.getSummary())
+ .ifPresent(summary -> project.setSummary(summary.getValue()));
+
+ Optional ofundedamount = Optional.ofNullable(p.getFundedamount());
+ Optional> ocurrency = Optional.ofNullable(p.getCurrency());
+ Optional ototalcost = Optional.ofNullable(p.getTotalcost());
+
+ if (ocurrency.isPresent()) {
+ if (ofundedamount.isPresent()) {
+ if (ototalcost.isPresent()) {
+ project
+ .setGranted(
+ Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
+ } else {
+ project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
+ }
+ }
+ }
+
+ project
+ .setProgramme(
+ Optional
+ .ofNullable(p.getProgramme())
+ .map(
+ programme -> programme
+ .stream()
+ .map(pg -> Programme.newInstance(pg.getCode(), pg.getDescription()))
+ .collect(Collectors.toList()))
+ .orElse(new ArrayList<>()));
+
+ Optional>> ofundTree = Optional
+ .ofNullable(p.getFundingtree());
+ List funList = new ArrayList<>();
+ if (ofundTree.isPresent()) {
+ for (Field fundingtree : ofundTree.get()) {
+ funList.add(getFunder(fundingtree.getValue()));
+ }
+ }
+ project.setFunding(funList);
+
+ return project;
+ }
+
+ public static Funder getFunder(String fundingtree) throws DocumentException {
+ Funder f = new Funder();
+ final Document doc;
+
+ doc = new SAXReader().read(new StringReader(fundingtree));
+ f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
+ f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
+ f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
+ // f.setId(((org.dom4j.Node) (doc.selectNodes("//funder/id").get(0))).getText());
+
+ String id = "";
+ String description = "";
+ // List fundings = new ArrayList<>();
+ int level = 0;
+ List nodes = doc.selectNodes("//funding_level_" + level);
+ while (nodes.size() > 0) {
+ for (org.dom4j.Node n : nodes) {
+
+ List node = n.selectNodes("./id");
+ id = ((org.dom4j.Node) node.get(0)).getText();
+ id = id.substring(id.indexOf("::") + 2);
+
+ node = n.selectNodes("./description");
+ description += ((Node) node.get(0)).getText() + " - ";
+
+ }
+ level += 1;
+ nodes = doc.selectNodes("//funding_level_" + level);
+ }
+
+ if (!id.equals("")) {
+ Fundings fundings = new Fundings();
+ fundings.setId(id);
+ fundings.setDescription(description.substring(0, description.length() - 3).trim());
+ f.setFunding_stream(fundings);
+ }
+
+ return f;
+
+ }
+
+ private static void organizationMap(SparkSession spark, String inputPath, String outputPath,
+ Class inputClazz) {
+ Utils
+ .readPath(spark, inputPath, inputClazz)
+ .map(o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o), Encoders.bean(Organization.class))
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression", "gzip")
+ .json(outputPath);
+ }
+
+ private static Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org) {
+ Organization organization = new Organization();
+
+ Optional
+ .ofNullable(org.getLegalshortname())
+ .ifPresent(value -> organization.setLegalshortname(value.getValue()));
+
+ Optional
+ .ofNullable(org.getLegalname())
+ .ifPresent(value -> organization.setLegalname(value.getValue()));
+
+ Optional
+ .ofNullable(org.getWebsiteurl())
+ .ifPresent(value -> organization.setWebsiteurl(value.getValue()));
+
+ Optional
+ .ofNullable(org.getAlternativeNames())
+ .ifPresent(
+ value -> organization
+ .setAlternativenames(
+ value
+ .stream()
+ .map(v -> v.getValue())
+ .collect(Collectors.toList())));
+
+ Optional
+ .ofNullable(org.getCountry())
+ .ifPresent(
+ value -> organization.setCountry(Qualifier.newInstance(value.getClassid(), value.getClassname())));
+
+ Optional
+ .ofNullable(org.getId())
+ .ifPresent(value -> organization.setId(value));
+
+ Optional
+ .ofNullable(org.getPid())
+ .ifPresent(
+ value -> organization
+ .setPid(
+ value
+ .stream()
+ .map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
+ .collect(Collectors.toList())));
+
+ return organization;
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Extractor.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Extractor.java
new file mode 100644
index 000000000..3daaed47f
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Extractor.java
@@ -0,0 +1,197 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.*;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
+import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+/**
+ * Creates new Relations (as in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation) from the information in the Entity.
+ * The new Relations are created for the datasource in the collectedfrom and hostedby elements and for the context
+ * related to communities and research initiative/infrastructures.
+ *
+ * For collectedfrom elements it creates: datasource -> provides -> result and result -> isProvidedBy -> datasource
+ * For hostedby elements it creates: datasource -> hosts -> result and result -> isHostedBy -> datasource
+ * For context elements it creates: context <-> isRelatedTo <-> result
+ */
+public class Extractor implements Serializable {
+
+ public void run(Boolean isSparkSessionManaged,
+ String inputPath,
+ String outputPath,
+ Class extends Result> inputClazz,
+ String communityMapPath) {
+
+ SparkConf conf = new SparkConf();
+
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath);
+ extractRelationResult(
+ spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath));
+ });
+ }
+
+ private void extractRelationResult(SparkSession spark,
+ String inputPath,
+ String outputPath,
+ Class inputClazz,
+ CommunityMap communityMap) {
+
+ Set hashCodes = new HashSet<>();
+
+ Utils
+ .readPath(spark, inputPath, inputClazz)
+ .flatMap((FlatMapFunction) value -> {
+ List relationList = new ArrayList<>();
+ Optional
+ .ofNullable(value.getInstance())
+ .ifPresent(inst -> inst.forEach(instance -> {
+ Optional
+ .ofNullable(instance.getCollectedfrom())
+ .ifPresent(
+ cf -> getRelatioPair(
+ value, relationList, cf,
+ ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
+ Optional
+ .ofNullable(instance.getHostedby())
+ .ifPresent(
+ hb -> getRelatioPair(
+ value, relationList, hb,
+ Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
+ }));
+ Set communities = communityMap.keySet();
+ Optional
+ .ofNullable(value.getContext())
+ .ifPresent(contexts -> contexts.forEach(context -> {
+ String id = context.getId();
+ if (id.contains(":")) {
+ id = id.substring(0, id.indexOf(":"));
+ }
+ if (communities.contains(id)) {
+ String contextId = Utils.getContextId(id);
+ Provenance provenance = Optional
+ .ofNullable(context.getDataInfo())
+ .map(
+ dinfo -> Optional
+ .ofNullable(dinfo.get(0).getProvenanceaction())
+ .map(
+ paction -> Provenance
+ .newInstance(
+ paction.getClassid(),
+ dinfo.get(0).getTrust()))
+ .orElse(null))
+ .orElse(null);
+ Relation r = getRelation(
+ value.getId(), contextId,
+ Constants.RESULT_ENTITY,
+ Constants.CONTEXT_ENTITY,
+ ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, provenance);
+ if (!hashCodes.contains(r.hashCode())) {
+ relationList
+ .add(r);
+ hashCodes.add(r.hashCode());
+ }
+ r = getRelation(
+ contextId, value.getId(),
+ Constants.CONTEXT_ENTITY,
+ Constants.RESULT_ENTITY,
+ ModelConstants.RELATIONSHIP,
+ ModelConstants.IS_RELATED_TO, provenance);
+ if (!hashCodes.contains(r.hashCode())) {
+ relationList
+ .add(
+ r);
+ hashCodes.add(r.hashCode());
+ }
+
+ }
+
+ }));
+
+ return relationList.iterator();
+ }, Encoders.bean(Relation.class))
+ .write()
+ .option("compression", "gzip")
+ .mode(SaveMode.Overwrite)
+ .json(outputPath);
+
+ }
+
+ private static void getRelatioPair(R value, List relationList, KeyValue cf,
+ String result_dtasource, String datasource_result,
+ Set hashCodes) {
+ Provenance provenance = Optional
+ .ofNullable(cf.getDataInfo())
+ .map(
+ dinfo -> Optional
+ .ofNullable(dinfo.getProvenanceaction())
+ .map(
+ paction -> Provenance
+ .newInstance(
+ paction.getClassid(),
+ dinfo.getTrust()))
+ .orElse(
+ Provenance
+ .newInstance(
+ eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
+ eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)))
+ .orElse(
+ Provenance
+ .newInstance(
+ eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
+ eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST));
+ Relation r = getRelation(
+ value.getId(),
+ cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
+ result_dtasource, ModelConstants.PROVISION,
+ provenance);
+ if (!hashCodes.contains(r.hashCode())) {
+ relationList
+ .add(r);
+ hashCodes.add(r.hashCode());
+ }
+
+ r = getRelation(
+ cf.getKey(), value.getId(),
+ Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
+ datasource_result, ModelConstants.PROVISION,
+ provenance);
+
+ if (!hashCodes.contains(r.hashCode())) {
+ relationList
+ .add(r);
+ hashCodes.add(r.hashCode());
+ }
+
+ }
+
+ private static Relation getRelation(String source, String target, String sourceType, String targetType,
+ String relName, String relType, Provenance provenance) {
+ Relation r = new Relation();
+ r.setSource(Node.newInstance(source, sourceType));
+ r.setTarget(Node.newInstance(target, targetType));
+ r.setReltype(RelType.newInstance(relName, relType));
+ r.setProvenance(provenance);
+ return r;
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/MergedRels.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/MergedRels.java
new file mode 100644
index 000000000..5f59750ea
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/MergedRels.java
@@ -0,0 +1,25 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.Serializable;
+
+public class MergedRels implements Serializable {
+ private String organizationId;
+ private String representativeId;
+
+ public String getOrganizationId() {
+ return organizationId;
+ }
+
+ public void setOrganizationId(String organizationId) {
+ this.organizationId = organizationId;
+ }
+
+ public String getRepresentativeId() {
+ return representativeId;
+ }
+
+ public void setRepresentativeId(String representativeId) {
+ this.representativeId = representativeId;
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/OrganizationMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/OrganizationMap.java
new file mode 100644
index 000000000..11db7c25e
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/OrganizationMap.java
@@ -0,0 +1,21 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+public class OrganizationMap extends HashMap> {
+
+ public OrganizationMap() {
+ super();
+ }
+
+ public List get(String key) {
+
+ if (super.get(key) == null) {
+ return new ArrayList<>();
+ }
+ return super.get(key);
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Process.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Process.java
new file mode 100644
index 000000000..7b7dafdf3
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/Process.java
@@ -0,0 +1,98 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.oa.graph.dump.Constants;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
+
+/**
+ * It process the ContextInfo information to produce a new Context Entity or a set of Relations between the
+ * generic context entity and datasource/projects related to the context.
+ *
+ */
+public class Process implements Serializable {
+ private static final Logger log = LoggerFactory.getLogger(Process.class);
+
+ public static R getEntity(ContextInfo ci) {
+ try {
+ ResearchInitiative ri;
+ if (ci.getType().equals("community")) {
+ ri = new ResearchCommunity();
+ ((ResearchCommunity) ri).setSubject(ci.getSubject());
+ ri.setType(Constants.RESEARCH_COMMUNITY);
+ } else {
+ ri = new ResearchInitiative();
+ ri.setType(Constants.RESEARCH_INFRASTRUCTURE);
+ }
+ ri.setId(Utils.getContextId(ci.getId()));
+ ri.setOriginalId(ci.getId());
+
+ ri.setDescription(ci.getDescription());
+ ri.setName(ci.getName());
+ ri.setZenodo_community(Constants.ZENODO_COMMUNITY_PREFIX + ci.getZenodocommunity());
+ return (R) ri;
+
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static List getRelation(ContextInfo ci) {
+ try {
+
+ List relationList = new ArrayList<>();
+ ci
+ .getDatasourceList()
+ .forEach(ds -> {
+
+ String nodeType = ModelSupport.idPrefixEntity.get(ds.substring(0, 2));
+
+ String contextId = Utils.getContextId(ci.getId());
+ relationList
+ .add(
+ Relation
+ .newInstance(
+ Node
+ .newInstance(
+ contextId, eu.dnetlib.dhp.schema.dump.oaf.graph.Constants.CONTEXT_ENTITY),
+ Node.newInstance(ds, nodeType),
+ RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
+ Provenance
+ .newInstance(
+ Constants.USER_CLAIM,
+ Constants.DEFAULT_TRUST)));
+
+ relationList
+ .add(
+ Relation
+ .newInstance(
+ Node.newInstance(ds, nodeType),
+ Node
+ .newInstance(
+ contextId, eu.dnetlib.dhp.schema.dump.oaf.graph.Constants.CONTEXT_ENTITY),
+ RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
+ Provenance
+ .newInstance(
+ Constants.USER_CLAIM,
+ Constants.DEFAULT_TRUST)));
+
+ });
+
+ return relationList;
+
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/QueryInformationSystem.java
new file mode 100644
index 000000000..e74d8a44c
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/QueryInformationSystem.java
@@ -0,0 +1,132 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.StringReader;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.*;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.Element;
+import org.dom4j.Node;
+import org.dom4j.io.SAXReader;
+import org.jetbrains.annotations.NotNull;
+
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
+public class QueryInformationSystem {
+
+ private ISLookUpService isLookUp;
+ private List contextRelationResult;
+
+ private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+ +
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
+ " and $x//context/param[./@name = 'status']/text() = 'all' " +
+ " return " +
+ "$x//context";
+
+ private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+ +
+ "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
+ +
+ "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
+ "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
+ +
+ "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
+
+ public void getContextInformation(final Consumer consumer) throws ISLookUpException {
+
+ isLookUp
+ .quickSearchProfile(XQUERY_ENTITY)
+ .forEach(c -> {
+ ContextInfo cinfo = new ContextInfo();
+ String[] cSplit = c.split("@@");
+ cinfo.setId(cSplit[0]);
+ cinfo.setName(cSplit[1]);
+ cinfo.setDescription(cSplit[2]);
+ if (!cSplit[3].trim().equals("")) {
+ cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
+ }
+ cinfo.setZenodocommunity(cSplit[4]);
+ cinfo.setType(cSplit[5]);
+ consumer.accept(cinfo);
+ });
+
+ }
+
+ public List getContextRelationResult() {
+ return contextRelationResult;
+ }
+
+ public void setContextRelationResult(List contextRelationResult) {
+ this.contextRelationResult = contextRelationResult;
+ }
+
+ public ISLookUpService getIsLookUp() {
+ return isLookUp;
+ }
+
+ public void setIsLookUp(ISLookUpService isLookUpService) {
+ this.isLookUp = isLookUpService;
+ }
+
+ public void execContextRelationQuery() throws ISLookUpException {
+ contextRelationResult = isLookUp.quickSearchProfile(XQUERY);
+
+ }
+
+ public void getContextRelation(final Consumer consumer, String category, String prefix) {
+
+ contextRelationResult.forEach(xml -> {
+ ContextInfo cinfo = new ContextInfo();
+ final Document doc;
+
+ try {
+
+ doc = new SAXReader().read(new StringReader(xml));
+ Element root = doc.getRootElement();
+ cinfo.setId(root.attributeValue("id"));
+
+ Iterator it = root.elementIterator();
+ while (it.hasNext()) {
+ Element el = (Element) it.next();
+ if (el.getName().equals("category")) {
+ String categoryId = el.attributeValue("id");
+ categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
+ if (categoryId.equals(category)) {
+ cinfo.setDatasourceList(getCategoryList(el, prefix));
+ }
+ }
+
+ }
+ consumer.accept(cinfo);
+ } catch (DocumentException e) {
+ e.printStackTrace();
+ }
+
+ });
+
+ }
+
+ @NotNull
+ private List getCategoryList(Element el, String prefix) {
+ List datasourceList = new ArrayList<>();
+ for (Object node : el.selectNodes(".//param")) {
+ Node n = (Node) node;
+ if (n.valueOf("./@name").equals("openaireId")) {
+ datasourceList.add(prefix + "|" + n.getText());
+ }
+ }
+
+ return datasourceList;
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkCollectAndSave.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkCollectAndSave.java
new file mode 100644
index 000000000..cb150210a
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkCollectAndSave.java
@@ -0,0 +1,89 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.dump.oaf.Result;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
+
+/**
+ * Reads all the entities of the same type (Relation / Results) and saves them in the same folder
+ *
+ */
+public class SparkCollectAndSave implements Serializable {
+
+ private static final Logger log = LoggerFactory.getLogger(SparkCollectAndSave.class);
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ SparkCollectAndSave.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump_whole/input_collect_and_save.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("inputPath: {}", inputPath);
+
+ final String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ SparkConf conf = new SparkConf();
+
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath + "/result");
+ run(spark, inputPath, outputPath);
+
+ });
+
+ }
+
+ private static void run(SparkSession spark, String inputPath, String outputPath) {
+ Utils
+ .readPath(spark, inputPath + "/result/publication", Result.class)
+ .union(Utils.readPath(spark, inputPath + "/result/dataset", Result.class))
+ .union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", Result.class))
+ .union(Utils.readPath(spark, inputPath + "/result/software", Result.class))
+ .write()
+ .option("compression", "gzip")
+ .mode(SaveMode.Overwrite)
+ .json(outputPath + "/result");
+
+ Utils
+ .readPath(spark, inputPath + "/relation/publication", Relation.class)
+ .union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class))
+ .union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class))
+ .union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class))
+ .union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class))
+ .union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class))
+ .union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class))
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression", "gzip")
+ .json(outputPath + "/relation");
+
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkDumpEntitiesJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkDumpEntitiesJob.java
new file mode 100644
index 000000000..441cfa32d
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkDumpEntitiesJob.java
@@ -0,0 +1,54 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+
+/**
+ * Spark Job that fires the dump for the entites
+ */
+public class SparkDumpEntitiesJob implements Serializable {
+ private static final Logger log = LoggerFactory.getLogger(SparkDumpEntitiesJob.class);
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ SparkDumpEntitiesJob.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump_whole/input_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("inputPath: {}", inputPath);
+
+ final String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ final String resultClassName = parser.get("resultTableName");
+ log.info("resultTableName: {}", resultClassName);
+
+ final String communityMapPath = parser.get("communityMapPath");
+
+ Class extends OafEntity> inputClazz = (Class extends OafEntity>) Class.forName(resultClassName);
+
+ DumpGraphEntities dg = new DumpGraphEntities();
+ dg.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkDumpRelationJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkDumpRelationJob.java
new file mode 100644
index 000000000..59aad1f30
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkDumpRelationJob.java
@@ -0,0 +1,111 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
+import eu.dnetlib.dhp.schema.oaf.Relation;
+
+/**
+ * Dumps eu.dnetlib.dhp.schema.oaf.Relation in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
+ */
+public class SparkDumpRelationJob implements Serializable {
+
+ private static final Logger log = LoggerFactory.getLogger(SparkDumpRelationJob.class);
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ SparkDumpRelationJob.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump_whole/input_relationdump_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("inputPath: {}", inputPath);
+
+ final String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ SparkConf conf = new SparkConf();
+
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath);
+ dumpRelation(spark, inputPath, outputPath);
+
+ });
+
+ }
+
+ private static void dumpRelation(SparkSession spark, String inputPath, String outputPath) {
+ Utils
+ .readPath(spark, inputPath, Relation.class)
+ .map(relation -> {
+ eu.dnetlib.dhp.schema.dump.oaf.graph.Relation rel = new eu.dnetlib.dhp.schema.dump.oaf.graph.Relation();
+ rel
+ .setSource(
+ Node
+ .newInstance(
+ relation.getSource(),
+ ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
+
+ rel
+ .setTarget(
+ Node
+ .newInstance(
+ relation.getTarget(),
+ ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
+
+ rel
+ .setReltype(
+ RelType
+ .newInstance(
+ relation.getRelClass(),
+ relation.getSubRelType()));
+
+ Optional
+ .ofNullable(relation.getDataInfo())
+ .ifPresent(
+ datainfo -> rel
+ .setProvenance(
+ Provenance
+ .newInstance(datainfo.getProvenanceaction().getClassname(), datainfo.getTrust())));
+
+ return rel;
+
+ }, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
+ .write()
+ .option("compression", "gzip")
+ .mode(SaveMode.Overwrite)
+ .json(outputPath);
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkExtractRelationFromEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkExtractRelationFromEntities.java
new file mode 100644
index 000000000..f910dbee4
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkExtractRelationFromEntities.java
@@ -0,0 +1,57 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.Serializable;
+import java.util.*;
+
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.schema.oaf.Result;
+
+/**
+ * Spark job that fires the extraction of relations from entities
+ */
+public class SparkExtractRelationFromEntities implements Serializable {
+ private static final Logger log = LoggerFactory.getLogger(SparkExtractRelationFromEntities.class);
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ SparkExtractRelationFromEntities.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("inputPath: {}", inputPath);
+
+ final String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ final String resultClassName = parser.get("resultTableName");
+ log.info("resultTableName: {}", resultClassName);
+
+ final String communityMapPath = parser.get("communityMapPath");
+
+ Class extends Result> inputClazz = (Class extends Result>) Class.forName(resultClassName);
+
+ Extractor extractor = new Extractor();
+ extractor.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkOrganizationRelation.java
new file mode 100644
index 000000000..f17e7c894
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/graph/SparkOrganizationRelation.java
@@ -0,0 +1,160 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.*;
+import java.util.function.Consumer;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.*;
+import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Node;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
+import eu.dnetlib.dhp.schema.oaf.Relation;
+
+/**
+ * Create new Relations between Context Entities and Organizations whose products are associated to the context.
+ * It produces relation such as: organization <-> isRelatedTo <-> context
+ */
+public class SparkOrganizationRelation implements Serializable {
+ private static final Logger log = LoggerFactory.getLogger(SparkOrganizationRelation.class);
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ SparkOrganizationRelation.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump_whole/input_organization_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("inputPath: {}", inputPath);
+
+ final String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ final OrganizationMap organizationMap = new Gson()
+ .fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
+ log.info("organization map : {}", new Gson().toJson(organizationMap));
+
+ SparkConf conf = new SparkConf();
+
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ Utils.removeOutputDir(spark, outputPath);
+ extractRelation(spark, inputPath, organizationMap, outputPath);
+
+ });
+
+ }
+
+ private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap,
+ String outputPath) {
+ Dataset relationDataset = Utils.readPath(spark, inputPath, Relation.class);
+
+ relationDataset.createOrReplaceTempView("relation");
+
+ List relList = new ArrayList<>();
+
+ Dataset mergedRelsDataset = spark
+ .sql(
+ "SELECT target organizationId, source representativeId " +
+ "FROM relation " +
+ "WHERE datainfo.deletedbyinference = false " +
+ "AND relclass = 'merges' " +
+ "AND substr(source, 1, 2) = '20'")
+ .as(Encoders.bean(MergedRels.class));
+
+ mergedRelsDataset.map((MapFunction) mergedRels -> {
+ if (organizationMap.containsKey(mergedRels.getOrganizationId())) {
+ return mergedRels;
+ }
+ return null;
+ }, Encoders.bean(MergedRels.class))
+ .filter(Objects::nonNull)
+ .collectAsList()
+ .forEach(getMergedRelsConsumer(organizationMap, relList));
+
+ organizationMap
+ .keySet()
+ .forEach(
+ oId -> organizationMap
+ .get(oId)
+ .forEach(community -> addRelations(relList, community, oId)));
+
+ spark
+ .createDataset(relList, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class))
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression", "gzip")
+ .json(outputPath);
+
+ }
+
+ @NotNull
+ private static Consumer getMergedRelsConsumer(OrganizationMap organizationMap,
+ List relList) {
+ return mergedRels -> {
+ String oId = mergedRels.getOrganizationId();
+ organizationMap
+ .get(oId)
+ .forEach(community -> addRelations(relList, community, mergedRels.getRepresentativeId()));
+ organizationMap.remove(oId);
+ };
+ }
+
+ private static void addRelations(List relList, String community,
+ String organization) {
+
+ String id = Utils.getContextId(community);
+ log.info("create relation for organization: {}", organization);
+ relList
+ .add(
+ eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
+ .newInstance(
+ Node.newInstance(id, Constants.CONTEXT_ENTITY),
+ Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
+ RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
+ Provenance
+ .newInstance(
+ eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM,
+ eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)));
+
+ relList
+ .add(
+ eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
+ .newInstance(
+ Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
+ Node.newInstance(id, Constants.CONTEXT_ENTITY),
+ RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
+ Provenance
+ .newInstance(
+ eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM,
+ eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)));
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
index 5159fa9bb..5b6ae72f1 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
@@ -24,7 +24,9 @@ public abstract class AbstractMdRecordToOafMapper {
private final boolean invisible;
protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
+ protected static final String DATACITE_SCHEMA_KERNEL_4_SLASH = "http://datacite.org/schema/kernel-4/";
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
+ protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/";
protected static final Qualifier ORCID_PID_TYPE = qualifier(
"ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES);
protected static final Qualifier MAG_PID_TYPE = qualifier(
@@ -55,7 +57,11 @@ public abstract class AbstractMdRecordToOafMapper {
DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext);
final Document doc = DocumentHelper
- .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3));
+ .parseText(
+ xml
+ .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)
+ .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3)
+ .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3));
final KeyValue collectedFrom = getProvenanceDatasource(
doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name");
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
index da2ba4723..87c935d83 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java
@@ -44,6 +44,7 @@ import java.util.Date;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
+import java.util.function.Predicate;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
@@ -53,6 +54,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
+import eu.dnetlib.dhp.oa.graph.raw.common.VerifyNsPrefixPredicate;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
@@ -113,6 +115,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final String hdfsPath = parser.get("hdfsPath");
log.info("hdfsPath: {}", hdfsPath);
+ final String nsPrefixBlacklist = parser.get("nsPrefixBlacklist");
+ log.info("nsPrefixBlacklist: {}", nsPrefixBlacklist);
+
+ final Predicate verifyNamespacePrefix = new VerifyNsPrefixPredicate(nsPrefixBlacklist);
+
final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
log.info("processClaims: {}", processClaims);
@@ -123,23 +130,25 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
smdbe.execute("queryClaims.sql", smdbe::processClaims);
} else {
log.info("Processing datasources...");
- smdbe.execute("queryDatasources.sql", smdbe::processDatasource);
+ smdbe.execute("queryDatasources.sql", smdbe::processDatasource, verifyNamespacePrefix);
log.info("Processing projects...");
if (dbSchema.equalsIgnoreCase("beta")) {
- smdbe.execute("queryProjects.sql", smdbe::processProject);
+ smdbe.execute("queryProjects.sql", smdbe::processProject, verifyNamespacePrefix);
} else {
- smdbe.execute("queryProjects_production.sql", smdbe::processProject);
+ smdbe.execute("queryProjects_production.sql", smdbe::processProject, verifyNamespacePrefix);
}
log.info("Processing orgs...");
- smdbe.execute("queryOrganizations.sql", smdbe::processOrganization);
+ smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
log.info("Processing relationsNoRemoval ds <-> orgs ...");
- smdbe.execute("queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization);
+ smdbe
+ .execute(
+ "queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization, verifyNamespacePrefix);
log.info("Processing projects <-> orgs ...");
- smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization);
+ smdbe.execute("queryProjectOrganization.sql", smdbe::processProjectOrganization, verifyNamespacePrefix);
}
log.info("All done.");
}
@@ -163,10 +172,20 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
}
public void execute(final String sqlFile, final Function> producer)
+ throws Exception {
+ execute(sqlFile, producer, oaf -> true);
+ }
+
+ public void execute(final String sqlFile, final Function> producer,
+ final Predicate predicate)
throws Exception {
final String sql = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/sql/" + sqlFile));
- final Consumer consumer = rs -> producer.apply(rs).forEach(oaf -> emitOaf(oaf));
+ final Consumer consumer = rs -> producer.apply(rs).forEach(oaf -> {
+ if (predicate.test(oaf)) {
+ emitOaf(oaf);
+ }
+ });
dbClient.processResults(sql, consumer);
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
index 62f8123bb..6fe7bb971 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
@@ -16,6 +16,8 @@ import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.Node;
+import com.google.common.collect.Lists;
+
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Author;
@@ -366,7 +368,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override
protected List prepareResultPids(final Document doc, final DataInfo info) {
- final List res = new ArrayList<>();
+ final Set res = new HashSet();
res
.addAll(
prepareListStructPropsWithValidQualifier(
@@ -382,7 +384,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
doc,
"//datacite:alternateIdentifier[@alternateIdentifierType != 'URL' and @alternateIdentifierType != 'landingPage']",
"@alternateIdentifierType", DNET_PID_TYPES, info));
- return res;
+ return Lists.newArrayList(res);
}
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicate.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicate.java
new file mode 100644
index 000000000..1e99d298d
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VerifyNsPrefixPredicate.java
@@ -0,0 +1,62 @@
+
+package eu.dnetlib.dhp.oa.graph.raw.common;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.function.Predicate;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.base.Splitter;
+
+import eu.dnetlib.dhp.schema.oaf.Datasource;
+import eu.dnetlib.dhp.schema.oaf.Oaf;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.Relation;
+
+/**
+ * This predicate should be used to skip oaf objects using a blacklist of nsprefixes.
+ *
+ * @author michele
+ */
+public class VerifyNsPrefixPredicate implements Predicate {
+
+ final Set invalids = new HashSet<>();
+
+ public VerifyNsPrefixPredicate(final String blacklist) {
+ if (StringUtils.isNotBlank(blacklist)) {
+ Splitter
+ .on(",")
+ .trimResults()
+ .omitEmptyStrings()
+ .split(blacklist)
+ .forEach(invalids::add);
+ }
+ }
+
+ @Override
+ public boolean test(final Oaf oaf) {
+ if (oaf instanceof Datasource) {
+ return testValue(((Datasource) oaf).getNamespaceprefix().getValue());
+ } else if (oaf instanceof OafEntity) {
+ return testValue(((OafEntity) oaf).getId());
+ } else if (oaf instanceof Relation) {
+ return testValue(((Relation) oaf).getSource()) && testValue(((Relation) oaf).getTarget());
+ } else {
+ return true;
+ }
+ }
+
+ protected boolean testValue(final String s) {
+ if (StringUtils.isNotBlank(s)) {
+ for (final String invalid : invalids) {
+ if (Pattern.matches("^(\\d\\d\\|)?" + invalid + ".*$", s)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/EBIAggregator.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/EBIAggregator.scala
index 41fcd2636..d1bf39475 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/EBIAggregator.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/EBIAggregator.scala
@@ -1,5 +1,6 @@
package eu.dnetlib.dhp.sx.ebi
import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Dataset => OafDataset}
+import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIUnknown}
import org.apache.spark.sql.{Encoder, Encoders}
import org.apache.spark.sql.expressions.Aggregator
@@ -35,6 +36,88 @@ object EBIAggregator {
}
+
+ def getDLIUnknownAggregator(): Aggregator[(String, DLIUnknown), DLIUnknown, DLIUnknown] = new Aggregator[(String, DLIUnknown), DLIUnknown, DLIUnknown]{
+
+ override def zero: DLIUnknown = new DLIUnknown()
+
+ override def reduce(b: DLIUnknown, a: (String, DLIUnknown)): DLIUnknown = {
+ b.mergeFrom(a._2)
+ if (b.getId == null)
+ b.setId(a._2.getId)
+ b
+ }
+
+ override def merge(wx: DLIUnknown, wy: DLIUnknown): DLIUnknown = {
+ wx.mergeFrom(wy)
+ if(wx.getId == null && wy.getId.nonEmpty)
+ wx.setId(wy.getId)
+ wx
+ }
+ override def finish(reduction: DLIUnknown): DLIUnknown = reduction
+
+ override def bufferEncoder: Encoder[DLIUnknown] =
+ Encoders.kryo(classOf[DLIUnknown])
+
+ override def outputEncoder: Encoder[DLIUnknown] =
+ Encoders.kryo(classOf[DLIUnknown])
+ }
+
+ def getDLIDatasetAggregator(): Aggregator[(String, DLIDataset), DLIDataset, DLIDataset] = new Aggregator[(String, DLIDataset), DLIDataset, DLIDataset]{
+
+ override def zero: DLIDataset = new DLIDataset()
+
+ override def reduce(b: DLIDataset, a: (String, DLIDataset)): DLIDataset = {
+ b.mergeFrom(a._2)
+ if (b.getId == null)
+ b.setId(a._2.getId)
+ b
+ }
+
+ override def merge(wx: DLIDataset, wy: DLIDataset): DLIDataset = {
+ wx.mergeFrom(wy)
+ if(wx.getId == null && wy.getId.nonEmpty)
+ wx.setId(wy.getId)
+ wx
+ }
+ override def finish(reduction: DLIDataset): DLIDataset = reduction
+
+ override def bufferEncoder: Encoder[DLIDataset] =
+ Encoders.kryo(classOf[DLIDataset])
+
+ override def outputEncoder: Encoder[DLIDataset] =
+ Encoders.kryo(classOf[DLIDataset])
+ }
+
+
+ def getDLIPublicationAggregator(): Aggregator[(String, DLIPublication), DLIPublication, DLIPublication] = new Aggregator[(String, DLIPublication), DLIPublication, DLIPublication]{
+
+ override def zero: DLIPublication = new DLIPublication()
+
+ override def reduce(b: DLIPublication, a: (String, DLIPublication)): DLIPublication = {
+ b.mergeFrom(a._2)
+ if (b.getId == null)
+ b.setId(a._2.getId)
+ b
+ }
+
+
+ override def merge(wx: DLIPublication, wy: DLIPublication): DLIPublication = {
+ wx.mergeFrom(wy)
+ if(wx.getId == null && wy.getId.nonEmpty)
+ wx.setId(wy.getId)
+ wx
+ }
+ override def finish(reduction: DLIPublication): DLIPublication = reduction
+
+ override def bufferEncoder: Encoder[DLIPublication] =
+ Encoders.kryo(classOf[DLIPublication])
+
+ override def outputEncoder: Encoder[DLIPublication] =
+ Encoders.kryo(classOf[DLIPublication])
+ }
+
+
def getPublicationAggregator(): Aggregator[(String, Publication), Publication, Publication] = new Aggregator[(String, Publication), Publication, Publication]{
override def zero: Publication = new Publication()
@@ -86,4 +169,7 @@ object EBIAggregator {
+
+
+
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/SparkAddLinkUpdates.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/SparkAddLinkUpdates.scala
index 897bbd540..d5cdb8a7c 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/SparkAddLinkUpdates.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/SparkAddLinkUpdates.scala
@@ -1,8 +1,9 @@
package eu.dnetlib.dhp.sx.ebi
import eu.dnetlib.dhp.application.ArgumentApplicationParser
-import eu.dnetlib.dhp.schema.oaf.{Instance, KeyValue, Oaf}
+import eu.dnetlib.dhp.schema.oaf.{Author, Instance, Journal, KeyValue, Oaf, Publication, Relation, Dataset => OafDataset}
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils.createQualifier
-import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIRelation, OafUtils, ProvenaceInfo}
+import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, OafUtils, ProvenaceInfo}
+import eu.dnetlib.dhp.sx.ebi.model.{PMArticle, PMAuthor, PMJournal}
import eu.dnetlib.dhp.utils.DHPUtils
import eu.dnetlib.scholexplorer.relation.RelationMapper
import org.apache.commons.io.IOUtils
@@ -12,6 +13,7 @@ import org.json4s
import org.json4s.DefaultFormats
import org.json4s.JsonAST.{JField, JObject, JString}
import org.json4s.jackson.JsonMethods.parse
+import org.apache.spark.sql.functions._
import scala.collection.JavaConverters._
@@ -28,6 +30,64 @@ case class EBILinks(relation:String, pubdate:String, tpid:String, tpidType:Strin
}
+
+ def journalToOAF(pj:PMJournal): Journal = {
+ val j = new Journal
+ j.setIssnPrinted(pj.getIssn)
+ j.setVol(pj.getVolume)
+ j.setName(pj.getTitle)
+ j.setIss(pj.getIssue)
+ j.setDataInfo(OafUtils.generateDataInfo())
+ j
+ }
+
+
+ def pubmedTOPublication(input:PMArticle):DLIPublication = {
+
+
+ val dnetPublicationId = s"50|${DHPUtils.md5(s"${input.getPmid}::pmid")}"
+
+ val p = new DLIPublication
+ p.setId(dnetPublicationId)
+ p.setDataInfo(OafUtils.generateDataInfo())
+ p.setPid(List(OafUtils.createSP(input.getPmid.toLowerCase.trim, "pmid", "dnet:pid_types")).asJava)
+ p.setCompletionStatus("complete")
+ val pi = new ProvenaceInfo
+ pi.setId("dli_________::europe_pmc__")
+ pi.setName( "Europe PMC")
+ pi.setCompletionStatus("complete")
+ pi.setCollectionMode("collected")
+ p.setDlicollectedfrom(List(pi).asJava)
+ p.setCollectedfrom(List(generatePubmedDLICollectedFrom()).asJava)
+
+ if (input.getAuthors != null && input.getAuthors.size() >0) {
+ var aths: List[Author] = List()
+ input.getAuthors.asScala.filter(a=> a!= null).foreach(a => {
+ val c = new Author
+ c.setFullname(a.getFullName)
+ c.setName(a.getForeName)
+ c.setSurname(a.getLastName)
+ aths = aths ::: List(c)
+ })
+ if (aths.nonEmpty)
+ p.setAuthor(aths.asJava)
+ }
+
+
+ if (input.getJournal != null)
+ p.setJournal(journalToOAF(input.getJournal))
+ p.setTitle(List(OafUtils.createSP(input.getTitle, "main title", "dnet:dataCite_title")).asJava)
+ p.setDateofacceptance(OafUtils.asField(input.getDate))
+ val i = new Instance
+ i.setCollectedfrom(generatePubmedDLICollectedFrom())
+ i.setDateofacceptance(p.getDateofacceptance)
+ i.setUrl(List(s"https://pubmed.ncbi.nlm.nih.gov/${input.getPmid}").asJava)
+ i.setInstancetype(createQualifier("0001", "Article", "dnet:publication_resource", "dnet:publication_resource"))
+ p.setInstance(List(i).asJava)
+ p
+ }
+
+
def ebiLinksToOaf(input:(String, String)):List[Oaf] = {
val pmid :String = input._1
val input_json :String = input._2
@@ -55,8 +115,8 @@ case class EBILinks(relation:String, pubdate:String, tpid:String, tpidType:Strin
val dnetPublicationId = s"50|${DHPUtils.md5(s"$pmid::pmid")}"
targets.flatMap(l => {
- val relation = new DLIRelation
- val inverseRelation = new DLIRelation
+ val relation = new Relation
+ val inverseRelation = new Relation
val targetDnetId = s"50|${DHPUtils.md5(s"${l.tpid.toLowerCase.trim}::${l.tpidType.toLowerCase.trim}")}"
val relInfo = relationMapper.get(l.relation.toLowerCase)
val relationSemantic = relInfo.getOriginal
@@ -116,8 +176,16 @@ case class EBILinks(relation:String, pubdate:String, tpid:String, tpidType:Strin
val workingPath = parser.get("workingPath")
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
- implicit val relEncoder: Encoder[DLIRelation] = Encoders.kryo(classOf[DLIRelation])
+ implicit val oafpubEncoder: Encoder[Publication] = Encoders.kryo[Publication]
+ implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation])
implicit val datEncoder: Encoder[DLIDataset] = Encoders.kryo(classOf[DLIDataset])
+ implicit val pubEncoder: Encoder[DLIPublication] = Encoders.kryo(classOf[DLIPublication])
+ implicit val atEncoder: Encoder[Author] = Encoders.kryo(classOf[Author])
+ implicit val strEncoder:Encoder[String] = Encoders.STRING
+ implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle])
+ implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal])
+ implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor])
+
val ds:Dataset[(String,String)] = spark.read.load(s"$workingPath/baseline_links_updates").as[(String,String)](Encoders.tuple(Encoders.STRING, Encoders.STRING))
@@ -129,10 +197,50 @@ case class EBILinks(relation:String, pubdate:String, tpid:String, tpidType:Strin
val oDataset:Dataset[Oaf] = spark.read.load(s"$workingPath/baseline_links_updates_oaf").as[Oaf]
- oDataset.filter(p =>p.isInstanceOf[DLIRelation]).map(p => p.asInstanceOf[DLIRelation]).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_links_updates_relation")
+ oDataset.filter(p =>p.isInstanceOf[Relation]).map(p => p.asInstanceOf[Relation]).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_links_updates_relation")
oDataset.filter(p =>p.isInstanceOf[DLIDataset]).map(p => p.asInstanceOf[DLIDataset]).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_links_updates_dataset")
+ val idPublicationSolved:Dataset[String] = spark.read.load(s"$workingPath/baseline_links_updates").where(col("links").isNotNull).select("pmid").as[String]
+ val baseline:Dataset[(String, PMArticle)]= spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle].map(p=> (p.getPmid, p))(Encoders.tuple(strEncoder,PMEncoder))
+ idPublicationSolved.joinWith(baseline, idPublicationSolved("pmid").equalTo(baseline("_1"))).map(k => pubmedTOPublication(k._2._2)).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_links_updates_publication")
+
+
+ val pmaDatasets = spark.read.load("/user/sandro.labruzzo/scholix/EBI/ebi_garr/baseline_dataset").as[PMArticle]
+
+ pmaDatasets.map(p => pubmedTOPublication(p)).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_publication_all")
+
+ val pubs: Dataset[(String,Publication)] = spark.read.load("/user/sandro.labruzzo/scholix/EBI/publication").as[Publication].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING,oafpubEncoder))
+ val pubdate:Dataset[(String,DLIPublication)] = spark.read.load(s"$workingPath/baseline_publication_all").as[DLIPublication].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING,pubEncoder))
+
+
+
+ pubs.joinWith(pubdate, pubs("_1").equalTo(pubdate("_1"))).map(k => k._2._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_publication_ebi")
+
+
+
+ val dt : Dataset[DLIDataset] = spark.read.load(s"$workingPath/dataset").as[DLIDataset]
+ val update : Dataset[DLIDataset] = spark.read.load(s"$workingPath/ebi_garr/baseline_links_updates_dataset").as[DLIDataset]
+
+
+ dt.union(update).map(d => (d.getId,d))(Encoders.tuple(Encoders.STRING, datEncoder))
+ .groupByKey(_._1)(Encoders.STRING)
+ .agg(EBIAggregator.getDLIDatasetAggregator().toColumn)
+ .map(p => p._2)
+ .write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_dataset_ebi")
+
+
+ val rel: Dataset[Relation] = spark.read.load(s"$workingPath/relation").as[Relation]
+ val relupdate : Dataset[Relation] = spark.read.load(s"$workingPath/ebi_garr/baseline_links_updates_relation").as[Relation]
+
+
+ rel.union(relupdate)
+ .map(d => (s"${d.getSource}::${d.getRelType}::${d.getTarget}", d))(Encoders.tuple(Encoders.STRING, relEncoder))
+ .groupByKey(_._1)(Encoders.STRING)
+ .agg(EBIAggregator.getRelationAggregator().toColumn)
+ .map(p => p._2)
+ .write.mode(SaveMode.Overwrite)
+ .save(s"$workingPath/baseline_relation_ebi")
}
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/SparkCreateEBIDataFrame.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/SparkCreateEBIDataFrame.scala
index 60857f0fc..9fc970446 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/SparkCreateEBIDataFrame.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/SparkCreateEBIDataFrame.scala
@@ -2,6 +2,7 @@ package eu.dnetlib.dhp.sx.ebi
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset}
+import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication}
import eu.dnetlib.dhp.sx.graph.parser.{DatasetScholexplorerParser, PublicationScholexplorerParser}
import eu.dnetlib.scholexplorer.relation.RelationMapper
import org.apache.commons.io.IOUtils
@@ -10,6 +11,7 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
import org.slf4j.{Logger, LoggerFactory}
+
import scala.collection.JavaConverters._
object SparkCreateEBIDataFrame {
@@ -34,43 +36,43 @@ object SparkCreateEBIDataFrame {
val relationMapper = RelationMapper.load
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
- implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset])
- implicit val pubEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication])
+ implicit val datasetEncoder: Encoder[DLIDataset] = Encoders.kryo(classOf[DLIDataset])
+ implicit val pubEncoder: Encoder[DLIPublication] = Encoders.kryo(classOf[DLIPublication])
implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation])
- logger.info("Extract Publication and relation from publication_xml")
- val oafPubsRDD:RDD[Oaf] = sc.textFile(s"$workingPath/publication_xml").map(s =>
- {
- new ObjectMapper().readValue(s, classOf[String])
- }).flatMap(s => {
- val d = new PublicationScholexplorerParser
- d.parseObject(s, relationMapper).asScala.iterator})
+// logger.info("Extract Publication and relation from publication_xml")
+// val oafPubsRDD:RDD[Oaf] = sc.textFile(s"$workingPath/publication_xml").map(s =>
+// {
+// new ObjectMapper().readValue(s, classOf[String])
+// }).flatMap(s => {
+// val d = new PublicationScholexplorerParser
+// d.parseObject(s, relationMapper).asScala.iterator})
+//
+// val mapper = new ObjectMapper()
+// mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
+// spark.createDataset(oafPubsRDD).write.mode(SaveMode.Overwrite).save(s"$workingPath/oaf")
+//
+// logger.info("Extract Publication and relation from dataset_xml")
+// val oafDatsRDD:RDD[Oaf] = sc.textFile(s"$workingPath/dataset_xml").map(s =>
+// {
+// new ObjectMapper().readValue(s, classOf[String])
+// }).flatMap(s => {
+// val d = new DatasetScholexplorerParser
+// d.parseObject(s, relationMapper).asScala.iterator})
- val mapper = new ObjectMapper()
- mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
- spark.createDataset(oafPubsRDD).write.mode(SaveMode.Overwrite).save(s"$workingPath/oaf")
-
- logger.info("Extract Publication and relation from dataset_xml")
- val oafDatsRDD:RDD[Oaf] = sc.textFile(s"$workingPath/dataset_xml").map(s =>
- {
- new ObjectMapper().readValue(s, classOf[String])
- }).flatMap(s => {
- val d = new DatasetScholexplorerParser
- d.parseObject(s, relationMapper).asScala.iterator})
-
- spark.createDataset(oafDatsRDD).write.mode(SaveMode.Append).save(s"$workingPath/oaf")
- val dataset: Dataset[OafDataset] = spark.read.load(s"$workingPath/oaf").as[Oaf].filter(o => o.isInstanceOf[OafDataset]).map(d => d.asInstanceOf[OafDataset])
- val publication: Dataset[Publication] = spark.read.load(s"$workingPath/oaf").as[Oaf].filter(o => o.isInstanceOf[Publication]).map(d => d.asInstanceOf[Publication])
+// spark.createDataset(oafDatsRDD).write.mode(SaveMode.Append).save(s"$workingPath/oaf")
+ val dataset: Dataset[DLIDataset] = spark.read.load(s"$workingPath/oaf").as[Oaf].filter(o => o.isInstanceOf[DLIDataset]).map(d => d.asInstanceOf[DLIDataset])
+ val publication: Dataset[DLIPublication] = spark.read.load(s"$workingPath/oaf").as[Oaf].filter(o => o.isInstanceOf[DLIPublication]).map(d => d.asInstanceOf[DLIPublication])
val relations: Dataset[Relation] = spark.read.load(s"$workingPath/oaf").as[Oaf].filter(o => o.isInstanceOf[Relation]).map(d => d.asInstanceOf[Relation])
publication.map(d => (d.getId, d))(Encoders.tuple(Encoders.STRING, pubEncoder))
.groupByKey(_._1)(Encoders.STRING)
- .agg(EBIAggregator.getPublicationAggregator().toColumn)
+ .agg(EBIAggregator.getDLIPublicationAggregator().toColumn)
.map(p => p._2)
.write.mode(SaveMode.Overwrite).save(s"$workingPath/publication")
dataset.map(d => (d.getId, d))(Encoders.tuple(Encoders.STRING, datasetEncoder))
.groupByKey(_._1)(Encoders.STRING)
- .agg(EBIAggregator.getDatasetAggregator().toColumn)
+ .agg(EBIAggregator.getDLIDatasetAggregator().toColumn)
.map(p => p._2)
.write.mode(SaveMode.Overwrite).save(s"$workingPath/dataset")
@@ -80,8 +82,5 @@ object SparkCreateEBIDataFrame {
.map(p => p._2)
.write.mode(SaveMode.Overwrite).save(s"$workingPath/relation")
-
-
- relations.map(r => (r.getSource, r.getTarget))(Encoders.tuple(Encoders.STRING,Encoders.STRING))
}
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/model/PMAuthor.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/model/PMAuthor.java
index 4a2198542..e27c9adaa 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/model/PMAuthor.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/ebi/model/PMAuthor.java
@@ -25,7 +25,8 @@ public class PMAuthor implements Serializable {
}
public String getFullName() {
- return String.format("%s, %s", this.foreName, this.lastName);
+ return String
+ .format("%s, %s", this.foreName != null ? this.foreName : "", this.lastName != null ? this.lastName : "");
}
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkSXGeneratePidSimlarity.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkSXGeneratePidSimlarity.java
index f3d7fd40f..7003b179d 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkSXGeneratePidSimlarity.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkSXGeneratePidSimlarity.java
@@ -10,7 +10,7 @@ import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
-import eu.dnetlib.dhp.schema.scholexplorer.DLIRelation;
+import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2;
@@ -55,18 +55,18 @@ public class SparkSXGeneratePidSimlarity {
.equalsIgnoreCase(StringUtils.substringAfter(t._2(), "::")))
.distinct();
- JavaRDD simRel = datasetSimRel
+ JavaRDD simRel = datasetSimRel
.union(publicationSimRel)
.map(
s -> {
- final DLIRelation r = new DLIRelation();
+ final Relation r = new Relation();
r.setSource(s._1());
r.setTarget(s._2());
r.setRelType("similar");
return r;
});
spark
- .createDataset(simRel.rdd(), Encoders.bean(DLIRelation.class))
+ .createDataset(simRel.rdd(), Encoders.bean(Relation.class))
.distinct()
.write()
.mode(SaveMode.Overwrite)
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkScholexplorerCreateRawGraphJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkScholexplorerCreateRawGraphJob.java
index 385ac4d1a..05fb826db 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkScholexplorerCreateRawGraphJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkScholexplorerCreateRawGraphJob.java
@@ -31,7 +31,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.scholexplorer.DLIDataset;
import eu.dnetlib.dhp.schema.scholexplorer.DLIPublication;
-import eu.dnetlib.dhp.schema.scholexplorer.DLIRelation;
import eu.dnetlib.dhp.schema.scholexplorer.DLIUnknown;
import eu.dnetlib.dhp.utils.DHPUtils;
import net.minidev.json.JSONArray;
@@ -156,9 +155,9 @@ public class SparkScholexplorerCreateRawGraphJob {
SparkSXGeneratePidSimlarity
.generateDataFrame(
spark, sc, inputPath.replace("/relation", ""), targetPath.replace("/relation", ""));
- RDD rdd = union
+ RDD rdd = union
.mapToPair(
- (PairFunction) f -> {
+ (PairFunction) f -> {
final String source = getJPathString(SOURCEJSONPATH, f);
final String target = getJPathString(TARGETJSONPATH, f);
final String reltype = getJPathString(RELJSONPATH, f);
@@ -175,7 +174,7 @@ public class SparkScholexplorerCreateRawGraphJob {
source.toLowerCase(),
reltype.toLowerCase(),
target.toLowerCase())),
- mapper.readValue(f, DLIRelation.class));
+ mapper.readValue(f, Relation.class));
})
.reduceByKey(
(a, b) -> {
@@ -186,7 +185,7 @@ public class SparkScholexplorerCreateRawGraphJob {
.rdd();
spark
- .createDataset(rdd, Encoders.bean(DLIRelation.class))
+ .createDataset(rdd, Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Overwrite)
.save(targetPath);
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkSplitOafTODLIEntities.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkSplitOafTODLIEntities.scala
new file mode 100644
index 000000000..d0df28b2d
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkSplitOafTODLIEntities.scala
@@ -0,0 +1,107 @@
+package eu.dnetlib.dhp.sx.graph
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation}
+import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIUnknown}
+import eu.dnetlib.dhp.sx.ebi.EBIAggregator
+import eu.dnetlib.dhp.sx.ebi.model.{PMArticle, PMAuthor, PMJournal}
+import org.apache.commons.io.IOUtils
+import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
+import org.slf4j.LoggerFactory
+
+object SparkSplitOafTODLIEntities {
+
+
+ def getKeyRelation(rel:Relation):String = {
+ s"${rel.getSource}::${rel.getRelType}::${rel.getTarget}"
+
+
+ }
+
+ def main(args: Array[String]): Unit = {
+ val parser = new ArgumentApplicationParser(IOUtils.toString(SparkSplitOafTODLIEntities.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/argumentparser/input_extract_entities_parameters.json")))
+ val logger = LoggerFactory.getLogger(SparkSplitOafTODLIEntities.getClass)
+ parser.parseArgument(args)
+
+ val workingPath: String = parser.get("workingPath")
+ logger.info(s"Working dir path = $workingPath")
+
+ implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
+ implicit val pubEncoder: Encoder[DLIPublication] = Encoders.kryo[DLIPublication]
+ implicit val datEncoder: Encoder[DLIDataset] = Encoders.kryo[DLIDataset]
+ implicit val unkEncoder: Encoder[DLIUnknown] = Encoders.kryo[DLIUnknown]
+ implicit val relEncoder: Encoder[Relation] = Encoders.kryo[Relation]
+
+
+
+ val spark:SparkSession = SparkSession
+ .builder()
+ .appName(SparkSplitOafTODLIEntities.getClass.getSimpleName)
+ .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+ .master(parser.get("master"))
+ .getOrCreate()
+
+
+
+
+ val OAFDataset:Dataset[Oaf] = spark.read.load(s"$workingPath/input/OAFDataset").as[Oaf]
+
+ val ebi_dataset:Dataset[DLIDataset] = spark.read.load(s"$workingPath/ebi/baseline_dataset_ebi").as[DLIDataset]
+ val ebi_publication:Dataset[DLIPublication] = spark.read.load(s"$workingPath/ebi/baseline_publication_ebi").as[DLIPublication]
+ val ebi_relation:Dataset[Relation] = spark.read.load(s"$workingPath/ebi/baseline_relation_ebi").as[Relation]
+
+
+
+ OAFDataset
+ .filter(s => s != null && s.isInstanceOf[DLIPublication])
+ .map(s =>s.asInstanceOf[DLIPublication])
+ .union(ebi_publication)
+ .map(d => (d.getId, d))(Encoders.tuple(Encoders.STRING, pubEncoder))
+ .groupByKey(_._1)(Encoders.STRING)
+ .agg(EBIAggregator.getDLIPublicationAggregator().toColumn)
+ .map(p => p._2)
+ .repartition(1000)
+ .write.mode(SaveMode.Overwrite).save(s"$workingPath/graph/publication")
+
+ OAFDataset
+ .filter(s => s != null && s.isInstanceOf[DLIDataset])
+ .map(s =>s.asInstanceOf[DLIDataset])
+ .union(ebi_dataset)
+ .map(d => (d.getId, d))(Encoders.tuple(Encoders.STRING, datEncoder))
+ .groupByKey(_._1)(Encoders.STRING)
+ .agg(EBIAggregator.getDLIDatasetAggregator().toColumn)
+ .map(p => p._2)
+ .repartition(1000)
+ .write.mode(SaveMode.Overwrite).save(s"$workingPath/graph/dataset")
+
+
+ OAFDataset
+ .filter(s => s != null && s.isInstanceOf[DLIUnknown])
+ .map(s =>s.asInstanceOf[DLIUnknown])
+ .map(d => (d.getId, d))(Encoders.tuple(Encoders.STRING, unkEncoder))
+ .groupByKey(_._1)(Encoders.STRING)
+ .agg(EBIAggregator.getDLIUnknownAggregator().toColumn)
+ .map(p => p._2)
+ .repartition(1000)
+ .write.mode(SaveMode.Overwrite).save(s"$workingPath/graph/unknown")
+
+
+ OAFDataset
+ .filter(s => s != null && s.isInstanceOf[Relation])
+ .map(s =>s.asInstanceOf[Relation])
+ .union(ebi_relation)
+ .map(d => (getKeyRelation(d), d))(Encoders.tuple(Encoders.STRING, relEncoder))
+ .groupByKey(_._1)(Encoders.STRING)
+ .agg(EBIAggregator.getRelationAggregator().toColumn)
+ .map(p => p._2)
+ .repartition(1000)
+ .write.mode(SaveMode.Overwrite).save(s"$workingPath/graph/relation")
+
+
+
+
+
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkXMLToOAFDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkXMLToOAFDataset.scala
new file mode 100644
index 000000000..c63ad4370
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkXMLToOAFDataset.scala
@@ -0,0 +1,73 @@
+package eu.dnetlib.dhp.sx.graph
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser
+import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation}
+import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication}
+import eu.dnetlib.dhp.sx.graph.parser.{DatasetScholexplorerParser, PublicationScholexplorerParser}
+import eu.dnetlib.scholexplorer.relation.RelationMapper
+import org.apache.commons.io.IOUtils
+import org.apache.hadoop.io.{IntWritable, Text}
+import org.apache.spark.SparkConf
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
+import org.slf4j.LoggerFactory
+
+import scala.collection.JavaConverters._
+
+
+/**
+ * This new version of the Job read a sequential File containing XML stored in the aggregator and generates a Dataset OAF of heterogeneous
+ * entities like Dataset, Relation, Publication and Unknown
+ */
+
+object SparkXMLToOAFDataset {
+
+
+ def main(args: Array[String]): Unit = {
+ val logger = LoggerFactory.getLogger(SparkXMLToOAFDataset.getClass)
+ val conf = new SparkConf()
+ val parser = new ArgumentApplicationParser(IOUtils.toString(SparkXMLToOAFDataset.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/argumentparser/input_graph_scholix_parameters.json")))
+ parser.parseArgument(args)
+ val spark =
+ SparkSession
+ .builder()
+ .config(conf)
+ .appName(SparkXMLToOAFDataset.getClass.getSimpleName)
+ .master(parser.get("master")).getOrCreate()
+
+ val sc = spark.sparkContext
+
+ implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf]
+ implicit val datasetEncoder:Encoder[DLIDataset] = Encoders.kryo[DLIDataset]
+ implicit val publicationEncoder:Encoder[DLIPublication] = Encoders.kryo[DLIPublication]
+ implicit val relationEncoder:Encoder[Relation] = Encoders.kryo[Relation]
+
+ val relationMapper = RelationMapper.load
+
+ val inputPath: String = parser.get("sourcePath")
+ val entity: String = parser.get("entity")
+ val targetPath = parser.get("targetPath")
+
+ logger.info(s"Input path is $inputPath")
+ logger.info(s"Entity path is $entity")
+ logger.info(s"Target Path is $targetPath")
+
+ val scholixRdd:RDD[Oaf] = sc.sequenceFile(inputPath, classOf[IntWritable], classOf[Text])
+ .map(s => s._2.toString)
+ .flatMap(s => {
+ entity match {
+ case "publication" =>
+ val p = new PublicationScholexplorerParser
+ val l =p.parseObject(s, relationMapper)
+ if (l != null) l.asScala else List()
+ case "dataset" =>
+ val d = new DatasetScholexplorerParser
+ val l =d.parseObject(s, relationMapper)
+ if (l != null) l.asScala else List()
+ }
+ }).filter(s => s!= null)
+ spark.createDataset(scholixRdd).write.mode(SaveMode.Append).save(targetPath)
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java
index 75f28c129..f56760c82 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/AbstractScholexplorerParser.java
@@ -14,7 +14,6 @@ import org.apache.commons.logging.LogFactory;
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
import eu.dnetlib.dhp.schema.oaf.*;
-import eu.dnetlib.dhp.schema.scholexplorer.DLIRelation;
import eu.dnetlib.dhp.schema.scholexplorer.DLIUnknown;
import eu.dnetlib.dhp.schema.scholexplorer.ProvenaceInfo;
import eu.dnetlib.dhp.utils.DHPUtils;
@@ -175,8 +174,8 @@ public abstract class AbstractScholexplorerParser {
.stream()
.flatMap(
n -> {
- final List rels = new ArrayList<>();
- DLIRelation r = new DLIRelation();
+ final List rels = new ArrayList<>();
+ Relation r = new Relation();
r.setSource(parsedObject.getId());
final String relatedPid = n.getTextValue();
final String relatedPidType = n.getAttributes().get("relatedIdentifierType");
@@ -184,7 +183,6 @@ public abstract class AbstractScholexplorerParser {
String relationSemantic = n.getAttributes().get("relationType");
String inverseRelation;
final String targetId = generateId(relatedPid, relatedPidType, relatedType);
- r.setDateOfCollection(dateOfCollection);
if (relationMapper.containsKey(relationSemantic.toLowerCase())) {
RelInfo relInfo = relationMapper.get(relationSemantic.toLowerCase());
relationSemantic = relInfo.getOriginal();
@@ -199,14 +197,13 @@ public abstract class AbstractScholexplorerParser {
r.setCollectedfrom(parsedObject.getCollectedfrom());
r.setDataInfo(di);
rels.add(r);
- r = new DLIRelation();
+ r = new Relation();
r.setDataInfo(di);
r.setSource(targetId);
r.setTarget(parsedObject.getId());
r.setRelType(inverseRelation);
r.setRelClass("datacite");
r.setCollectedfrom(parsedObject.getCollectedfrom());
- r.setDateOfCollection(dateOfCollection);
rels.add(r);
if ("unknown".equalsIgnoreCase(relatedType))
result
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java
index 60371fa53..11d9905cc 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/parser/DatasetScholexplorerParser.java
@@ -317,6 +317,15 @@ public class DatasetScholexplorerParser extends AbstractScholexplorerParser {
.collect(Collectors.toList()));
}
+ // TERRIBLE HACK TO AVOID EMPTY COLLECTED FROM
+ if (parsedObject.getDlicollectedfrom() == null) {
+
+ final KeyValue cf = new KeyValue();
+ cf.setKey("dli_________::europe_pmc__");
+ cf.setValue("Europe PMC");
+ parsedObject.setCollectedfrom(Collections.singletonList(cf));
+ }
+
if (StringUtils.isNotBlank(resolvedURL)) {
Instance i = new Instance();
i.setCollectedfrom(parsedObject.getCollectedfrom().get(0));
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json
new file mode 100644
index 000000000..6e42bfa64
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json
@@ -0,0 +1,25 @@
+
+[
+
+ {
+ "paramName":"is",
+ "paramLongName":"isLookUpUrl",
+ "paramDescription": "URL of the isLookUp Service",
+ "paramRequired": true
+ },
+ {
+ "paramName":"nn",
+ "paramLongName":"nameNode",
+ "paramDescription": "the name node",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json
new file mode 100644
index 000000000..83967e282
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json
@@ -0,0 +1,24 @@
+[
+
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "hdp",
+ "paramLongName": "hdfsPath",
+ "paramDescription": "the path used to store the output archive",
+ "paramRequired": true
+ },
+ {
+ "paramName":"nn",
+ "paramLongName":"nameNode",
+ "paramDescription": "the name node",
+ "paramRequired": true
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json
new file mode 100644
index 000000000..b1f4c026a
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json
@@ -0,0 +1,36 @@
+[
+
+ {
+ "paramName":"cmp",
+ "paramLongName":"communityMapPath",
+ "paramDescription": "the path to the serialization of the community map",
+ "paramRequired": true
+ },
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ },
+ {
+ "paramName":"tn",
+ "paramLongName":"resultTableName",
+ "paramDescription": "the name of the result table we are currently working on",
+ "paramRequired": true
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/config-default.xml
new file mode 100644
index 000000000..e5ec3d0ae
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/config-default.xml
@@ -0,0 +1,30 @@
+
+
+ jobTracker
+ yarnRM
+
+
+ nameNode
+ hdfs://nameservice1
+
+
+ oozie.use.system.libpath
+ true
+
+
+ hiveMetastoreUris
+ thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
+
+
+ hiveJdbcUrl
+ jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000
+
+
+ hiveDbName
+ openaire
+
+
+ oozie.launcher.mapreduce.user.classpath.first
+ true
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml
new file mode 100644
index 000000000..7321fd076
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml
@@ -0,0 +1,431 @@
+
+
+
+
+ sourcePath
+ the source path
+
+
+ isLookUpUrl
+ the isLookup service endpoint
+
+
+ outputPath
+ the output path
+
+
+ accessToken
+ the access token used for the deposition in Zenodo
+
+
+ connectionUrl
+ the connection url for Zenodo
+
+
+ metadata
+ the metadata associated to the deposition
+
+
+ newDeposition
+ true if it is a brand new depositon. false for new version of an old deposition
+
+
+ conceptRecordId
+ for new version, the id of the record for the old deposition
+
+
+ hiveDbName
+ the target hive database name
+
+
+ hiveJdbcUrl
+ hive server jdbc url
+
+
+ hiveMetastoreUris
+ hive server metastore URIs
+
+
+ sparkDriverMemory
+ memory for driver process
+
+
+ sparkExecutorMemory
+ memory for individual executor
+
+
+ sparkExecutorCores
+ number of cores used by single executor
+
+
+ oozieActionShareLibForSpark2
+ oozie action sharelib for spark 2.*
+
+
+ spark2ExtraListeners
+ com.cloudera.spark.lineage.NavigatorAppListener
+ spark 2.* extra listeners classname
+
+
+ spark2SqlQueryExecutionListeners
+ com.cloudera.spark.lineage.NavigatorQueryListener
+ spark 2.* sql query execution listeners classname
+
+
+ spark2YarnHistoryServerAddress
+ spark 2.* yarn history server address
+
+
+ spark2EventLogDir
+ spark 2.* event log dir location
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+
+
+ mapreduce.job.queuename
+ ${queueName}
+
+
+ oozie.launcher.mapred.job.queue.name
+ ${oozieLauncherQueueName}
+
+
+ oozie.action.sharelib.for.spark
+ ${oozieActionShareLibForSpark2}
+
+
+
+
+
+
+
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap
+ --outputPath${workingDir}/communityMap
+ --nameNode${nameNode}
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table publication for community related products
+ eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/publication
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/publication
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table dataset for community related products
+ eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/dataset
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/dataset
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table ORP for community related products
+ eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/otherresearchproduct
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/otherresearchproduct
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table software for community related products
+ eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/software
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/software
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Prepare association result subset of project info
+ eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}
+ --outputPath${workingDir}/preparedInfo
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Extend dumped publications with information about project
+ eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${workingDir}/publication
+ --outputPath${workingDir}/ext/publication
+ --preparedInfoPath${workingDir}/preparedInfo
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Extend dumped dataset with information about project
+ eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${workingDir}/dataset
+ --outputPath${workingDir}/ext/dataset
+ --preparedInfoPath${workingDir}/preparedInfo
+
+
+
+
+
+
+ yarn
+ cluster
+ Extend dumped ORP with information about project
+ eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${workingDir}/otherresearchproduct
+ --outputPath${workingDir}/ext/orp
+ --preparedInfoPath${workingDir}/preparedInfo
+
+
+
+
+
+
+ yarn
+ cluster
+ Extend dumped software with information about project
+ eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${workingDir}/software
+ --outputPath${workingDir}/ext/software
+ --preparedInfoPath${workingDir}/preparedInfo
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Split dumped result for community
+ eu.dnetlib.dhp.oa.graph.dump.community.SparkSplitForCommunity
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${workingDir}/ext
+ --outputPath${workingDir}/split
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.dump.MakeTar
+ --hdfsPath${outputPath}
+ --nameNode${nameNode}
+ --sourcePath${workingDir}/split
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS
+ --hdfsPath${outputPath}
+ --nameNode${nameNode}
+ --accessToken${accessToken}
+ --connectionUrl${connectionUrl}
+ --metadata${metadata}
+ --communityMapPath${workingDir}/communityMap
+ --conceptRecordId${conceptRecordId}
+ --newDeposition${newDeposition}
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json
new file mode 100644
index 000000000..f2dc02ba9
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json
@@ -0,0 +1,29 @@
+[
+
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ },
+ {
+ "paramName": "pip",
+ "paramLongName": "preparedInfoPath",
+ "paramDescription": "the path of the association result projectlist",
+ "paramRequired": true
+ }
+]
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json
new file mode 100644
index 000000000..82714d973
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prep_parameters.json
@@ -0,0 +1,20 @@
+[
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ }
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json
new file mode 100644
index 000000000..cb092110e
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/result_schema.json
@@ -0,0 +1,542 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "definitions": {
+ "AccessRight": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
+ },
+ "label": {
+ "type": "string",
+ "description": "Label for the access mode"
+ },
+ "scheme": {
+ "type": "string",
+ "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
+ }
+ }
+ },
+ "ControlledField": {
+ "type": "object",
+ "properties": {
+ "scheme": {
+ "type": "string",
+ "description": "The scheme for the resource"
+ },
+ "value": {
+ "type": "string",
+ "description": "the value in the scheme"
+ }
+ }
+ },
+ "KeyValue": {
+ "type": "object",
+ "properties": {
+ "key": {
+ "type": "string",
+ "description": "Description of key"
+ },
+ "value": {
+ "type": "string",
+ "description": "Description of value"
+ }
+ }
+ },
+ "Provenance": {
+ "type": "object",
+ "properties": {
+ "provenance": {
+ "type": "string",
+ "description": "The provenance of the information"
+ },
+ "trust": {
+ "type": "string",
+ "description": "The trust associated to the information"
+ }
+ }
+ }
+ },
+ "type": "object",
+ "properties": {
+ "author": {
+ "description": "List of authors of the research results",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "affiliation": {
+ "description": "Affiliations of the author",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "One of the affiliation of the author"
+ }
+ },
+ "fullname": {
+ "type": "string",
+ "description": "Fullname of the author"
+ },
+ "name": {
+ "type": "string",
+ "description": "First name of the author"
+ },
+ "pid": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "allOf": [
+ {"$ref": "#/definitions/ControlledField"},
+ {"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"}
+ ]
+ },
+ "provenance": {
+ "allOf": [
+ {"$ref": "#/definitions/Provenance"},
+ {"description": "The provenance of the author's pid"}
+ ]
+ }
+ },
+ "description": "Persistent identifier of the author (e.g. ORCID)"
+ },
+ "rank": {
+ "type": "integer",
+ "description": "Order in which the author appears in the authors list"
+ },
+ "surname": {
+ "type": "string",
+ "description": "Surname of the author"
+ }
+ },
+ "description": "One of the author of the research result"
+ }
+ },
+ "bestaccessright": {
+ "allOf": [
+ {"$ref": "#/definitions/AccessRight"},
+ {"description": "The openest access right associated to the manifestations of this research results"}
+ ]
+ },
+ "codeRepositoryUrl": {
+ "type": "string",
+ "description": "Only for results with type 'software': the URL to the repository with the source code"
+ },
+ "collectedfrom": {
+ "description": "Information about the sources from which the record has been collected",
+ "type": "array",
+ "items": {
+ "allOf": [
+ {"$ref": "#/definitions/KeyValue"},
+ {"description": "Key is the OpenAIRE identifier of the data source, value is its name"}
+ ]
+ }
+ },
+ "contactgroup": {
+ "description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "contactperson": {
+ "description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "container": {
+ "type": "object",
+ "properties": {
+ "conferencedate": {
+ "type": "string",
+ "description": "Date of the conference"
+ },
+ "conferenceplace": {
+ "type": "string",
+ "description": "Place of the conference"
+ },
+ "edition": {
+ "type": "string",
+ "description": "Edition of the journal or conference proceeding"
+ },
+ "ep": {
+ "type": "string",
+ "description": "End page"
+ },
+ "iss": {
+ "type": "string",
+ "description": "Journal issue"
+ },
+ "issnLinking": {
+ "type": "string",
+ "description": "Journal linking iisn"
+ },
+ "issnOnline": {
+ "type": "string",
+ "description": "Journal online issn"
+ },
+ "issnPrinted": {
+ "type": "string",
+ "description": "Journal printed issn"
+ },
+ "name": {
+ "type": "string",
+ "description": "Name of the journal or conference"
+ },
+ "sp": {
+ "type": "string",
+ "description": "Start page"
+ },
+ "vol": {
+ "type": "string",
+ "description": "Volume"
+ }
+ },
+ "description": "Container has information about the conference or journal where the result has been presented or published"
+ },
+ "context": {
+ "description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "Code identifying the RI/RC"
+ },
+ "label": {
+ "type": "string",
+ "description": "Label of the RI/RC"
+ },
+ "provenance": {
+ "description": "Why this result is associated to the RI/RC.",
+ "type": "array",
+ "items": {
+ "allOf": [
+ {"$ref": "#/definitions/Provenance"}
+
+ ]
+ }
+ }
+ }
+ }
+ },
+ "contributor": {
+ "description": "Contributors of this result",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "country": {
+ "description": "Country associated to this result",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "ISO 3166-1 alpha-2 country code"
+ },
+ "label": {
+ "type": "string",
+ "description": "English label of the country"
+ },
+ "provenance": {
+ "allOf": [
+ {"$ref": "#/definitions/Provenance"},
+ {"description": "Why this result is associated to the country."}
+ ]
+ }
+ }
+ }
+ },
+ "coverage": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "dateofcollection": {
+ "type": "string",
+ "description": "When OpenAIRE collected the record the last time"
+ },
+ "description": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "documentationUrl": {
+ "description": "Only for results with type 'software': URL to the software documentation",
+ "type": "array",
+ "items": {
+ "type": "string"
+
+ }
+ },
+ "embargoenddate": {
+ "type": "string",
+ "description": "Date when the embargo ends and this result turns Open Access"
+ },
+ "externalReference": {
+ "description": "Links to external resources like entries from thematic databases (e.g. Protein Data Bank)",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "provenance": {
+ "allOf": [
+ {"$ref": "#/definitions/Provenance"},
+ {"description": "Why this result is linked to the external resource"}
+ ]
+ },
+ "typology": {
+ "type": "string"
+ },
+ "value": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "format": {
+
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "geolocation": {
+ "description": "Geolocation information",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "box": {
+ "type": "string"
+ },
+ "place": {
+ "type": "string"
+ },
+ "point": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "id": {
+ "type": "string",
+ "description": "OpenAIRE identifier"
+ },
+ "instance": {
+ "description": "Manifestations (i.e. different versions) of the result. For example: the pre-print and the published versions are two manifestations of the same research result",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "accessright": {
+ "allOf": [
+ {"$ref": "#/definitions/AccessRight"},
+ {"description": "Access right of this instance"}
+ ]
+ },
+ "collectedfrom": {
+ "allOf": [
+ {"$ref": "#/definitions/KeyValue"},
+ {"description": "Information about the source from which the instance has been collected. Key is the OpenAIRE identifier of the data source, value is its name"}
+ ]
+ },
+ "hostedby": {
+ "allOf": [
+ {"$ref": "#/definitions/KeyValue"},
+ {"description": "Information about the source from which the instance can be viewed or downloaded. Key is the OpenAIRE identifier of the data source, value is its name"}
+ ]
+ },
+ "license": {
+ "type": "string",
+ "description": "License applied to the instance"
+ },
+ "publicationdate": {
+ "type": "string",
+ "description": "Publication date of the instance"
+ },
+ "refereed": {
+ "type": "string",
+ "description": "Was the instance subject to peer-review? Possible values are 'Unknown', 'nonPeerReviewed', 'peerReviewed' (see also https://api.openaire.eu/vocabularies/dnet:review_levels)"
+ },
+ "type": {
+ "type": "string",
+ "description": "Type of the instance. Possible values are listed at https://api.openaire.eu/vocabularies/dnet:publication_resource"
+ },
+ "url": {
+ "description":"Location where the instance is accessible",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ },
+ "language": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "alpha-3/ISO 639-2 code of the language"
+ },
+ "label": {
+ "type": "string",
+ "description": "English label"
+ }
+ }
+ },
+ "lastupdatetimestamp": {
+ "type": "integer",
+ "description": "Timestamp of last update of the record in OpenAIRE"
+ },
+ "maintitle": {
+ "type": "string",
+ "description": "Title"
+ },
+ "originalId": {
+ "description": "Identifiers of the record at the original sources",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "pid": {
+ "description": "Persistent identifiers of the result",
+ "type": "array",
+ "items": {
+ "allOf": [
+ {"$ref": "#/definitions/ControlledField"},
+ {"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result "}
+ ]
+ }
+ },
+ "programmingLanguage": {
+ "type": "string",
+ "description": "Only for results with type 'software': the programming language"
+ },
+ "projects": {
+ "description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "acronym": {
+ "type": "string",
+ "description": "Project acronym"
+ },
+ "code": {
+ "type": "string",
+ "description": "Grant code"
+ },
+ "funder": {
+ "type": "object",
+ "properties": {
+ "fundingStream": {
+ "type": "string",
+ "description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)"
+ },
+ "jurisdiction": {
+ "type": "string",
+ "description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)"
+ },
+ "name": {
+ "type": "string",
+ "description": "Name of the funder"
+ },
+ "shortName": {
+ "type": "string",
+ "description": "Short name or acronym of the funder"
+ }
+ },
+ "description": "Information about the funder funding the project"
+ },
+ "id": {
+ "type": "string",
+ "description": "OpenAIRE identifier of the project"
+ },
+ "provenance": {
+ "allOf": [
+ {"$ref": "#/definitions/Provenance"},
+ {"description": "Why this project is associated to the result"}
+ ]
+ },
+ "title": {
+ "type": "string",
+ "description": "Title of the project"
+ }
+ }
+ }
+ },
+ "publicationdate": {
+ "type": "string",
+ "description": "Date of publication"
+ },
+ "publisher": {
+ "type": "string",
+ "description": "Publisher"
+ },
+ "size": {
+ "type": "string",
+ "description": "Only for results with type 'dataset': the declared size of the dataset"
+ },
+ "source": {
+ "description": "See definition of Dublin Core field dc:source",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "subjects": {
+ "description": "Keywords associated to the result",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "provenance": {
+ "allOf": [
+ {"$ref": "#/definitions/Provenance"},
+ {"description": "Why this subject is associated to the result"}
+ ]
+ },
+ "subject": {
+ "allOf": [
+ {"$ref": "#/definitions/ControlledField"},
+ {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary). "}
+ ]
+ }
+ }
+ }
+ },
+ "subtitle": {
+ "type": "string",
+ "description": "Sub-title of the result"
+ },
+ "tool": {
+ "description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "type": {
+ "type": "string",
+ "description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)"
+ },
+ "version": {
+ "type": "string",
+ "description": "Version of the result"
+ }
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json
new file mode 100644
index 000000000..29812188a
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json
@@ -0,0 +1,32 @@
+
+[
+
+ {
+ "paramName":"cmp",
+ "paramLongName":"communityMapPath",
+ "paramDescription": "the path to the serialization of the community map",
+ "paramRequired": false
+ },
+
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json
new file mode 100644
index 000000000..4c3ec06e1
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json
@@ -0,0 +1,51 @@
+
+[
+ {
+ "paramName":"nd",
+ "paramLongName":"newDeposition",
+ "paramDescription": "if it is a new deposition (true) or a new versione (false)",
+ "paramRequired": true
+ },
+ {
+ "paramName":"cri",
+ "paramLongName":"conceptRecordId",
+ "paramDescription": "The id of the concept record for a new version",
+ "paramRequired": false
+ },
+ {
+ "paramName":"cmp",
+ "paramLongName":"communityMapPath",
+ "paramDescription": "the path to the serialization of the community map",
+ "paramRequired": false
+ },
+{
+"paramName":"hdfsp",
+"paramLongName":"hdfsPath",
+"paramDescription": "the path of the folder tofind files to send to Zenodo",
+"paramRequired": true
+},
+{
+"paramName": "nn",
+"paramLongName": "nameNode",
+"paramDescription": "the name node",
+"paramRequired": true
+},
+{
+"paramName": "at",
+"paramLongName": "accessToken",
+"paramDescription": "the access token for the deposition",
+"paramRequired": false
+},
+{
+"paramName":"cu",
+"paramLongName":"connectionUrl",
+"paramDescription": "the url to connect to deposit",
+"paramRequired": false
+},
+{
+"paramName":"m",
+"paramLongName":"metadata",
+"paramDescription": "metadata associated to the deposition",
+"paramRequired": false
+}
+]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_collect_and_save.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_collect_and_save.json
new file mode 100644
index 000000000..e1130c4f6
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_collect_and_save.json
@@ -0,0 +1,24 @@
+[
+
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json
new file mode 100644
index 000000000..87de13d63
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_entity_parameter.json
@@ -0,0 +1,24 @@
+[
+
+ {
+ "paramName":"is",
+ "paramLongName":"isLookUpUrl",
+ "paramDescription": "URL of the isLookUp Service",
+ "paramRequired": false
+ },
+ {
+ "paramName": "hdfs",
+ "paramLongName": "hdfsPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "nn",
+ "paramLongName": "nameNode",
+ "paramDescription": "the name node",
+ "paramRequired": true
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_organization_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_organization_parameters.json
new file mode 100644
index 000000000..3a4632af9
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_organization_parameters.json
@@ -0,0 +1,30 @@
+[
+
+ {
+ "paramName":"ocm",
+ "paramLongName":"organizationCommunityMap",
+ "paramDescription": "the organization community map association",
+ "paramRequired": false
+ },
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_parameters.json
new file mode 100644
index 000000000..bc3e0cd51
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_parameters.json
@@ -0,0 +1,35 @@
+[
+ {
+ "paramName":"cmp",
+ "paramLongName":"communityMapPath",
+ "paramDescription": "the path to the serialization of the community map",
+ "paramRequired": true
+ },
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ },
+ {
+ "paramName":"tn",
+ "paramLongName":"resultTableName",
+ "paramDescription": "the name of the result table we are currently working on",
+ "paramRequired": true
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_relationdump_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_relationdump_parameters.json
new file mode 100644
index 000000000..2bfcac3bc
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/input_relationdump_parameters.json
@@ -0,0 +1,26 @@
+
+
+[
+
+ {
+ "paramName":"s",
+ "paramLongName":"sourcePath",
+ "paramDescription": "the path of the sequencial file to read",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store temporary output files",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/config-default.xml
new file mode 100644
index 000000000..e5ec3d0ae
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/config-default.xml
@@ -0,0 +1,30 @@
+
+
+ jobTracker
+ yarnRM
+
+
+ nameNode
+ hdfs://nameservice1
+
+
+ oozie.use.system.libpath
+ true
+
+
+ hiveMetastoreUris
+ thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
+
+
+ hiveJdbcUrl
+ jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000
+
+
+ hiveDbName
+ openaire
+
+
+ oozie.launcher.mapreduce.user.classpath.first
+ true
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/workflow.xml
new file mode 100644
index 000000000..a1b984f9c
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/oozie_app/workflow.xml
@@ -0,0 +1,588 @@
+
+
+
+
+ sourcePath
+ the source path
+
+
+ isLookUpUrl
+ the isLookup service endpoint
+
+
+ outputPath
+ the output path
+
+
+ accessToken
+ the access token used for the deposition in Zenodo
+
+
+ connectionUrl
+ the connection url for Zenodo
+
+
+ metadata
+ the metadata associated to the deposition
+
+
+ newDeposition
+ true if it is a brand new depositon. false for new version of an old deposition
+
+
+ conceptRecordId
+ for new version, the id of the record for the old deposition
+
+
+ organizationCommunityMap
+ the organization community map
+
+
+
+ hiveDbName
+ the target hive database name
+
+
+ hiveJdbcUrl
+ hive server jdbc url
+
+
+ hiveMetastoreUris
+ hive server metastore URIs
+
+
+ sparkDriverMemory
+ memory for driver process
+
+
+ sparkExecutorMemory
+ memory for individual executor
+
+
+ sparkExecutorCores
+ number of cores used by single executor
+
+
+ oozieActionShareLibForSpark2
+ oozie action sharelib for spark 2.*
+
+
+ spark2ExtraListeners
+ com.cloudera.spark.lineage.NavigatorAppListener
+ spark 2.* extra listeners classname
+
+
+ spark2SqlQueryExecutionListeners
+ com.cloudera.spark.lineage.NavigatorQueryListener
+ spark 2.* sql query execution listeners classname
+
+
+ spark2YarnHistoryServerAddress
+ spark 2.* yarn history server address
+
+
+ spark2EventLogDir
+ spark 2.* event log dir location
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+
+
+ mapreduce.job.queuename
+ ${queueName}
+
+
+ oozie.launcher.mapred.job.queue.name
+ ${oozieLauncherQueueName}
+
+
+ oozie.action.sharelib.for.spark
+ ${oozieActionShareLibForSpark2}
+
+
+
+
+
+
+
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap
+ --outputPath${workingDir}/communityMap
+ --nameNode${nameNode}
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table publication
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/publication
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/result/publication
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table dataset
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/dataset
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/result/dataset
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table ORP
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/otherresearchproduct
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/result/otherresearchproduct
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table software
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/software
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/result/software
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table organization
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/organization
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Organization
+ --outputPath${workingDir}/collect/organization
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table project
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/project
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Project
+ --outputPath${workingDir}/collect/project
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table datasource
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpEntitiesJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/datasource
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Datasource
+ --outputPath${workingDir}/collect/datasource
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table relation
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkDumpRelationJob
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/relation
+ --outputPath${workingDir}/relation/relation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.dump.graph.CreateContextEntities
+ --hdfsPath${workingDir}/collect/context
+ --nameNode${nameNode}
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.dump.graph.CreateContextRelation
+ --hdfsPath${workingDir}/relation/context
+ --nameNode${nameNode}
+ --isLookUpUrl${isLookUpUrl}
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table relation
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkOrganizationRelation
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/relation
+ --outputPath${workingDir}/relation/contextOrg
+ --organizationCommunityMap${organizationCommunityMap}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Extract Relations from publication
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/publication
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
+ --outputPath${workingDir}/relation/publication
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table dataset
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/dataset
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
+ --outputPath${workingDir}/relation/dataset
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table ORP
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/otherresearchproduct
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
+ --outputPath${workingDir}/relation/orp
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Dump table software
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkExtractRelationFromEntities
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${sourcePath}/software
+ --resultTableNameeu.dnetlib.dhp.schema.oaf.Software
+ --outputPath${workingDir}/relation/software
+ --communityMapPath${workingDir}/communityMap
+
+
+
+
+
+
+
+
+
+
+
+ yarn
+ cluster
+ Collect Results and Relations and put them in the right path
+ eu.dnetlib.dhp.oa.graph.dump.graph.SparkCollectAndSave
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --sourcePath${workingDir}
+ --outputPath${workingDir}/collect
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.dump.MakeTar
+ --hdfsPath${outputPath}
+ --nameNode${nameNode}
+ --sourcePath${workingDir}/collect
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS
+ --hdfsPath${outputPath}
+ --nameNode${nameNode}
+ --accessToken${accessToken}
+ --connectionUrl${connectionUrl}
+ --metadata${metadata}
+ --communityMapPath${workingDir}/communityMap
+ --conceptRecordId${conceptRecordId}
+ --newDeposition${newDeposition}
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/context_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/context_schema.json
new file mode 100644
index 000000000..ba6609a50
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/context_schema.json
@@ -0,0 +1,38 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "properties": {
+ "description": {
+ "type": "string",
+ "description": "Description of description"
+ },
+ "id": {
+ "type": "string",
+ "description": "Description of id"
+ },
+ "name": {
+ "type": "string",
+ "description": "Description of name"
+ },
+ "originalId": {
+ "type": "string",
+ "description": "Description of originalId"
+ },
+ "subject": {
+ "description": "Description of subject",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of subject"
+ }
+ },
+ "type": {
+ "type": "string",
+ "description": "Description of type"
+ },
+ "zenodo_community": {
+ "type": "string",
+ "description": "Description of zenodo_community"
+ }
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/datasource_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/datasource_schema.json
new file mode 100644
index 000000000..f492620ee
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/datasource_schema.json
@@ -0,0 +1,210 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "definitions": {
+ "ControlledField": {
+ "type": "object",
+ "properties": {
+ "scheme": {
+ "type": "string",
+ "description": "Description of scheme"
+ },
+ "value": {
+ "type": "string",
+ "description": "Description of value"
+ }
+ }
+ }
+ },
+ "type": "object",
+ "properties": {
+ "accessrights": {
+ "type": "string",
+ "description": "Description of accessrights"
+ },
+ "certificates": {
+ "type": "string",
+ "description": "Description of certificates"
+ },
+ "citationguidelineurl": {
+ "type": "string",
+ "description": "Description of citationguidelineurl"
+ },
+ "contenttypes": {
+ "description": "Description of contenttypes",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of contenttypes"
+ }
+ },
+ "databaseaccessrestriction": {
+ "type": "string",
+ "description": "Description of databaseaccessrestriction"
+ },
+ "datasourcetype": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/ControlledField"
+ },
+ {
+ "description": "Description of datasourcetype"
+ }
+ ]
+ },
+ "datauploadrestriction": {
+ "type": "string",
+ "description": "Description of datauploadrestriction"
+ },
+ "dateofvalidation": {
+ "type": "string",
+ "description": "Description of dateofvalidation"
+ },
+ "description": {
+ "type": "string",
+ "description": "Description of description"
+ },
+ "englishname": {
+ "type": "string",
+ "description": "Description of englishname"
+ },
+ "id": {
+ "type": "string",
+ "description": "Description of id"
+ },
+ "journal": {
+ "type": "object",
+ "properties": {
+ "conferencedate": {
+ "type": "string",
+ "description": "Description of conferencedate"
+ },
+ "conferenceplace": {
+ "type": "string",
+ "description": "Description of conferenceplace"
+ },
+ "edition": {
+ "type": "string",
+ "description": "Description of edition"
+ },
+ "ep": {
+ "type": "string",
+ "description": "Description of ep"
+ },
+ "iss": {
+ "type": "string",
+ "description": "Description of iss"
+ },
+ "issnLinking": {
+ "type": "string",
+ "description": "Description of issnLinking"
+ },
+ "issnOnline": {
+ "type": "string",
+ "description": "Description of issnOnline"
+ },
+ "issnPrinted": {
+ "type": "string",
+ "description": "Description of issnPrinted"
+ },
+ "name": {
+ "type": "string",
+ "description": "Description of name"
+ },
+ "sp": {
+ "type": "string",
+ "description": "Description of sp"
+ },
+ "vol": {
+ "type": "string",
+ "description": "Description of vol"
+ }
+ },
+ "description": "Description of journal"
+ },
+ "languages": {
+ "description": "Description of languages",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of languages"
+ }
+ },
+ "logourl": {
+ "type": "string",
+ "description": "Description of logourl"
+ },
+ "missionstatementurl": {
+ "type": "string",
+ "description": "Description of missionstatementurl"
+ },
+ "officialname": {
+ "type": "string",
+ "description": "Description of officialname"
+ },
+ "openairecompatibility": {
+ "type": "string",
+ "description": "Description of openairecompatibility"
+ },
+ "originalId": {
+ "description": "Description of originalId",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of originalId"
+ }
+ },
+ "pid": {
+ "description": "Description of pid",
+ "type": "array",
+ "items": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/ControlledField"
+ },
+ {
+ "description": "Description of pid"
+ }
+ ]
+ }
+ },
+ "pidsystems": {
+ "type": "string",
+ "description": "Description of pidsystems"
+ },
+ "policies": {
+ "description": "Description of policies",
+ "type": "array",
+ "items": {
+ "description": "Description of policies"
+ }
+ },
+ "releaseenddate": {
+ "type": "string",
+ "description": "Description of releaseenddate"
+ },
+ "releasestartdate": {
+ "type": "string",
+ "description": "Description of releasestartdate"
+ },
+ "subjects": {
+ "description": "Description of subjects",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of subjects"
+ }
+ },
+ "uploadrights": {
+ "type": "string",
+ "description": "Description of uploadrights"
+ },
+ "versioning": {
+ "type": "boolean",
+ "description": "Description of versioning"
+ },
+ "websiteurl": {
+ "type": "string",
+ "description": "Description of websiteurl"
+ }
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/organization_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/organization_schema.json
new file mode 100644
index 000000000..3477c8370
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/organization_schema.json
@@ -0,0 +1,62 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "properties": {
+ "alternativenames": {
+ "description": "Description of alternativenames",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of alternativenames"
+ }
+ },
+ "country": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "Description of code"
+ },
+ "label": {
+ "type": "string",
+ "description": "Description of label"
+ }
+ },
+ "description": "Description of country"
+ },
+ "id": {
+ "type": "string",
+ "description": "Description of id"
+ },
+ "legalname": {
+ "type": "string",
+ "description": "Description of legalname"
+ },
+ "legalshortname": {
+ "type": "string",
+ "description": "Description of legalshortname"
+ },
+ "pid": {
+ "description": "Description of pid",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "scheme": {
+ "type": "string",
+ "description": "Description of scheme"
+ },
+ "value": {
+ "type": "string",
+ "description": "Description of value"
+ }
+ },
+ "description": "Description of pid"
+ }
+ },
+ "websiteurl": {
+ "type": "string",
+ "description": "Description of websiteurl"
+ }
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/project_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/project_schema.json
new file mode 100644
index 000000000..9aba19f17
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/project_schema.json
@@ -0,0 +1,134 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "type": "object",
+ "properties": {
+ "acronym": {
+ "type": "string",
+ "description": "Description of acronym"
+ },
+ "callidentifier": {
+ "type": "string",
+ "description": "Description of callidentifier"
+ },
+ "code": {
+ "type": "string",
+ "description": "Description of code"
+ },
+ "enddate": {
+ "type": "string",
+ "description": "Description of enddate"
+ },
+ "funding": {
+ "description": "Description of funding",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "funding_stream": {
+ "type": "object",
+ "properties": {
+ "description": {
+ "type": "string",
+ "description": "Description of description"
+ },
+ "id": {
+ "type": "string",
+ "description": "Description of id"
+ }
+ },
+ "description": "Description of funding_stream"
+ },
+ "jurisdiction": {
+ "type": "string",
+ "description": "Description of jurisdiction"
+ },
+ "name": {
+ "type": "string",
+ "description": "Description of name"
+ },
+ "shortName": {
+ "type": "string",
+ "description": "Description of shortName"
+ }
+ },
+ "description": "Description of funding"
+ }
+ },
+ "granted": {
+ "type": "object",
+ "properties": {
+ "currency": {
+ "type": "string",
+ "description": "Description of currency"
+ },
+ "fundedamount": {
+ "type": "number",
+ "description": "Description of fundedamount"
+ },
+ "totalcost": {
+ "type": "number",
+ "description": "Description of totalcost"
+ }
+ },
+ "description": "Description of granted"
+ },
+ "id": {
+ "type": "string",
+ "description": "Description of id"
+ },
+ "keywords": {
+ "type": "string",
+ "description": "Description of keywords"
+ },
+ "openaccessmandatefordataset": {
+ "type": "boolean",
+ "description": "Description of openaccessmandatefordataset"
+ },
+ "openaccessmandateforpublications": {
+ "type": "boolean",
+ "description": "Description of openaccessmandateforpublications"
+ },
+ "programme": {
+ "description": "Description of programme",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "Description of code"
+ },
+ "description": {
+ "type": "string",
+ "description": "Description of description"
+ }
+ },
+ "description": "Description of programme"
+ }
+ },
+ "startdate": {
+ "type": "string",
+ "description": "Description of startdate"
+ },
+ "subject": {
+ "description": "Description of subject",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of subject"
+ }
+ },
+ "summary": {
+ "type": "string",
+ "description": "Description of summary"
+ },
+ "title": {
+ "type": "string",
+ "description": "Description of title"
+ },
+ "websiteurl": {
+ "type": "string",
+ "description": "Description of websiteurl"
+ }
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/relation_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/relation_schema.json
new file mode 100644
index 000000000..95a80d5cf
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/relation_schema.json
@@ -0,0 +1,69 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "definitions": {
+ "Node": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Description of id"
+ },
+ "type": {
+ "type": "string",
+ "description": "Description of type"
+ }
+ }
+ }
+ },
+ "type": "object",
+ "properties": {
+ "provenance": {
+ "type": "object",
+ "properties": {
+ "provenance": {
+ "type": "string",
+ "description": "Description of provenance"
+ },
+ "trust": {
+ "type": "string",
+ "description": "Description of trust"
+ }
+ },
+ "description": "Description of provenance"
+ },
+ "reltype": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "Description of name"
+ },
+ "type": {
+ "type": "string",
+ "description": "Description of type"
+ }
+ },
+ "description": "Description of reltype"
+ },
+ "source": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/Node"
+ },
+ {
+ "description": "Description of source"
+ }
+ ]
+ },
+ "target": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/Node"
+ },
+ {
+ "description": "Description of target"
+ }
+ ]
+ }
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/result_schema.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/result_schema.json
new file mode 100644
index 000000000..59708639b
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump_whole/schema/result_schema.json
@@ -0,0 +1,520 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "definitions": {
+ "AccessRight": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "Description of code"
+ },
+ "label": {
+ "type": "string",
+ "description": "Description of label"
+ },
+ "scheme": {
+ "type": "string",
+ "description": "Description of scheme"
+ }
+ }
+ },
+ "ControlledField": {
+ "type": "object",
+ "properties": {
+ "scheme": {
+ "type": "string",
+ "description": "Description of scheme"
+ },
+ "value": {
+ "type": "string",
+ "description": "Description of value"
+ }
+ }
+ },
+ "KeyValue": {
+ "type": "object",
+ "properties": {
+ "key": {
+ "type": "string",
+ "description": "Description of key"
+ },
+ "value": {
+ "type": "string",
+ "description": "Description of value"
+ }
+ }
+ },
+ "Provenance": {
+ "type": "object",
+ "properties": {
+ "provenance": {
+ "type": "string",
+ "description": "Description of provenance"
+ },
+ "trust": {
+ "type": "string",
+ "description": "Description of trust"
+ }
+ }
+ }
+ },
+ "type": "object",
+ "properties": {
+ "author": {
+ "description": "Description of author",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "affiliation": {
+ "description": "Description of affiliation",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of affiliation"
+ }
+ },
+ "fullname": {
+ "type": "string",
+ "description": "Description of fullname"
+ },
+ "name": {
+ "type": "string",
+ "description": "Description of name"
+ },
+ "pid": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/ControlledField"
+ },
+ {
+ "description": "Description of id"
+ }
+ ]
+ },
+ "provenance": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/Provenance"
+ },
+ {
+ "description": "Description of provenance"
+ }
+ ]
+ }
+ },
+ "description": "Description of pid"
+ },
+ "rank": {
+ "type": "integer",
+ "description": "Description of rank"
+ },
+ "surname": {
+ "type": "string",
+ "description": "Description of surname"
+ }
+ },
+ "description": "Description of author"
+ }
+ },
+ "bestaccessright": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/AccessRight"
+ },
+ {
+ "description": "Description of bestaccessright"
+ }
+ ]
+ },
+ "codeRepositoryUrl": {
+ "type": "string",
+ "description": "Description of codeRepositoryUrl"
+ },
+ "contactgroup": {
+ "description": "Description of contactgroup",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of contactgroup"
+ }
+ },
+ "contactperson": {
+ "description": "Description of contactperson",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of contactperson"
+ }
+ },
+ "container": {
+ "type": "object",
+ "properties": {
+ "conferencedate": {
+ "type": "string",
+ "description": "Description of conferencedate"
+ },
+ "conferenceplace": {
+ "type": "string",
+ "description": "Description of conferenceplace"
+ },
+ "edition": {
+ "type": "string",
+ "description": "Description of edition"
+ },
+ "ep": {
+ "type": "string",
+ "description": "Description of ep"
+ },
+ "iss": {
+ "type": "string",
+ "description": "Description of iss"
+ },
+ "issnLinking": {
+ "type": "string",
+ "description": "Description of issnLinking"
+ },
+ "issnOnline": {
+ "type": "string",
+ "description": "Description of issnOnline"
+ },
+ "issnPrinted": {
+ "type": "string",
+ "description": "Description of issnPrinted"
+ },
+ "name": {
+ "type": "string",
+ "description": "Description of name"
+ },
+ "sp": {
+ "type": "string",
+ "description": "Description of sp"
+ },
+ "vol": {
+ "type": "string",
+ "description": "Description of vol"
+ }
+ },
+ "description": "Description of container"
+ },
+ "contributor": {
+ "description": "Description of contributor",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of contributor"
+ }
+ },
+ "country": {
+ "description": "Description of country",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "Description of code"
+ },
+ "label": {
+ "type": "string",
+ "description": "Description of label"
+ },
+ "provenance": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/Provenance"
+ },
+ {
+ "description": "Description of provenance"
+ }
+ ]
+ }
+ },
+ "description": "Description of country"
+ }
+ },
+ "coverage": {
+ "description": "Description of coverage",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of coverage"
+ }
+ },
+ "dateofcollection": {
+ "type": "string",
+ "description": "Description of dateofcollection"
+ },
+ "description": {
+ "description": "Description of description",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of description"
+ }
+ },
+ "documentationUrl": {
+ "description": "Description of documentationUrl",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of documentationUrl"
+ }
+ },
+ "embargoenddate": {
+ "type": "string",
+ "description": "Description of embargoenddate"
+ },
+ "externalReference": {
+ "description": "Description of externalReference",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "Description of name"
+ },
+ "provenance": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/Provenance"
+ },
+ {
+ "description": "Description of provenance"
+ }
+ ]
+ },
+ "typology": {
+ "type": "string",
+ "description": "Description of typology"
+ },
+ "value": {
+ "type": "string",
+ "description": "Description of value"
+ }
+ },
+ "description": "Description of externalReference"
+ }
+ },
+ "format": {
+ "description": "Description of format",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of format"
+ }
+ },
+ "geolocation": {
+ "description": "Description of geolocation",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "box": {
+ "type": "string",
+ "description": "Description of box"
+ },
+ "place": {
+ "type": "string",
+ "description": "Description of place"
+ },
+ "point": {
+ "type": "string",
+ "description": "Description of point"
+ }
+ },
+ "description": "Description of geolocation"
+ }
+ },
+ "id": {
+ "type": "string",
+ "description": "Description of id"
+ },
+ "instance": {
+ "description": "Description of instance",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "accessright": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/AccessRight"
+ },
+ {
+ "description": "Description of accessright"
+ }
+ ]
+ },
+ "collectedfrom": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/KeyValue"
+ },
+ {
+ "description": "Description of collectedfrom"
+ }
+ ]
+ },
+ "hostedby": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/KeyValue"
+ },
+ {
+ "description": "Description of hostedby"
+ }
+ ]
+ },
+ "license": {
+ "type": "string",
+ "description": "Description of license"
+ },
+ "publicationdate": {
+ "type": "string",
+ "description": "Description of publicationdate"
+ },
+ "refereed": {
+ "type": "string",
+ "description": "Description of refereed"
+ },
+ "type": {
+ "type": "string",
+ "description": "Description of type"
+ },
+ "url": {
+ "description": "Description of url",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of url"
+ }
+ }
+ },
+ "description": "Description of instance"
+ }
+ },
+ "language": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "Description of code"
+ },
+ "label": {
+ "type": "string",
+ "description": "Description of label"
+ }
+ },
+ "description": "Description of language"
+ },
+ "lastupdatetimestamp": {
+ "type": "integer",
+ "description": "Description of lastupdatetimestamp"
+ },
+ "maintitle": {
+ "type": "string",
+ "description": "Description of maintitle"
+ },
+ "originalId": {
+ "description": "Description of originalId",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of originalId"
+ }
+ },
+ "pid": {
+ "description": "Description of pid",
+ "type": "array",
+ "items": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/ControlledField"
+ },
+ {
+ "description": "Description of pid"
+ }
+ ]
+ }
+ },
+ "programmingLanguage": {
+ "type": "string",
+ "description": "Description of programmingLanguage"
+ },
+ "publicationdate": {
+ "type": "string",
+ "description": "Description of publicationdate"
+ },
+ "publisher": {
+ "type": "string",
+ "description": "Description of publisher"
+ },
+ "size": {
+ "type": "string",
+ "description": "Description of size"
+ },
+ "source": {
+ "description": "Description of source",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of source"
+ }
+ },
+ "subjects": {
+ "description": "Description of subjects",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "provenance": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/Provenance"
+ },
+ {
+ "description": "Description of provenance"
+ }
+ ]
+ },
+ "subject": {
+ "allOf": [
+ {
+ "$ref": "#/definitions/ControlledField"
+ },
+ {
+ "description": "Description of subject"
+ }
+ ]
+ }
+ },
+ "description": "Description of subjects"
+ }
+ },
+ "subtitle": {
+ "type": "string",
+ "description": "Description of subtitle"
+ },
+ "tool": {
+ "description": "Description of tool",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "description": "Description of tool"
+ }
+ },
+ "type": {
+ "type": "string",
+ "description": "Description of type"
+ },
+ "version": {
+ "type": "string",
+ "description": "Description of version"
+ }
+ }
+}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json
index 6dfef32db..b23ac6546 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json
@@ -40,5 +40,11 @@
"paramLongName": "dbschema",
"paramDescription": "the database schema according to the D-Net infrastructure (beta or production)",
"paramRequired": true
+ },
+ {
+ "paramName": "nsbl",
+ "paramLongName": "nsPrefixBlacklist",
+ "paramDescription": "a blacklist of nsprefixes (comma separeted)",
+ "paramRequired": false
}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml
index d8b61b5ea..d8146d9a2 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml
@@ -43,7 +43,11 @@
isLookupUrl
the address of the lookUp service
-
+
+ nsPrefixBlacklist
+
+ a blacklist of nsprefixes (comma separeted)
+
sparkDriverMemory
memory for driver process
@@ -131,6 +135,7 @@
--isLookupUrl${isLookupUrl}
--actionclaims
--dbschema${dbSchema}
+ --nsPrefixBlacklist${nsPrefixBlacklist}
@@ -182,6 +187,7 @@
--postgresPassword${postgresPassword}
--isLookupUrl${isLookupUrl}
--dbschema${dbSchema}
+ --nsPrefixBlacklist${nsPrefixBlacklist}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_claims/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_claims/oozie_app/workflow.xml
index 66eaeeb26..4c319d037 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_claims/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_claims/oozie_app/workflow.xml
@@ -38,7 +38,11 @@
isLookupUrl
the address of the lookUp service
-
+
+ nsPrefixBlacklist
+
+ a blacklist of nsprefixes (comma separeted)
+
sparkDriverMemory
memory for driver process
@@ -113,6 +117,7 @@
--isLookupUrl${isLookupUrl}
--actionclaims
--dbschema${dbSchema}
+ --nsPrefixBlacklist${nsPrefixBlacklist}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml
index eea8d0a5a..29d4269ef 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml
@@ -25,7 +25,11 @@
isLookupUrl
the address of the lookUp service
-
+
+ nsPrefixBlacklist
+
+ a blacklist of nsprefixes (comma separeted)
+
sparkDriverMemory
memory for driver process
@@ -99,6 +103,7 @@
--postgresPassword${postgresPassword}
--isLookupUrl${isLookupUrl}
--dbschema${dbSchema}
+ --nsPrefixBlacklist${nsPrefixBlacklist}
@@ -117,6 +122,7 @@
--isLookupUrl${isLookupUrl}
--dbschema${dbSchema}
--actionclaims
+ --nsPrefixBlacklist${nsPrefixBlacklist}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml
index 868418152..9b68cfb05 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml
@@ -28,6 +28,11 @@
isLookupUrl
the address of the lookUp service
+
+ nsPrefixBlacklist
+
+ a blacklist of nsprefixes (comma separeted)
+
sparkDriverMemory
memory for driver process
@@ -67,6 +72,7 @@
-pguser${postgresUser}
-pgpasswd${postgresPassword}
-islookup${isLookupUrl}
+ --nsPrefixBlacklist${nsPrefixBlacklist}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/ebi/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/ebi/oozie_app/config-default.xml
index cac3cc2bb..17cd6c9a3 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/ebi/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/ebi/oozie_app/config-default.xml
@@ -1,7 +1,7 @@
-
+
-
- jobTracker
- yarn
-
-
- nameNode
- hdfs://hadoop-rm1.garr-pa1.d4science.org:8020
-
-
- hive_metastore_uris
- thrift://hadoop-edge3.garr-pa1.d4science.org:9083
-
-
- spark2YarnHistoryServerAddress
- http://hadoop-rm2.garr-pa1.d4science.org:19888
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/ebi/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/ebi/oozie_app/workflow.xml
index a5035c56c..7e6336242 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/ebi/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/ebi/oozie_app/workflow.xml
@@ -18,7 +18,7 @@
-
+
@@ -48,6 +48,28 @@
+
+
+ yarn-cluster
+ cluster
+ Create EBI DataSet
+
+ eu.dnetlib.dhp.sx.ebi.SparkCreateEBIDataFrame
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.sql.shuffle.partitions=1000
+ ${sparkExtraOPT}
+
+ --workingPath${workingPath}
+ --masteryarn
+
+
+
+
+
yarn-cluster
@@ -71,27 +93,7 @@
-
-
- yarn-cluster
- cluster
- Create EBI DataSet
- eu.dnetlib.dhp.sx.ebi.SparkCreateEBIDataFrame
- dhp-graph-mapper-${projectVersion}.jar
-
- --executor-memory=${sparkExecutorMemory}
- --executor-cores=${sparkExecutorCores}
- --driver-memory=${sparkDriverMemory}
- --conf spark.sql.shuffle.partitions=1000
- ${sparkExtraOPT}
-
- --workingPath${workingPath}
- --masteryarn
-
-
-
-
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/input_extract_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/input_extract_entities_parameters.json
index 1c02109d0..febcfc898 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/input_extract_entities_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/argumentparser/input_extract_entities_parameters.json
@@ -1,7 +1,4 @@
[
- {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
- {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true},
- {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the result data", "paramRequired": true},
- {"paramName":"td", "paramLongName":"targetDir", "paramDescription": "the name of the result data", "paramRequired": true},
- {"paramName":"e", "paramLongName":"entities", "paramDescription": "the entity type to be filtered", "paramRequired": true}
+ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
+ {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the work dir path", "paramRequired": true}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml
index d74d68663..c94394b1e 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml
@@ -101,12 +101,17 @@
yarn-cluster
cluster
Import ${entity} and related entities
- eu.dnetlib.dhp.sx.graph.SparkScholexplorerGraphImporter
+ eu.dnetlib.dhp.sx.graph.SparkXMLToOAFDataset
dhp-graph-mapper-${projectVersion}.jar
- --executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT}
- -mt yarn-cluster
+
+ --executor-memory ${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ ${sparkExtraOPT}
+
+ -mt yarn
--sourcePath${targetXMLPath}
- --targetPath${targetEntityPath}
+ --targetPath${workingPath}/input/OAFDataset
--entity${entity}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml
index 46e2dc3f9..fabe7510b 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml
@@ -1,16 +1,8 @@
- sourcePath
- the source path
-
-
- targetPath
- the source path
-
-
- targetDir
- the name of the path
+ workingPath
+ the working path
sparkDriverMemory
@@ -20,32 +12,13 @@
sparkExecutorMemory
memory for individual executor
-
- entities
- the entities to be extracted
-
-
+
Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -53,19 +26,18 @@
${nameNode}
yarn-cluster
cluster
- Extract ${entities}
- eu.dnetlib.dhp.sx.graph.SparkExtractEntitiesJob
+ Extract DLI Entities
+ eu.dnetlib.dhp.sx.graph.SparkSplitOafTODLIEntities
dhp-graph-mapper-${projectVersion}.jar
--executor-memory ${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
+ --conf spark.sql.shuffle.partitions=3840
${sparkExtraOPT}
-mt yarn-cluster
- --sourcePath${sourcePath}
- --targetPath${targetPath}
- --targetDir${targetDir}
- --entities${entities}
+ --workingPath${workingPath}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java
index 559a30b1e..e1ef847c3 100644
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java
@@ -7,6 +7,8 @@ import static org.mockito.Mockito.lenient;
import java.io.IOException;
import java.util.List;
import java.util.Set;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
@@ -19,9 +21,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
-import eu.dnetlib.dhp.schema.oaf.Publication;
-import eu.dnetlib.dhp.schema.oaf.Qualifier;
-import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@@ -62,10 +62,12 @@ public class CleaningFunctionTest {
assertTrue(p_in instanceof Result);
assertTrue(p_in instanceof Publication);
- Publication p_out = OafCleaner.apply(p_in, mapping);
+ Publication p_out = OafCleaner.apply(CleanGraphSparkJob.fixVocabularyNames(p_in), mapping);
assertNotNull(p_out);
+ assertNotNull(p_out.getPublisher());
+ assertNull(p_out.getPublisher().getValue());
assertEquals("und", p_out.getLanguage().getClassid());
assertEquals("Undetermined", p_out.getLanguage().getClassname());
@@ -88,6 +90,16 @@ public class CleaningFunctionTest {
Publication p_defaults = CleanGraphSparkJob.fixDefaults(p_out);
assertEquals("CLOSED", p_defaults.getBestaccessright().getClassid());
+ assertNull(p_out.getPublisher());
+
+ getAuthorPids(p_defaults).forEach(pid -> {
+ System.out
+ .println(
+ String
+ .format(
+ "%s [%s - %s]", pid.getValue(), pid.getQualifier().getClassid(),
+ pid.getQualifier().getClassname()));
+ });
// TODO add more assertions to verity the cleaned values
System.out.println(MAPPER.writeValueAsString(p_out));
@@ -97,7 +109,7 @@ public class CleaningFunctionTest {
*/
}
- private Stream getAuthorPidTypes(Publication pub) {
+ private Stream getAuthorPidTypes(Result pub) {
return pub
.getAuthor()
.stream()
@@ -106,6 +118,14 @@ public class CleaningFunctionTest {
.map(s -> s.getQualifier());
}
+ private Stream getAuthorPids(Result pub) {
+ return pub
+ .getAuthor()
+ .stream()
+ .map(a -> a.getPid())
+ .flatMap(p -> p.stream());
+ }
+
private List vocs() throws IOException {
return IOUtils
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java
new file mode 100644
index 000000000..d261320d4
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java
@@ -0,0 +1,405 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.schema.dump.oaf.Result;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+import eu.dnetlib.dhp.schema.oaf.Dataset;
+import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.oaf.Software;
+
+@Disabled
+public class DumpJobTest {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private static SparkSession spark;
+
+ private static Path workingDir;
+
+ private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
+
+ private static CommunityMap map = new CommunityMap();
+
+ static {
+ map.put("egi", "EGI Federation");
+ map.put("fet-fp7", "FET FP7");
+ map.put("fet-h2020", "FET H2020");
+ map.put("clarin", "CLARIN");
+ map.put("fam", "Fisheries and Aquaculture Management");
+ map.put("ni", "Neuroinformatics");
+ map.put("mes", "European Marine Scinece");
+ map.put("instruct", "Instruct-Eric");
+ map.put("rda", "Research Data Alliance");
+ map.put("elixir-gr", "ELIXIR GR");
+ map.put("aginfra", "Agricultural and Food Sciences");
+ map.put("dariah", "DARIAH EU");
+ map.put("risis", "RISI");
+ map.put("ee", "SDSN - Greece");
+ map.put("oa-pg", "EC Post-Grant Open Access Pilot");
+ map.put("beopen", "Transport Research");
+ map.put("euromarine", "Euromarine");
+ map.put("ifremer", "Ifremer");
+ map.put("dh-ch", "Digital Humanities and Cultural Heritage");
+ map.put("science-innovation-policy", "Science and Innovation Policy Studies");
+ map.put("covid-19", "COVID-19");
+ map.put("enrmaps", "Energy Research");
+ map.put("epos", "EPOS");
+
+ }
+
+ List communityMap = Arrays
+ .asList(
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "");
+
+ private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+ +
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
+ " return " +
+ " " +
+ "{$x//CONFIGURATION/context/@id}" +
+ "{$x//CONFIGURATION/context/@label}" +
+ "";
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName());
+ log.info("using work dir {}", workingDir);
+
+ SparkConf conf = new SparkConf();
+ conf.setAppName(DumpJobTest.class.getSimpleName());
+
+ conf.setMaster("local[*]");
+ conf.set("spark.driver.host", "localhost");
+ conf.set("hive.metastore.local", "true");
+ conf.set("spark.ui.enabled", "false");
+ conf.set("spark.sql.warehouse.dir", workingDir.toString());
+ conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+ spark = SparkSession
+ .builder()
+ .appName(DumpJobTest.class.getSimpleName())
+ .config(conf)
+ .getOrCreate();
+ }
+
+ @AfterAll
+ public static void afterAll() throws IOException {
+ FileUtils.deleteDirectory(workingDir.toFile());
+ spark.stop();
+ }
+
+ @Test
+ public void testMap() {
+ System.out.println(new Gson().toJson(map));
+ }
+
+ @Test
+ public void testDataset() {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json")
+ .getPath();
+
+ final String communityMapPath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+ .getPath();
+
+ DumpProducts dump = new DumpProducts();
+ dump
+ .run(
+ // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
+ false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
+ CommunityResult.class, false);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/result")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ Assertions.assertEquals(90, verificationDataset.count());
+
+ Assertions
+ .assertTrue(
+ verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset
+ .filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'")
+ .count());
+
+ Assertions
+ .assertTrue(
+ verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset
+ .filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'")
+ .count());
+
+ Assertions
+ .assertTrue(
+ verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset
+ .filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'")
+ .count());
+
+ Assertions
+ .assertTrue(
+ verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset
+ .filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'")
+ .count());
+
+ Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90);
+
+ Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90);
+
+//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
+
+ }
+
+ @Test
+ public void testDataset2All() {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
+ .getPath();
+
+ final String communityMapPath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+ .getPath();
+
+ DumpProducts dump = new DumpProducts();
+ dump
+ .run(
+ // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
+ false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
+ Result.class, true);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/result")
+ .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.Result.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class));
+
+ Assertions.assertEquals(5, verificationDataset.count());
+
+ verificationDataset.show(false);
+ }
+
+ @Test
+ public void testDataset2Communities() {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
+ .getPath();
+
+ final String communityMapPath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+ .getPath();
+
+ DumpProducts dump = new DumpProducts();
+ dump
+ .run(
+ // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
+ false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
+ CommunityResult.class, false);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/result")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ Assertions.assertEquals(0, verificationDataset.count());
+
+ verificationDataset.show(false);
+ }
+
+ @Test
+ public void testPublication() {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json")
+ .getPath();
+
+ final String communityMapPath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+ .getPath();
+
+ DumpProducts dump = new DumpProducts();
+ dump
+ .run(
+ // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
+ false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
+ CommunityResult.class, false);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/result")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ Assertions.assertEquals(74, verificationDataset.count());
+ verificationDataset.show(false);
+
+ Assertions.assertEquals(74, verificationDataset.filter("type = 'publication'").count());
+
+//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
+
+ }
+
+ @Test
+ public void testSoftware() {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json")
+ .getPath();
+
+ final String communityMapPath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+ .getPath();
+
+ DumpProducts dump = new DumpProducts();
+ dump
+ .run(
+ // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
+ false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
+ CommunityResult.class, false);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/result")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ Assertions.assertEquals(6, verificationDataset.count());
+
+ Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count());
+ verificationDataset.show(false);
+
+//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
+
+ }
+
+ @Test
+ public void testORP() {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json")
+ .getPath();
+
+ final String communityMapPath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+ .getPath();
+
+ DumpProducts dump = new DumpProducts();
+ dump
+ .run(
+ // false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
+ false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
+ CommunityResult.class, false);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/result")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ Assertions.assertEquals(3, verificationDataset.count());
+
+ Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count());
+ verificationDataset.show(false);
+
+//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
+
+ }
+
+ @Test
+ public void testRecord() {
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
+ .getPath();
+
+ final String communityMapPath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+ .getPath();
+
+ DumpProducts dump = new DumpProducts();
+ dump
+ .run(
+ // false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
+ false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
+ CommunityResult.class, false);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/result")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ Assertions.assertEquals(2, verificationDataset.count());
+ verificationDataset.show(false);
+
+ Assertions.assertEquals(2, verificationDataset.filter("type = 'publication'").count());
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java
new file mode 100644
index 000000000..803ae0416
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java
@@ -0,0 +1,28 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.github.victools.jsonschema.generator.*;
+
+import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
+
+@Disabled
+public class GenerateJsonSchema {
+
+ @Test
+ public void generateSchema() {
+ SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_7,
+ OptionPreset.PLAIN_JSON)
+ .with(Option.SCHEMA_VERSION_INDICATOR)
+ .without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
+ configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
+ SchemaGeneratorConfig config = configBuilder.build();
+ SchemaGenerator generator = new SchemaGenerator(config);
+ JsonNode jsonSchema = generator.generateSchema(Relation.class);
+
+ System.out.println(jsonSchema.toString());
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java
new file mode 100644
index 000000000..0de4c8338
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java
@@ -0,0 +1,60 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.IOException;
+import java.nio.file.Files;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.*;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+@Disabled
+public class MakeTarTest {
+ private static String workingDir;
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files
+ .createTempDirectory(MakeTarTest.class.getSimpleName())
+ .toString();
+ }
+
+ @Test
+ public void testTar() throws IOException {
+ LocalFileSystem fs = FileSystem.getLocal(new Configuration());
+
+ fs
+ .copyFromLocalFile(
+ false, new Path(getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
+ .getPath()),
+ new Path(workingDir + "/zenodo/ni/part-00000"));
+
+ fs
+ .copyFromLocalFile(
+ false, new Path(getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/egi")
+ .getPath()),
+ new Path(workingDir + "/zenodo/ni/part-00001"));
+
+ fs
+ .copyFromLocalFile(
+ false, new Path(getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch")
+ .getPath()),
+ new Path(workingDir + "/zenodo/dh-ch/part-00000"));
+ fs
+ .copyFromLocalFile(
+ false, new Path(getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/science-innovation-policy")
+ .getPath()),
+ new Path(workingDir + "/zenodo/ni/part-00002"));
+
+ String inputPath = workingDir + "/zenodo/";
+
+ MakeTar.makeTArArchive(fs, inputPath, "/tmp/out");
+
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java
new file mode 100644
index 000000000..0e8908418
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java
@@ -0,0 +1,239 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.graph.dump.community.ResultProject;
+import eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject;
+
+public class PrepareResultProjectJobTest {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private static SparkSession spark;
+
+ private static Path workingDir;
+
+ private static final Logger log = LoggerFactory
+ .getLogger(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class);
+
+ private static HashMap map = new HashMap<>();
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files
+ .createTempDirectory(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class.getSimpleName());
+ log.info("using work dir {}", workingDir);
+
+ SparkConf conf = new SparkConf();
+ conf.setAppName(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class.getSimpleName());
+
+ conf.setMaster("local[*]");
+ conf.set("spark.driver.host", "localhost");
+ conf.set("hive.metastore.local", "true");
+ conf.set("spark.ui.enabled", "false");
+ conf.set("spark.sql.warehouse.dir", workingDir.toString());
+ conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+ spark = SparkSession
+ .builder()
+ .appName(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class.getSimpleName())
+ .config(conf)
+ .getOrCreate();
+ }
+
+ @AfterAll
+ public static void afterAll() throws IOException {
+ FileUtils.deleteDirectory(workingDir.toFile());
+ spark.stop();
+ }
+
+ @Test
+ public void testNoMatch() throws Exception {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match")
+ .getPath();
+
+ SparkPrepareResultProject.main(new String[] {
+ "-isSparkSessionManaged", Boolean.FALSE.toString(),
+ "-outputPath", workingDir.toString() + "/preparedInfo",
+ "-sourcePath", sourcePath
+ });
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/preparedInfo")
+ .map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
+
+ Assertions.assertEquals(0, verificationDataset.count());
+
+ }
+
+ @Test
+ public void testMatchOne() throws Exception {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one")
+ .getPath();
+
+ SparkPrepareResultProject.main(new String[] {
+ "-isSparkSessionManaged", Boolean.FALSE.toString(),
+ "-outputPath", workingDir.toString() + "/preparedInfo",
+ "-sourcePath", sourcePath
+ });
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/preparedInfo")
+ .map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
+
+ Assertions.assertTrue(verificationDataset.count() == 1);
+
+ Assertions
+ .assertEquals(
+ 1,
+ verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
+
+ verificationDataset.createOrReplaceTempView("table");
+
+ Dataset check = spark
+ .sql(
+ "Select projList.provenance.provenance " +
+ "from table " +
+ "lateral view explode (projectsList) pl as projList");
+
+ Assertions.assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());
+
+ verificationDataset.show(false);
+
+ }
+
+ @Test
+ public void testMatch() throws Exception {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match")
+ .getPath();
+
+ SparkPrepareResultProject.main(new String[] {
+ "-isSparkSessionManaged", Boolean.FALSE.toString(),
+ "-outputPath", workingDir.toString() + "/preparedInfo",
+ "-sourcePath", sourcePath
+ });
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/preparedInfo")
+ .map(item -> OBJECT_MAPPER.readValue(item, ResultProject.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class));
+
+ Assertions.assertTrue(verificationDataset.count() == 2);
+
+ Assertions
+ .assertEquals(
+ 1,
+ verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
+ Assertions
+ .assertEquals(
+ 1,
+ verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
+
+ verificationDataset.createOrReplaceTempView("dataset");
+
+ String query = "select resultId, MyT.id project , MyT.title title, MyT.acronym acronym , MyT.provenance.provenance provenance "
+ + "from dataset "
+ + "lateral view explode(projectsList) p as MyT ";
+
+ org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query);
+ Assertions.assertEquals(3, resultExplodedProvenance.count());
+ Assertions
+ .assertEquals(
+ 2,
+ resultExplodedProvenance
+ .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
+ .count());
+
+ Assertions
+ .assertEquals(
+ 1,
+ resultExplodedProvenance
+ .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'")
+ .count());
+
+ Assertions
+ .assertEquals(
+ 2,
+ resultExplodedProvenance
+ .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'")
+ .count());
+
+ Assertions
+ .assertEquals(
+ 1,
+ resultExplodedProvenance
+ .filter(
+ "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
+ .count());
+
+ Assertions
+ .assertEquals(
+ 1,
+ resultExplodedProvenance
+ .filter(
+ "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'")
+ .count());
+
+ Assertions
+ .assertEquals(
+ 1,
+ resultExplodedProvenance
+ .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'")
+ .count());
+
+ Assertions
+ .assertEquals(
+ 1,
+ resultExplodedProvenance
+ .filter(
+ "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'")
+ .count());
+
+ Assertions
+ .assertEquals(
+ 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count());
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java
new file mode 100644
index 000000000..c6666342a
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java
@@ -0,0 +1,116 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import static org.mockito.Mockito.lenient;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.dom4j.DocumentException;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
+@ExtendWith(MockitoExtension.class)
+public class QueryInformationSystemTest {
+
+ private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+ +
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
+ " and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') "
+ +
+ " return " +
+ " " +
+ "{$x//CONFIGURATION/context/@id}" +
+ "{$x//CONFIGURATION/context/@label}" +
+ "";
+
+ List communityMap = Arrays
+ .asList(
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "");
+
+ @Mock
+ private ISLookUpService isLookUpService;
+
+ private QueryInformationSystem queryInformationSystem;
+
+ private Map map;
+
+ @BeforeEach
+ public void setUp() throws ISLookUpException, DocumentException {
+ lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap);
+ queryInformationSystem = new QueryInformationSystem();
+ queryInformationSystem.setIsLookUp(isLookUpService);
+ map = queryInformationSystem.getCommunityMap();
+ }
+
+ @Test
+ public void testSize() throws ISLookUpException {
+
+ Assertions.assertEquals(23, map.size());
+ }
+
+ @Test
+ public void testContent() {
+ Assertions.assertTrue(map.containsKey("egi") && map.get("egi").equals("EGI Federation"));
+
+ Assertions.assertTrue(map.containsKey("fet-fp7") && map.get("fet-fp7").equals("FET FP7"));
+ Assertions.assertTrue(map.containsKey("fet-h2020") && map.get("fet-h2020").equals("FET H2020"));
+ Assertions.assertTrue(map.containsKey("clarin") && map.get("clarin").equals("CLARIN"));
+ Assertions.assertTrue(map.containsKey("rda") && map.get("rda").equals("Research Data Alliance"));
+ Assertions.assertTrue(map.containsKey("ee") && map.get("ee").equals("SDSN - Greece"));
+ Assertions
+ .assertTrue(
+ map.containsKey("dh-ch") && map.get("dh-ch").equals("Digital Humanities and Cultural Heritage"));
+ Assertions.assertTrue(map.containsKey("fam") && map.get("fam").equals("Fisheries and Aquaculture Management"));
+ Assertions.assertTrue(map.containsKey("ni") && map.get("ni").equals("Neuroinformatics"));
+ Assertions.assertTrue(map.containsKey("mes") && map.get("mes").equals("European Marine Science"));
+ Assertions.assertTrue(map.containsKey("instruct") && map.get("instruct").equals("Instruct-ERIC"));
+ Assertions.assertTrue(map.containsKey("elixir-gr") && map.get("elixir-gr").equals("ELIXIR GR"));
+ Assertions
+ .assertTrue(map.containsKey("aginfra") && map.get("aginfra").equals("Agricultural and Food Sciences"));
+ Assertions.assertTrue(map.containsKey("dariah") && map.get("dariah").equals("DARIAH EU"));
+ Assertions.assertTrue(map.containsKey("risis") && map.get("risis").equals("RISIS"));
+ Assertions.assertTrue(map.containsKey("epos") && map.get("epos").equals("EPOS"));
+ Assertions.assertTrue(map.containsKey("beopen") && map.get("beopen").equals("Transport Research"));
+ Assertions.assertTrue(map.containsKey("euromarine") && map.get("euromarine").equals("EuroMarine"));
+ Assertions.assertTrue(map.containsKey("ifremer") && map.get("ifremer").equals("Ifremer"));
+ Assertions.assertTrue(map.containsKey("oa-pg") && map.get("oa-pg").equals("EC Post-Grant Open Access Pilot"));
+ Assertions
+ .assertTrue(
+ map.containsKey("science-innovation-policy")
+ && map.get("science-innovation-policy").equals("Science and Innovation Policy Studies"));
+ Assertions.assertTrue(map.containsKey("covid-19") && map.get("covid-19").equals("COVID-19"));
+ Assertions.assertTrue(map.containsKey("enermaps") && map.get("enermaps").equals("Energy Research"));
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java
new file mode 100644
index 000000000..42ad5634a
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java
@@ -0,0 +1,143 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunitySplit;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+
+public class SplitForCommunityTest {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private static SparkSession spark;
+
+ private static Path workingDir;
+
+ private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files.createTempDirectory(SplitForCommunityTest.class.getSimpleName());
+ log.info("using work dir {}", workingDir);
+
+ SparkConf conf = new SparkConf();
+ conf.setAppName(SplitForCommunityTest.class.getSimpleName());
+
+ conf.setMaster("local[*]");
+ conf.set("spark.driver.host", "localhost");
+ conf.set("hive.metastore.local", "true");
+ conf.set("spark.ui.enabled", "false");
+ conf.set("spark.sql.warehouse.dir", workingDir.toString());
+ conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+ spark = SparkSession
+ .builder()
+ .appName(SplitForCommunityTest.class.getSimpleName())
+ .config(conf)
+ .getOrCreate();
+ }
+
+ @AfterAll
+ public static void afterAll() throws IOException {
+ FileUtils.deleteDirectory(workingDir.toFile());
+ spark.stop();
+ }
+
+ @Test
+ public void test1() {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity")
+ .getPath();
+
+ final String communityMapPath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+ .getPath();
+
+ CommunitySplit split = new CommunitySplit();
+
+ split.run(false, sourcePath, workingDir.toString() + "/split", communityMapPath);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/split/dh-ch")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ Assertions.assertEquals(19, verificationDataset.count());
+
+ Assertions
+ .assertEquals(
+ 1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
+
+ tmp = sc
+ .textFile(workingDir.toString() + "/split/egi")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ Assertions.assertEquals(1, verificationDataset.count());
+
+ Assertions
+ .assertEquals(
+ 1, verificationDataset.filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
+
+ tmp = sc
+ .textFile(workingDir.toString() + "/split/ni")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ Assertions.assertEquals(5, verificationDataset.count());
+
+ Assertions
+ .assertEquals(
+ 1, verificationDataset.filter("id = '50|datacite____::6b1e3a2fa60ed8c27317a66d6357f795'").count());
+
+ tmp = sc
+ .textFile(workingDir.toString() + "/split/science-innovation-policy")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ Assertions.assertEquals(4, verificationDataset.count());
+
+ Assertions
+ .assertEquals(
+ 1, verificationDataset.filter("id = '50|dedup_wf_001::0347b1cd516fc59e41ba92e0d74e4e9f'").count());
+ Assertions
+ .assertEquals(
+ 1, verificationDataset.filter("id = '50|dedup_wf_001::1432beb6171baa5da8a85a7f99545d69'").count());
+ Assertions
+ .assertEquals(
+ 1, verificationDataset.filter("id = '50|dedup_wf_001::1c8bd19e633976e314b88ce5c3f92d69'").count());
+ Assertions
+ .assertEquals(
+ 1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
+
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java
new file mode 100644
index 000000000..bd191c847
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java
@@ -0,0 +1,138 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo;
+import eu.dnetlib.dhp.schema.dump.oaf.Result;
+import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
+
+public class UpdateProjectInfoTest {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private static SparkSession spark;
+
+ private static Path workingDir;
+
+ private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.oa.graph.dump.UpdateProjectInfoTest.class);
+
+ private static HashMap map = new HashMap<>();
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files
+ .createTempDirectory(eu.dnetlib.dhp.oa.graph.dump.UpdateProjectInfoTest.class.getSimpleName());
+ log.info("using work dir {}", workingDir);
+
+ SparkConf conf = new SparkConf();
+ conf.setAppName(eu.dnetlib.dhp.oa.graph.dump.UpdateProjectInfoTest.class.getSimpleName());
+
+ conf.setMaster("local[*]");
+ conf.set("spark.driver.host", "localhost");
+ conf.set("hive.metastore.local", "true");
+ conf.set("spark.ui.enabled", "false");
+ conf.set("spark.sql.warehouse.dir", workingDir.toString());
+ conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+ spark = SparkSession
+ .builder()
+ .appName(eu.dnetlib.dhp.oa.graph.dump.UpdateProjectInfoTest.class.getSimpleName())
+ .config(conf)
+ .getOrCreate();
+ }
+
+ @AfterAll
+ public static void afterAll() throws IOException {
+ FileUtils.deleteDirectory(workingDir.toFile());
+ spark.stop();
+ }
+
+ @Test
+ public void test1() throws Exception {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo")
+ .getPath();
+
+ SparkUpdateProjectInfo.main(new String[] {
+ "-isSparkSessionManaged", Boolean.FALSE.toString(),
+ "-preparedInfoPath", sourcePath + "/preparedInfo",
+ "-outputPath", workingDir.toString() + "/result",
+ "-sourcePath", sourcePath + "/software.json"
+ });
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/result")
+ .map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
+
+ verificationDataset.show(false);
+
+ Assertions.assertEquals(6, verificationDataset.count());
+ verificationDataset.createOrReplaceTempView("dataset");
+
+ String query = "select id, MyT.code code, MyT.title title, MyT.funder.name funderName, MyT.funder.shortName funderShortName, "
+ +
+ "MyT.funder.jurisdiction funderJurisdiction, MyT.funder.fundingStream fundingStream "
+ + "from dataset " +
+ "lateral view explode(projects) p as MyT ";
+
+ org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query);
+
+ Assertions.assertEquals(3, resultExplodedProvenance.count());
+ resultExplodedProvenance.show(false);
+
+ Assertions
+ .assertEquals(
+ 2,
+ resultExplodedProvenance.filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
+
+ Assertions
+ .assertEquals(
+ 1,
+ resultExplodedProvenance
+ .filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' and code = '123455'")
+ .count());
+
+ Assertions
+ .assertEquals(
+ 1,
+ resultExplodedProvenance
+ .filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb' and code = '119027'")
+ .count());
+
+ Assertions
+ .assertEquals(
+ 1,
+ resultExplodedProvenance
+ .filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80' and code = '123455'")
+ .count());
+
+ resultExplodedProvenance.show(false);
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java
new file mode 100644
index 000000000..05dc423cb
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java
@@ -0,0 +1,153 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.*;
+import java.nio.file.Files;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.*;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
+import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+
+@Disabled
+public class ZenodoUploadTest {
+
+ private static String workingDir;
+
+ private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
+ private final String ACCESS_TOKEN = "";
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files
+ .createTempDirectory(eu.dnetlib.dhp.oa.graph.dump.UpdateProjectInfoTest.class.getSimpleName())
+ .toString();
+ }
+
+ @Test
+ public void testNewDeposition() throws IOException {
+ CommunityMap communityMap = new CommunityMap();
+ communityMap.put("ni", "Neuroinformatics");
+ communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage");
+ LocalFileSystem fs = FileSystem.getLocal(new Configuration());
+
+ fs
+ .copyFromLocalFile(
+ false, new Path(getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
+ .getPath()),
+ new Path(workingDir + "/zenodo/ni/ni"));
+ fs
+ .copyFromLocalFile(
+ false, new Path(getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch")
+ .getPath()),
+ new Path(workingDir + "/zenodo/dh-ch/dh-ch"));
+
+ ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+ ACCESS_TOKEN);
+ client.newDeposition();
+
+ // the second boolean parameter here sets the recursion to true
+ RemoteIterator fileStatusListIterator = fs
+ .listFiles(
+ new Path(workingDir + "/zenodo"), true);
+ while (fileStatusListIterator.hasNext()) {
+ LocatedFileStatus fileStatus = fileStatusListIterator.next();
+
+ String p_string = fileStatus.getPath().toString();
+
+ int index = p_string.lastIndexOf("/");
+ String community = p_string.substring(0, index);
+ community = community.substring(community.lastIndexOf("/") + 1);
+ String community_name = communityMap.get(community).replace(" ", "_");
+ // fs.copyToLocalFile(fileStatus.getPath(), new Path("/tmp/" + community_name));
+ System.out.println(community);
+
+ // File f = new File("/tmp/" + community_name);
+ FSDataInputStream inputStream = fs.open(fileStatus.getPath());
+ System.out.println(client.uploadIS(inputStream, community_name, fileStatus.getLen()));
+
+ }
+
+ String metadata = "{\"metadata\":{\"access_right\":\"open\",\"communities\":[{\"identifier\":\"openaire-research-graph\"}],\"creators\":[{\"affiliation\":\"CNR - ISTI\",\"name\":\"Manghi, Paolo\",\"orcid\":\"0000-0001-7291-3210\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Atzori, Claudio\",\"orcid\":\"0000-0001-9613-6639\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Bardi, Alessia\",\"orcid\":\"0000-0002-1112-1292\"},{\"affiliation\":\"ISTI - CNR\",\"name\":\"Baglioni, Miriam\",\"orcid\":\"0000-0002-2273-9004\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Shirrwagen, Jochen\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Dimitropoulos, Harry\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"La Bruzzo, Sandro\",\"orcid\":\"0000-0003-2855-1245\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Foufoulas, Ioannis\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Löhden, Aenne\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Bäcker, Amelie\",\"orcid\":\"0000-0001-6015-2063\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Mannocci, Andrea\",\"orcid\":\"0000-0002-5193-7851\"},{\"affiliation\":\"University of Warsaw\",\"name\":\"Horst, Marek\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Czerniak, Andreas\",\"orcid\":\"0000-0003-3883-4169\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kiatropoulou, Katerina\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Kokogiannaki, Argiro\",\"orcid\":\"0000-0002-3880-0244\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"De Bonis, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Artini, Michele\"},{\"affiliation\":\"CNR - ISTI\",\"name\":\"Ottonello, Enrico\"},{\"affiliation\":\"Athena Research and Innovation Centre\",\"name\":\"Lempesis, Antonis\"},{\"affiliation\":\"CERN\",\"name\":\"Ioannidis, Alexandros\"},{\"affiliation\":\"University of Bielefeld\",\"name\":\"Summan, Friedrich\"}],\"description\":\"\\u003cp\\u003eThis dataset contains dumps of the OpenAIRE Research Graph containing metadata records relevant for the research communities and initiatives collaborating with OpenAIRE\\u003c/p\\u003e. \\u003cp\\u003eEach dataset is a zip containing a file with one json per line. Each json is compliant to the schema available at XXXX\\u003c/p\\u003e Note that the file that is offered is not a typical json file: each line contains a separate, self-contained json object. For more information please see http://jsonlines.org\",\"grants\":[{\"id\":\"777541\"},{\"id\":\"824091\"},{\"id\":\"824323\"}],\"keywords\":[\"Open Science\",\"Scholarly Communication\",\"Information Science\"],\"language\":\"eng\",\"license\":\"CC-BY-4.0\",\"title\":\"OpenAIRE Research Graph: Dumps for research communities and initiatives.\",\"upload_type\":\"dataset\",\"version\":\"1.0\"}}";
+
+ System.out.println(client.sendMretadata(metadata));
+
+ System.out.println(client.publish());
+
+ }
+
+ @Test
+ public void testNewVersion() throws IOException, MissingConceptDoiException {
+
+ ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
+ ACCESS_TOKEN);
+
+ client.newVersion("656628");
+
+ CommunityMap communityMap = new CommunityMap();
+ communityMap.put("ni", "Neuroinformatics");
+ communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage");
+ LocalFileSystem fs = FileSystem.getLocal(new Configuration());
+
+ fs
+ .copyFromLocalFile(
+ false, new Path(getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
+ .getPath()),
+ new Path(workingDir + "/zenodo/ni/ni"));
+ fs
+ .copyFromLocalFile(
+ false, new Path(getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch")
+ .getPath()),
+ new Path(workingDir + "/zenodo/dh-ch/dh-ch"));
+
+ RemoteIterator fileStatusListIterator = fs
+ .listFiles(
+ new Path(workingDir + "/zenodo"), true);
+ while (fileStatusListIterator.hasNext()) {
+ LocatedFileStatus fileStatus = fileStatusListIterator.next();
+
+ String p_string = fileStatus.getPath().toString();
+
+ int index = p_string.lastIndexOf("/");
+ String community = p_string.substring(0, index);
+ community = community.substring(community.lastIndexOf("/") + 1);
+ String community_name = communityMap.get(community).replace(" ", "_");
+ // fs.copyToLocalFile(fileStatus.getPath(), new Path("/tmp/" + community_name));
+ System.out.println(community);
+
+ // File f = new File("/tmp/" + community_name);
+ FSDataInputStream inputStream = fs.open(fileStatus.getPath());
+ System.out.println(client.uploadIS(inputStream, community_name, fileStatus.getLen()));
+
+ }
+
+ System.out.println(client.publish());
+
+ }
+
+ @Test
+ public void readCommunityMap() throws IOException {
+ LocalFileSystem fs = FileSystem.getLocal(new Configuration());
+ System.out
+ .println(
+ new Gson()
+ .toJson(
+ Utils
+ .readCommunityMap(
+ fs, getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+ .getPath())));
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateEntityTest.java
new file mode 100644
index 000000000..181dc8f1e
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateEntityTest.java
@@ -0,0 +1,127 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import static org.mockito.Mockito.lenient;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Consumer;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchCommunity;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
+import eu.dnetlib.dhp.utils.DHPUtils;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
+@ExtendWith(MockitoExtension.class)
+public class CreateEntityTest {
+
+ private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+ +
+ "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
+ +
+ "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
+ "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
+ +
+ "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
+
+ List communityMap = Arrays
+ .asList(
+ "clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri",
+ "ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. "
+ +
+ "SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - "
+ +
+ "Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community",
+ "dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community",
+ "fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community",
+ "ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community",
+ "mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community",
+ "instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community",
+ "elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri",
+ "aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community",
+ "dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri",
+ "epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri",
+ "covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community");
+
+ @Mock
+ private ISLookUpService isLookUpService;
+
+ private QueryInformationSystem queryInformationSystem;
+
+ @BeforeEach
+ public void setUp() throws ISLookUpException {
+ lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap);
+ queryInformationSystem = new QueryInformationSystem();
+ queryInformationSystem.setIsLookUp(isLookUpService);
+ }
+
+ @Test
+ public void test1() throws ISLookUpException, IOException {
+ List cInfoList = new ArrayList<>();
+ final Consumer consumer = ci -> cInfoList.add(ci);
+ queryInformationSystem.getContextInformation(consumer);
+
+ List riList = new ArrayList<>();
+ cInfoList.forEach(cInfo -> riList.add(Process.getEntity(cInfo)));
+
+ Assertions.assertEquals(12, riList.size());
+
+ riList.stream().forEach(c -> {
+ switch (c.getOriginalId()) {
+ case "mes":
+ Assertions
+ .assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY));
+ Assertions.assertTrue(((ResearchCommunity) c).getSubject().size() == 5);
+ Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("marine"));
+ Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("ocean"));
+ Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("fish"));
+ Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("aqua"));
+ Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("sea"));
+ Assertions
+ .assertTrue(
+ c
+ .getId()
+ .equals(
+ String
+ .format(
+ "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
+ DHPUtils.md5(c.getOriginalId()))));
+ Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes"));
+ Assertions.assertTrue("mes".equals(c.getOriginalId()));
+ break;
+ case "clarin":
+ Assertions
+ .assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_INFRASTRUCTURE));
+ Assertions
+ .assertTrue(
+ c
+ .getId()
+ .equals(
+ String
+ .format(
+ "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
+ DHPUtils.md5(c.getOriginalId()))));
+ Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin"));
+ Assertions.assertTrue("clarin".equals(c.getOriginalId()));
+ break;
+ }
+ // TODO add check for all the others Entities
+
+ });
+
+ riList.forEach(c -> System.out.println(new Gson().toJson(c)));
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateRelationTest.java
new file mode 100644
index 000000000..bb2e402b2
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/CreateRelationTest.java
@@ -0,0 +1,569 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.util.*;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import com.google.gson.Gson;
+
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
+import eu.dnetlib.dhp.schema.oaf.Datasource;
+import eu.dnetlib.dhp.utils.DHPUtils;
+
+public class CreateRelationTest {
+
+ List communityContext = Arrays
+ .asList(
+ "\n" +
+ " all\n" +
+ " CLARIN\n" +
+ " https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg\n"
+ +
+ " Common Language Resources and Technology Infrastructure\n" +
+ " maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it\n"
+ +
+ " \n" +
+ " (Part of) the work reported here was made possible by using the CLARIN infrastructure.\n"
+ +
+ " The work reported here has received funding through <CLARIN national consortium member, e.g. CLARIN.SI>, <XYZ> project, grant no. <XYZ>.\n"
+ +
+ " The work reported here has received funding (through CLARIN ERIC) from the European Union’s Horizon 2020 research and innovation programme under grant agreement No <0-9> for project <XYZ>.\n"
+ +
+ " (E.g. No 676529 for project CLARIN-PLUS.)\n" +
+ " oac_clarin\n" +
+ " 2018-03-01T12:00:00\n" +
+ " \n" +
+ " \n" +
+ " CLARIN-PLUS\n" +
+ " \n" +
+ " \n" +
+ " 676529\n" +
+ " http://www.clarin.eu\n" +
+ " EC\n" +
+ " H2020-INFRADEV-1-2015-1\n" +
+ " CLARIN+\n" +
+ " \n" +
+ " \n" +
+ " Common Language Resources and Technology Infrastructure\n"
+ +
+ " CLARIN\n" +
+ " 212230\n" +
+ " EC\n" +
+ " corda_______::ef782b2d85676aa3e5a907427feb18c4\n" +
+ " \n" +
+ " \n" +
+ " " +
+ "\n" +
+ " \n" +
+ " LINDAT/CLARIN repository\n" +
+ " LINDAT/CLARIN repository\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " \n" +
+ "",
+ "\n" +
+ " all\n" +
+ " This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.\n"
+ +
+ " http://sanmamante.org/DH_CH_logo.png\n" +
+ " Digital Humanities and Cultural Heritage\n" +
+ " ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk\n"
+ +
+ " modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels\n"
+ +
+ " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n"
+ +
+ " oac_dh-ch\n" +
+ " 2018-03-01T12:00:00\n" +
+ " \n" +
+ " \n"
+ +
+ " Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\n"
+ +
+ " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n"
+ +
+ " \n" +
+ " 654119\n" +
+ " http://www.parthenos-project.eu\n" +
+ " EC\n" +
+ " PARTHENOS\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " re3data_____::9ebe127e5f3a0bf401875690f3bb6b81\n" +
+ " The UK's largest collection of digital research data in the social sciences and humanities\n"
+ +
+ " UK Data Archive\n" +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " doajarticles::c6cd4b532e12868c1d760a8d7cda6815\n" +
+ " Journal of Data Mining and Digital Humanities\n" +
+ " Journal of Data Mining and Digital Humanities\n" +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b\n" +
+ " Frontiers in Digital Humanities\n" +
+ " Frontiers in Digital Humanities\n" +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " doajarticles::6eb31d13b12bc06bbac06aef63cf33c9\n" +
+ " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n"
+ +
+ " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n"
+ +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " doajarticles::0da84e9dfdc8419576169e027baa8028\n" +
+ " Conservation Science in Cultural Heritage\n" +
+ " Conservation Science in Cultural Heritage\n" +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " re3data_____::84e123776089ce3c7a33db98d9cd15a8\n" +
+ " Electronic Archiving System\n" +
+ " EASY\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " openaire____::c5502a43e76feab55dd00cf50f519125\n" +
+ " DANS-KB Harvester\n" +
+ " Gemeenschappelijke Harvester DANS-KB\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " re3data_____::a48f09c562b247a9919acfe195549b47\n" +
+ " ads\n" +
+ " Archaeology Data Service\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " opendoar____::97275a23ca44226c9964043c8462be96\n" +
+ " KNAW Repository\n" +
+ " KNAW Repository\n" +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " doajarticles::2899208a99aa7d142646e0a80bfeef05\n" +
+ " Internet Archaeology\n" +
+ " Internet Archaeology\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ "\n",
+ "\n" +
+ " all\n" +
+ " The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.\n"
+ +
+ " https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png\n"
+ +
+ " Neuroinformatics\n" +
+ " sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it\n"
+ +
+ " brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities\n"
+ +
+ " \n" +
+ " oac_ni\n" +
+ " 2018-03-01T12:00:00\n" +
+ " \n" +
+ " \n" +
+ " re3data_____::5b9bf9171d92df854cf3c520692e9122\n" +
+ " Formerly:OpenFMRI\n" +
+ " OpenNeuro\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " doajarticles::c7d3de67dc77af72f6747157441252ec\n" +
+ " Research Ideas and Outcomes\n" +
+ " Research Ideas and Outcomes\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " re3data_____::8515794670370f49c1d176c399c714f5\n" +
+ " Neuroimaging Informatics Tools and Resources Clearinghouse\n"
+ +
+ " NITRC\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " doajarticles::d640648c84b10d425f96f11c3de468f3\n" +
+ " Frontiers in Neuroinformatics\n" +
+ " Frontiers in Neuroinformatics\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a\n" +
+ " NeuroImage: Clinical\n" +
+ " NeuroImage: Clinical\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " rest________::fb1a3d4523c95e63496e3bc7ba36244b\n" +
+ " NeuroVault\n" +
+ " NeuroVault\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ "\n",
+ "\n" +
+ " all\n" +
+ " Instruct-ERIC is the European Research Infrastructure for Structural Biology\n"
+ +
+ " https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png\n"
+ +
+ " Instruct-ERIC\n" +
+ " claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it\n"
+ +
+ " \n" +
+ " The authors acknowledge the support and the use of resources of Instruct-ERIC.\n"
+ +
+ " The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project\n"
+ +
+ " oac_instruct\n" +
+ " 2018-03-01T12:00:00\n" +
+ " \n" +
+ " \n"
+ +
+ " Authentication and Authorisation For Research and Collaboration\n"
+ +
+ " \n" +
+ " 730941\n" +
+ " \n" +
+ " H2020-EINFRA-2016-1\n" +
+ " AARC2\n" +
+ " EC\n" +
+ " \n" +
+ " \n"
+ +
+ " Building data bridges between biological and medical infrastructures in Europe\n"
+ +
+ " \n" +
+ " 284209\n" +
+ " \n" +
+ " FP7-INFRASTRUCTURES-2011-1\n" +
+ " EC\n" +
+ " BioMedBridges\n" +
+ " \n" +
+ " \n"
+ +
+ " Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\n"
+ +
+ " \n" +
+ " 283570\n" +
+ " \n" +
+ " FP7-INFRASTRUCTURES-2011-1\n" +
+ " EC\n" +
+ " BioStruct-X\n" +
+ " \n" +
+ " \n"
+ +
+ " Coordinated Research Infrastructures Building Enduring Life-science services\n"
+ +
+ " \n" +
+ " 654248\n" +
+ " \n" +
+ " H2020-INFRADEV-1-2014-1\n" +
+ " EC\n" +
+ " CORBEL\n" +
+ " \n" +
+ " \n"
+ +
+ " Infrastructure for NMR, EM and X-rays for translational research\n"
+ +
+ " \n" +
+ " 653706\n" +
+ " \n" +
+ " H2020-INFRAIA-2014-2015\n" +
+ " EC\n" +
+ " iNEXT\n" +
+ " \n" +
+ " \n"
+ +
+ " Integrated Structural Biology Infrastructure\n" +
+ " \n" +
+ " 211252\n" +
+ " \n" +
+ " FP7-INFRASTRUCTURES-2007-1\n" +
+ " EC\n" +
+ " INSTRUCT\n" +
+ " \n" +
+ " \n"
+ +
+ " Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\n"
+ +
+ " \n" +
+ " 731005\n" +
+ " \n" +
+ " H2020-INFRADEV-2016-1\n" +
+ " EC\n" +
+ " INSTRUCT-ULTRA\n" +
+ " \n" +
+ " \n"
+ +
+ " Opening Synchrotron Light for Experimental Science and Applications in the Middle East\n"
+ +
+ " \n" +
+ " 730943\n" +
+ " \n" +
+ " H2020-INFRASUPP-2016-1\n" +
+ " EC\n" +
+ " OPEN SESAME\n" +
+ " \n" +
+ " \n"
+ +
+ " Infrastructure for Protein Production Platforms\n"
+ +
+ " \n" +
+ " 227764\n" +
+ " \n" +
+ " FP7-INFRASTRUCTURES-2008-1\n" +
+ " EC\n" +
+ " PCUBE\n" +
+ " \n" +
+ " \n"
+ +
+ " European Vaccine Research and Development Infrastructure\n"
+ +
+ " \n" +
+ " 730964\n" +
+ " \n" +
+ " H2020-INFRAIA-2016-1\n" +
+ " EC\n" +
+ " TRAMSVAC2\n" +
+ " \n" +
+ " \n"
+ +
+ " World-wide E-infrastructure for structural biology\n"
+ +
+ " \n" +
+ " 675858\n" +
+ " \n" +
+ " H2020-EINFRA-2015-1\n" +
+ " EC\n" +
+ " West-Life\n" +
+ " \n" +
+ " \n" +
+ " Expanding research infrastructure visibility to strengthen strategic partnerships\n"
+ +
+ " RI-VIS\n" +
+ " 824063\n" +
+ " EC\n" +
+ " corda__h2020::af93b591b76991d8437993a8f6fc6538\n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " \n"
+ +
+ " \n" +
+ " instruct\n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " west-life\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " \n" +
+ " FRISBI\n" +
+ " aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==\n"
+ +
+ " aHR0cDovL2ZyaXNiaS5ldS8=\n" +
+ " \n" +
+ " \n" +
+ " RI-VIS\n" +
+ " aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=\n"
+ +
+ " aHR0cHM6Ly9yaS12aXMuZXU=\n" +
+ " \n" +
+ " \n" +
+ " CIISB\n" +
+ " aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=\n" +
+ " aHR0cHM6Ly93d3cuY2lpc2Iub3Jn\n" +
+ " \n" +
+ " \n" +
+ "\n",
+ "\n" +
+ " all\n" +
+ " ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.\n"
+ +
+ " https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png\n"
+ +
+ " The Greek National Node of the ESFRI European RI ELIXIR\n" +
+ " vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it\n"
+ +
+ " \n" +
+ " \n" +
+ " oaa_elixir-gr\n" +
+ " 2018-03-01T12:00:00\n" +
+ " \n" +
+ " \n"
+ +
+ " \n" +
+ " rest________::b8e502674c3c3499d5374e9b2ea6d8d5\n" +
+ " bio.tools\n" +
+ " bio.tools\n" +
+ " false\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " \n" +
+ " \n" +
+ " ATHENA RC\n" +
+ " aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=\n"
+ +
+ " aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=\n" +
+ " \n" +
+ " \n"
+ +
+ "");
+
+ private QueryInformationSystem queryInformationSystem;
+
+ private Map map;
+
+ @BeforeEach
+ public void setUp() {
+
+ queryInformationSystem = new QueryInformationSystem();
+ queryInformationSystem.setContextRelationResult(communityContext);
+ }
+
+ @Test
+ public void test1() {
+ List cInfoList = new ArrayList<>();
+ final Consumer consumer = ci -> cInfoList.add(ci);
+
+ queryInformationSystem
+ .getContextRelation(consumer, "contentproviders", ModelSupport.getIdPrefix(Datasource.class));
+
+ cInfoList.forEach(c -> System.out.println(new Gson().toJson(c)));
+
+ List rList = new ArrayList<>();
+
+ cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
+
+ Assertions.assertEquals(34, rList.size());
+
+ Assertions
+ .assertTrue(
+ rList
+ .stream()
+ .map(r -> r.getSource().getId())
+ .collect(Collectors.toSet())
+ .contains(
+ String
+ .format(
+ "%s|%s::%s", Constants.CONTEXT_ID,
+ Constants.CONTEXT_NS_PREFIX,
+ DHPUtils.md5("dh-ch"))));
+
+ Assertions
+ .assertEquals(
+ 10,
+ rList
+ .stream()
+ .filter(
+ r -> r
+ .getSource()
+ .getId()
+ .equals(
+ String
+ .format(
+ "%s|%s::%s", Constants.CONTEXT_ID,
+ Constants.CONTEXT_NS_PREFIX,
+ DHPUtils.md5("dh-ch"))))
+ .collect(Collectors.toList())
+ .size());
+
+ Assertions
+ .assertEquals(
+ 10,
+ rList
+ .stream()
+ .filter(
+ r -> r
+ .getTarget()
+ .getId()
+ .equals(
+ String
+ .format(
+ "%s|%s::%s", Constants.CONTEXT_ID,
+ Constants.CONTEXT_NS_PREFIX,
+ DHPUtils.md5("dh-ch"))))
+ .collect(Collectors.toList())
+ .size());
+
+ Set tmp = rList
+ .stream()
+ .filter(
+ r -> r
+ .getSource()
+ .getId()
+ .equals(
+ String
+ .format(
+ "%s|%s::%s", Constants.CONTEXT_ID,
+ Constants.CONTEXT_NS_PREFIX,
+ DHPUtils.md5("dh-ch"))))
+ .map(r -> r.getTarget().getId())
+ .collect(Collectors.toSet());
+
+ Assertions
+ .assertTrue(
+ tmp.contains("10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81") &&
+ tmp.contains("10|doajarticles::c6cd4b532e12868c1d760a8d7cda6815") &&
+ tmp.contains("10|doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b") &&
+ tmp.contains("10|doajarticles::6eb31d13b12bc06bbac06aef63cf33c9") &&
+ tmp.contains("10|doajarticles::0da84e9dfdc8419576169e027baa8028") &&
+ tmp.contains("10|re3data_____::84e123776089ce3c7a33db98d9cd15a8") &&
+ tmp.contains("10|openaire____::c5502a43e76feab55dd00cf50f519125") &&
+ tmp.contains("10|re3data_____::a48f09c562b247a9919acfe195549b47") &&
+ tmp.contains("10|opendoar____::97275a23ca44226c9964043c8462be96") &&
+ tmp.contains("10|doajarticles::2899208a99aa7d142646e0a80bfeef05"));
+
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpOrganizationProjectDatasourceTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpOrganizationProjectDatasourceTest.java
new file mode 100644
index 000000000..d855f279d
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpOrganizationProjectDatasourceTest.java
@@ -0,0 +1,146 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.oaf.Datasource;
+import eu.dnetlib.dhp.schema.oaf.Organization;
+import eu.dnetlib.dhp.schema.oaf.Project;
+
+public class DumpOrganizationProjectDatasourceTest {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private static SparkSession spark;
+
+ private static Path workingDir;
+
+ private static final Logger log = LoggerFactory
+ .getLogger(DumpOrganizationProjectDatasourceTest.class);
+
+ private static HashMap map = new HashMap<>();
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files
+ .createTempDirectory(DumpOrganizationProjectDatasourceTest.class.getSimpleName());
+ log.info("using work dir {}", workingDir);
+
+ SparkConf conf = new SparkConf();
+ conf.setAppName(DumpOrganizationProjectDatasourceTest.class.getSimpleName());
+
+ conf.setMaster("local[*]");
+ conf.set("spark.driver.host", "localhost");
+ conf.set("hive.metastore.local", "true");
+ conf.set("spark.ui.enabled", "false");
+ conf.set("spark.sql.warehouse.dir", workingDir.toString());
+ conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+ spark = SparkSession
+ .builder()
+ .appName(DumpOrganizationProjectDatasourceTest.class.getSimpleName())
+ .config(conf)
+ .getOrCreate();
+ }
+
+ @AfterAll
+ public static void afterAll() throws IOException {
+ FileUtils.deleteDirectory(workingDir.toFile());
+ spark.stop();
+ }
+
+ @Test
+ public void dumpOrganizationTest() throws Exception {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/graph/organization")
+ .getPath();
+
+ DumpGraphEntities dg = new DumpGraphEntities();
+
+ dg.run(false, sourcePath, workingDir.toString() + "/dump", Organization.class, null);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/dump")
+ .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.graph.Organization.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Organization.class));
+
+ Assertions.assertEquals(34, verificationDataset.count());
+
+ verificationDataset.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
+
+ }
+
+ @Test
+ public void dumpProjectTest() {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/graph/project")
+ .getPath();
+
+ DumpGraphEntities dg = new DumpGraphEntities();
+
+ dg.run(false, sourcePath, workingDir.toString() + "/dump", Project.class, null);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/dump")
+ .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.graph.Project.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Project.class));
+
+ Assertions.assertEquals(12, verificationDataset.count());
+
+ verificationDataset.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
+
+ }
+
+ @Test
+ public void dumpDatasourceTest() {
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/graph/datasource")
+ .getPath();
+
+ DumpGraphEntities dg = new DumpGraphEntities();
+
+ dg.run(false, sourcePath, workingDir.toString() + "/dump", Datasource.class, null);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/dump")
+ .map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Datasource.class));
+
+ Assertions.assertEquals(5, verificationDataset.count());
+
+ verificationDataset.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpRelationTest.java
new file mode 100644
index 000000000..611b49fcb
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/DumpRelationTest.java
@@ -0,0 +1,130 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
+
+public class DumpRelationTest {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private static SparkSession spark;
+
+ private static Path workingDir;
+
+ private static final Logger log = LoggerFactory
+ .getLogger(DumpRelationTest.class);
+
+ private static HashMap map = new HashMap<>();
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files
+ .createTempDirectory(DumpRelationTest.class.getSimpleName());
+ log.info("using work dir {}", workingDir);
+
+ SparkConf conf = new SparkConf();
+ conf.setAppName(DumpRelationTest.class.getSimpleName());
+
+ conf.setMaster("local[*]");
+ conf.set("spark.driver.host", "localhost");
+ conf.set("hive.metastore.local", "true");
+ conf.set("spark.ui.enabled", "false");
+ conf.set("spark.sql.warehouse.dir", workingDir.toString());
+ conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+ spark = SparkSession
+ .builder()
+ .appName(DumpRelationTest.class.getSimpleName())
+ .config(conf)
+ .getOrCreate();
+ }
+
+ @AfterAll
+ public static void afterAll() throws IOException {
+ FileUtils.deleteDirectory(workingDir.toFile());
+ spark.stop();
+ }
+
+ @Test
+ public void test1() throws Exception {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation")
+ .getPath();
+
+ SparkDumpRelationJob.main(new String[] {
+ "-isSparkSessionManaged", Boolean.FALSE.toString(),
+ "-outputPath", workingDir.toString() + "/relation",
+ "-sourcePath", sourcePath
+ });
+
+// dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/relation")
+ .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(Relation.class));
+
+ verificationDataset.createOrReplaceTempView("table");
+
+ Dataset check = spark
+ .sql(
+ "SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance "
+ +
+ "from table ");
+
+ Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count());
+ Assertions
+ .assertEquals(
+ 22, check
+ .filter(
+ "name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " +
+ "provenance = 'Harvested'")
+ .count());
+
+ Assertions.assertEquals(7, check.filter("name = 'isParticipant'").count());
+ Assertions
+ .assertEquals(
+ 7, check
+ .filter(
+ "name = 'isParticipant' and stype = 'organization' and ttype = 'project' " +
+ "and provenance = 'Harvested'")
+ .count());
+
+ Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count());
+ Assertions
+ .assertEquals(
+ 1, check
+ .filter(
+ "name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " +
+ "and provenance = 'Inferred by OpenAIRE'")
+ .count());
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/ExtractRelationFromEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/ExtractRelationFromEntityTest.java
new file mode 100644
index 000000000..820a899ce
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/ExtractRelationFromEntityTest.java
@@ -0,0 +1,109 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.*;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
+public class ExtractRelationFromEntityTest {
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private static SparkSession spark;
+
+ private static Path workingDir;
+
+ private static final Logger log = LoggerFactory
+ .getLogger(ExtractRelationFromEntityTest.class);
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files
+ .createTempDirectory(ExtractRelationFromEntityTest.class.getSimpleName());
+ log.info("using work dir {}", workingDir);
+
+ SparkConf conf = new SparkConf();
+ conf.setAppName(ExtractRelationFromEntityTest.class.getSimpleName());
+
+ conf.setMaster("local[*]");
+ conf.set("spark.driver.host", "localhost");
+ conf.set("hive.metastore.local", "true");
+ conf.set("spark.ui.enabled", "false");
+ conf.set("spark.sql.warehouse.dir", workingDir.toString());
+ conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+ spark = SparkSession
+ .builder()
+ .appName(ExtractRelationFromEntityTest.class.getSimpleName())
+ .config(conf)
+ .getOrCreate();
+ }
+
+ @AfterAll
+ public static void afterAll() throws IOException {
+ FileUtils.deleteDirectory(workingDir.toFile());
+ spark.stop();
+ }
+
+ @Test
+ public void test1() {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
+ .getPath();
+
+ final String communityMapPath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+ .getPath();
+
+ Extractor ex = new Extractor();
+ ex
+ .run(
+ false, sourcePath, workingDir.toString() + "/relation",
+ // eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
+ eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/relation")
+ .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(Relation.class));
+
+ Assertions
+ .assertEquals(
+ 9,
+ verificationDataset.filter("source.id = '50|dedup_wf_001::15270b996fa8fd2fb5723daeab3685c3'").count());
+
+ Assertions
+ .assertEquals(
+ 9,
+ verificationDataset.filter("source.id = '50|dedup_wf_001::15270b996fa8fd2fb5723daxab3685c3'").count());
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/FunderParsingTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/FunderParsingTest.java
new file mode 100644
index 000000000..0374a1568
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/FunderParsingTest.java
@@ -0,0 +1,69 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import org.dom4j.DocumentException;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder;
+
+public class FunderParsingTest {
+
+ @Test
+ public void testFunderTwoLevels() throws DocumentException {
+
+ String funding_Stream = "nsf_________::NSFNSFNational Science "
+ +
+ "FoundationUSnsf_________::NSF::CISE/OAD::CISE/CCFDivision "
+ +
+ "of Computing and Communication FoundationsDivision of Computing and Communication " +
+ "Foundationsnsf_________::NSF::CISE/OADDirectorate for "
+ +
+ "Computer & Information Science & EngineeringDirectorate for Computer & " +
+ "Information Science & Engineeringnsf:fundingStream";
+
+ Funder f = DumpGraphEntities.getFunder(funding_Stream);
+
+ Assertions.assertEquals("NSF", f.getShortName());
+ Assertions.assertEquals("National Science Foundation", f.getName());
+ Assertions.assertEquals("US", f.getJurisdiction());
+
+ Assertions.assertEquals("NSF::CISE/OAD::CISE/CCF", f.getFunding_stream().getId());
+ Assertions
+ .assertEquals(
+ "Directorate for Computer & Information Science & Engineering - Division of Computing and Communication Foundations",
+ f.getFunding_stream().getDescription());
+
+ }
+
+ @Test
+ public void testFunderThreeeLevels() throws DocumentException {
+ String funding_stream = "ec__________::EC" +
+ "EC" +
+ "European Commission" +
+ "EU" +
+ "" +
+ "ec__________::EC::H2020::ERC::ERC-COG" +
+ "Consolidator Grant" +
+ "ERC-COGec:h2020toas" +
+ "ec__________::EC::H2020::ERC" +
+ "European Research Council" +
+ "ERCec:h2020fundings" +
+ "ec__________::EC::H2020H2020" +
+ "Horizon 2020 Framework Programme" +
+ "ec:h2020fundings";
+
+ Funder f = DumpGraphEntities.getFunder(funding_stream);
+
+ Assertions.assertEquals("EC", f.getShortName());
+ Assertions.assertEquals("European Commission", f.getName());
+ Assertions.assertEquals("EU", f.getJurisdiction());
+
+ Assertions.assertEquals("EC::H2020::ERC::ERC-COG", f.getFunding_stream().getId());
+ Assertions
+ .assertEquals(
+ "Horizon 2020 Framework Programme - European Research Council - Consolidator Grant",
+ f.getFunding_stream().getDescription());
+
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/QueryInformationSystemTest.java
new file mode 100644
index 000000000..074bed198
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/QueryInformationSystemTest.java
@@ -0,0 +1,807 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import static org.mockito.Mockito.lenient;
+
+import java.util.*;
+import java.util.function.Consumer;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
+@ExtendWith(MockitoExtension.class)
+public class QueryInformationSystemTest {
+
+ private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+ +
+ " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
+ " and $x//context/param[./@name = 'status']/text() = 'all' " +
+ " return " +
+ "$x//context";
+
+ private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+ +
+ "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
+ +
+ "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
+ "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
+ +
+ "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
+
+ List communityMap = Arrays
+ .asList(
+ "clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri",
+ "ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. "
+ +
+ "SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - "
+ +
+ "Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community",
+ "dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community",
+ "fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community",
+ "ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community",
+ "mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community",
+ "instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community",
+ "elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri",
+ "aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community",
+ "dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri",
+ "epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri",
+ "covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community");
+
+ List communityContext = Arrays
+ .asList(
+ "\n" +
+ " all\n" +
+ " CLARIN\n" +
+ " https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg\n"
+ +
+ " Common Language Resources and Technology Infrastructure\n" +
+ " maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it\n"
+ +
+ " \n" +
+ " (Part of) the work reported here was made possible by using the CLARIN infrastructure.\n"
+ +
+ " The work reported here has received funding through <CLARIN national consortium member, e.g. CLARIN.SI>, <XYZ> project, grant no. <XYZ>.\n"
+ +
+ " The work reported here has received funding (through CLARIN ERIC) from the European Union’s Horizon 2020 research and innovation programme under grant agreement No <0-9> for project <XYZ>.\n"
+ +
+ " (E.g. No 676529 for project CLARIN-PLUS.)\n" +
+ " oac_clarin\n" +
+ " 2018-03-01T12:00:00\n" +
+ " \n" +
+ " \n" +
+ " CLARIN-PLUS\n" +
+ " \n" +
+ " \n" +
+ " 676529\n" +
+ " http://www.clarin.eu\n" +
+ " EC\n" +
+ " H2020-INFRADEV-1-2015-1\n" +
+ " CLARIN+\n" +
+ " \n" +
+ " \n" +
+ " Common Language Resources and Technology Infrastructure\n"
+ +
+ " CLARIN\n" +
+ " 212230\n" +
+ " EC\n" +
+ " corda_______::ef782b2d85676aa3e5a907427feb18c4\n" +
+ " \n" +
+ " \n" +
+ " " +
+ "\n" +
+ " \n" +
+ " LINDAT/CLARIN repository\n" +
+ " LINDAT/CLARIN repository\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " \n" +
+ "",
+ "\n" +
+ " all\n" +
+ " This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.\n"
+ +
+ " http://sanmamante.org/DH_CH_logo.png\n" +
+ " Digital Humanities and Cultural Heritage\n" +
+ " ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk\n"
+ +
+ " modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels\n"
+ +
+ " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n"
+ +
+ " oac_dh-ch\n" +
+ " 2018-03-01T12:00:00\n" +
+ " \n" +
+ " \n"
+ +
+ " Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\n"
+ +
+ " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n"
+ +
+ " \n" +
+ " 654119\n" +
+ " http://www.parthenos-project.eu\n" +
+ " EC\n" +
+ " PARTHENOS\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " re3data_____::9ebe127e5f3a0bf401875690f3bb6b81\n" +
+ " The UK's largest collection of digital research data in the social sciences and humanities\n"
+ +
+ " UK Data Archive\n" +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " doajarticles::c6cd4b532e12868c1d760a8d7cda6815\n" +
+ " Journal of Data Mining and Digital Humanities\n" +
+ " Journal of Data Mining and Digital Humanities\n" +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b\n" +
+ " Frontiers in Digital Humanities\n" +
+ " Frontiers in Digital Humanities\n" +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " doajarticles::6eb31d13b12bc06bbac06aef63cf33c9\n" +
+ " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n"
+ +
+ " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n"
+ +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " doajarticles::0da84e9dfdc8419576169e027baa8028\n" +
+ " Conservation Science in Cultural Heritage\n" +
+ " Conservation Science in Cultural Heritage\n" +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " re3data_____::84e123776089ce3c7a33db98d9cd15a8\n" +
+ " Electronic Archiving System\n" +
+ " EASY\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " openaire____::c5502a43e76feab55dd00cf50f519125\n" +
+ " DANS-KB Harvester\n" +
+ " Gemeenschappelijke Harvester DANS-KB\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " re3data_____::a48f09c562b247a9919acfe195549b47\n" +
+ " ads\n" +
+ " Archaeology Data Service\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " opendoar____::97275a23ca44226c9964043c8462be96\n" +
+ " KNAW Repository\n" +
+ " KNAW Repository\n" +
+ " true\n" +
+ " \n" +
+ " \n"
+ +
+ " doajarticles::2899208a99aa7d142646e0a80bfeef05\n" +
+ " Internet Archaeology\n" +
+ " Internet Archaeology\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ "\n",
+ "\n" +
+ " all\n" +
+ " The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.\n"
+ +
+ " https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png\n"
+ +
+ " Neuroinformatics\n" +
+ " sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it\n"
+ +
+ " brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities\n"
+ +
+ " \n" +
+ " oac_ni\n" +
+ " 2018-03-01T12:00:00\n" +
+ " \n" +
+ " \n" +
+ " re3data_____::5b9bf9171d92df854cf3c520692e9122\n" +
+ " Formerly:OpenFMRI\n" +
+ " OpenNeuro\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " doajarticles::c7d3de67dc77af72f6747157441252ec\n" +
+ " Research Ideas and Outcomes\n" +
+ " Research Ideas and Outcomes\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " re3data_____::8515794670370f49c1d176c399c714f5\n" +
+ " Neuroimaging Informatics Tools and Resources Clearinghouse\n"
+ +
+ " NITRC\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " doajarticles::d640648c84b10d425f96f11c3de468f3\n" +
+ " Frontiers in Neuroinformatics\n" +
+ " Frontiers in Neuroinformatics\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a\n" +
+ " NeuroImage: Clinical\n" +
+ " NeuroImage: Clinical\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ " rest________::fb1a3d4523c95e63496e3bc7ba36244b\n" +
+ " NeuroVault\n" +
+ " NeuroVault\n" +
+ " true\n" +
+ " \n" +
+ " \n" +
+ "\n",
+ "\n" +
+ " all\n" +
+ " Instruct-ERIC is the European Research Infrastructure for Structural Biology\n"
+ +
+ " https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png\n"
+ +
+ " Instruct-ERIC\n" +
+ " claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it\n"
+ +
+ " \n" +
+ " The authors acknowledge the support and the use of resources of Instruct-ERIC.\n"
+ +
+ " The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project\n"
+ +
+ " oac_instruct\n" +
+ " 2018-03-01T12:00:00\n" +
+ " \n" +
+ " \n"
+ +
+ " Authentication and Authorisation For Research and Collaboration\n"
+ +
+ " \n" +
+ " 730941\n" +
+ " \n" +
+ " H2020-EINFRA-2016-1\n" +
+ " AARC2\n" +
+ " EC\n" +
+ " \n" +
+ " \n"
+ +
+ " Building data bridges between biological and medical infrastructures in Europe\n"
+ +
+ " \n" +
+ " 284209\n" +
+ " \n" +
+ " FP7-INFRASTRUCTURES-2011-1\n" +
+ " EC\n" +
+ " BioMedBridges\n" +
+ " \n" +
+ " \n"
+ +
+ " Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\n"
+ +
+ " \n" +
+ " 283570\n" +
+ " \n" +
+ " FP7-INFRASTRUCTURES-2011-1\n" +
+ " EC\n" +
+ " BioStruct-X\n" +
+ " \n" +
+ " \n"
+ +
+ " Coordinated Research Infrastructures Building Enduring Life-science services\n"
+ +
+ " \n" +
+ " 654248\n" +
+ " \n" +
+ " H2020-INFRADEV-1-2014-1\n" +
+ " EC\n" +
+ " CORBEL\n" +
+ " \n" +
+ " \n"
+ +
+ " Infrastructure for NMR, EM and X-rays for translational research\n"
+ +
+ " \n" +
+ " 653706\n" +
+ " \n" +
+ " H2020-INFRAIA-2014-2015\n" +
+ " EC\n" +
+ " iNEXT\n" +
+ " \n" +
+ " \n"
+ +
+ " Integrated Structural Biology Infrastructure\n" +
+ " \n" +
+ " 211252\n" +
+ " \n" +
+ " FP7-INFRASTRUCTURES-2007-1\n" +
+ " EC\n" +
+ " INSTRUCT\n" +
+ " \n" +
+ " \n"
+ +
+ " Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\n"
+ +
+ " \n" +
+ " 731005\n" +
+ " \n" +
+ " H2020-INFRADEV-2016-1\n" +
+ " EC\n" +
+ " INSTRUCT-ULTRA\n" +
+ " \n" +
+ " \n"
+ +
+ " Opening Synchrotron Light for Experimental Science and Applications in the Middle East\n"
+ +
+ " \n" +
+ " 730943\n" +
+ " \n" +
+ " H2020-INFRASUPP-2016-1\n" +
+ " EC\n" +
+ " OPEN SESAME\n" +
+ " \n" +
+ " \n"
+ +
+ " Infrastructure for Protein Production Platforms\n"
+ +
+ " \n" +
+ " 227764\n" +
+ " \n" +
+ " FP7-INFRASTRUCTURES-2008-1\n" +
+ " EC\n" +
+ " PCUBE\n" +
+ " \n" +
+ " \n"
+ +
+ " European Vaccine Research and Development Infrastructure\n"
+ +
+ " \n" +
+ " 730964\n" +
+ " \n" +
+ " H2020-INFRAIA-2016-1\n" +
+ " EC\n" +
+ " TRAMSVAC2\n" +
+ " \n" +
+ " \n"
+ +
+ " World-wide E-infrastructure for structural biology\n"
+ +
+ " \n" +
+ " 675858\n" +
+ " \n" +
+ " H2020-EINFRA-2015-1\n" +
+ " EC\n" +
+ " West-Life\n" +
+ " \n" +
+ " \n" +
+ " Expanding research infrastructure visibility to strengthen strategic partnerships\n"
+ +
+ " RI-VIS\n" +
+ " 824063\n" +
+ " EC\n" +
+ " corda__h2020::af93b591b76991d8437993a8f6fc6538\n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " \n"
+ +
+ " \n" +
+ " instruct\n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " west-life\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " \n" +
+ " FRISBI\n" +
+ " aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==\n"
+ +
+ " aHR0cDovL2ZyaXNiaS5ldS8=\n" +
+ " \n" +
+ " \n" +
+ " RI-VIS\n" +
+ " aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=\n"
+ +
+ " aHR0cHM6Ly9yaS12aXMuZXU=\n" +
+ " \n" +
+ " \n" +
+ " CIISB\n" +
+ " aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=\n" +
+ " aHR0cHM6Ly93d3cuY2lpc2Iub3Jn\n" +
+ " \n" +
+ " \n" +
+ "\n",
+ "\n" +
+ " all\n" +
+ " ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.\n"
+ +
+ " https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png\n"
+ +
+ " The Greek National Node of the ESFRI European RI ELIXIR\n" +
+ " vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it\n"
+ +
+ " \n" +
+ " \n" +
+ " oaa_elixir-gr\n" +
+ " 2018-03-01T12:00:00\n" +
+ " \n" +
+ " \n"
+ +
+ " \n" +
+ " rest________::b8e502674c3c3499d5374e9b2ea6d8d5\n" +
+ " bio.tools\n" +
+ " bio.tools\n" +
+ " false\n" +
+ " \n" +
+ " \n" +
+ " \n" +
+ " \n"
+ +
+ " \n" +
+ " \n" +
+ " ATHENA RC\n" +
+ " aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=\n"
+ +
+ " aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=\n" +
+ " \n" +
+ " \n"
+ +
+ "");
+
+ @Mock
+ private ISLookUpService isLookUpService;
+
+ private QueryInformationSystem queryInformationSystem;
+
+ private Map map;
+
+ @BeforeEach
+ public void setUp() throws ISLookUpException {
+ lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap);
+ lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityContext);
+ queryInformationSystem = new QueryInformationSystem();
+ queryInformationSystem.setIsLookUp(isLookUpService);
+ }
+
+ @Test
+ public void testSizeEntity() throws ISLookUpException {
+
+ List cInfoList = new ArrayList<>();
+ final Consumer consumer = ci -> cInfoList.add(ci);
+ queryInformationSystem.getContextInformation(consumer);
+
+ Assertions.assertEquals(12, cInfoList.size());
+ }
+
+ @Test
+ public void testSizeRelation() throws ISLookUpException {
+
+ List cInfoList = new ArrayList<>();
+ final Consumer consumer = ci -> cInfoList.add(ci);
+ queryInformationSystem.execContextRelationQuery();
+ queryInformationSystem.getContextRelation(consumer, "contentproviders", "10|");
+
+ Assertions.assertEquals(5, cInfoList.size());
+ }
+
+ @Test
+ public void testContentRelation() throws ISLookUpException {
+
+ List cInfoList = new ArrayList<>();
+ final Consumer consumer = ci -> cInfoList.add(ci);
+ queryInformationSystem.execContextRelationQuery();
+ queryInformationSystem.getContextRelation(consumer, "contentproviders", "10");
+
+ cInfoList.forEach(contextInfo -> {
+ switch (contextInfo.getId()) {
+ case "elixir-gr":
+ Assertions.assertEquals(1, contextInfo.getDatasourceList().size());
+ Assertions
+ .assertEquals(
+ "10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5",
+ contextInfo.getDatasourceList().get(0));
+ break;
+ case "instruct":
+ Assertions.assertEquals(0, contextInfo.getDatasourceList().size());
+ break;
+ case "ni":
+ Assertions.assertEquals(6, contextInfo.getDatasourceList().size());
+ Assertions
+ .assertTrue(
+ contextInfo
+ .getDatasourceList()
+ .contains("10|rest________::fb1a3d4523c95e63496e3bc7ba36244b"));
+ break;
+ case "dh-ch":
+ Assertions.assertEquals(10, contextInfo.getDatasourceList().size());
+ break;
+ case "clarin":
+ Assertions.assertEquals(0, contextInfo.getDatasourceList().size());
+ break;
+ }
+ });
+ }
+
+ @Test
+ public void testContentEntity() throws ISLookUpException {
+
+ List cInfoList = new ArrayList<>();
+ final Consumer consumer = ci -> cInfoList.add(ci);
+ queryInformationSystem.getContextInformation(consumer);
+
+ cInfoList.forEach(context -> {
+ switch (context.getId()) {
+ case "clarin":// clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin",
+ Assertions
+ .assertEquals("Common Language Resources and Technology Infrastructure", context.getName());
+ Assertions.assertEquals("CLARIN", context.getDescription());
+ Assertions
+ .assertTrue(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+ Assertions.assertEquals("oac_clarin", context.getZenodocommunity());
+ Assertions.assertEquals("ri", context.getType());
+ break;
+ case "ee":
+ Assertions.assertEquals("Sustainable Development Solutions Network - Greece", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertFalse(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+ Assertions.assertEquals(17, context.getSubject().size());
+ Assertions.assertEquals("oac_sdsn-greece", context.getZenodocommunity());
+ Assertions.assertEquals("community", context.getType());
+ break;
+ case "dh-ch":
+ Assertions.assertEquals("Digital Humanities and Cultural Heritage", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertFalse(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+ Assertions.assertEquals(67, context.getSubject().size());
+ Assertions.assertEquals("oac_dh-ch", context.getZenodocommunity());
+ Assertions.assertEquals("community", context.getType());
+ break;
+ case "fam":
+ Assertions.assertEquals("Fisheries and Aquaculture Management", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertTrue(
+ context
+ .getDescription()
+ .startsWith("Conservation of marine resources for sustainable development"));
+ Assertions
+ .assertFalse(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+ Assertions.assertEquals(19, context.getSubject().size());
+ Assertions.assertEquals("fisheries", context.getZenodocommunity());
+ Assertions.assertEquals("community", context.getType());
+ break;
+ case "ni":
+ Assertions.assertEquals("Neuroinformatics", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertTrue(
+ context
+ .getDescription()
+ .startsWith("The neuroinformatics dashboard gathers research outputs from the"));
+ Assertions
+ .assertFalse(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+ Assertions.assertEquals(18, context.getSubject().size());
+ Assertions.assertEquals("oac_ni", context.getZenodocommunity());
+ Assertions.assertEquals("community", context.getType());
+ Assertions.assertTrue(context.getSubject().contains("brain"));
+ break;
+ case "mes":
+ Assertions.assertEquals("European Marine Science", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertTrue(
+ context
+ .getDescription()
+ .startsWith(
+ "This community was initially defined to include a very broad range of topics"));
+ Assertions
+ .assertFalse(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+ Assertions.assertEquals(5, context.getSubject().size());
+ Assertions.assertEquals("oac_mes", context.getZenodocommunity());
+ Assertions.assertEquals("community", context.getType());
+ Assertions.assertTrue(context.getSubject().contains("sea"));
+ Assertions.assertTrue(context.getSubject().contains("fish"));
+ Assertions.assertTrue(context.getSubject().contains("ocean"));
+ Assertions.assertTrue(context.getSubject().contains("aqua"));
+ Assertions.assertTrue(context.getSubject().contains("marine"));
+ break;
+ case "instruct":
+ Assertions.assertEquals("Instruct-ERIC", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertTrue(
+ context
+ .getDescription()
+ .equals(
+ "Instruct-ERIC is the European Research Infrastructure for Structural Biology"));
+ Assertions
+ .assertTrue(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+ Assertions.assertEquals("oac_instruct", context.getZenodocommunity());
+ Assertions.assertEquals("community", context.getType());
+
+ break;
+ case "elixir-gr":
+ Assertions
+ .assertEquals("The Greek National Node of the ESFRI European RI ELIXIR", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertTrue(
+ context
+ .getDescription()
+ .startsWith(
+ "ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open"));
+ Assertions
+ .assertTrue(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+ Assertions.assertEquals("oaa_elixir-gr", context.getZenodocommunity());
+ Assertions.assertEquals("ri", context.getType());
+
+ break;
+ case "aginfra":
+ Assertions.assertEquals("Agricultural and Food Sciences", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertTrue(
+ context
+ .getDescription()
+ .startsWith(
+ "The scope of this community is to provide access to publications, research data, projects and software"));
+ Assertions
+ .assertFalse(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+ Assertions.assertEquals(18, context.getSubject().size());
+ Assertions.assertEquals("oac_aginfra", context.getZenodocommunity());
+ Assertions.assertEquals("community", context.getType());
+ Assertions.assertTrue(context.getSubject().contains("food distribution"));
+ break;
+ case "dariah":
+ Assertions.assertEquals("DARIAH EU", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertTrue(
+ context
+ .getDescription()
+ .startsWith(
+ "The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support "));
+ Assertions
+ .assertTrue(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+
+ Assertions.assertEquals("dariah", context.getZenodocommunity());
+ Assertions.assertEquals("ri", context.getType());
+
+ break;
+ case "epos":
+ Assertions.assertEquals("European Plate Observing System", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertTrue(
+ context
+ .getDescription()
+ .startsWith(
+ "EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of "));
+ Assertions
+ .assertTrue(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+
+ Assertions.assertEquals("", context.getZenodocommunity());
+ Assertions.assertEquals("ri", context.getType());
+
+ break;
+ case "covid-19":
+ Assertions.assertEquals("Corona Virus Disease", context.getName());
+ Assertions.assertTrue(context.getDescription().length() > 0);
+ Assertions
+ .assertTrue(
+ context
+ .getDescription()
+ .startsWith(
+ "This portal provides access to publications, research data, projects and "));
+ Assertions
+ .assertFalse(
+ Optional
+ .ofNullable(context.getSubject())
+ .map(value -> false)
+ .orElse(true));
+ Assertions.assertEquals(25, context.getSubject().size());
+ Assertions.assertEquals("covid-19", context.getZenodocommunity());
+ Assertions.assertEquals("community", context.getType());
+ Assertions.assertTrue(context.getSubject().contains("coronavirus disease 2019"));
+ break;
+
+ }
+ });
+
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/RelationFromOrganizationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/RelationFromOrganizationTest.java
new file mode 100644
index 000000000..f4816bb79
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/graph/RelationFromOrganizationTest.java
@@ -0,0 +1,120 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.graph;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.neethi.Assertion;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
+import eu.dnetlib.dhp.utils.DHPUtils;
+
+public class RelationFromOrganizationTest {
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private static SparkSession spark;
+
+ private static Path workingDir;
+
+ private static final Logger log = LoggerFactory
+ .getLogger(RelationFromOrganizationTest.class);
+
+ private static HashMap map = new HashMap<>();
+
+ String organizationCommunityMap = "{\"20|grid________::afaa39865943381c51f76c08725ffa75\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8\":[\"mes\",\"euromarine\"], \"20|snsf________::9b253f265e3bef5cae6d881fdf61aceb\":[\"mes\",\"euromarine\"],\"20|rcuk________::e054eea0a47665af8c3656b5785ccf76\":[\"mes\",\"euromarine\"],\"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151\":[\"mes\",\"euromarine\"],\"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27\":[\"mes\",\"euromarine\"],\"20|snsf________::8fa091f8f25a846779acb4ea97b50aef\":[\"mes\",\"euromarine\"],\"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|corda_______::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78\":[\"mes\",\"euromarine\"],\"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70\":[\"mes\",\"euromarine\"],\"20|rcuk________::e16010089551a1a9182a94604fc0ea59\":[\"mes\",\"euromarine\"],\"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7\":[\"mes\",\"euromarine\"],\"20|snsf________::74730ef1439d7f7636a8be58a6b471b8\":[\"mes\",\"euromarine\"],\"20|nsf_________::ad72e19043a5a467e35f9b444d11563e\":[\"mes\",\"euromarine\"],\"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3\":[\"mes\",\"euromarine\"],\"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea\":[\"mes\",\"euromarine\"],\"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317\":[\"mes\",\"euromarine\"], \"20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f\":[\"mes\",\"euromarine\"], \"20|corda__h2020::65531bd11be9935948c7f2f4db1c1832\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946\":[\"mes\",\"euromarine\"], \"20|snsf________::3eb43582ac27601459a8d8b3e195724b\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6\":[\"mes\",\"euromarine\"], \"20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0\":[\"mes\",\"euromarine\"], \"20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0\":[\"beopen\"], "
+ +
+ "\"20|grid________::a867f78acdc5041b34acfe4f9a349157\":[\"beopen\"], \"20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff\":[\"beopen\"], \"20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad\":[\"beopen\"], \"20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602\":[\"beopen\"], \"20|corda_______::8ba50792bc5f4d51d79fca47d860c602\":[\"beopen\"], \"20|corda__h2020::e70e9114979e963eef24666657b807c3\":[\"beopen\"], \"20|corda_______::e70e9114979e963eef24666657b807c3\":[\"beopen\"], \"20|corda_______::15911e01e9744d57205825d77c218737\":[\"beopen\"], \"20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab\":[\"beopen\"], \"20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3\":[\"beopen\"], \"20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3\":[\"beopen\"], \"20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9\":[\"beopen\"], \"20|corda_______::3ff558e30c2e434d688539548300b050\":[\"beopen\"], \"20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39\":[\"beopen\"], \"20|corda__h2020::5187217e2e806a6df3579c46f82401bc\":[\"beopen\"], \"20|grid________::5fa7e2709bcd945e26bfa18689adeec1\":[\"beopen\"], \"20|corda_______::d8696683c53027438031a96ad27c3c07\":[\"beopen\"], \"20|corda__h2020::d8696683c53027438031a96ad27c3c07\":[\"beopen\"], \"20|rcuk________::23a79ebdfa59790864e4a485881568c1\":[\"beopen\"], \"20|corda__h2020::b76cf8fe49590a966953c37e18608af9\":[\"beopen\"], \"20|grid________::d2f0204126ee709244a488a4cd3b91c2\":[\"beopen\"], \"20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6\":[\"beopen\"], \"20|grid________::802401579481dc32062bdee69f5e6a34\":[\"beopen\"], \"20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d\":[\"beopen\"]}";
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files
+ .createTempDirectory(RelationFromOrganizationTest.class.getSimpleName());
+ log.info("using work dir {}", workingDir);
+
+ SparkConf conf = new SparkConf();
+ conf.setAppName(RelationFromOrganizationTest.class.getSimpleName());
+
+ conf.setMaster("local[*]");
+ conf.set("spark.driver.host", "localhost");
+ conf.set("hive.metastore.local", "true");
+ conf.set("spark.ui.enabled", "false");
+ conf.set("spark.sql.warehouse.dir", workingDir.toString());
+ conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+ spark = SparkSession
+ .builder()
+ .appName(RelationFromOrganizationTest.class.getSimpleName())
+ .config(conf)
+ .getOrCreate();
+ }
+
+ @AfterAll
+ public static void afterAll() throws IOException {
+ FileUtils.deleteDirectory(workingDir.toFile());
+ spark.stop();
+ }
+
+ @Test
+ public void test1() throws Exception {
+
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/dump/graph/relation")
+ .getPath();
+
+ SparkOrganizationRelation.main(new String[] {
+ "-isSparkSessionManaged", Boolean.FALSE.toString(),
+ "-outputPath", workingDir.toString() + "/relation",
+ "-sourcePath", sourcePath,
+ "-organizationCommunityMap", organizationCommunityMap
+ });
+
+ final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ JavaRDD tmp = sc
+ .textFile(workingDir.toString() + "/relation")
+ .map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
+
+ org.apache.spark.sql.Dataset verificationDataset = spark
+ .createDataset(tmp.rdd(), Encoders.bean(Relation.class));
+
+ verificationDataset.createOrReplaceTempView("table");
+
+ Assertions.assertEquals(170, verificationDataset.count());
+
+ Dataset