[SKG-IF] -

This commit is contained in:
Miriam Baglioni 2024-02-07 15:33:12 +01:00
commit c3be9a7b14
71 changed files with 3451 additions and 3964 deletions

5
.gitignore vendored
View File

@ -26,3 +26,8 @@ spark-warehouse
/**/*.log
/**/.factorypath
/**/.scalafmt.conf
/**/job.properties
/job.properties
/*/job.properties
/*/*/job.properties
/*/*/*/job.properties

49
api/pom.xml Normal file
View File

@ -0,0 +1,49 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-graph-dump</artifactId>
<version>1.2.5-SNAPSHOT</version>
</parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>api</artifactId>
<version>1.2.5-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
</dependency>
<dependency>
<groupId>jaxen</groupId>
<artifactId>jaxen</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<scope>compile</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,75 @@
package eu.dnetlib.dhp.communityapi;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
public class QueryCommunityAPI {
private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/";
private static String get(String geturl) throws IOException {
URL url = new URL(geturl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setDoOutput(true);
conn.setRequestMethod("GET");
int responseCode = conn.getResponseCode();
String body = getBody(conn);
conn.disconnect();
if (responseCode != HttpURLConnection.HTTP_OK)
throw new IOException("Unexpected code " + responseCode + body);
return body;
}
public static String communities() throws IOException {
return get(PRODUCTION_BASE_URL + "community/communities");
}
public static String community(String id) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id);
}
public static String communityDatasource(String id) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders");
}
public static String communityPropagationOrganization(String id) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations");
}
public static String communityProjects(String id, String page, String size) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
}
private static String getBody(HttpURLConnection conn) throws IOException {
String body = "{}";
try (BufferedReader br = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
StringBuilder response = new StringBuilder();
String responseLine = null;
while ((responseLine = br.readLine()) != null) {
response.append(responseLine.trim());
}
body = response.toString();
}
return body;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.communityapi.model;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@JsonAutoDetect
@JsonIgnoreProperties(ignoreUnknown = true)
public class CommunityContentprovider {
private String openaireId;
private String enabled;
public String getEnabled() {
return enabled;
}
public void setEnabled(String enabled) {
this.enabled = enabled;
}
public String getOpenaireId() {
return openaireId;
}
public void setOpenaireId(final String openaireId) {
this.openaireId = openaireId;
}
}

View File

@ -0,0 +1,21 @@
package eu.dnetlib.dhp.communityapi.model;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class CommunityEntityMap extends HashMap<String, List<String>> {
public CommunityEntityMap() {
super();
}
public List<String> get(String key) {
if (super.get(key) == null) {
return new ArrayList<>();
}
return super.get(key);
}
}

View File

@ -0,0 +1,82 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class CommunityModel implements Serializable {
private String id;
private String name;
private String description;
private String status;
private String type;
private List<String> subjects;
private String zenodoCommunity;
public List<String> getSubjects() {
return subjects;
}
public void setSubjects(List<String> subjects) {
this.subjects = subjects;
}
public String getZenodoCommunity() {
return zenodoCommunity;
}
public void setZenodoCommunity(String zenodoCommunity) {
this.zenodoCommunity = zenodoCommunity;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}

View File

@ -0,0 +1,15 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.ArrayList;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
public class CommunitySummary extends ArrayList<CommunityModel> implements Serializable {
public CommunitySummary() {
super();
}
}

View File

@ -0,0 +1,51 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class ContentModel implements Serializable {
private List<ProjectModel> content;
private Integer totalPages;
private Boolean last;
private Integer number;
public List<ProjectModel> getContent() {
return content;
}
public void setContent(List<ProjectModel> content) {
this.content = content;
}
public Integer getTotalPages() {
return totalPages;
}
public void setTotalPages(Integer totalPages) {
this.totalPages = totalPages;
}
public Boolean getLast() {
return last;
}
public void setLast(Boolean last) {
this.last = last;
}
public Integer getNumber() {
return number;
}
public void setNumber(Integer number) {
this.number = number;
}
}

View File

@ -0,0 +1,11 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.ArrayList;
public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable {
public DatasourceList() {
super();
}
}

View File

@ -0,0 +1,16 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.ArrayList;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
public class OrganizationList extends ArrayList<String> implements Serializable {
public OrganizationList() {
super();
}
}

View File

@ -0,0 +1,44 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class ProjectModel implements Serializable {
private String openaireId;
private String funder;
private String gratId;
public String getFunder() {
return funder;
}
public void setFunder(String funder) {
this.funder = funder;
}
public String getGratId() {
return gratId;
}
public void setGratId(String gratId) {
this.gratId = gratId;
}
public String getOpenaireId() {
return openaireId;
}
public void setOpenaireId(String openaireId) {
this.openaireId = openaireId;
}
}

View File

@ -0,0 +1,15 @@
package eu.dnetlib.dhp.oa.model;
/**
* @author miriam.baglioni
* @Date 19/12/23
*/
/**
* The OpenAccess color meant to be used on the result level
*/
public enum OpenAccessColor {
gold, hybrid, bronze
}

View File

@ -5,6 +5,7 @@ import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
@ -74,6 +75,53 @@ public class Result implements Serializable {
private List<Author> author;
// resulttype allows subclassing results into publications | datasets | software
@JsonProperty("isGreen")
@JsonSchema(description = "True if the result is green Open Access")
private Boolean isGreen;
@JsonSchema(description = "The Open Access Color of the publication")
private OpenAccessColor openAccessColor;
@JsonProperty("isInDiamondJournal")
@JsonSchema(description = "True if the result is published in a Diamond Journal")
private Boolean isInDiamondJournal;
@JsonSchema(description = "True if the result is outcome of a project")
private Boolean publiclyFunded;
public Boolean getGreen() {
return isGreen;
}
public void setGreen(Boolean green) {
isGreen = green;
}
public OpenAccessColor getOpenAccessColor() {
return openAccessColor;
}
public void setOpenAccessColor(OpenAccessColor openAccessColor) {
this.openAccessColor = openAccessColor;
}
public Boolean getInDiamondJournal() {
return isInDiamondJournal;
}
public void setInDiamondJournal(Boolean inDiamondJournal) {
isInDiamondJournal = inDiamondJournal;
}
public Boolean getPubliclyFunded() {
return publiclyFunded;
}
public void setPubliclyFunded(Boolean publiclyFunded) {
this.publiclyFunded = publiclyFunded;
}
@JsonSchema(
description = "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)")
private String type; // resulttype

File diff suppressed because it is too large Load Diff

View File

@ -9,6 +9,7 @@ import com.github.imifou.jsonschema.module.addon.AddonModule;
import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.ExecCreateSchemas;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.*;
@ -24,7 +25,7 @@ class GenerateJsonSchema {
configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(GraphResult.class);
JsonNode jsonSchema = generator.generateSchema(CommunityResult.class);
System.out.println(jsonSchema.toString());
}
@ -41,7 +42,7 @@ class GenerateJsonSchema {
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(Project.class);
JsonNode jsonSchema = generator.generateSchema(Result.class);
System.out.println(jsonSchema.toString());
}

View File

@ -1,5 +0,0 @@
id name acronym description
04a00617ca659adc944977ac700ea14b Digital Humanities and Cultural Heritage dh-ch This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.
3ee95893613de7450247d7fef747136f DARIAH EU dariah The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents.
5fde864866ea5ded4cc873b3170b63c3 Transport Research beopen Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research. The TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323)
aa0e56dd2e9d2a0be749f5debdd2b3d8 Energy Research enermaps <p>EnerMaps Open Data Management Tool aims to&nbsp; <strong>improve data management</strong>&nbsp; and&nbsp; <strong>accessibility</strong>&nbsp; in the field of&nbsp; <strong>energy research</strong>&nbsp; for the&nbsp; <strong>renewable energy industry</strong>.</p> <p>EnerMaps&rsquo; tool accelerates and facilitates the energy transition offering a qualitative and user-friendly digital platform to the energy professionals.</p> <p>The project is based on the&nbsp; <strong>FAIR data principle</strong>&nbsp; which requires data to be&nbsp; <strong>F</strong>indable,&nbsp; <strong>A</strong>ccessible,&nbsp; <strong>I</strong>nteroperable and&nbsp; <strong>R</strong>eusable.</p> <p><strong>EnerMaps project</strong>&nbsp; coordinates and enriches existing energy databases to promote&nbsp; <strong>trans-disciplinary research</strong>&nbsp; and to develop partnerships between researchers and the energy professionals.</p> <p>The EnerMaps&nbsp;project has received funding from the European Union&rsquo;s Horizon 2020 research and innovation program under &nbsp; <a href="https://cordis.europa.eu/project/id/884161?WT.mc_id=RSS-Feed&amp;WT.rss_f=project&amp;WT.rss_a=227144&amp;WT.rss_ev=a" rel="noopener noreferrer" target="_blank">grant agreement N&deg;884161</a>.&nbsp;</p> <p>&nbsp;</p> <p>Website:<a href="https://enermaps.eu/">&nbsp; https://enermaps.eu/&nbsp;</a></p>

View File

@ -53,6 +53,11 @@
<artifactId>dump-schema</artifactId>
<version>1.2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>api</artifactId>
<version>1.2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
@ -62,6 +67,12 @@
<artifactId>classgraph</artifactId>
<version>4.8.71</version>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>api</artifactId>
<version>1.2.5-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>

View File

@ -10,6 +10,7 @@ import java.util.Optional;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.slf4j.Logger;
@ -20,6 +21,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class MakeTarArchive implements Serializable {
private static final Logger log = LoggerFactory.getLogger(MakeTarArchive.class);
private static int index = 1;
private static String prevname = new String();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@ -154,13 +157,21 @@ public class MakeTarArchive implements Serializable {
String pString = p.toString();
if (!pString.endsWith("_SUCCESS")) {
String name = pString.substring(pString.lastIndexOf("/") + 1);
if (name.startsWith("part-") & name.length() > 10) {
String tmp = name.substring(0, 10);
if (name.contains(".")) {
tmp += name.substring(name.indexOf("."));
}
name = tmp;
}
// if (name.startsWith("part-") & name.length() > 10) {
// String tmp = name.substring(0, 10);
// if (prevname.equalsIgnoreCase(tmp)) {
// tmp = tmp + "_" + index;
// index += 1;
// } else {
// prevname = tmp;
// index = 1;
// }
// if (name.contains(".")) {
// tmp += name.substring(name.indexOf("."));
// }
// name = tmp;
//
// }
if (rename) {
if (name.endsWith(".txt.gz"))
name = name.replace(".txt.gz", ".json.gz");

View File

@ -8,8 +8,9 @@ import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHeaders;
import org.apache.http.entity.ContentType;
// import org.apache.http.impl.client.HttpClients;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
@ -18,7 +19,7 @@ import eu.dnetlib.dhp.common.zenodoapi.model.ZenodoModelList;
import okhttp3.*;
public class ZenodoAPIClient implements Serializable {
private static final Logger log = LoggerFactory.getLogger(ZenodoAPIClient.class);
String urlString;
String bucket;
@ -29,6 +30,8 @@ public class ZenodoAPIClient implements Serializable {
private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip");
private static final MediaType MEDIA_TYPE_TAR = MediaType.parse("application/octet-stream");
public String getUrlString() {
return urlString;
}
@ -61,40 +64,40 @@ public class ZenodoAPIClient implements Serializable {
* @return response code
* @throws IOException
*/
// public int newDeposition() throws IOException {
// String json = "{}";
//
// URL url = new URL(urlString);
// HttpURLConnection conn = (HttpURLConnection) url.openConnection();
// conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// conn.setRequestMethod("POST");
// conn.setDoOutput(true);
// try (OutputStream os = conn.getOutputStream()) {
// byte[] input = json.getBytes("utf-8");
// os.write(input, 0, input.length);
// }
//
// String body = getBody(conn);
//
// int responseCode = conn.getResponseCode();
// conn.disconnect();
//
// if (!checkOKStatus(responseCode))
// throw new IOException("Unexpected code " + responseCode + body);
//
// ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
// this.bucket = newSubmission.getLinks().getBucket();
// this.deposition_id = newSubmission.getId();
//
// return responseCode;
// }
public int newDeposition2() throws IOException {
String json = "{}";
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setRequestMethod("POST");
conn.setDoOutput(true);
try (OutputStream os = conn.getOutputStream()) {
byte[] input = json.getBytes("utf-8");
os.write(input, 0, input.length);
}
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
this.bucket = newSubmission.getLinks().getBucket();
this.deposition_id = newSubmission.getId();
return responseCode;
}
public int newDeposition() throws IOException {
String json = "{}";
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
RequestBody body = RequestBody.create(json.getBytes());
Request request = new Request.Builder()
.url(urlString)
@ -122,50 +125,6 @@ public class ZenodoAPIClient implements Serializable {
}
// public int uploadIS2(InputStream is, String fileName) throws IOException {
//
// final String crlf = "\r\n";
// final String twoHyphens = "--";
// final String boundary = "*****";
//
// HttpPut put = new HttpPut(bucket + "/" + fileName);
//
// put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip");
// put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
//
// put.setEntity(new InputStreamEntity(is));
//
// int statusCode;
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(put);
// statusCode = response.getStatusLine().getStatusCode();
//
// }
//
// if (!checkOKStatus(statusCode)) {
// throw new IOException("Unexpected code " + statusCode);
// }
//
// return statusCode;
// }
// public int publish() throws IOException {
// String json = "{}";
// HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish");
// post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json");
// post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// post.setEntity(new StringEntity(json));
// int statusCode;
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(post);
// statusCode = response.getStatusLine().getStatusCode();
// }
// if (!checkOKStatus(statusCode)) {
// throw new IOException("Unexpected code " + statusCode);
// }
// return statusCode;
// }
/**
* Upload files in Zenodo.
*
@ -217,22 +176,33 @@ public class ZenodoAPIClient implements Serializable {
}
public int uploadIS3(InputStream is, String file_name, long len) throws IOException {
// String urlString = "http://checkip.amazonaws.com/";
// URL url = new URL(urlString);
// try (BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()))) {
// log.info(br.readLine());
// }
OkHttpClient httpClient = new OkHttpClient.Builder()
.writeTimeout(600, TimeUnit.SECONDS)
.readTimeout(600, TimeUnit.SECONDS)
.connectTimeout(600, TimeUnit.SECONDS)
.build();
System.out.println(bucket + "/" + file_name);
Request request = new Request.Builder()
.url(bucket + "/" + file_name)
.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers
.addHeader(HttpHeaders.CONTENT_TYPE, "application/json") // add request headers
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
.put(InputStreamRequestBody.create(MEDIA_TYPE_TAR, is, len))
.build();
log.info("URL: " + request.url().toString());
// log.info("Headers: " + request.headers().toString());
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
throw new IOException("Unexpected code " + response + response.body().string());
log.info("Unexpected code " + response + response.body().string());
System.out.println("Unexpected code " + response + response.body().string());
return response.code();
}
}
@ -287,39 +257,40 @@ public class ZenodoAPIClient implements Serializable {
* @throws IOException
* @throws MissingConceptDoiException
*/
// public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException {
// setDepositionId(concept_rec_id, 1);
// String json = "{}";
//
// URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
// HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//
// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// conn.setDoOutput(true);
// conn.setRequestMethod("POST");
//
// try (OutputStream os = conn.getOutputStream()) {
// byte[] input = json.getBytes("utf-8");
// os.write(input, 0, input.length);
//
// }
//
// String body = getBody(conn);
//
// int responseCode = conn.getResponseCode();
//
// conn.disconnect();
// if (!checkOKStatus(responseCode))
// throw new IOException("Unexpected code " + responseCode + body);
//
// ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
// String latest_draft = zenodoModel.getLinks().getLatest_draft();
// deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
// bucket = getBucket(latest_draft);
//
// return responseCode;
//
// }
public int newVersion2(String concept_rec_id) throws Exception, MissingConceptDoiException {
setDepositionId(concept_rec_id, 1);
String json = "{}";
URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
conn.setDoOutput(true);
conn.setRequestMethod("POST");
try (OutputStream os = conn.getOutputStream()) {
byte[] input = json.getBytes("utf-8");
os.write(input, 0, input.length);
}
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
String latest_draft = zenodoModel.getLinks().getLatest_draft();
deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
bucket = getBucket(latest_draft);
return responseCode;
}
public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException {
setDepositionId(concept_rec_id, 1);
String json = "{}";
@ -336,8 +307,10 @@ public class ZenodoAPIClient implements Serializable {
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
if (!response.isSuccessful()) {
// log.info("response headers " + response.headers().toString());
throw new IOException("Unexpected code " + response + response.body().string());
}
ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
String latest_draft = zenodoModel.getLinks().getLatest_draft();
@ -360,7 +333,11 @@ public class ZenodoAPIClient implements Serializable {
* @throws MissingConceptDoiException
*/
public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException {
// String urlamazon = "http://checkip.amazonaws.com/";
// URL url = new URL(urlamazon);
// try (BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()))) {
// log.info(br.readLine());
// }
this.deposition_id = deposition_id;
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
@ -370,11 +347,15 @@ public class ZenodoAPIClient implements Serializable {
.addHeader("Authorization", "Bearer " + access_token)
.build();
log.info("URL: " + request.url().toString());
// log.info("Headers: " + request.headers().toString());
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
if (!response.isSuccessful()) {
log.info("Unexpected code " + response + response.body().string());
throw new IOException("Unexpected code " + response + response.body().string());
}
ZenodoModel zenodoModel = new Gson()
.fromJson(response.body().string(), ZenodoModel.class);
bucket = zenodoModel.getLinks().getBucket();
@ -385,6 +366,11 @@ public class ZenodoAPIClient implements Serializable {
}
private void setDepositionId(String concept_rec_id, Integer page) throws Exception, MissingConceptDoiException {
// String urlString = "http://checkip.amazonaws.com/";
// URL url = new URL(urlString);
// try (BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()))) {
// log.info(br.readLine());
// }
ZenodoModelList zenodoModelList = new Gson()
.fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
@ -402,27 +388,13 @@ public class ZenodoAPIClient implements Serializable {
}
// private String getPrevDepositions(String page) throws Exception {
//
// HttpGet get = new HttpGet(urlString);
// URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build();
//
// get.setURI(uri);
//
// get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
// get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
// try (CloseableHttpClient client = HttpClients.createDefault()) {
// CloseableHttpResponse response = client.execute(get);
// final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8);
// return body;
// }
// }
private String getPrevDepositions(String page) throws IOException {
OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
HttpUrl.Builder urlBuilder = HttpUrl
.parse(urlString)// + "?access_token=" + access_token + "&page=" + page)
.newBuilder();
urlBuilder.addQueryParameter("page", page);
String url = urlBuilder.build().toString();
@ -433,10 +405,15 @@ public class ZenodoAPIClient implements Serializable {
.get()
.build();
log.info("URL: " + request.url().toString());
log.info("Headers: " + request.headers().toString());
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful())
if (!response.isSuccessful()) {
log.info("response headers: " + response.headers());
throw new IOException("Unexpected code " + response + response.body().string());
}
return response.body().string();

View File

@ -1,110 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class QueryInformationSystem {
private ISLookUpService isLookUp;
private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class);
private static final String XQUERY_ALL = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and ($x//context/param[./@name = 'status']/text() = 'all') "
+
" return " +
"<community> " +
"{$x//CONFIGURATION/context/@id}" +
"{$x//CONFIGURATION/context/@label}" +
"</community>";
private static final String XQUERY_CI = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and $x//CONFIGURATION/context[./@id=%s] "
+
" return " +
"<community> " +
"{$x//CONFIGURATION/context/@id}" +
"{$x//CONFIGURATION/context/@label}" +
"</community>";
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
throws ISLookUpException, DocumentException, SAXException {
if (singleCommunity)
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'")));
return getMap(isLookUp.quickSearchProfile(XQUERY_ALL));
}
public ISLookUpService getIsLookUp() {
return isLookUp;
}
public void setIsLookUp(ISLookUpService isLookUpService) {
this.isLookUp = isLookUpService;
}
private CommunityMap getMap(List<String> communityMap) throws DocumentException, SAXException {
final CommunityMap map = new CommunityMap();
for (String xml : communityMap) {
final Document doc;
final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
doc = reader.read(new StringReader(xml));
Element root = doc.getRootElement();
map.put(root.attribute("id").getValue(), root.attribute("label").getValue());
}
return map;
}
public List<String> getCommunityCsv(String toString) throws ISLookUpException, SAXException, DocumentException {
List<String> communities = new ArrayList<>();
for (String xml : isLookUp.quickSearchProfile(toString)) {
log.info(xml);
final Document doc;
final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
doc = reader.read(new StringReader(xml));
Element root = doc.getRootElement();
StringBuilder builder = new StringBuilder();
builder.append(DHPUtils.md5(root.attribute("id").getValue()));
builder.append(Constants.SEP);
builder.append(root.attribute("label").getValue());
builder.append(Constants.SEP);
builder.append(root.attribute("id").getValue());
builder.append(Constants.SEP);
builder
.append(
((Node) (root.selectNodes("//description").get(0)))
.getText()
.replace("\n", " ")
.replace("\t", " "));
communities.add(builder.toString());
}
return communities;
}
}

View File

@ -2,13 +2,14 @@
package eu.dnetlib.dhp.oa.graph.dump;
import static eu.dnetlib.dhp.oa.graph.dump.Constants.*;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
@ -16,8 +17,8 @@ import eu.dnetlib.dhp.oa.model.*;
import eu.dnetlib.dhp.oa.model.AccessRight;
import eu.dnetlib.dhp.oa.model.Author;
import eu.dnetlib.dhp.oa.model.GeoLocation;
import eu.dnetlib.dhp.oa.model.Indicator;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.OpenAccessColor;
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.Subject;
@ -30,6 +31,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
public class ResultMapper implements Serializable {
private static final String NULL = "null";
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
E in, Map<String, String> communityMap, String dumpType)
@ -54,10 +56,14 @@ public class ResultMapper implements Serializable {
mapCountry(out, input);
mapCoverage(out, input);
out.setDateofcollection(input.getDateofcollection());
out.setGreen(input.getIsGreen());
out.setInDiamondJournal(input.getIsInDiamondJournal());
out.setPubliclyFunded(input.getPubliclyFunded());
mapOpenAccessColor(out, input);
mapDescription(out, input);
mapEmbargo(out, input);
mapFormat(out, input);
out.setId(input.getId().substring(3));
out.setId(getEntityId(input.getId(), ENTITY_ID_SEPARATOR));
mapOriginalId(out, input);
mapInstance(dumpType, out, input);
mapLanguage(out, input);
@ -83,6 +89,23 @@ public class ResultMapper implements Serializable {
}
private static void mapOpenAccessColor(Result out, eu.dnetlib.dhp.schema.oaf.Result input) {
if (Optional.ofNullable(input.getOpenAccessColor()).isPresent())
switch (input.getOpenAccessColor()) {
case bronze:
out.setOpenAccessColor(OpenAccessColor.bronze);
break;
case gold:
out.setOpenAccessColor(OpenAccessColor.gold);
break;
case hybrid:
out.setOpenAccessColor(OpenAccessColor.hybrid);
break;
}
}
private static void mapContext(Map<String, String> communityMap, CommunityResult out,
eu.dnetlib.dhp.schema.oaf.Result input) {
Set<String> communities = communityMap.keySet();
@ -155,7 +178,7 @@ public class ResultMapper implements Serializable {
input
.getCollectedfrom()
.stream()
.map(cf -> CfHbKeyValue.newInstance(cf.getKey().substring(3), cf.getValue()))
.map(cf -> CfHbKeyValue.newInstance(getEntityId(cf.getKey(), ENTITY_ID_SEPARATOR), cf.getValue()))
.collect(Collectors.toList()));
}
@ -187,6 +210,7 @@ public class ResultMapper implements Serializable {
// .getProvenanceaction()
// .getClassid()
// .equalsIgnoreCase("subject:sdg"))))
.filter(s -> !s.getValue().equalsIgnoreCase(NULL))
.forEach(s -> subjectList.add(getSubject(s))));
out.setSubjects(subjectList);
@ -521,14 +545,18 @@ public class ResultMapper implements Serializable {
instance
.setCollectedfrom(
CfHbKeyValue
.newInstance(i.getCollectedfrom().getKey().substring(3), i.getCollectedfrom().getValue()));
.newInstance(
getEntityId(i.getCollectedfrom().getKey(), ENTITY_ID_SEPARATOR),
i.getCollectedfrom().getValue()));
if (Optional.ofNullable(i.getHostedby()).isPresent() &&
Optional.ofNullable(i.getHostedby().getKey()).isPresent() &&
StringUtils.isNotBlank(i.getHostedby().getKey()))
instance
.setHostedby(
CfHbKeyValue.newInstance(i.getHostedby().getKey().substring(3), i.getHostedby().getValue()));
CfHbKeyValue
.newInstance(
getEntityId(i.getHostedby().getKey(), ENTITY_ID_SEPARATOR), i.getHostedby().getValue()));
return instance;
@ -670,7 +698,9 @@ public class ResultMapper implements Serializable {
if (di.isPresent()) {
Provenance p = new Provenance();
p.setProvenance(di.get().getProvenanceaction().getClassname());
p.setTrust(di.get().getTrust());
if (!s.getQualifier().getClassid().equalsIgnoreCase("fos") &&
!s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
p.setTrust(di.get().getTrust());
subject.setProvenance(p);
}

View File

@ -13,16 +13,13 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
* This class connects with the community APIs for production. It saves the information about the
* context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community
* - research infrastructure/initiative , the value is the label of the research community - research
* infrastructure/initiative.
@ -31,11 +28,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
public class SaveCommunityMap implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
private final transient QueryInformationSystem queryInformationSystem;
private final transient UtilCommunityAPI queryInformationSystem;
private final transient BufferedWriter writer;
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException {
final Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
@ -45,8 +42,7 @@ public class SaveCommunityMap implements Serializable {
fileSystem.delete(hdfsWritePath, true);
}
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
queryInformationSystem = new UtilCommunityAPI();
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
@ -68,9 +64,6 @@ public class SaveCommunityMap implements Serializable {
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String isLookUpUrl = parser.get("isLookUpUrl");
log.info("isLookUpUrl: {}", isLookUpUrl);
final Boolean singleCommunity = Optional
.ofNullable(parser.get("singleDeposition"))
.map(Boolean::valueOf)
@ -78,14 +71,14 @@ public class SaveCommunityMap implements Serializable {
final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null);
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode);
scm.saveCommunityMap(singleCommunity, community_id);
}
private void saveCommunityMap(boolean singleCommunity, String communityId)
throws ISLookUpException, IOException, DocumentException, SAXException {
throws IOException {
final String communityMapString = Utils.OBJECT_MAPPER
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId));
log.info("communityMap {} ", communityMapString);

View File

@ -3,21 +3,31 @@ package eu.dnetlib.dhp.oa.graph.dump;
import java.io.Serializable;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.http.HttpStatus;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException;
import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient;
public class SendToZenodoHDFS implements Serializable {
private static final String NEW = "new"; // to be used for a brand new deposition in zenodo
private static final String VERSION = "version"; // to be used to upload a new version of a published deposition
private static final String UPDATE = "update"; // to upload content to an open deposition not published
private static final Integer NUMBER_OF_RETRIES = 5;
private static final Integer DELAY = 10;
private static final Integer MULTIPLIER = 5;
private static final Logger log = LoggerFactory.getLogger(SendToZenodoHDFS.class);
public static void main(final String[] args) throws Exception, MissingConceptDoiException {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -79,22 +89,44 @@ public class SendToZenodoHDFS implements Serializable {
Path p = fileStatus.getPath();
String pString = p.toString();
boolean retry = true;
int numberOfRetries = 0;
if (!pString.endsWith("_SUCCESS")) {
String name = pString.substring(pString.lastIndexOf("/") + 1);
log.info("Upoloading: {}", name);
FSDataInputStream inputStream = fileSystem.open(p);
while (retry && numberOfRetries < NUMBER_OF_RETRIES) {
int response_code = -1;
try (FSDataInputStream inputStream = fileSystem.open(p)) {
zenodoApiClient.uploadIS(inputStream, name);
try {
response_code = zenodoApiClient
.uploadIS3(inputStream, name, fileSystem.getFileStatus(p).getLen());
} catch (Exception e) {
log.info(e.getMessage());
throw new RuntimeException("Error while uploading on Zenodo");
}
log.info("response code: {}", response_code);
if (HttpStatus.SC_OK == response_code || HttpStatus.SC_CREATED == response_code) {
retry = false;
} else {
numberOfRetries += 1;
TimeUnit.SECONDS.sleep(DELAY * MULTIPLIER ^ numberOfRetries);
}
}
if (numberOfRetries == NUMBER_OF_RETRIES) {
throw new RuntimeException("reached the maximun number or retries to upload on Zenodo");
}
}
}
// log.info(DateTime.now().toDateTimeISO().toString());
TimeUnit.SECONDS.sleep(DELAY);
// log.info("Delayed: {}", DateTime.now().toDateTimeISO().toString());
}
if (!metadata.equals("")) {
zenodoApiClient.sendMretadata(metadata);
}
if (Boolean.TRUE.equals(publish)) {
zenodoApiClient.publish();
}
}
}

View File

@ -0,0 +1,92 @@
package eu.dnetlib.dhp.oa.graph.dump;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 22/09/23
*/
public class SparkCopyGraph implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkCopyGraph.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkCopyGraph.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/copygraph_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String hivePath = parser.get("hivePath");
log.info("hivePath: {}", hivePath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark ->
execCopy(
spark,
hivePath,
outputPath));
}
private static void execCopy(SparkSession spark, String hivePath, String outputPath) {
ModelSupport.oafTypes.entrySet().parallelStream().forEach(entry -> {
String entityType = entry.getKey();
Class<?> clazz = entry.getValue();
// if (!entityType.equalsIgnoreCase("relation")) {
spark
.read()
.schema(Encoders.bean(clazz).schema())
.parquet(hivePath + "/" + entityType)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/" + entityType);
});
}
}

View File

@ -0,0 +1,203 @@
package eu.dnetlib.dhp.oa.graph.dump;
import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.communityapi.model.*;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import eu.dnetlib.dhp.utils.DHPUtils;
public class UtilCommunityAPI {
private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class);
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
throws IOException {
if (singleCommunity)
return getMap(Arrays.asList(getCommunity(communityId)));
return getMap(getValidCommunities());
}
private CommunityMap getMap(List<CommunityModel> communities) {
final CommunityMap map = new CommunityMap();
communities.forEach(c -> map.put(c.getId(), c.getName()));
return map;
}
public List<String> getCommunityCsv(List<String> comms) {
return comms.stream().map(c -> {
try {
CommunityModel community = getCommunity(c);
StringBuilder builder = new StringBuilder();
builder.append(DHPUtils.md5(community.getId()));
builder.append(Constants.SEP);
builder.append(community.getName());
builder.append(Constants.SEP);
builder.append(community.getId());
builder.append(Constants.SEP);
builder
.append(
community.getDescription());
return builder.toString();
} catch (IOException e) {
throw new RuntimeException(e);
}
}).collect(Collectors.toList());
}
private List<CommunityModel> getValidCommunities() throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
.stream()
.filter(
community -> (community.getStatus().equals("all") || community.getStatus().equalsIgnoreCase("public"))
&&
(community.getType().equals("ri") || community.getType().equals("community")))
.collect(Collectors.toList());
}
private CommunityModel getCommunity(String id) throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
}
public List<ContextInfo> getContextInformation() throws IOException {
return getValidCommunities()
.stream()
.map(c -> getContext(c))
.collect(Collectors.toList());
}
public ContextInfo getContext(CommunityModel c) {
ContextInfo cinfo = new ContextInfo();
cinfo.setId(c.getId());
cinfo.setDescription(c.getDescription());
CommunityModel cm = null;
try {
cm = getCommunity(c.getId());
} catch (IOException e) {
throw new RuntimeException(e);
}
cinfo.setSubject(new ArrayList<>());
cinfo.getSubject().addAll(cm.getSubjects());
cinfo.setZenodocommunity(c.getZenodoCommunity());
cinfo.setType(c.getType());
return cinfo;
}
public List<ContextInfo> getContextRelation() throws IOException {
return getValidCommunities().stream().map(c -> {
ContextInfo cinfo = new ContextInfo();
cinfo.setId(c.getId());
cinfo.setDatasourceList(getDatasourceList(c.getId()));
cinfo.setProjectList(getProjectList(c.getId()));
return cinfo;
}).collect(Collectors.toList());
}
private List<String> getDatasourceList(String id) {
List<String> datasourceList = new ArrayList<>();
try {
new ObjectMapper()
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id),
DatasourceList.class)
.stream()
.forEach(ds -> {
if (Optional.ofNullable(ds.getOpenaireId()).isPresent()) {
datasourceList.add(ds.getOpenaireId());
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
return datasourceList;
}
private List<String> getProjectList(String id) {
int page = -1;
int size = 100;
ContentModel cm = null;
;
ArrayList<String> projectList = new ArrayList<>();
do {
page++;
try {
cm = new ObjectMapper()
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI
.communityProjects(
id, String.valueOf(page), String.valueOf(size)),
ContentModel.class);
if (cm.getContent().size() > 0) {
cm.getContent().forEach(p -> {
if (Optional.ofNullable(p.getOpenaireId()).isPresent())
projectList.add(p.getOpenaireId());
});
}
} catch (IOException e) {
throw new RuntimeException(e);
}
} while (!cm.getLast());
return projectList;
}
/**
* it returns for each organization the list of associated communities
*/
public CommunityEntityMap getCommunityOrganization() throws IOException {
CommunityEntityMap organizationMap = new CommunityEntityMap();
getValidCommunities()
.forEach(community -> {
String id = community.getId();
try {
List<String> associatedOrgs = MAPPER
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id),
OrganizationList.class);
associatedOrgs.forEach(o -> {
if (!organizationMap
.keySet()
.contains(o))
organizationMap.put(o, new ArrayList<>());
organizationMap.get(o).add(community.getId());
});
} catch (IOException e) {
throw new RuntimeException(e);
}
});
return organizationMap;
}
}

View File

@ -8,6 +8,7 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -32,12 +33,11 @@ import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Measure;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;
public class Utils {
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final String ENTITY_ID_SEPARATOR = "|";
private Utils() {
}
@ -54,10 +54,6 @@ public class Utils {
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
}
public static ISLookUpService getIsLookUpService(String isLookUpUrl) {
return ISLookupClientFactory.getLookUpService(isLookUpUrl);
}
public static String getContextId(String id) {
return String
@ -88,6 +84,10 @@ public class Utils {
return new Gson().fromJson(sb.toString(), CommunityMap.class);
}
public static String getEntityId(String id, String separator) {
return id.substring(id.indexOf(separator) + 1);
}
public static Dataset<String> getEntitiesId(SparkSession spark, String inputPath) {
Dataset<String> dumpedIds = Utils
.readPath(spark, inputPath + "/publication", GraphResult.class)
@ -162,23 +162,7 @@ public class Utils {
default:
getImpactMeasure(i).add(getScore(m.getId(), m.getUnit()));
break;
// case IMPACT_POPULARITY:
// getImpactMeasure(i).setPopularity(getScore(m.getUnit()));
// break;
// case IMPACT_POPULARITY_ALT:
// getImpactMeasure(i).setPopularity_alt(getScore(m.getUnit()));
// break;
// case IMPACT_IMPULSE:
// getImpactMeasure(i).setImpulse(getScore(m.getUnit()));
// break;
// case IMPACT_INFLUENCE:
// getImpactMeasure(i).setInfluence(getScore(m.getUnit()));
// break;
// case IMPACT_INFLUENCE_ALT:
// getImpactMeasure(i).setInfluence_alt(getScore(m.getUnit()));
// break;
// default:
// break;
}
}

View File

@ -67,7 +67,7 @@ public class CommunitySplit implements Serializable {
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.text(outputPath + "/" + communities.get(c).replace(" ", "_"));
.text(outputPath + "/" + c.replace(" ", "_"));
});
}

View File

@ -2,6 +2,8 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
import java.io.Serializable;
import java.io.StringReader;
@ -110,7 +112,7 @@ public class SparkPrepareResultProject implements Serializable {
Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation> first = it.next();
ResultProject rp = new ResultProject();
if (substring)
rp.setResultId(s.substring(3));
rp.setResultId(getEntityId(s, ENTITY_ID_SEPARATOR));
else
rp.setResultId(s);
eu.dnetlib.dhp.schema.oaf.Project p = first._1();
@ -142,7 +144,7 @@ public class SparkPrepareResultProject implements Serializable {
private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op, Relation relation) {
Project p = Project
.newInstance(
op.getId().substring(3),
getEntityId(op.getId(), ENTITY_ID_SEPARATOR),
op.getCode().getValue(),
Optional
.ofNullable(op.getAcronym())

View File

@ -20,9 +20,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
@ -52,13 +52,10 @@ public class CreateContextEntities implements Serializable {
final String hdfsNameNode = parser.get("nameNode");
log.info("nameNode: {}", hdfsNameNode);
final String isLookUpUrl = parser.get("isLookUpUrl");
log.info("isLookUpUrl: {}", isLookUpUrl);
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
log.info("Processing contexts...");
cce.execute(Process::getEntity, isLookUpUrl);
cce.execute(Process::getEntity);
cce.close();
@ -87,15 +84,14 @@ public class CreateContextEntities implements Serializable {
}
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer, String isLookUpUrl)
throws ISLookUpException {
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer)
throws IOException {
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
UtilCommunityAPI queryInformationSystem = new UtilCommunityAPI();
final Consumer<ContextInfo> consumer = ci -> writeEntity(producer.apply(ci));
queryInformationSystem.getContextInformation(consumer);
queryInformationSystem.getContextInformation().forEach(ci -> consumer.accept(ci));
}
protected <R extends ResearchInitiative> void writeEntity(final R r) {

View File

@ -12,7 +12,6 @@ import java.util.function.Function;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -22,14 +21,11 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
import eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
@ -39,10 +35,6 @@ public class CreateContextRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
private final transient Configuration conf;
private final transient BufferedWriter writer;
private final transient QueryInformationSystem queryInformationSystem;
private static final String CONTEX_RELATION_DATASOURCE = "contentproviders";
private static final String CONTEX_RELATION_PROJECT = "projects";
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@ -68,59 +60,26 @@ public class CreateContextRelation implements Serializable {
final String hdfsNameNode = parser.get("nameNode");
log.info("hdfsNameNode: {}", hdfsNameNode);
final String isLookUpUrl = parser.get("isLookUpUrl");
log.info("isLookUpUrl: {}", isLookUpUrl);
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode);
final String masterDuplicatePath = parser.get("masterDuplicate");
log.info("masterDuplicatePath: {}", masterDuplicatePath);
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl);
final List<MasterDuplicate> masterDuplicateList = cce.readMasterDuplicate(masterDuplicatePath);
log.info("Creating relation for datasource...");
log.info("Creating relation for datasources and projects...");
cce
.execute(
Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class),
masterDuplicateList);
log.info("Creating relations for projects... ");
cce
.execute(
Process::getRelation, CONTEX_RELATION_PROJECT,
ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
Process::getRelation);
cce.close();
}
private List<MasterDuplicate> readMasterDuplicate(String masterDuplicatePath) throws IOException {
FileSystem fileSystem = FileSystem.get(conf);
Path hdfsReadPath = new Path(masterDuplicatePath);
BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(hdfsReadPath)));
List<MasterDuplicate> mdlist = new ArrayList<>();
ObjectMapper mapper = new ObjectMapper();
String line;
while ((line = reader.readLine()) != null) {
mdlist.add(mapper.readValue(line, MasterDuplicate.class));
}
return mdlist;
}
private void close() throws IOException {
writer.close();
}
public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl)
throws IOException, ISLookUpException {
public CreateContextRelation(String hdfsPath, String hdfsNameNode)
throws IOException {
this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode);
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
queryInformationSystem.execContextRelationQuery();
FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
@ -134,17 +93,12 @@ public class CreateContextRelation implements Serializable {
}
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix) {
execute(producer, category, prefix, null);
}
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix,
List<MasterDuplicate> masterDuplicateList) {
public void execute(final Function<ContextInfo, List<Relation>> producer) throws IOException {
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity);
queryInformationSystem.getContextRelation(consumer, category, prefix, masterDuplicateList);
UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI();
queryCommunityAPI.getContextRelation().forEach(ci -> consumer.accept(ci));
}
protected void writeEntity(final Relation r) {

View File

@ -2,6 +2,8 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
import java.io.Serializable;
import java.util.*;
@ -84,7 +86,7 @@ public class Extractor implements Serializable {
.orElse(null))
.orElse(null);
Relation r = getRelation(
value.getId().substring(3), contextId,
getEntityId(value.getId(), ENTITY_ID_SEPARATOR), contextId,
Constants.RESULT_ENTITY,
Constants.CONTEXT_ENTITY,
ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP, provenance);
@ -94,7 +96,7 @@ public class Extractor implements Serializable {
hashCodes.add(r.hashCode());
}
r = getRelation(
contextId, value.getId().substring(3),
contextId, getEntityId(value.getId(), ENTITY_ID_SEPARATOR),
Constants.CONTEXT_ENTITY,
Constants.RESULT_ENTITY,
ModelConstants.IS_RELATED_TO,
@ -163,8 +165,8 @@ public class Extractor implements Serializable {
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST));
Relation r = getRelation(
value.getId().substring(3),
cf.getKey().substring(3), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
getEntityId(value.getId(), ENTITY_ID_SEPARATOR),
getEntityId(cf.getKey(), ENTITY_ID_SEPARATOR), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
resultDatasource, ModelConstants.PROVISION,
provenance);
if (!hashCodes.contains(r.hashCode())) {
@ -174,7 +176,7 @@ public class Extractor implements Serializable {
}
r = getRelation(
cf.getKey().substring(3), value.getId().substring(3),
getEntityId(cf.getKey(), ENTITY_ID_SEPARATOR), getEntityId(value.getId(), ENTITY_ID_SEPARATOR),
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
datasourceResult, ModelConstants.PROVISION,
provenance);

View File

@ -54,38 +54,11 @@ public class Process implements Serializable {
List<Relation> relationList = new ArrayList<>();
ci
.getDatasourceList()
.forEach(ds -> {
String nodeType = ModelSupport.idPrefixEntity.get(ds.substring(0, 2));
String datasourceId = ds;
if (ds.startsWith("10|") || ds.startsWith("40|"))
datasourceId = ds.substring(3);
.forEach(ds -> relationList.addAll(addRelations(ci, ds, ModelSupport.idPrefixEntity.get("10"))));
String contextId = Utils.getContextId(ci.getId());
relationList
.add(
Relation
.newInstance(
contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY,
datasourceId, nodeType,
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
Provenance
.newInstance(
Constants.USER_CLAIM,
Constants.DEFAULT_TRUST)));
relationList
.add(
Relation
.newInstance(
datasourceId, nodeType,
contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY,
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
Provenance
.newInstance(
Constants.USER_CLAIM,
Constants.DEFAULT_TRUST)));
});
ci
.getProjectList()
.forEach(p -> relationList.addAll(addRelations(ci, p, ModelSupport.idPrefixEntity.get("40"))));
return relationList;
@ -94,4 +67,33 @@ public class Process implements Serializable {
}
}
private static List<Relation> addRelations(ContextInfo ci, String ds, String nodeType) {
List<Relation> relationList = new ArrayList<>();
String contextId = Utils.getContextId(ci.getId());
relationList
.add(
Relation
.newInstance(
contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY,
ds, nodeType,
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
Provenance
.newInstance(
Constants.USER_CLAIM,
Constants.DEFAULT_TRUST)));
relationList
.add(
Relation
.newInstance(
ds, nodeType,
contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY,
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
Provenance
.newInstance(
Constants.USER_CLAIM,
Constants.DEFAULT_TRUST)));
return relationList;
}
}

View File

@ -1,246 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
import eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class QueryInformationSystem {
private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class);
private ISLookUpService isLookUp;
private List<String> contextRelationResult;
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and $x//context/param[./@name = 'status']/text() = 'all' " +
" return " +
"$x//context";
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
+
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
+
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
public void getContextInformation(final Consumer<ContextInfo> consumer) throws ISLookUpException {
isLookUp
.quickSearchProfile(XQUERY_ENTITY)
.forEach(c -> {
ContextInfo cinfo = new ContextInfo();
String[] cSplit = c.split("@@");
cinfo.setId(cSplit[0]);
cinfo.setName(cSplit[1]);
log.info("community name : {}", cSplit[1]);
cinfo.setDescription(cSplit[2]);
if (!cSplit[3].trim().equals("")) {
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
}
cinfo.setZenodocommunity(cSplit[4]);
cinfo.setType(cSplit[5]);
consumer.accept(cinfo);
});
}
public List<ContextInfo> getContextInformation() throws ISLookUpException {
List<ContextInfo> ret = new ArrayList<>();
isLookUp
.quickSearchProfile(XQUERY_ENTITY)
.forEach(c -> {
ContextInfo cinfo = new ContextInfo();
String[] cSplit = c.split("@@");
cinfo.setId(cSplit[0]);
cinfo.setName(cSplit[1]);
cinfo.setDescription(cSplit[2]);
if (!cSplit[3].trim().equals("")) {
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
}
cinfo.setZenodocommunity(cSplit[4]);
cinfo.setType(cSplit[5]);
ret.add(cinfo);
});
return ret;
}
public List<String> getContextRelationResult() {
return contextRelationResult;
}
public void setContextRelationResult(List<String> contextRelationResult) {
this.contextRelationResult = contextRelationResult;
}
public ISLookUpService getIsLookUp() {
return isLookUp;
}
public void setIsLookUp(ISLookUpService isLookUpService) {
this.isLookUp = isLookUpService;
}
public void execContextRelationQuery() throws ISLookUpException {
contextRelationResult = isLookUp.quickSearchProfile(XQUERY);
}
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix) {
getContextRelation(consumer, category, prefix, null);
}
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix,
List<MasterDuplicate> masterDuplicateList) {
contextRelationResult.forEach(xml -> {
ContextInfo cinfo = new ContextInfo();
final Document doc;
try {
final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
doc = reader.read(new StringReader(xml));
Element root = doc.getRootElement();
cinfo.setId(root.attributeValue("id"));
Iterator<Element> it = root.elementIterator();
while (it.hasNext()) {
Element el = it.next();
if (el.getName().equals("category")) {
String categoryId = el.attributeValue("id");
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
if (categoryId.equals(category)) {
cinfo.setDatasourceList(getCategoryList(el, prefix, masterDuplicateList));
}
}
}
consumer.accept(cinfo);
} catch (DocumentException | SAXException e) {
e.printStackTrace();
}
});
}
@NotNull
private List<String> getCategoryList(Element el, String prefix, List<MasterDuplicate> masterDuplicateList) {
List<String> datasourceList = new ArrayList<>();
for (Object node : el.selectNodes(".//concept")) {
String oid = getOpenaireId((Node) node, prefix);
if (oid != null)
if (masterDuplicateList == null)
datasourceList.add(oid);
else
datasourceList.add(getMaster(oid, masterDuplicateList));
}
return datasourceList;
}
private String getMaster(String oid, List<MasterDuplicate> masterDuplicateList) {
for (MasterDuplicate md : masterDuplicateList) {
if (md.getDuplicate().equals(oid))
return md.getMaster();
}
return oid;
}
private String getOpenaireId(Node el, String prefix) {
for (Object node : el.selectNodes(".//param")) {
Node n = (Node) node;
if (n.valueOf("./@name").equals("openaireId")) {
String id = n.getText();
if (id.startsWith(prefix + "|"))
return id;
return prefix + "|" + id;
}
}
return makeOpenaireId(el, prefix);
}
private String makeOpenaireId(Node el, String prefix) {
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
return null;
}
String funder = "";
String grantId = null;
String funding = null;
for (Object node : el.selectNodes(".//param")) {
Node n = (Node) node;
switch (n.valueOf("./@name")) {
case "funding":
funding = n.getText();
break;
case "funder":
funder = n.getText();
break;
case "CD_PROJECT_NUMBER":
grantId = n.getText();
break;
default:
break;
}
}
String nsp = null;
switch (funder.toLowerCase()) {
case "ec":
if (funding == null) {
return null;
}
if (funding.toLowerCase().contains("h2020")) {
nsp = "corda__h2020::";
} else if (funding.toLowerCase().contains("he")) {
nsp = "corda_____he::";
} else {
nsp = "corda_______::";
}
break;
case "tubitak":
nsp = "tubitakf____::";
break;
case "dfg":
nsp = "dfgf________::";
break;
default:
StringBuilder bld = new StringBuilder();
bld.append(funder.toLowerCase());
for (int i = funder.length(); i < 12; i++)
bld.append("_");
bld.append("::");
nsp = bld.toString();
}
return prefix + "|" + nsp + DHPUtils.md5(grantId);
}
}

View File

@ -2,6 +2,8 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
import java.io.Serializable;
import java.io.StringReader;
@ -216,7 +218,7 @@ public class SparkDumpEntitiesJob implements Serializable {
return null;
Datasource datasource = new Datasource();
datasource.setId(d.getId().substring(3));
datasource.setId(getEntityId(d.getId(), ENTITY_ID_SEPARATOR));
Optional
.ofNullable(d.getOriginalId())
@ -406,7 +408,7 @@ public class SparkDumpEntitiesJob implements Serializable {
Optional
.ofNullable(p.getId())
.ifPresent(id -> project.setId(id.substring(3)));
.ifPresent(id -> project.setId(getEntityId(id, ENTITY_ID_SEPARATOR)));
Optional
.ofNullable(p.getWebsiteurl())
@ -619,7 +621,7 @@ public class SparkDumpEntitiesJob implements Serializable {
Optional
.ofNullable(org.getId())
.ifPresent(value -> organization.setId(value.substring(3)));
.ifPresent(value -> organization.setId(getEntityId(value, ENTITY_ID_SEPARATOR)));
Optional
.ofNullable(org.getPid())

View File

@ -2,6 +2,8 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
import java.io.Serializable;
import java.util.Collections;
@ -85,11 +87,11 @@ public class SparkDumpRelationJob implements Serializable {
.map((MapFunction<Relation, eu.dnetlib.dhp.oa.model.graph.Relation>) relation -> {
eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation();
relNew
.setSource(relation.getSource().substring(3));
.setSource(getEntityId(relation.getSource(), ENTITY_ID_SEPARATOR));
relNew.setSourceType(ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2)));
relNew
.setTarget(relation.getTarget().substring(3));
.setTarget(getEntityId(relation.getTarget(), ENTITY_ID_SEPARATOR));
relNew.setTargetType(ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2)));
relNew

View File

@ -2,6 +2,9 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
import static eu.dnetlib.dhp.schema.common.ModelSupport.idPrefixMap;
import java.io.Serializable;
import java.util.ArrayList;
@ -21,12 +24,16 @@ import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.graph.RelType;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Relation;
/**
@ -58,8 +65,9 @@ public class SparkOrganizationRelation implements Serializable {
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final OrganizationMap organizationMap = new Gson()
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI();
final CommunityEntityMap organizationMap = queryCommunityAPI.getCommunityOrganization();
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
log.info("organization map : {}", serializedOrganizationMap);
@ -79,7 +87,7 @@ public class SparkOrganizationRelation implements Serializable {
}
private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap,
private static void extractRelation(SparkSession spark, String inputPath, CommunityEntityMap organizationMap,
String outputPath, String communityMapPath) {
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
@ -100,7 +108,7 @@ public class SparkOrganizationRelation implements Serializable {
.as(Encoders.bean(MergedRels.class));
mergedRelsDataset.map((MapFunction<MergedRels, MergedRels>) mergedRels -> {
if (organizationMap.containsKey(mergedRels.getOrganizationId())) {
if (organizationMap.containsKey(getEntityId(mergedRels.getOrganizationId(), ENTITY_ID_SEPARATOR))) {
return mergedRels;
}
return null;
@ -129,15 +137,16 @@ public class SparkOrganizationRelation implements Serializable {
}
@NotNull
private static Consumer<MergedRels> getMergedRelsConsumer(OrganizationMap organizationMap,
private static Consumer<MergedRels> getMergedRelsConsumer(CommunityEntityMap organizationMap,
List<eu.dnetlib.dhp.oa.model.graph.Relation> relList, CommunityMap communityMap) {
return mergedRels -> {
String oId = mergedRels.getOrganizationId();
String oId = getEntityId(mergedRels.getOrganizationId(), ENTITY_ID_SEPARATOR);
organizationMap
.get(oId)
.forEach(community -> {
if (communityMap.containsKey(community)) {
addRelations(relList, community, mergedRels.getRepresentativeId());
addRelations(
relList, community, getEntityId(mergedRels.getRepresentativeId(), ENTITY_ID_SEPARATOR));
}
});
@ -155,8 +164,8 @@ public class SparkOrganizationRelation implements Serializable {
eu.dnetlib.dhp.oa.model.graph.Relation
.newInstance(
id, Constants.CONTEXT_ENTITY,
organization.substring(3),
ModelSupport.idPrefixEntity.get(organization.substring(0, 2)),
organization,
ModelSupport.idPrefixEntity.get(idPrefixMap.get(Organization.class)),
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
Provenance
.newInstance(
@ -167,7 +176,7 @@ public class SparkOrganizationRelation implements Serializable {
.add(
eu.dnetlib.dhp.oa.model.graph.Relation
.newInstance(
organization.substring(3), ModelSupport.idPrefixEntity.get(organization.substring(0, 2)),
organization, ModelSupport.idPrefixEntity.get(idPrefixMap.get(Organization.class)),
id, Constants.CONTEXT_ENTITY,
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
Provenance

View File

@ -11,27 +11,17 @@ import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem;
import eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
/**
* @author miriam.baglioni
@ -44,7 +34,7 @@ public class DumpCommunities implements Serializable {
private final BufferedWriter writer;
private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP
+ " description \n";
private final transient QueryInformationSystem queryInformationSystem;
private final transient UtilCommunityAPI queryCommunityAPI;
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@ -64,31 +54,19 @@ public class DumpCommunities implements Serializable {
final List<String> communities = Arrays.asList(split(parser.get("communities"), ";"));
final DumpCommunities dc = new DumpCommunities(outputPath, nameNode, parser.get("isLookUpUrl"));
final DumpCommunities dc = new DumpCommunities(outputPath, nameNode);
dc.writeCommunity(communities);
}
private void writeCommunity(List<String> communities)
throws IOException, ISLookUpException, DocumentException, SAXException {
throws IOException {
writer.write(HEADER);
writer.flush();
String a = IOUtils
.toString(
DumpCommunities.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq"));
final String xquery = String
.format(
a,
communities
.stream()
.map(t -> String.format("$x//CONFIGURATION/context[./@id= '%s']", t))
.collect(Collectors.joining(" or ")));
for (String community : queryInformationSystem
.getCommunityCsv(xquery)) {
for (String community : queryCommunityAPI
.getCommunityCsv(communities)) {
writer
.write(
community);
@ -98,10 +76,9 @@ public class DumpCommunities implements Serializable {
writer.close();
}
public DumpCommunities(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws Exception {
public DumpCommunities(String hdfsPath, String hdfsNameNode) throws Exception {
final Configuration conf = new Configuration();
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
queryCommunityAPI = new UtilCommunityAPI();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);

View File

@ -19,6 +19,7 @@ import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.amazonaws.transform.SimpleTypeUnmarshallers;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
@ -26,6 +27,7 @@ import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.community.Funder;
import eu.dnetlib.dhp.oa.model.community.Project;
import io.netty.util.internal.StringUtil;
/**
* Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC
@ -95,6 +97,8 @@ public class SparkDumpFunderResults implements Serializable {
Optional<Funder> ofunder = Optional.ofNullable(p.getFunder());
if (ofunder.isPresent()) {
String fName = ofunder.get().getShortName();
if (StringUtil.isNullOrEmpty(fName))
return ofunder.get().getName();
if (fName.equalsIgnoreCase("ec")) {
fName += "_" + ofunder.get().getFundingStream();
}

View File

@ -0,0 +1,270 @@
package eu.dnetlib.dhp.oa.graph.dump.organizationonly;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId;
import java.io.Serializable;
import java.io.StringReader;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.Container;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.oa.model.graph.Datasource;
import eu.dnetlib.dhp.oa.model.graph.Organization;
import eu.dnetlib.dhp.oa.model.graph.Project;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Relation;
import scala.Tuple2;
/**
* Spark Job that fires the dump for the entites
*/
public class SparkDumpOrganizationJob implements Serializable {
private static final Logger log = LoggerFactory
.getLogger(eu.dnetlib.dhp.oa.graph.dump.organizationonly.SparkDumpOrganizationJob.class);
public static final String COMPRESSION = "compression";
public static final String GZIP = "gzip";
public static void main(String[] args) throws Exception {
Boolean isSparkSessionManaged = Boolean.TRUE;
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = "/tmp/prod_provision/graph/20_graph_blacklisted/";
log.info("inputPath: {}", inputPath);
final String outputPath = "/tmp/miriam/organizationsOnly/";
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
// Utils.removeOutputDir(spark, outputPath);
organizationMap(spark, inputPath, outputPath);
// relationMap2(spark, inputPath, outputPath);
});
}
private static void relationMap2(SparkSession spark, String inputPath, String outputPath) {
Utils
.readPath(spark, inputPath + "relation", Relation.class)
.filter((FilterFunction<Relation>) r -> r.getRelType().equalsIgnoreCase("organizationOrganization"))
.map((MapFunction<Relation, eu.dnetlib.dhp.oa.model.graph.Relation>) relation -> {
eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation();
relNew
.setSource(getEntityId(relation.getSource(), ENTITY_ID_SEPARATOR));
relNew.setSourceType(ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2)));
relNew
.setTarget(getEntityId(relation.getTarget(), ENTITY_ID_SEPARATOR));
relNew.setTargetType(ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2)));
relNew
.setReltype(
RelType
.newInstance(
relation.getRelClass(),
relation.getSubRelType()));
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
if (odInfo.isPresent()) {
DataInfo dInfo = odInfo.get();
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
relNew
.setProvenance(
Provenance
.newInstance(
dInfo.getProvenanceaction().getClassname(),
dInfo.getTrust()));
}
}
if (Boolean.TRUE.equals(relation.getValidated())) {
relNew.setValidated(relation.getValidated());
relNew.setValidationDate(relation.getValidationDate());
}
return relNew;
}, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "relation");
}
private static void relationMap(SparkSession spark, String inputPath, String outputPath) {
Dataset<eu.dnetlib.dhp.schema.oaf.Organization> organization = Utils
.readPath(spark, inputPath + "organization", eu.dnetlib.dhp.schema.oaf.Organization.class);
Dataset<Relation> rels = Utils.readPath(spark, inputPath + "relation", Relation.class);
organization
.joinWith(rels, organization.col("id").equalTo(rels.col("source")), "left")
.map(
(MapFunction<Tuple2<eu.dnetlib.dhp.schema.oaf.Organization, Relation>, Relation>) t2 -> t2._2(),
Encoders.bean(Relation.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json("/tmp/orgSource");
rels = Utils.readPath(spark, "/tmp/orgSource", Relation.class);
organization
.joinWith(rels, organization.col("id").equalTo(rels.col("target")), "left")
.map(
(MapFunction<Tuple2<eu.dnetlib.dhp.schema.oaf.Organization, Relation>, Relation>) t2 -> t2._2(),
Encoders.bean(Relation.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json("/tmp/orgSourceTarget");
Utils
.readPath(spark, "/tmp/orgSourceTarget", Relation.class)
.map((MapFunction<Relation, eu.dnetlib.dhp.oa.model.graph.Relation>) relation -> {
eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation();
relNew
.setSource(getEntityId(relation.getSource(), ENTITY_ID_SEPARATOR));
relNew.setSourceType(ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2)));
relNew
.setTarget(getEntityId(relation.getTarget(), ENTITY_ID_SEPARATOR));
relNew.setTargetType(ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2)));
relNew
.setReltype(
RelType
.newInstance(
relation.getRelClass(),
relation.getSubRelType()));
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
if (odInfo.isPresent()) {
DataInfo dInfo = odInfo.get();
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
relNew
.setProvenance(
Provenance
.newInstance(
dInfo.getProvenanceaction().getClassname(),
dInfo.getTrust()));
}
}
if (Boolean.TRUE.equals(relation.getValidated())) {
relNew.setValidated(relation.getValidated());
relNew.setValidationDate(relation.getValidationDate());
}
return relNew;
}, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "relation");
}
private static void organizationMap(SparkSession spark, String inputPath, String outputPath) {
Utils
.readPath(spark, inputPath + "organization", eu.dnetlib.dhp.schema.oaf.Organization.class)
.map(
(MapFunction<eu.dnetlib.dhp.schema.oaf.Organization, Organization>) o -> mapOrganization(o),
Encoders.bean(Organization.class))
.filter((FilterFunction<Organization>) o -> o != null)
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.json(outputPath + "/organization");
}
private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization(
eu.dnetlib.dhp.schema.oaf.Organization org) {
Organization organization = new Organization();
Optional
.ofNullable(org.getLegalshortname())
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
Optional
.ofNullable(org.getLegalname())
.ifPresent(value -> organization.setLegalname(value.getValue()));
Optional
.ofNullable(org.getWebsiteurl())
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
Optional
.ofNullable(org.getAlternativeNames())
.ifPresent(
value -> organization
.setAlternativenames(
value
.stream()
.map(v -> v.getValue())
.collect(Collectors.toList())));
Optional
.ofNullable(org.getCountry())
.ifPresent(
value -> {
if (!value.getClassid().equals(eu.dnetlib.dhp.oa.graph.dump.complete.Constants.UNKNOWN)) {
organization
.setCountry(
eu.dnetlib.dhp.oa.model.Country.newInstance(value.getClassid(), value.getClassname()));
}
});
Optional
.ofNullable(org.getId())
.ifPresent(value -> organization.setId(getEntityId(value, ENTITY_ID_SEPARATOR)));
Optional
.ofNullable(org.getPid())
.ifPresent(
value -> organization
.setPid(
value
.stream()
.map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
return organization;
}
}

View File

@ -74,9 +74,6 @@ public class SparkDumpResult implements Serializable {
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String masterDuplicatePath = parser.get("masterDuplicatePath");
log.info("masterDuplicatePath: {}", masterDuplicatePath);
Optional<String> pathString = Optional.ofNullable(parser.get("pathMap"));
HashMap<String, String> pathMap = null;
if (pathString.isPresent()) {
@ -97,13 +94,13 @@ public class SparkDumpResult implements Serializable {
run(
isSparkSessionManaged, inputPath, outputPath, pathMap, selectionConstraints, inputClazz,
resultType, masterDuplicatePath);
resultType);
}
private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath,
HashMap<String, String> pathMap, SelectionConstraints selectionConstraints,
Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz, String resultType, String masterDuplicatePath) {
Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz, String resultType) {
SparkConf conf = new SparkConf();
HashMap<String, String> finalPathMap = pathMap;
@ -116,7 +113,7 @@ public class SparkDumpResult implements Serializable {
Utils.removeOutputDir(spark, outputPath + "/dump/" + resultType);
resultDump(
spark, inputPath, outputPath, inputClazz, finalPathMap,
finalSelectionConstraints, resultType, masterDuplicatePath);
finalSelectionConstraints, resultType);
});
}
@ -128,17 +125,13 @@ public class SparkDumpResult implements Serializable {
Class<I> inputClazz,
Map<String, String> pathMap,
SelectionConstraints selectionConstraints,
String resultType,
String masterDuplicatePath) {
String resultType) {
List<MasterDuplicate> masterDuplicateList = Utils
.readPath(spark, masterDuplicatePath, MasterDuplicate.class)
.collectAsList();
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<I, I>) value -> filterResult(
value, pathMap, selectionConstraints, inputClazz, masterDuplicateList, resultType),
value, pathMap, selectionConstraints, inputClazz, resultType),
Encoders.bean(inputClazz))
.filter(Objects::nonNull)
.write()
@ -163,7 +156,7 @@ public class SparkDumpResult implements Serializable {
}
private static <I extends eu.dnetlib.dhp.schema.oaf.Result> I filterResult(I value, Map<String, String> pathMap,
SelectionConstraints selectionConstraints, Class<I> inputClazz, List<MasterDuplicate> masterDuplicateList,
SelectionConstraints selectionConstraints, Class<I> inputClazz,
String resultType) {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
@ -195,14 +188,7 @@ public class SparkDumpResult implements Serializable {
return null;
}
}
if (Optional.ofNullable(value.getCollectedfrom()).isPresent())
value.getCollectedfrom().forEach(cf -> update(cf, masterDuplicateList));
if (Optional.ofNullable(value.getInstance()).isPresent()) {
value.getInstance().forEach(i -> {
update(i.getCollectedfrom(), masterDuplicateList);
update(i.getHostedby(), masterDuplicateList);
});
}
return value;
}
@ -210,13 +196,4 @@ public class SparkDumpResult implements Serializable {
return (classid.equals(resultType) || (classid.equals("other") && resultType.equals("otherresearchproduct")));
}
private static void update(KeyValue kv, List<MasterDuplicate> masterDuplicateList) {
for (MasterDuplicate md : masterDuplicateList) {
if (md.getDuplicate().equals(kv.getKey())) {
kv.setKey(md.getMaster());
return;
}
}
}
}

View File

@ -1,11 +1,11 @@
package eu.dnetlib.dhp.skgif;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.skgif.model.RelationType;
import eu.dnetlib.dhp.skgif.model.Relations;
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.*;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
@ -14,113 +14,121 @@ import org.apache.spark.sql.*;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.skgif.model.RelationType;
import eu.dnetlib.dhp.skgif.model.Relations;
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
import scala.Tuple2;
import java.io.Serializable;
import java.util.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
/**
* @author miriam.baglioni
* @Date 05/09/23
*/
public class ExtendResult implements Serializable {
private static final Logger log = LoggerFactory.getLogger(ExtendResult.class);
private static final Logger log = LoggerFactory.getLogger(ExtendResult.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/extend_result_parameters.json"));
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/extend_result_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extendResult(spark, inputPath, outputPath);
});
}
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extendResult(spark, inputPath, outputPath);
});
}
private static void extendResult(SparkSession spark, String inputPath, String outputPath) {
ObjectMapper mapper = new ObjectMapper();
Dataset<ResearchProduct> result = spark
.read()
.json(inputPath + "/result")
.as(Encoders.bean(ResearchProduct.class));
final StructType structureSchema = new StructType()
.fromDDL("`resultId` STRING, `target` STRING, `resultClass` STRING");
private static void extendResult(SparkSession spark, String inputPath, String outputPath) {
ObjectMapper mapper = new ObjectMapper();
Dataset<ResearchProduct> result = spark.read().json(inputPath + "/result")
.as(Encoders.bean(ResearchProduct.class));
final StructType structureSchema = new StructType()
.fromDDL("`resultId` STRING, `target` STRING, `resultClass` STRING");
Dataset<Row> relations = spark
.read()
.schema(structureSchema)
.json(inputPath + "/preparedRelations");
Dataset<Row> relations = spark
.read()
.schema(structureSchema)
.json(inputPath + "/preparedRelations");
result
.joinWith(
relations, result
.col("localIdentifier")
.equalTo(relations.col("resultId")),
"left")
.groupByKey(
(MapFunction<Tuple2<ResearchProduct, Row>, String>) t2 -> t2._1().getLocalIdentifier(),
Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Tuple2<ResearchProduct, Row>, ResearchProduct>) (key, it) -> {
Tuple2<ResearchProduct, Row> first = it.next();
ResearchProduct rp = first._1();
addRels(rp, first._2());
it.forEachRemaining(t2 -> addRels(rp, t2._2()));
return rp;
}, Encoders.bean(ResearchProduct.class))
.map((MapFunction<ResearchProduct, String>) r -> mapper.writeValueAsString(r), Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.text(outputPath);
result.joinWith(relations, result.col("localIdentifier")
.equalTo(relations.col("resultId")), "left")
.groupByKey((MapFunction<Tuple2<ResearchProduct, Row>, String>)t2 -> t2._1().getLocalIdentifier(),Encoders.STRING() )
.mapGroups((MapGroupsFunction<String, Tuple2<ResearchProduct, Row>, ResearchProduct>) (key,it) -> {
Tuple2<ResearchProduct, Row> first = it.next();
ResearchProduct rp = first._1();
addRels(rp, first._2());
it.forEachRemaining(t2 -> addRels(rp, t2._2()));
return rp;
}, Encoders.bean(ResearchProduct.class))
.map((MapFunction<ResearchProduct, String>) r -> mapper.writeValueAsString(r), Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option("compression","gzip")
.text(outputPath);
}
}
private static void addRels(ResearchProduct rp, Row row) {
String relClass = row.getAs("relClass");
Map<String, List<String>> relations = new HashMap<>();
if (relClass.equals(RelationType.OUTCOME.label)) {
if (!Optional.ofNullable(rp.getFunding()).isPresent()) {
rp.setFunding(new ArrayList<>());
}
rp.getFunding().add(row.getAs("target"));
} else if (relClass.equals(RelationType.AFFILIATION)) {
if (!Optional.ofNullable(rp.getRelevantOrganizations()).isPresent())
rp.setRelevantOrganizations(new ArrayList<>());
rp.getRelevantOrganizations().add(row.getAs("target"));
} else {
if (!relations.containsKey(relClass)) {
relations.put(relClass, new ArrayList<>());
}
relations.get(relClass).add(row.getAs("target"));
}
if (relations.size() > 0) {
rp.setRelatedProducts(new ArrayList<>());
for (String key : relations.keySet()) {
Relations rel = new Relations();
rel.setRelationType(key);
rel.setProductList(relations.get(key));
}
}
private static void addRels(ResearchProduct rp, Row row) {
String relClass = row.getAs("relClass");
Map<String, List<String>> relations = new HashMap<>();
if(relClass.equals(RelationType.OUTCOME.label)){
if(!Optional.ofNullable(rp.getFunding()).isPresent()){
rp.setFunding(new ArrayList<>());
}
rp.getFunding().add(row.getAs("target"));
}else if(relClass.equals(RelationType.AFFILIATION)){
if(!Optional.ofNullable(rp.getRelevantOrganizations()).isPresent())
rp.setRelevantOrganizations(new ArrayList<>());
rp.getRelevantOrganizations().add(row.getAs("target"));
}else{
if(!relations.containsKey(relClass)){
relations.put(relClass, new ArrayList<>());
}
relations.get(relClass).add(row.getAs("target"));
}
if(relations.size() > 0) {
rp.setRelatedProducts(new ArrayList<>());
for (String key: relations.keySet()){
Relations rel = new Relations();
rel.setRelationType(key);
rel.setProductList(relations.get(key));
}
}
}
}
}

View File

@ -0,0 +1,77 @@
package eu.dnetlib.dhp.skgif;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Optional;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
/**
* @author miriam.baglioni
* @Date 06/02/24
*/
public class JournalsFromDatasources implements Serializable {
private static final Logger log = LoggerFactory.getLogger(JournalsFromDatasources.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/journals_from_datasource_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String datasourcePath = parser.get("datasourcePath");
log.info("datasourcePath: {}", datasourcePath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extendResult(spark, inputPath, outputPath, datasourcePath);
});
}
//find the results having a container in the metadata
//map all the hostedby.key delle istanze associate al risultato
//find a corrispondence to a datasource which is a journal
//write for the result the biblio
public static void extendResult(SparkSession spark, String inputPath, String outputPath, String datasourcePath ){
Dataset<Datasource> datasource = Utils.readPath(spark, datasourcePath, Datasource.class)
.filter((FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEosctype()).isPresent() &&
d.getEosctype().getClassname().equalsIgnoreCase("Journal archive");
Dataset<ResearchProduct> results = Utils.readPath(spark, inputPath, ResearchProduct.class);
}
}

View File

@ -1,93 +1,99 @@
package eu.dnetlib.dhp.skgif;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.oaf.Relation;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.*;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.oaf.Relation;
/**
* @author miriam.baglioni
* @Date 04/09/23
*/
public class PrepareResultRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(PrepareResultRelation.class);
private static final Logger log = LoggerFactory.getLogger(PrepareResultRelation.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/result_relation_parameters.json"));
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/result_relation_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
prepareResultRelationList(spark, inputPath, outputPath);
});
}
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
prepareResultRelationList(spark, inputPath, outputPath);
});
}
private static void prepareResultRelationList(SparkSession spark, String inputPath, String outputPath) {
final StructType structureSchema = new StructType()
.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>");
Dataset<Relation> relation = spark.read().json(inputPath).as(Encoders.bean(Relation.class))
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true")
.filter("relClass == 'hasAuthorInstitution' or relClass == 'outcome' or " +
"relClass == 'IsSupplementedBy' or relClass == 'IsDocumentedBy' or relClass == 'IsPartOf' " +
"relClass == IsNewVersionOf");
private static void prepareResultRelationList(SparkSession spark, String inputPath, String outputPath) {
final StructType structureSchema = new StructType()
.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>");
Dataset<Relation> relation = spark
.read()
.json(inputPath)
.as(Encoders.bean(Relation.class))
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true")
.filter(
"relClass == 'hasAuthorInstitution' or relClass == 'outcome' or " +
"relClass == 'IsSupplementedBy' or relClass == 'IsDocumentedBy' or relClass == 'IsPartOf' " +
"relClass == IsNewVersionOf");
org.apache.spark.sql.Dataset<Row> df = spark.createDataFrame(new ArrayList<Row>(), structureSchema);
List<String> entities = Arrays
.asList(
"publication", "dataset", "otherresearchproduct", "software");
for (String e : entities)
df = df
.union(
spark
.read()
.schema(structureSchema)
.json(inputPath + "/" + e)
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true"));
org.apache.spark.sql.Dataset<Row> df = spark.createDataFrame(new ArrayList<Row>(), structureSchema);
List<String> entities = Arrays
.asList(
"publication", "dataset", "otherresearchproduct", "software");
for (String e : entities)
df = df
.union(
spark
.read()
.schema(structureSchema)
.json(inputPath + "/" + e)
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true"));
relation.joinWith(df, relation.col("source").equalTo(df.col("id")))
.select(
new Column("id").as("resultId"),
new Column("target"),
new Column("relClass"))
.write()
.mode(SaveMode.Overwrite)
.option("compression","gzip")
.json(outputPath);
}
relation
.joinWith(df, relation.col("source").equalTo(df.col("id")))
.select(
new Column("id").as("resultId"),
new Column("target"),
new Column("relClass"))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
}

View File

@ -1,5 +1,18 @@
package eu.dnetlib.dhp.skgif;
import static eu.dnetlib.dhp.oa.graph.dump.ResultMapper.*;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.management.RuntimeErrorException;
import eu.dnetlib.dhp.oa.graph.dump.csv.AuthorResult;
import eu.dnetlib.dhp.oa.model.ResultPid;
@ -13,173 +26,166 @@ import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2;
import scala.reflect.internal.Trees;
import javax.management.RuntimeErrorException;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.oa.graph.dump.ResultMapper.*;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class ResultMapper implements Serializable {
public static <E extends Result> ResearchProduct map(
E input)
throws Exception{
public static <E extends Result> ResearchProduct map(
E input)
throws Exception {
ResearchProduct out = new ResearchProduct();
ResearchProduct out = new ResearchProduct();
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
if (ort.isPresent()) {
try {
out.setLocalIdentifier(input.getId());
mapPid(out, input);
mapTitle(out, input);
mapAbstract(out, input);
mapType(out, input);
mapTopic(out, input);
mapContribution(out, input);
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
if (ort.isPresent()) {
try {
out.setLocalIdentifier(input.getId());
mapPid(out, input);
mapTitle(out, input);
mapAbstract(out, input);
mapType(out, input);
mapTopic(out, input);
mapContribution(out, input);
if(!Optional.ofNullable(out.getTitles()).isPresent() ||
!Optional.ofNullable(out.getContributions()).isPresent())
return null;
//TODO map the manifestation directly from the instances
//it is not completed
mapManifestation(out, input);
if (!Optional.ofNullable(out.getTitles()).isPresent() ||
!Optional.ofNullable(out.getContributions()).isPresent())
return null;
// TODO map the manifestation directly from the instances
// it is not completed
mapManifestation(out, input);
//TODO extend the mapping to consider relations between these entities and the results
// TODO extend the mapping to consider relations between these entities and the results
// private List<String> relevantOrganizations;
// private List<String> funding;
// private List<Relations> relatedProducts;
} catch (ClassCastException cce) {
return null;
}
}
} catch (ClassCastException cce) {
return null;
}
}
return null;
return null;
}
}
private static <E extends Result> void mapManifestation(ResearchProduct out, E input) {
out.setManifestations( input.getInstance().stream().parallel()
.map(i -> {
try {
return getManifestation(i);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
}).collect(Collectors.toList()));
}
private static <E extends Result> void mapManifestation(ResearchProduct out, E input) {
out
.setManifestations(
input
.getInstance()
.stream()
.parallel()
.map(i -> {
try {
return getManifestation(i);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
})
.collect(Collectors.toList()));
}
private static Manifestation getManifestation(Instance i) throws MalformedURLException {
Manifestation manifestation = new Manifestation();
manifestation.setProductLocalType(i.getInstancetype().getClassname());
manifestation.setProductLocalTypeSchema(i.getInstancetype().getSchemename());
Dates dates = new Dates();
dates.setType("publishing");
dates.setValue(i.getDateofacceptance().getValue());
manifestation.setDates(Arrays.asList(dates));
switch(i.getRefereed().getClassid()){
case "0000":
manifestation.setPeerReview(PeerReview.UNAVAILABLE.label);
break;
case "0001":
manifestation.setPeerReview(PeerReview.PEER_REVIEWED.label);
break;
case "0002":
manifestation.setPeerReview(PeerReview.NON_PEER_REVIEWED.label);
break;
}
manifestation.setMetadataCuration(MetadataCuration.UNAVAILABLE.label);
//TODO filter out the URL that refer to pids. If nothing remains, decide what to do
manifestation.setUrl(new URL(i.getUrl().get(0)));
if(Optional.ofNullable(i.getPid()).isPresent()){
manifestation.setPid(i.getPid().get(0).getValue());
}
switch(i.getAccessright().getClassid()){
case"OPEN":
case"OPEN DATA":
case "OPEN SOURCE":
manifestation.setAccessRight(AccessRight.OPEN.label);
break;
case "CLOSED":
manifestation.setAccessRight(AccessRight.CLOSED.label);
break;
case "RESTRICTED":
manifestation.setAccessRight(AccessRight.RESTRICTED.label);
break;
case"EMBARGO":
case "12MONTHS":
case "6MONTHS":
manifestation.setAccessRight(AccessRight.EMBARGO.label);
break;
default:
manifestation.setAccessRight(AccessRight.UNAVAILABLE.label);
}
if(Optional.ofNullable(i.getLicense()).isPresent())
manifestation.setLicence(i.getLicense().getValue());
//TODO to fill the biblio in case it is a journal, we need to join with the datasource and verify the type
Biblio biblio = null;
manifestation.setHostingDatasource(i.getHostedby().getKey());
//TODO verify if the result is published in ojournal or conferences. In that case the venue is the identifier
//of the journal/conference. In case it is not, the venue is the datasource
if(biblio == null){
manifestation.setVenue(i.getHostedby().getKey());
}else{
manifestation.setVenue("insert the id of the venue");
}
return manifestation;
}
private static Manifestation getManifestation(Instance i) throws MalformedURLException {
Manifestation manifestation = new Manifestation();
manifestation.setProductLocalType(i.getInstancetype().getClassname());
manifestation.setProductLocalTypeSchema(i.getInstancetype().getSchemename());
Dates dates = new Dates();
dates.setType("publishing");
dates.setValue(i.getDateofacceptance().getValue());
manifestation.setDates(Arrays.asList(dates));
switch (i.getRefereed().getClassid()) {
case "0000":
manifestation.setPeerReview(PeerReview.UNAVAILABLE.label);
break;
case "0001":
manifestation.setPeerReview(PeerReview.PEER_REVIEWED.label);
break;
case "0002":
manifestation.setPeerReview(PeerReview.NON_PEER_REVIEWED.label);
break;
}
manifestation.setMetadataCuration(MetadataCuration.UNAVAILABLE.label);
// TODO filter out the URL that refer to pids. If nothing remains, decide what to do
manifestation.setUrl(new URL(i.getUrl().get(0)));
if (Optional.ofNullable(i.getPid()).isPresent()) {
manifestation.setPid(i.getPid().get(0).getValue());
}
switch (i.getAccessright().getClassid()) {
case "OPEN":
case "OPEN DATA":
case "OPEN SOURCE":
manifestation.setAccessRight(AccessRight.OPEN.label);
break;
case "CLOSED":
manifestation.setAccessRight(AccessRight.CLOSED.label);
break;
case "RESTRICTED":
manifestation.setAccessRight(AccessRight.RESTRICTED.label);
break;
case "EMBARGO":
case "12MONTHS":
case "6MONTHS":
manifestation.setAccessRight(AccessRight.EMBARGO.label);
break;
default:
manifestation.setAccessRight(AccessRight.UNAVAILABLE.label);
}
if (Optional.ofNullable(i.getLicense()).isPresent())
manifestation.setLicence(i.getLicense().getValue());
// TODO to fill the biblio in case it is a journal, we need to join with the datasource and verify the type
Biblio biblio = null;
manifestation.setHostingDatasource(i.getHostedby().getKey());
// TODO verify if the result is published in ojournal or conferences. In that case the venue is the identifier
// of the journal/conference. In case it is not, the venue is the datasource
if (biblio == null) {
manifestation.setVenue(i.getHostedby().getKey());
} else {
manifestation.setVenue("insert the id of the venue");
}
return manifestation;
}
private static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
if (!Optional.ofNullable(pid).isPresent())
return null;
if (pid.size() == 0)
return null;
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
return new Tuple2<>(p.getValue(), Boolean.TRUE);
}
}
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
return new Tuple2<>(p.getValue(), Boolean.FALSE);
}
}
return null;
}
private static <E extends Result> void mapContribution(ResearchProduct out, E input) {
if(Optional.ofNullable(input.getAuthor()).isPresent()){
int count = 0;
for (Author a : input.getAuthor()) {
count += 1;
Contribution contribution = new Contribution();
if(Optional.ofNullable(a.getPid()).isPresent()){
Tuple2<String, Boolean> orcid = getOrcid(a.getPid());
if(orcid != null){
contribution.setPerson(DHPUtils.md5(orcid._1() + orcid._2()));
}else{
if(Optional.ofNullable(a.getRank()).isPresent()){
contribution.setPerson(DHPUtils.md5(input.getId() + a.getRank()));
}else{
contribution.setPerson(DHPUtils.md5(input.getId() + count));
}
private static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
if (!Optional.ofNullable(pid).isPresent())
return null;
if (pid.size() == 0)
return null;
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
return new Tuple2<>(p.getValue(), Boolean.TRUE);
}
}
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
return new Tuple2<>(p.getValue(), Boolean.FALSE);
}
}
return null;
}
}
}
}
}
private static <E extends Result> void mapContribution(ResearchProduct out, E input) {
if (Optional.ofNullable(input.getAuthor()).isPresent()) {
int count = 0;
for (Author a : input.getAuthor()) {
count += 1;
Contribution contribution = new Contribution();
if (Optional.ofNullable(a.getPid()).isPresent()) {
Tuple2<String, Boolean> orcid = getOrcid(a.getPid());
if (orcid != null) {
contribution.setPerson("person______::"+DHPUtils.md5(orcid._1() + orcid._2()));
} else {
if (Optional.ofNullable(a.getRank()).isPresent()) {
contribution.setPerson("person______::"+DHPUtils.md5(input.getId() + a.getRank()));
} else {
contribution.setPerson("tmp_person__::"+DHPUtils.md5(input.getId() + count));
}
}
}
}
}
// "contributions": [
// {
@ -189,94 +195,93 @@ public class ResultMapper implements Serializable {
// "roles": ["writing-original-draft", "conceptualization"]
// }
// ]
}
}
private static <E extends Result> void mapTopic(ResearchProduct out, E input) {
if(Optional.ofNullable(input.getSubject()).isPresent()){
out.setTopics(input.getSubject().stream().parallel().map(s -> {
Topic topic = new Topic();
topic.setTopic(getIdentifier(s));
Provenance provenance = new Provenance();
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
provenance.setType(s.getDataInfo().getInferenceprovenance());
topic.setProvenance(provenance);
return topic;
}).collect(Collectors.toList()));
}
}
private static <E extends Result> void mapTopic(ResearchProduct out, E input) {
if (Optional.ofNullable(input.getSubject()).isPresent()) {
out.setTopics(input.getSubject().stream().parallel().map(s -> {
Topic topic = new Topic();
topic.setTopic(getIdentifier(s));
Provenance provenance = new Provenance();
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
provenance.setType(s.getDataInfo().getInferenceprovenance());
topic.setProvenance(provenance);
return topic;
}).collect(Collectors.toList()));
}
}
private static String getIdentifier(StructuredProperty s) {
return DHPUtils.md5(s.getQualifier().getClassid() + s.getValue());
}
private static String getIdentifier(StructuredProperty s) {
return DHPUtils.md5(s.getQualifier().getClassid() + s.getValue());
}
private static <E extends Result> void mapType(ResearchProduct out, E input) throws NoAllowedTypeException{
switch (input.getResulttype().getClassid()){
case "publication":
out.setProductType(ResearchTypes.LITERATURE.label);
break;
case"dataset":
out.setProductType(ResearchTypes.RESEARCH_DATA.label);
break;
case"software":
out.setProductType(ResearchTypes.RESEARCH_SOFTWARE.label);
break;
case"other":
out.setProductType(ResearchTypes.OTHER.label);
break;
default:
throw new ClassCastException("Result type not present or not allowed");
}
}
private static <E extends Result> void mapType(ResearchProduct out, E input) throws NoAllowedTypeException {
switch (input.getResulttype().getClassid()) {
case "publication":
out.setProductType(ResearchTypes.LITERATURE.label);
break;
case "dataset":
out.setProductType(ResearchTypes.RESEARCH_DATA.label);
break;
case "software":
out.setProductType(ResearchTypes.RESEARCH_SOFTWARE.label);
break;
case "other":
out.setProductType(ResearchTypes.OTHER.label);
break;
default:
throw new ClassCastException("Result type not present or not allowed");
}
}
private static void mapPid(ResearchProduct out, Result input) {
Optional
.ofNullable(input.getPid())
.ifPresent(
value -> out
.setIdentifiers(
value
.stream()
.map(
p -> {
Identifier identifier = new Identifier();
identifier.setValue(p.getValue());
identifier.setScheme(p.getQualifier().getSchemeid());
return identifier;
})
.collect(Collectors.toList())));
}
private static void mapPid(ResearchProduct out, Result input) {
Optional
.ofNullable(input.getPid())
.ifPresent(
value -> out
.setIdentifiers(
value
.stream()
.map(
p -> {
Identifier identifier = new Identifier();
identifier.setValue(p.getValue());
identifier.setScheme(p.getQualifier().getSchemeid());
return identifier;
})
.collect(Collectors.toList())));
}
private static void mapTitle(ResearchProduct out, Result input) throws NoTitleFoundException {
Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
if (otitle.isPresent()) {
List<StructuredProperty> iTitle = otitle
.get()
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
.collect(Collectors.toList());
if (!iTitle.isEmpty()) {
out.setTitles(Arrays.asList(iTitle.get(0).getValue()));
return;
}
private static void mapTitle(ResearchProduct out, Result input) throws NoTitleFoundException {
Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
if (otitle.isPresent()) {
List<StructuredProperty> iTitle = otitle
.get()
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
.collect(Collectors.toList());
if (!iTitle.isEmpty()) {
out.setTitles(Arrays.asList(iTitle.get(0).getValue()));
return;
}
iTitle = otitle
.get()
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
.collect(Collectors.toList());
if (!iTitle.isEmpty()) {
out.setTitles(Arrays.asList(iTitle.get(0).getValue()));
}
iTitle = otitle
.get()
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
.collect(Collectors.toList());
if (!iTitle.isEmpty()) {
out.setTitles(Arrays.asList(iTitle.get(0).getValue()));
}
}
}
}
}
private static void mapAbstract(ResearchProduct out, Result input) {
final List<String> descriptionList = new ArrayList<>();
Optional
.ofNullable(input.getDescription())
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
out.setAbstracts(descriptionList);
}
private static void mapAbstract(ResearchProduct out, Result input) {
final List<String> descriptionList = new ArrayList<>();
Optional
.ofNullable(input.getDescription())
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
out.setAbstracts(descriptionList);
}
}

View File

@ -0,0 +1,30 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,110 @@
<workflow-app name="copy_graph_from_hive" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>hivePath</name>
<description>the country for which to produce the dump</description>
</property>
<property>
<name>hiveDbName</name>
<description>the target hive database name</description>
</property>
<property>
<name>hiveJdbcUrl</name>
<description>hive server jdbc url</description>
</property>
<property>
<name>hiveMetastoreUris</name>
<description>hive server metastore URIs</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="copy_graph" />
<!-- <start to="make_archive" />-->
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="copy_graph">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Copy graph</name>
<class>eu.dnetlib.dhp.oa.graph.dump.SparkCopyGraph</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--hivePath</arg><arg>${hivePath}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End" />
</workflow-app>

View File

@ -0,0 +1,21 @@
[
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName":"hp",
"paramLongName":"hivePath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"issm",
"paramLongName":"isSparkSessionManaged",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": false
}
]

View File

@ -1,4 +1,4 @@
<workflow-app name="sub-dump_subset" xmlns="uri:oozie:workflow:0.5">
<workflow-app name="dump_country" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
@ -88,7 +88,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>
<ok to="find_results_for_country"/>
<error to="Kill"/>

View File

@ -81,7 +81,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities</main-class>
<arg>--outputPath</arg><arg>${outputPath}/community</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
<arg>--communities</arg><arg>${communities}</arg>
</java>
<ok to="select_result_dump_relation"/>
@ -143,7 +142,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>publication</arg>
</spark>
@ -169,7 +167,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>dataset</arg>
</spark>
@ -195,7 +192,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>otherresearchproduct</arg>
</spark>
@ -221,7 +217,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>software</arg>
</spark>
@ -252,9 +247,7 @@
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>

View File

@ -1,12 +1,6 @@
[
{
"paramName":"is",
"paramLongName":"isLookUpUrl",
"paramDescription": "URL of the isLookUp Service",
"paramRequired": true
},
{
"paramName":"nn",
"paramLongName":"nameNode",

View File

@ -13,13 +13,6 @@
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": true
},
{
"paramName":"ilu",
"paramLongName":"isLookUpUrl",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"c",
"paramLongName":"communities",

View File

@ -1,11 +1,6 @@
[
{
"paramName":"is",
"paramLongName":"isLookUpUrl",
"paramDescription": "URL of the isLookUp Service",
"paramRequired": false
},
{
"paramName": "hdfs",
"paramLongName": "hdfsPath",
@ -17,12 +12,8 @@
"paramLongName": "nameNode",
"paramDescription": "the name node",
"paramRequired": true
},{
"paramName": "md",
"paramLongName": "masterDuplicate",
"paramDescription": "the master duplicate path for datasource deduplication",
"paramRequired": false
}
}
]

View File

@ -1,11 +1,6 @@
[
{
"paramName":"ocm",
"paramLongName":"organizationCommunityMap",
"paramDescription": "the organization community map association",
"paramRequired": false
},
{
"paramName":"s",
"paramLongName":"sourcePath",

View File

@ -0,0 +1,30 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,88 @@
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="dump_organization"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="dump_organization">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table organization and related relations </name>
<class>eu.dnetlib.dhp.oa.graph.dump.organizationonly.SparkDumpOrganizationJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--outputPath</arg><arg>${workingDir}/project</arg>
<arg>--communityMapPath</arg><arg>noneed</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -27,10 +27,6 @@
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>isLookUpUrl</name>
<description>the isLookup service endpoint</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
@ -67,11 +63,7 @@
<value>none</value>
<description>the depositionId of a deposition open that has to be added content</description>
</property>
<property>
<name>organizationCommunityMap</name>
<value>none</value>
<description>the organization community map</description>
</property>
<!---->
<property>
<name>hiveDbName</name>
<description>the target hive database name</description>
@ -167,7 +159,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
<arg>--singleDeposition</arg><arg>${singleDeposition}</arg>
<arg>--communityId</arg><arg>${communityId}</arg>
</java>
@ -175,6 +166,8 @@
<error to="Kill"/>
</action>
<decision name="choose_dump">
<switch>
<case to="dump_funder">${wf:conf('dumpType') eq "funder"}</case>
@ -203,22 +196,6 @@
<name>sourcePath</name>
<value>${sourcePath}</value>
</property>
<property>
<name>organizationCommunityMap</name>
<value>${organizationCommunityMap}</value>
</property>
<property>
<name>isLookUpUrl</name>
<value>${isLookUpUrl}</value>
</property>
<property>
<name>pathMap</name>
<value>${pathMap}</value>
</property>
<property>
<name>selectionCriteria</name>
<value>${selectionCriteria}</value>
</property>
</configuration>
</sub-workflow>
<ok to="make_archive" />
@ -244,14 +221,7 @@
<name>sourcePath</name>
<value>${sourcePath}</value>
</property>
<property>
<name>organizationCommunityMap</name>
<value>${organizationCommunityMap}</value>
</property>
<property>
<name>isLookUpUrl</name>
<value>${isLookUpUrl}</value>
</property>
<property>
<name>resultAggregation</name>
<value>${resultAggregation}</value>
@ -327,17 +297,6 @@
<error to="Kill" />
</action>
<!-- <action name="make_archive">-->
<!-- <java>-->
<!-- <main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>-->
<!-- <arg>&#45;&#45;hdfsPath</arg><arg>${outputPath}</arg>-->
<!-- <arg>&#45;&#45;nameNode</arg><arg>${nameNode}</arg>-->
<!-- <arg>&#45;&#45;sourcePath</arg><arg>${workingDir}/tar</arg>-->
<!-- </java>-->
<!-- <ok to="should_upload"/>-->
<!-- <error to="Kill"/>-->
<!-- </action>-->
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>

View File

@ -12,10 +12,7 @@
<name>resultAggregation</name>
<description>true if all the result type have to be dumped under result. false otherwise</description>
</property>
<property>
<name>organizationCommunityMap</name>
<description>the organization community map</description>
</property>
<property>
<name>hiveDbName</name>
@ -85,20 +82,8 @@
</configuration>
</global>
<start to="get_master_duplicate" />
<start to="fork_dump" />
<action name="get_master_duplicate">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/masterduplicate</arg>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
</java>
<ok to="fork_dump"/>
<error to="Kill"/>
</action>
<fork name="fork_dump">
<path start="dump_publication"/>
@ -349,7 +334,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}/communities_infrastructures/community_infrastructure.json.gz</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>
<ok to="join_context"/>
<error to="Kill"/>
@ -360,8 +344,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/relation/context</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
<arg>--masterDuplicate</arg><arg>${workingDir}/masterduplicate</arg>
</java>
<ok to="join_context"/>
<error to="Kill"/>
@ -386,7 +368,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation/contextOrg</arg>
<arg>--organizationCommunityMap</arg><arg>${organizationCommunityMap}</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_context"/>

View File

@ -8,10 +8,6 @@
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>organizationCommunityMap</name>
<description>the organization community map</description>
</property>
<property>
<name>pathMap</name>
<description>the path where to find the elements involved in the constraints within the json</description>
@ -92,7 +88,7 @@
</configuration>
</global>
<start to="get_master_duplicate" />
<start to="fork_select_and_dump" />
<action name="get_master_duplicate">
<java>
@ -138,7 +134,6 @@
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>publication</arg>
<arg>--masterDuplicatePath</arg><arg>${workingDir}/masterduplicate</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -168,7 +163,6 @@
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>dataset</arg>
<arg>--masterDuplicatePath</arg><arg>${workingDir}/masterduplicate</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -197,7 +191,6 @@
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>otherresearchproduct</arg>
<arg>--masterDuplicatePath</arg><arg>${workingDir}/masterduplicate</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -226,7 +219,6 @@
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>software</arg>
<arg>--masterDuplicatePath</arg><arg>${workingDir}/masterduplicate</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -612,7 +604,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/context/community_infrastructure.json.gz</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>
<ok to="select_valid_context"/>
<error to="Kill"/>
@ -648,8 +639,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/dump/relation/context</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
<arg>--masterDuplicate</arg><arg>${workingDir}/masterduplicate</arg>
</java>
<ok to="join_context"/>
<error to="Kill"/>
@ -674,7 +663,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/relation/contextOrg</arg>
<arg>--organizationCommunityMap</arg><arg>${organizationCommunityMap}</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_context"/>

View File

@ -1,8 +0,0 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']
and ($x//context/param[./@name = 'status']/text() = 'all')
return
<community>
{$x//CONFIGURATION/context/@id}
{$x//CONFIGURATION/context/@label}
</community>

View File

@ -1,11 +0,0 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']
and (%s)
return
<community>
{$x//CONFIGURATION/context/@id}
{$x//CONFIGURATION/context/@label}
<description>
{$x//CONFIGURATION/context/param[@name='description']/text()}
</description>
</community>

View File

@ -1,8 +0,0 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']
and $x//CONFIGURATION/context[./@id=%s]
return
<community>
{$x//CONFIGURATION/context/@id}
{$x//CONFIGURATION/context/@label}
</community>

View File

@ -1,117 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump;
import static org.mockito.Mockito.lenient;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.dom4j.DocumentException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.xml.sax.SAXException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
class QueryInformationSystemTest {
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and ($x//context/param[./@name = 'status']/text() = 'all') "
+
" return " +
"<community> " +
"{$x//CONFIGURATION/context/@id}" +
"{$x//CONFIGURATION/context/@label}" +
"</community>";
List<String> communityMap = Arrays
.asList(
"<community id=\"egi\" label=\"EGI Federation\"/>",
"<community id=\"fet-fp7\" label=\"FET FP7\"/>",
"<community id=\"fet-h2020\" label=\"FET H2020\"/>",
"<community id=\"clarin\" label=\"CLARIN\"/>",
"<community id=\"rda\" label=\"Research Data Alliance\"/>",
"<community id=\"ee\" label=\"SDSN - Greece\"/>",
"<community id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\"/>",
"<community id=\"fam\" label=\"Fisheries and Aquaculture Management\"/>",
"<community id=\"ni\" label=\"Neuroinformatics\"/>",
"<community id=\"mes\" label=\"European Marine Science\"/>",
"<community id=\"instruct\" label=\"Instruct-ERIC\"/>",
"<community id=\"elixir-gr\" label=\"ELIXIR GR\"/>",
"<community id=\"aginfra\" label=\"Agricultural and Food Sciences\"/>",
"<community id=\"dariah\" label=\"DARIAH EU\"/>",
"<community id=\"risis\" label=\"RISIS\"/>",
"<community id=\"epos\" label=\"EPOS\"/>",
"<community id=\"beopen\" label=\"Transport Research\"/>",
"<community id=\"euromarine\" label=\"EuroMarine\"/>",
"<community id=\"ifremer\" label=\"Ifremer\"/>",
"<community id=\"oa-pg\" label=\"EC Post-Grant Open Access Pilot\"/>",
"<community id=\"science-innovation-policy\" label=\"Science and Innovation Policy Studies\"/>",
"<community id=\"covid-19\" label=\"COVID-19\"/>",
"<community id=\"enermaps\" label=\"Energy Research\"/>");
@Mock
private ISLookUpService isLookUpService;
private QueryInformationSystem queryInformationSystem;
private Map<String, String> map;
@BeforeEach
public void setUp() throws ISLookUpException, DocumentException, SAXException {
lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap);
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(isLookUpService);
map = queryInformationSystem.getCommunityMap(false, null);
}
@Test
void testSize() throws ISLookUpException {
Assertions.assertEquals(23, map.size());
}
@Test
void testContent() {
Assertions.assertTrue(map.containsKey("egi") && map.get("egi").equals("EGI Federation"));
Assertions.assertTrue(map.containsKey("fet-fp7") && map.get("fet-fp7").equals("FET FP7"));
Assertions.assertTrue(map.containsKey("fet-h2020") && map.get("fet-h2020").equals("FET H2020"));
Assertions.assertTrue(map.containsKey("clarin") && map.get("clarin").equals("CLARIN"));
Assertions.assertTrue(map.containsKey("rda") && map.get("rda").equals("Research Data Alliance"));
Assertions.assertTrue(map.containsKey("ee") && map.get("ee").equals("SDSN - Greece"));
Assertions
.assertTrue(
map.containsKey("dh-ch") && map.get("dh-ch").equals("Digital Humanities and Cultural Heritage"));
Assertions.assertTrue(map.containsKey("fam") && map.get("fam").equals("Fisheries and Aquaculture Management"));
Assertions.assertTrue(map.containsKey("ni") && map.get("ni").equals("Neuroinformatics"));
Assertions.assertTrue(map.containsKey("mes") && map.get("mes").equals("European Marine Science"));
Assertions.assertTrue(map.containsKey("instruct") && map.get("instruct").equals("Instruct-ERIC"));
Assertions.assertTrue(map.containsKey("elixir-gr") && map.get("elixir-gr").equals("ELIXIR GR"));
Assertions
.assertTrue(map.containsKey("aginfra") && map.get("aginfra").equals("Agricultural and Food Sciences"));
Assertions.assertTrue(map.containsKey("dariah") && map.get("dariah").equals("DARIAH EU"));
Assertions.assertTrue(map.containsKey("risis") && map.get("risis").equals("RISIS"));
Assertions.assertTrue(map.containsKey("epos") && map.get("epos").equals("EPOS"));
Assertions.assertTrue(map.containsKey("beopen") && map.get("beopen").equals("Transport Research"));
Assertions.assertTrue(map.containsKey("euromarine") && map.get("euromarine").equals("EuroMarine"));
Assertions.assertTrue(map.containsKey("ifremer") && map.get("ifremer").equals("Ifremer"));
Assertions.assertTrue(map.containsKey("oa-pg") && map.get("oa-pg").equals("EC Post-Grant Open Access Pilot"));
Assertions
.assertTrue(
map.containsKey("science-innovation-policy")
&& map.get("science-innovation-policy").equals("Science and Innovation Policy Studies"));
Assertions.assertTrue(map.containsKey("covid-19") && map.get("covid-19").equals("COVID-19"));
Assertions.assertTrue(map.containsKey("enermaps") && map.get("enermaps").equals("Energy Research"));
}
}

View File

@ -16,9 +16,9 @@ import org.junit.jupiter.api.Test;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.common.zenodoapi.MissingConceptDoiException;
import eu.dnetlib.dhp.common.zenodoapi.ZenodoAPIClient;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
@Disabled
public class ZenodoUploadTest {
@ -95,46 +95,46 @@ public class ZenodoUploadTest {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
client.newVersion("656628");
System.out.println(client.newVersion("3516917"));
CommunityMap communityMap = new CommunityMap();
communityMap.put("ni", "Neuroinformatics");
communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage");
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
// CommunityMap communityMap = new CommunityMap();
// communityMap.put("ni", "Neuroinformatics");
// communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage");
// LocalFileSystem fs = FileSystem.getLocal(new Configuration());
//
// fs
// .copyFromLocalFile(
// false, new Path(getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
// .getPath()),
// new Path(workingDir + "/zenodo/ni/ni"));
// fs
// .copyFromLocalFile(
// false, new Path(getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch")
// .getPath()),
// new Path(workingDir + "/zenodo/dh-ch/dh-ch"));
//
// RemoteIterator<LocatedFileStatus> fileStatusListIterator = fs
// .listFiles(
// new Path(workingDir + "/zenodo"), true);
// while (fileStatusListIterator.hasNext()) {
// LocatedFileStatus fileStatus = fileStatusListIterator.next();
//
// String p_string = fileStatus.getPath().toString();
//
// int index = p_string.lastIndexOf("/");
// String community = p_string.substring(0, index);
// community = community.substring(community.lastIndexOf("/") + 1);
// String community_name = communityMap.get(community).replace(" ", "_");
// // fs.copyToLocalFile(fileStatus.getPath(), new Path("/tmp/" + community_name));
// System.out.println(community);
//
// // File f = new File("/tmp/" + community_name);
// FSDataInputStream inputStream = fs.open(fileStatus.getPath());
// System.out.println(client.uploadIS(inputStream, community_name));
fs
.copyFromLocalFile(
false, new Path(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni")
.getPath()),
new Path(workingDir + "/zenodo/ni/ni"));
fs
.copyFromLocalFile(
false, new Path(getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch")
.getPath()),
new Path(workingDir + "/zenodo/dh-ch/dh-ch"));
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fs
.listFiles(
new Path(workingDir + "/zenodo"), true);
while (fileStatusListIterator.hasNext()) {
LocatedFileStatus fileStatus = fileStatusListIterator.next();
String p_string = fileStatus.getPath().toString();
int index = p_string.lastIndexOf("/");
String community = p_string.substring(0, index);
community = community.substring(community.lastIndexOf("/") + 1);
String community_name = communityMap.get(community).replace(" ", "_");
// fs.copyToLocalFile(fileStatus.getPath(), new Path("/tmp/" + community_name));
System.out.println(community);
// File f = new File("/tmp/" + community_name);
FSDataInputStream inputStream = fs.open(fileStatus.getPath());
System.out.println(client.uploadIS(inputStream, community_name));
}
// }
// System.out.println(client.publish());
@ -146,7 +146,7 @@ public class ZenodoUploadTest {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
client.newVersion("1210237");
client.newVersion("4559725");
File file = new File("/Users/miriam.baglioni/Desktop/EOSC_DUMP/publication.tar");
// File file = new File(getClass()
@ -200,7 +200,7 @@ public class ZenodoUploadTest {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN);
client.uploadOpenDeposition("8144316");
client.uploadOpenDeposition("10037121");
File file = new File("/Users/miriam.baglioni/Desktop/EOSC_DUMP/publication.tar");
// File file = new File(getClass()
@ -209,7 +209,9 @@ public class ZenodoUploadTest {
InputStream is = new FileInputStream(file);
Assertions.assertEquals(200, client.uploadIS3(is, "newVersion_deposition", file.length()));
Integer response_code = client.uploadIS3(is, "newVersion_deposition", file.length());
System.out.println(response_code);
Assertions.assertEquals(201, response_code);
// Assertions.assertEquals(202, client.publish());

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,7 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.IOException;
import java.util.*;
import java.util.function.Consumer;
import java.util.stream.Collectors;
@ -13,6 +14,8 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.communityapi.QueryCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.schema.common.ModelSupport;
@ -22,542 +25,25 @@ import eu.dnetlib.dhp.utils.DHPUtils;
class CreateRelationTest {
List<String> communityContext = Arrays
.asList(
"<context id=\"clarin\" label=\"CLARIN\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">CLARIN</param>\n" +
" <param name=\"logourl\">https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg</param>\n"
+
" <param name=\"name\">Common Language Resources and Technology Infrastructure</param>\n" +
" <param name=\"manager\">maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\">(Part of) the work reported here was made possible by using the CLARIN infrastructure.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding through &lt;CLARIN national consortium member, e.g. CLARIN.SI&gt;, &lt;XYZ&gt; project, grant no. &lt;XYZ&gt;.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding (through CLARIN ERIC) from the European Unions Horizon 2020 research and innovation programme under grant agreement No &lt;0-9&gt; for project &lt;XYZ&gt;.\n"
+
" (E.g. No 676529 for project CLARIN-PLUS.)</param>\n" +
" <param name=\"zenodoCommunity\">oac_clarin</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"true\" id=\"clarin::projects\" label=\"CLARIN Projects\">\n" +
" <concept claim=\"false\" id=\"clarin::projects::1\" label=\"CLARIN-PLUS\">\n" +
" <param name=\"projectfullname\">CLARIN-PLUS</param>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">676529</param>\n" +
" <param name=\"url\">http://www.clarin.eu</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"funding\">H2020-INFRADEV-1-2015-1</param>\n" +
" <param name=\"acronym\">CLARIN+</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"clarin::projects::2\" label=\"CLARIN\">\n" +
" <param name=\"projectfullname\">Common Language Resources and Technology Infrastructure</param>\n"
+
" <param name=\"acronym\">CLARIN</param>\n" +
" <param name=\"CD_PROJECT_NUMBER\">212230</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"openaireId\">corda_______::ef782b2d85676aa3e5a907427feb18c4</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"clarin::contentproviders\" label=\"CLARIN Content providers\">" +
"<!--<concept claim=\"true\" id=\"clarin::contentproviders::1\" label=\"Zotero\">\n" +
" <param name=\"openaireId\">opendoar____::d96409bf894217686ba124d7356686c9</param>\n"
+
" <param name=\"name\">Public Knowledge Project EPrint Archive</param>\n" +
" <param name=\"officialname\">Public Knowledge Project EPrint Archive</param>\n"
+
" <param name=\"enabled\">true</param>\n" +
" </concept> -->\n" +
" <concept claim=\"false\" id=\"clarin::contentproviders::2\" label=\"\">\n" +
" <param name=\"name\">LINDAT/CLARIN repository</param>\n" +
" <param name=\"officialname\">LINDAT/CLARIN repository</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"true\" id=\"clarin::subcommunity\" label=\"CLARIN communities\">\n" +
" <concept claim=\"true\" id=\"clarin::subcommunity::1\" label=\"CLARIN-D\">\n" +
" <param name=\"fullname\">CLARIN-D</param>\n" +
" <param name=\"homepageUrl\">https://www.clarin-d.de/en/</param>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"example\">http://www.lrec-conf.org/proceedings/lrec2018/pdf/504.pdf</param>\n"
+
" <param name=\"nation\">Germany</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"clarin::zenodocommunities\" label=\"CLARIN Zenodo Communities\"/>\n"
+
" <category claim=\"false\" id=\"clarin::organizations\" label=\"CLARIN Organizations\"/>\n" +
"</context>",
"<context id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\" type=\"community\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.</param>\n"
+
" <param name=\"logourl\">http://sanmamante.org/DH_CH_logo.png</param>\n" +
" <param name=\"name\">Digital Humanities and Cultural Heritage</param>\n" +
" <param name=\"manager\">ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk</param>\n"
+
" <param name=\"subject\">modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
+
" <param name=\"zenodoCommunity\">oac_dh-ch</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"dh-ch::projects\" label=\"DH-CH Projects\">\n" +
" <concept claim=\"false\" id=\"dh-ch::projects::1\" label=\"Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\">\n"
+
" <param name=\"projectfullname\">Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">654119</param>\n" +
" <param name=\"url\">http://www.parthenos-project.eu</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">PARTHENOS</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"dh-ch::contentproviders\" label=\"DH-CH Content providers\">\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::2\" label=\"The UK's largest collection of digital research data in the social sciences and humanities\">\n"
+
" <param name=\"openaireId\">re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</param>\n" +
" <param name=\"name\">The UK's largest collection of digital research data in the social sciences and humanities</param>\n"
+
" <param name=\"officialname\">UK Data Archive</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::3\" label=\"Journal of Data Mining and Digital Humanities\">\n"
+
" <param name=\"openaireId\">doajarticles::c6cd4b532e12868c1d760a8d7cda6815</param>\n" +
" <param name=\"name\">Journal of Data Mining and Digital Humanities</param>\n" +
" <param name=\"officialname\">Journal of Data Mining and Digital Humanities</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::6\" label=\"Frontiers in Digital Humanities\">\n"
+
" <param name=\"openaireId\">doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</param>\n" +
" <param name=\"name\">Frontiers in Digital Humanities</param>\n" +
" <param name=\"officialname\">Frontiers in Digital Humanities</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::7\" label=\"Il Capitale Culturale: Studies on the Value of Cultural Heritage\">\n"
+
" <param name=\"openaireId\">doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</param>\n" +
" <param name=\"name\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
+
" <param name=\"officialname\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
+
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::8\" label=\"Conservation Science in Cultural Heritage\">\n"
+
" <param name=\"openaireId\">doajarticles::0da84e9dfdc8419576169e027baa8028</param>\n" +
" <param name=\"name\">Conservation Science in Cultural Heritage</param>\n" +
" <param name=\"officialname\">Conservation Science in Cultural Heritage</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::9\" label=\"Electronic Archiving System\">\n"
+
" <param name=\"openaireId\">re3data_____::84e123776089ce3c7a33db98d9cd15a8</param>\n" +
" <param name=\"name\">Electronic Archiving System</param>\n" +
" <param name=\"officialname\">EASY</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::10\" label=\"DANS-KB Harvester\">\n" +
" <param name=\"openaireId\">openaire____::c5502a43e76feab55dd00cf50f519125</param>\n" +
" <param name=\"name\">DANS-KB Harvester</param>\n" +
" <param name=\"officialname\">Gemeenschappelijke Harvester DANS-KB</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::11\" label=\"ads\">\n" +
" <param name=\"openaireId\">re3data_____::a48f09c562b247a9919acfe195549b47</param>\n" +
" <param name=\"name\">ads</param>\n" +
" <param name=\"officialname\">Archaeology Data Service</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::12\" label=\"\">\n" +
" <param name=\"openaireId\">opendoar____::97275a23ca44226c9964043c8462be96</param>\n" +
" <param name=\"name\">KNAW Repository</param>\n" +
" <param name=\"officialname\">KNAW Repository</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::13\" label=\"Internet Archaeology\">\n"
+
" <param name=\"openaireId\">doajarticles::2899208a99aa7d142646e0a80bfeef05</param>\n" +
" <param name=\"name\">Internet Archaeology</param>\n" +
" <param name=\"officialname\">Internet Archaeology</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"ni\" label=\"Neuroinformatics\" type=\"community\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.</param>\n"
+
" <param name=\"logourl\">https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png</param>\n"
+
" <param name=\"name\">Neuroinformatics</param>\n" +
" <param name=\"manager\">sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\">brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities</param>\n"
+
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"zenodoCommunity\">oac_ni</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"ni::projects\" label=\"NI Content providers\"/>\n" +
" <category claim=\"false\" id=\"ni::contentproviders\" label=\"NI Content providers\">\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::1\" label=\"OpenNeuro\">\n" +
" <param name=\"openaireId\">re3data_____::5b9bf9171d92df854cf3c520692e9122</param>\n" +
" <param name=\"name\">Formerly:OpenFMRI</param>\n" +
" <param name=\"officialname\">OpenNeuro</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::2\" label=\"RIO\">\n" +
" <param name=\"openaireId\">doajarticles::c7d3de67dc77af72f6747157441252ec</param>\n" +
" <param name=\"name\">Research Ideas and Outcomes</param>\n" +
" <param name=\"officialname\">Research Ideas and Outcomes</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::3\" label=\"NITRC\">\n" +
" <param name=\"openaireId\">re3data_____::8515794670370f49c1d176c399c714f5</param>\n" +
" <param name=\"name\">Neuroimaging Informatics Tools and Resources Clearinghouse</param>\n"
+
" <param name=\"officialname\">NITRC</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::4\" label=\"FRONTIERSNI\">\n" +
" <param name=\"openaireId\">doajarticles::d640648c84b10d425f96f11c3de468f3</param>\n" +
" <param name=\"name\">Frontiers in Neuroinformatics</param>\n" +
" <param name=\"officialname\">Frontiers in Neuroinformatics</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::5\" label=\"NeuroImage: Clinical\">\n" +
" <param name=\"openaireId\">doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</param>\n" +
" <param name=\"name\">NeuroImage: Clinical</param>\n" +
" <param name=\"officialname\">NeuroImage: Clinical</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::6\" label=\"NeuroVault\">\n" +
" <param name=\"openaireId\">rest________::fb1a3d4523c95e63496e3bc7ba36244b</param>\n" +
" <param name=\"name\">NeuroVault</param>\n" +
" <param name=\"officialname\">NeuroVault</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"instruct\" label=\"Instruct-ERIC\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">Instruct-ERIC is the European Research Infrastructure for Structural Biology</param>\n"
+
" <param name=\"logourl\">https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png</param>\n"
+
" <param name=\"name\">Instruct-ERIC</param>\n" +
" <param name=\"manager\">claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct-ERIC.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project</param>\n"
+
" <param name=\"zenodoCommunity\">oac_instruct</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"instruct::projects\" label=\"Instruct-ERIC Projects\">\n" +
" <concept claim=\"false\" id=\"instruct::projects::1\" label=\"Authentication and Authorisation For Research and Collaboration\">\n"
+
" <param name=\"projectfullname\">Authentication and Authorisation For Research and Collaboration</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730941</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-EINFRA-2016-1</param>\n" +
" <param name=\"acronym\">AARC2</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::2\" label=\"Building data bridges between biological and medical infrastructures in Europe\">\n"
+
" <param name=\"projectfullname\">Building data bridges between biological and medical infrastructures in Europe</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">284209</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">BioMedBridges</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::3\" label=\"Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\">\n"
+
" <param name=\"projectfullname\">Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">283570</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">BioStruct-X</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::4\" label=\"Coordinated Research Infrastructures Building Enduring Life-science services\">\n"
+
" <param name=\"projectfullname\">Coordinated Research Infrastructures Building Enduring Life-science services</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">654248</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRADEV-1-2014-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">CORBEL</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::5\" label=\"Infrastructure for NMR, EM and X-rays for translational research\">\n"
+
" <param name=\"projectfullname\">Infrastructure for NMR, EM and X-rays for translational research</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">653706</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRAIA-2014-2015</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">iNEXT</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::6\" label=\"Integrated Structural Biology Infrastructure\">\n"
+
" <param name=\"projectfullname\">Integrated Structural Biology Infrastructure</param>\n" +
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">211252</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2007-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">INSTRUCT</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::7\" label=\"Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\">\n"
+
" <param name=\"projectfullname\">Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">731005</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRADEV-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">INSTRUCT-ULTRA</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::8\" label=\"Opening Synchrotron Light for Experimental Science and Applications in the Middle East\">\n"
+
" <param name=\"projectfullname\">Opening Synchrotron Light for Experimental Science and Applications in the Middle East</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730943</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRASUPP-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">OPEN SESAME</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::9\" label=\"Infrastructure for Protein Production Platforms\">\n"
+
" <param name=\"projectfullname\">Infrastructure for Protein Production Platforms</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">227764</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2008-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">PCUBE</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::10\" label=\"European Vaccine Research and Development Infrastructure\">\n"
+
" <param name=\"projectfullname\">European Vaccine Research and Development Infrastructure</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730964</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRAIA-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">TRAMSVAC2</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::11\" label=\"World-wide E-infrastructure for structural biology\">\n"
+
" <param name=\"projectfullname\">World-wide E-infrastructure for structural biology</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">675858</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">EC | H2020 | RIA</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">West-Life</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::12\" label=\"RI-VIS\">\n" +
" <param name=\"projectfullname\">Expanding research infrastructure visibility to strengthen strategic partnerships</param>\n"
+
" <param name=\"acronym\">RI-VIS</param>\n" +
" <param name=\"CD_PROJECT_NUMBER\">824063</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"openaireId\">corda__h2020::af93b591b76991d8437993a8f6fc6538</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"instruct::contentproviders\" label=\"Instruct-ERIC Content providers\"/>\n"
+
" <category claim=\"false\" id=\"instruct::zenodocommunities\" label=\"Instruct-ERIC Zenodo Communities\">\n"
+
" <concept claim=\"false\" id=\"instruct::zenodocommunities::1\" label=\"Instruct\">\n" +
" <param name=\"zenodoid\">instruct</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::zenodocommunities::2\" label=\"West-Life Virtual Research Environment for Structural Biology\">\n"
+
" <param name=\"zenodoid\">west-life</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"instruct::organizations\" label=\"Instruct-ERIC Organizations\">\n"
+
" <concept claim=\"false\" id=\"instruct::organizations::1\" label=\"FRISBI\">\n" +
" <param name=\"name\">FRISBI</param>\n" +
" <param name=\"logourl\">aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==</param>\n"
+
" <param name=\"websiteurl\">aHR0cDovL2ZyaXNiaS5ldS8=</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::organizations::2\" label=\"RI-VIS\">\n" +
" <param name=\"name\">RI-VIS</param>\n" +
" <param name=\"logourl\">aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=</param>\n"
+
" <param name=\"websiteurl\">aHR0cHM6Ly9yaS12aXMuZXU=</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::organizations::3\" label=\"CIISB\">\n" +
" <param name=\"name\">CIISB</param>\n" +
" <param name=\"logourl\">aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=</param>\n" +
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuY2lpc2Iub3Jn</param>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"elixir-gr\" label=\"ELIXIR GR\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.</param>\n"
+
" <param name=\"logourl\">https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png</param>\n"
+
" <param name=\"name\">The Greek National Node of the ESFRI European RI ELIXIR</param>\n" +
" <param name=\"manager\">vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"zenodoCommunity\">oaa_elixir-gr</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"elixir-gr::projects\" label=\"ELIXIR GR Projects\">\n" +
" <concept claim=\"false\" id=\"ni::projects::12\" label=\"\">\n" +
" <param name=\"projectfullname\">BIO-INFORMATICS RESEARCH NETWORK COORDINATING CENTER (BIRN-CC)</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1U24RR025736-01</param>\n" +
" <param name=\"funder\">NIH</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::13\" label=\"\">\n" +
" <param name=\"projectfullname\">COLLABORATIVE RESEARCH: The Cognitive Neuroscience of Category Learning</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">0223843</param>\n" +
" <param name=\"funder\">NSF</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::14\" label=\"\">\n" +
" <param name=\"projectfullname\">The Cognitive Atlas: Developing an Interdisciplinary Knowledge Base Through Socia</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">5R01MH082795-05</param>\n" +
" <param name=\"funder\">NIH</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::15\" label=\"\">\n" +
" <param name=\"projectfullname\">Fragmented early life environmental and emotional / cognitive vulnerabilities</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1P50MH096889-01A1</param>\n" +
" <param name=\"funder\">NIH</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::16\" label=\"\">\n" +
" <param name=\"projectfullname\">Enhancement of the 1000 Functional Connectome Project</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1R03MH096321-01A1</param>\n" +
" <param name=\"funder\">TUBITAK</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::17\" label=\"\">\n" +
" <param name=\"projectfullname\">CRCNS Data Sharing: An open data repository for cognitive neuroscience: The OpenfMRI Project</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1131441</param>\n" +
" <param name=\"funder\">NSF</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::18\" label=\"\">\n" +
" <param name=\"projectfullname\">Enhancing Human Cortical Plasticity: Visual Psychophysics and fMRI</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">0121950</param>\n" +
" <param name=\"funder\">NSF</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::18\" label=\"\">\n" +
" <param name=\"projectfullname\">Transforming statistical methodology for neuroimaging meta-analysis.</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">100309</param>\n" +
" <param name=\"funder\">WT</param>\n" +
" </concept>\n" +
" </category>" +
" <category claim=\"false\" id=\"elixir-gr::contentproviders\" label=\"Elixir-GR Content providers\">\n"
+
" <concept claim=\"false\" id=\"elixir-gr::contentproviders::1\" label=\"bio.tools\">\n" +
" <param name=\"openaireId\">rest________::b8e502674c3c3499d5374e9b2ea6d8d5</param>\n" +
" <param name=\"name\">bio.tools</param>\n" +
" <param name=\"officialname\">bio.tools</param>\n" +
" <param name=\"enabled\">false</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"elixir-gr::zenodocommunities\" label=\"Elixir-GR Zenodo Communities\"/>\n"
+
" <category claim=\"false\" id=\"elixir-gr::organizations\" label=\"Elixir-GR Organizations\">\n" +
" <concept claim=\"false\" id=\"elixir-gr::organizations::1\" label=\"ATHENA RC\">\n" +
" <param name=\"name\">ATHENA RC</param>\n" +
" <param name=\"logourl\">aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=</param>\n"
+
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=</param>\n" +
" </concept>\n" +
" </category><!-- <category claim=\"false\" id=\"elixir-gr::resultorganizations\" label=\"Elixir-GR Results through organizations\"/> -->\n"
+
"</context>");
private QueryInformationSystem queryInformationSystem;
private Map<String, String> map;
@BeforeEach
public void setUp() {
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setContextRelationResult(communityContext);
}
String contextInfo1 = "{\"id\":\"eut\",\"description\":null,\"type\":null,\"zenodocommunity\":null,\"name\":null,\"projectList\":[],\"datasourceList\":[\"opendoar____::39e4973ba3321b80f37d9b55f63ed8b8\",\"opendoar____::cda72177eba360ff16b7f836e2754370\",\"opendoar____::71f6278d140af599e06ad9bf1ba03cb0\",\"opendoar____::f5c59267dae7d123f54b741a76f28f84\",\"opendoar____::532a082cc38eaffa923d73bce41b4061\",\"opendoar____::00a03ec6533ca7f5c644d198d815329c\",\"opendoar____::33a854e247155d590883b93bca53848a\"],\"subject\":null}";
String contextInfo2 = "{\"id\":\"knowmad\",\"description\":null,\"type\":null,\"zenodocommunity\":null,\"name\":null,\"projectList\":[],\"datasourceList\":[\"issn___print::81db6614a3fa07becf706193d4754f30\",\"doajarticles::71eae002613af3569ea42b7093eefdbd\",\"doajarticles::daf53452e1199952d6c95ee7d0088d35\",\"issn___print::ff1a02f622ff006edc668e55fc865ae8\",\"doajarticles::14c007581eb12d843050ebf0493f2254\",\"issn__online::14c007581eb12d843050ebf0493f2254\",\"doajarticles::19a7308c7287b3784aacdb1979141c50\",\"doajarticles::c28985d2363f1ed20048dfd682ee14be\",\"issn__online::c28985d2363f1ed20048dfd682ee14be\",\"doajarticles::81db6614a3fa07becf706193d4754f30\",\"doajarticles::588fc0f98218e9f29a0dabe1182851c4\",\"doajarticles::5ccbe475390dfa642a356c7fd678b70a\",\"doajarticles::cf63b988539f8d28ed366220691f751b\",\"doajarticles::c03353a080ac66c37a1f6aa2de05ca63\",\"issn__online::efec4cc1b1143f69d7d0954a2e9b18b6\",\"doajarticles::25ddd7bb737fc8b027b455f6712111a0\",\"doajarticles::ca2e3cc507ff73298333ed53177e9916\",\"doajarticles::220ab67f748963b6932600585452ad83\",\"doajarticles::b718013941a35a86b17b9b57aca9d260\",\"doajarticles::934e4b68deaeee0781f18a6a6e4fd906\",\"doajarticles::43587737046be23d692860e3212d6966\",\"doajarticles::23a27fb45b9af1f2b57632b7ceb98448\",\"doajarticles::9d32313b109db4f024eb1c802ad4e353\",\"fairsharing_::b4944963b5c83d545c3d3022bcf03282\",\"doajarticles::b5a2cadc830f94ceb2f8035369cf66a1\",\"doajarticles::8d64614210c51d4bc9e1470bbe1b5607\",\"doajarticles::e258604e7e1132683570d4178e9bec8f\",\"doajarticles::260c4ba804c4c08842ad39a6e8fd2a97\",\"doajarticles::8c9f0a0c20cb062a0ab7823af8262d0e\",\"issn__online::ef2866d8e4561162a1b115c24ebd7887\",\"doajarticles::de16206a48e58e344ef049670a1a0134\",\"doajarticles::96621c5056169f56ff3131ea90aa07c9\",\"issn__online::4558a062701b28ec0b4a6b2f0fbe8d09\",\"issn___print::5571b62bd2e69e2311990fceb7fe2e3a\",\"doajarticles::c61ecda4a5a75b94a28b1c782008e64c\",\"doajarticles::fd825a036d04fcdf0ab805d87e16d1de\",\"issn___print::61d35d3f7e7f635fa9818eb5c5724833\",\"doajarticles::685e5ac5fb55c74ee8a2b266923e1f1d\",\"doajarticles::b99614718f7a6545b1eed9b68d3441de\",\"doajarticles::7d2d2e0095bf1ec755bf3e39d31a5bcb\",\"doajarticles::6124a1bc19d8f538c1bb45da8fda914b\",\"issn___print::6f2e006697b7a4f9d6e2e4d61d9e87c2\",\"issn___print::24d6c4cc60c9446680a4bf6464f232cc\",\"issn___print::0482ed6012f16fa029dd0434b644cb90\",\"issn___print::74d68388e8ad50f80b754f5036c80fcf\",\"issn__online::22b586e63e4962054a332a538e5d2b71\",\"issn___print::4bceffb60baed7f1e3f25b171b8fcf63\",\"issn__online::4074709fc410b3be61ea1769634ae8ff\",\"issn__online::d787423124beb54d12945b8fb6effe17\",\"issn__online::92d6a58173da7e479557acb8701da9dc\",\"issn___print::36813e86616b329b06101afab0f2d115\",\"issn___print::60f102cf59ddbfc2297dbfd2efe1c006\",\"issn__online::e2bedb8ffd0dd9a05af59c2651eb4200\",\"issn___print::3a5263873041ce993d856c3a08b87e66\",\"issn___print::c0118216b90b4ec13c4344c302eb1cc2\",\"issn___print::36cb3b21af46a23327d120d848ac5256\",\"issn___print::f6528b255fa5b915efddf5bdd4b12bef\",\"issn___print::50ae07b765e6c893d93701b63dc885eb\",\"issn___print::1efd4ac736f64f83bc537339bf112882\",\"issn___print::f46704a2eb1338c3524391c7fcdc5f50\",\"issn__online::6e823cb8d73afda60e12a0ce7ec65fe3\",\"doajarticles::df6746f5d17a6c27cf1033e2888b934d\",\"issn___print::8a8a700ac926378b330e57d7faf5948e\",\"issn___print::6fbc352ab2267a17490b922033c2ce8c\",\"issn___print::692e57ca190e52967683bca19dcc6200\",\"issn___print::4f657189126a3c51cf8f2d58410d09b8\",\"issn___print::2fd71afb352b4ec1ee58b2bfa4d536da\",\"doajarticles::e91e4788bfb04277b61416fd184416b2\",\"issn__online::3097c43f11470b27e74f040cf267eedf\",\"doajarticles::942ba9d488531f3a60633b716db0b05c\",\"doajarticles::1aefeb2e98d29b17473e3f4dbcc5000e\",\"issn___print::223b857892c5de7247b3f906e1e1b96a\",\"doajarticles::b64f9c27d2c8899df904930cd6d2316a\"],\"subject\":null}";
String contextInfo3 = "{\"id\":\"ni\",\"description\":null,\"type\":null,\"zenodocommunity\":null,\"name\":null,\"projectList\":[\"nih_________::412379d1e2683a9571405e6621d3cdeb\",\"nsf_________::1e685a3d3ca1d62192bb51d42d44c6d1\",\"nsf_________::8ff441b675f2c62061f15210b05c1584\",\"nih_________::2d8bd32ed8cb2f0b0d4508df61d494dd\",\"nsf_________::e71b0a0f6a347d76816adb29b322441c\",\"nih_________::c27d60d28c9bc5cf17dd7ae7ad2d4ab4\",\"nih_________::b28919975c85cfee114e442c30c918c6\",\"wt__________::d709ac8e1ed393d036c895d239d825a4\"],\"datasourceList\":[\"re3data_____::5b9bf9171d92df854cf3c520692e9122\",\"doajarticles::c7d3de67dc77af72f6747157441252ec\",\"re3data_____::8515794670370f49c1d176c399c714f5\",\"doajarticles::d640648c84b10d425f96f11c3de468f3\",\"doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a\",\"rest________::fb1a3d4523c95e63496e3bc7ba36244b\",\"opendoar____::7e7757b1e12abcb736ab9a754ffb617a\"],\"subject\":null}";
@Test
void test1() {
void test1() throws IOException {
ObjectMapper mapper = new ObjectMapper();
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem
.getContextRelation(consumer, "contentproviders", ModelSupport.getIdPrefix(Datasource.class));
cInfoList.forEach(c -> System.out.println(new Gson().toJson(c)));
consumer.accept(mapper.readValue(contextInfo1, ContextInfo.class));
consumer.accept(mapper.readValue(contextInfo2, ContextInfo.class));
consumer.accept(mapper.readValue(contextInfo3, ContextInfo.class));
List<Relation> rList = new ArrayList<>();
cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
rList.forEach(r -> {
try {
System.out.println(new ObjectMapper().writeValueAsString(r));
} catch (JsonProcessingException e) {
e.printStackTrace();
}
});
Assertions.assertEquals(34, rList.size());
Assertions.assertEquals(190, rList.size());
Assertions
.assertTrue(
@ -570,11 +56,11 @@ class CreateRelationTest {
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))));
DHPUtils.md5("eut"))));
Assertions
.assertEquals(
10,
7,
rList
.stream()
.filter(
@ -586,13 +72,13 @@ class CreateRelationTest {
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))))
DHPUtils.md5("eut"))))
.collect(Collectors.toList())
.size());
Assertions
.assertEquals(
10,
7,
rList
.stream()
.filter(
@ -604,7 +90,7 @@ class CreateRelationTest {
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))))
DHPUtils.md5("eut"))))
.collect(Collectors.toList())
.size());
@ -619,57 +105,22 @@ class CreateRelationTest {
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))))
DHPUtils.md5("eut"))))
.map(r -> r.getTarget())
.collect(Collectors.toSet());
Assertions
.assertTrue(
tmp.contains("re3data_____::9ebe127e5f3a0bf401875690f3bb6b81") &&
tmp.contains("doajarticles::c6cd4b532e12868c1d760a8d7cda6815") &&
tmp.contains("doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b") &&
tmp.contains("doajarticles::6eb31d13b12bc06bbac06aef63cf33c9") &&
tmp.contains("doajarticles::0da84e9dfdc8419576169e027baa8028") &&
tmp.contains("re3data_____::84e123776089ce3c7a33db98d9cd15a8") &&
tmp.contains("openaire____::c5502a43e76feab55dd00cf50f519125") &&
tmp.contains("re3data_____::a48f09c562b247a9919acfe195549b47") &&
tmp.contains("opendoar____::97275a23ca44226c9964043c8462be96") &&
tmp.contains("doajarticles::2899208a99aa7d142646e0a80bfeef05"));
}
@Test
public void test2() {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem
.getContextRelation(consumer, "projects", ModelSupport.getIdPrefix(Project.class));
cInfoList.forEach(c -> System.out.println(new Gson().toJson(c)));
List<Relation> rList = new ArrayList<>();
cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
Assertions.assertEquals(44, rList.size());
tmp.contains("opendoar____::532a082cc38eaffa923d73bce41b4061") &&
tmp.contains("opendoar____::00a03ec6533ca7f5c644d198d815329c") &&
tmp.contains("opendoar____::33a854e247155d590883b93bca53848a") &&
tmp.contains("opendoar____::71f6278d140af599e06ad9bf1ba03cb0") &&
tmp.contains("opendoar____::f5c59267dae7d123f54b741a76f28f84") &&
tmp.contains("opendoar____::cda72177eba360ff16b7f836e2754370") &&
tmp.contains("opendoar____::39e4973ba3321b80f37d9b55f63ed8b8"));
Assertions
.assertFalse(
rList
.stream()
.map(r -> r.getSource())
.collect(Collectors.toSet())
.contains(
String
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))));
Assertions
.assertEquals(
2,
.assertTrue(
rList
.stream()
.filter(
@ -681,13 +132,15 @@ class CreateRelationTest {
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("clarin"))))
.collect(Collectors.toList())
.size());
DHPUtils.md5("eut"))))
.map(r -> r.getTargetType())
.collect(Collectors.toSet())
.stream()
.allMatch(t -> t.equals("datasource")));
Assertions
.assertEquals(
2,
15,
rList
.stream()
.filter(
@ -699,144 +152,43 @@ class CreateRelationTest {
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("clarin"))))
DHPUtils.md5("ni"))))
.collect(Collectors.toList())
.size());
Set<String> tmp = rList
.stream()
.filter(
r -> r
.getSource()
.equals(
String
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("clarin"))))
.map(r -> r.getTarget())
.collect(Collectors.toSet());
Assertions
.assertTrue(
tmp.contains("corda__h2020::b5a4eb56bf84bef2ebc193306b4d423f") &&
tmp.contains("corda_______::ef782b2d85676aa3e5a907427feb18c4"));
rList.forEach(rel -> {
if (rel.getSourceType().equals("project")) {
String proj = rel.getSource();
Assertions.assertTrue(proj.substring(0, proj.indexOf("::")).length() == 12);
Assertions.assertFalse(proj.startsWith("40|"));
}
});
}
@Test
void test3() {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
MasterDuplicate md1 = new MasterDuplicate();
md1.setMaster("10|fake________::9ebe127e5f3a0bf401875690f3bb6b81");
md1.setDuplicate("10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81");
queryInformationSystem
.getContextRelation(
consumer, "contentproviders", ModelSupport.getIdPrefix(Datasource.class), Arrays.asList(md1));
cInfoList.forEach(c -> System.out.println(new Gson().toJson(c)));
List<Relation> rList = new ArrayList<>();
cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
rList.forEach(r -> {
try {
System.out.println(new ObjectMapper().writeValueAsString(r));
} catch (JsonProcessingException e) {
e.printStackTrace();
}
});
Assertions.assertEquals(34, rList.size());
Assertions
.assertTrue(
rList
.stream()
.map(r -> r.getSource())
.collect(Collectors.toSet())
.contains(
String
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))));
Assertions
.assertEquals(
10,
rList
7, rList
.stream()
.filter(
r -> r
.getSource()
.equals(
String
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))))
.collect(Collectors.toList())
.size());
DHPUtils.md5("ni")))
&&
r.getTargetType().equals("datasource"))
.count());
Assertions
.assertEquals(
10,
rList
8, rList
.stream()
.filter(
r -> r
.getTarget()
.getSource()
.equals(
String
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))))
.collect(Collectors.toList())
.size());
Set<String> tmp = rList
.stream()
.filter(
r -> r
.getSource()
.equals(
String
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))))
.map(r -> r.getTarget())
.collect(Collectors.toSet());
Assertions
.assertTrue(
tmp.contains("fake________::9ebe127e5f3a0bf401875690f3bb6b81") &&
tmp.contains("doajarticles::c6cd4b532e12868c1d760a8d7cda6815") &&
tmp.contains("doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b") &&
tmp.contains("doajarticles::6eb31d13b12bc06bbac06aef63cf33c9") &&
tmp.contains("doajarticles::0da84e9dfdc8419576169e027baa8028") &&
tmp.contains("re3data_____::84e123776089ce3c7a33db98d9cd15a8") &&
tmp.contains("openaire____::c5502a43e76feab55dd00cf50f519125") &&
tmp.contains("re3data_____::a48f09c562b247a9919acfe195549b47") &&
tmp.contains("opendoar____::97275a23ca44226c9964043c8462be96") &&
tmp.contains("doajarticles::2899208a99aa7d142646e0a80bfeef05"));
DHPUtils.md5("ni")))
&&
r.getTargetType().equals("project"))
.count());
}
}

View File

@ -1,810 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static org.mockito.Mockito.lenient;
import java.util.*;
import java.util.function.Consumer;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
class QueryInformationSystemTest {
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and $x//context/param[./@name = 'status']/text() = 'all' " +
" return " +
"$x//context";
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
+
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
+
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
List<String> communityMap = Arrays
.asList(
"clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri",
"ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. "
+
"SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - "
+
"Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community",
"dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community",
"fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community",
"ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community",
"mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community",
"instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community",
"elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri",
"aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community",
"dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri",
"epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri",
"covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community");
List<String> communityContext = Arrays
.asList(
"<context id=\"clarin\" label=\"CLARIN\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">CLARIN</param>\n" +
" <param name=\"logourl\">https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg</param>\n"
+
" <param name=\"name\">Common Language Resources and Technology Infrastructure</param>\n" +
" <param name=\"manager\">maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\">(Part of) the work reported here was made possible by using the CLARIN infrastructure.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding through &lt;CLARIN national consortium member, e.g. CLARIN.SI&gt;, &lt;XYZ&gt; project, grant no. &lt;XYZ&gt;.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding (through CLARIN ERIC) from the European Unions Horizon 2020 research and innovation programme under grant agreement No &lt;0-9&gt; for project &lt;XYZ&gt;.\n"
+
" (E.g. No 676529 for project CLARIN-PLUS.)</param>\n" +
" <param name=\"zenodoCommunity\">oac_clarin</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"true\" id=\"clarin::projects\" label=\"CLARIN Projects\">\n" +
" <concept claim=\"false\" id=\"clarin::projects::1\" label=\"CLARIN-PLUS\">\n" +
" <param name=\"projectfullname\">CLARIN-PLUS</param>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">676529</param>\n" +
" <param name=\"url\">http://www.clarin.eu</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"funding\">H2020-INFRADEV-1-2015-1</param>\n" +
" <param name=\"acronym\">CLARIN+</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"clarin::projects::2\" label=\"CLARIN\">\n" +
" <param name=\"projectfullname\">Common Language Resources and Technology Infrastructure</param>\n"
+
" <param name=\"acronym\">CLARIN</param>\n" +
" <param name=\"CD_PROJECT_NUMBER\">212230</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"openaireId\">corda_______::ef782b2d85676aa3e5a907427feb18c4</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"clarin::contentproviders\" label=\"CLARIN Content providers\">" +
"<!--<concept claim=\"true\" id=\"clarin::contentproviders::1\" label=\"Zotero\">\n" +
" <param name=\"openaireId\">opendoar____::d96409bf894217686ba124d7356686c9</param>\n"
+
" <param name=\"name\">Public Knowledge Project EPrint Archive</param>\n" +
" <param name=\"officialname\">Public Knowledge Project EPrint Archive</param>\n"
+
" <param name=\"enabled\">true</param>\n" +
" </concept> -->\n" +
" <concept claim=\"false\" id=\"clarin::contentproviders::2\" label=\"\">\n" +
" <param name=\"name\">LINDAT/CLARIN repository</param>\n" +
" <param name=\"officialname\">LINDAT/CLARIN repository</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"true\" id=\"clarin::subcommunity\" label=\"CLARIN communities\">\n" +
" <concept claim=\"true\" id=\"clarin::subcommunity::1\" label=\"CLARIN-D\">\n" +
" <param name=\"fullname\">CLARIN-D</param>\n" +
" <param name=\"homepageUrl\">https://www.clarin-d.de/en/</param>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"example\">http://www.lrec-conf.org/proceedings/lrec2018/pdf/504.pdf</param>\n"
+
" <param name=\"nation\">Germany</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"clarin::zenodocommunities\" label=\"CLARIN Zenodo Communities\"/>\n"
+
" <category claim=\"false\" id=\"clarin::organizations\" label=\"CLARIN Organizations\"/>\n" +
"</context>",
"<context id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\" type=\"community\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.</param>\n"
+
" <param name=\"logourl\">http://sanmamante.org/DH_CH_logo.png</param>\n" +
" <param name=\"name\">Digital Humanities and Cultural Heritage</param>\n" +
" <param name=\"manager\">ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk</param>\n"
+
" <param name=\"subject\">modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
+
" <param name=\"zenodoCommunity\">oac_dh-ch</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"dh-ch::projects\" label=\"DH-CH Projects\">\n" +
" <concept claim=\"false\" id=\"dh-ch::projects::1\" label=\"Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\">\n"
+
" <param name=\"projectfullname\">Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">654119</param>\n" +
" <param name=\"url\">http://www.parthenos-project.eu</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">PARTHENOS</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"dh-ch::contentproviders\" label=\"DH-CH Content providers\">\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::2\" label=\"The UK's largest collection of digital research data in the social sciences and humanities\">\n"
+
" <param name=\"openaireId\">re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</param>\n" +
" <param name=\"name\">The UK's largest collection of digital research data in the social sciences and humanities</param>\n"
+
" <param name=\"officialname\">UK Data Archive</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::3\" label=\"Journal of Data Mining and Digital Humanities\">\n"
+
" <param name=\"openaireId\">doajarticles::c6cd4b532e12868c1d760a8d7cda6815</param>\n" +
" <param name=\"name\">Journal of Data Mining and Digital Humanities</param>\n" +
" <param name=\"officialname\">Journal of Data Mining and Digital Humanities</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::6\" label=\"Frontiers in Digital Humanities\">\n"
+
" <param name=\"openaireId\">doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</param>\n" +
" <param name=\"name\">Frontiers in Digital Humanities</param>\n" +
" <param name=\"officialname\">Frontiers in Digital Humanities</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::7\" label=\"Il Capitale Culturale: Studies on the Value of Cultural Heritage\">\n"
+
" <param name=\"openaireId\">doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</param>\n" +
" <param name=\"name\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
+
" <param name=\"officialname\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
+
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::8\" label=\"Conservation Science in Cultural Heritage\">\n"
+
" <param name=\"openaireId\">doajarticles::0da84e9dfdc8419576169e027baa8028</param>\n" +
" <param name=\"name\">Conservation Science in Cultural Heritage</param>\n" +
" <param name=\"officialname\">Conservation Science in Cultural Heritage</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::9\" label=\"Electronic Archiving System\">\n"
+
" <param name=\"openaireId\">re3data_____::84e123776089ce3c7a33db98d9cd15a8</param>\n" +
" <param name=\"name\">Electronic Archiving System</param>\n" +
" <param name=\"officialname\">EASY</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::10\" label=\"DANS-KB Harvester\">\n" +
" <param name=\"openaireId\">openaire____::c5502a43e76feab55dd00cf50f519125</param>\n" +
" <param name=\"name\">DANS-KB Harvester</param>\n" +
" <param name=\"officialname\">Gemeenschappelijke Harvester DANS-KB</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::11\" label=\"ads\">\n" +
" <param name=\"openaireId\">re3data_____::a48f09c562b247a9919acfe195549b47</param>\n" +
" <param name=\"name\">ads</param>\n" +
" <param name=\"officialname\">Archaeology Data Service</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::12\" label=\"\">\n" +
" <param name=\"openaireId\">opendoar____::97275a23ca44226c9964043c8462be96</param>\n" +
" <param name=\"name\">KNAW Repository</param>\n" +
" <param name=\"officialname\">KNAW Repository</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::13\" label=\"Internet Archaeology\">\n"
+
" <param name=\"openaireId\">doajarticles::2899208a99aa7d142646e0a80bfeef05</param>\n" +
" <param name=\"name\">Internet Archaeology</param>\n" +
" <param name=\"officialname\">Internet Archaeology</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"ni\" label=\"Neuroinformatics\" type=\"community\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.</param>\n"
+
" <param name=\"logourl\">https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png</param>\n"
+
" <param name=\"name\">Neuroinformatics</param>\n" +
" <param name=\"manager\">sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\">brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities</param>\n"
+
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"zenodoCommunity\">oac_ni</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"ni::contentproviders\" label=\"NI Content providers\">\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::1\" label=\"OpenNeuro\">\n" +
" <param name=\"openaireId\">re3data_____::5b9bf9171d92df854cf3c520692e9122</param>\n" +
" <param name=\"name\">Formerly:OpenFMRI</param>\n" +
" <param name=\"officialname\">OpenNeuro</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::2\" label=\"RIO\">\n" +
" <param name=\"openaireId\">doajarticles::c7d3de67dc77af72f6747157441252ec</param>\n" +
" <param name=\"name\">Research Ideas and Outcomes</param>\n" +
" <param name=\"officialname\">Research Ideas and Outcomes</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::3\" label=\"NITRC\">\n" +
" <param name=\"openaireId\">re3data_____::8515794670370f49c1d176c399c714f5</param>\n" +
" <param name=\"name\">Neuroimaging Informatics Tools and Resources Clearinghouse</param>\n"
+
" <param name=\"officialname\">NITRC</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::4\" label=\"FRONTIERSNI\">\n" +
" <param name=\"openaireId\">doajarticles::d640648c84b10d425f96f11c3de468f3</param>\n" +
" <param name=\"name\">Frontiers in Neuroinformatics</param>\n" +
" <param name=\"officialname\">Frontiers in Neuroinformatics</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::5\" label=\"NeuroImage: Clinical\">\n" +
" <param name=\"openaireId\">doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</param>\n" +
" <param name=\"name\">NeuroImage: Clinical</param>\n" +
" <param name=\"officialname\">NeuroImage: Clinical</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::6\" label=\"NeuroVault\">\n" +
" <param name=\"openaireId\">rest________::fb1a3d4523c95e63496e3bc7ba36244b</param>\n" +
" <param name=\"name\">NeuroVault</param>\n" +
" <param name=\"officialname\">NeuroVault</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"instruct\" label=\"Instruct-ERIC\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">Instruct-ERIC is the European Research Infrastructure for Structural Biology</param>\n"
+
" <param name=\"logourl\">https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png</param>\n"
+
" <param name=\"name\">Instruct-ERIC</param>\n" +
" <param name=\"manager\">claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct-ERIC.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project</param>\n"
+
" <param name=\"zenodoCommunity\">oac_instruct</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"instruct::projects\" label=\"Instruct-ERIC Projects\">\n" +
" <concept claim=\"false\" id=\"instruct::projects::1\" label=\"Authentication and Authorisation For Research and Collaboration\">\n"
+
" <param name=\"projectfullname\">Authentication and Authorisation For Research and Collaboration</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730941</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-EINFRA-2016-1</param>\n" +
" <param name=\"acronym\">AARC2</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::2\" label=\"Building data bridges between biological and medical infrastructures in Europe\">\n"
+
" <param name=\"projectfullname\">Building data bridges between biological and medical infrastructures in Europe</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">284209</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">BioMedBridges</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::3\" label=\"Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\">\n"
+
" <param name=\"projectfullname\">Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">283570</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">BioStruct-X</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::4\" label=\"Coordinated Research Infrastructures Building Enduring Life-science services\">\n"
+
" <param name=\"projectfullname\">Coordinated Research Infrastructures Building Enduring Life-science services</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">654248</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRADEV-1-2014-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">CORBEL</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::5\" label=\"Infrastructure for NMR, EM and X-rays for translational research\">\n"
+
" <param name=\"projectfullname\">Infrastructure for NMR, EM and X-rays for translational research</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">653706</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRAIA-2014-2015</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">iNEXT</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::6\" label=\"Integrated Structural Biology Infrastructure\">\n"
+
" <param name=\"projectfullname\">Integrated Structural Biology Infrastructure</param>\n" +
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">211252</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2007-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">INSTRUCT</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::7\" label=\"Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\">\n"
+
" <param name=\"projectfullname\">Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">731005</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRADEV-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">INSTRUCT-ULTRA</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::8\" label=\"Opening Synchrotron Light for Experimental Science and Applications in the Middle East\">\n"
+
" <param name=\"projectfullname\">Opening Synchrotron Light for Experimental Science and Applications in the Middle East</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730943</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRASUPP-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">OPEN SESAME</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::9\" label=\"Infrastructure for Protein Production Platforms\">\n"
+
" <param name=\"projectfullname\">Infrastructure for Protein Production Platforms</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">227764</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2008-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">PCUBE</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::10\" label=\"European Vaccine Research and Development Infrastructure\">\n"
+
" <param name=\"projectfullname\">European Vaccine Research and Development Infrastructure</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730964</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRAIA-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">TRAMSVAC2</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::11\" label=\"World-wide E-infrastructure for structural biology\">\n"
+
" <param name=\"projectfullname\">World-wide E-infrastructure for structural biology</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">675858</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-EINFRA-2015-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">West-Life</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::12\" label=\"RI-VIS\">\n" +
" <param name=\"projectfullname\">Expanding research infrastructure visibility to strengthen strategic partnerships</param>\n"
+
" <param name=\"acronym\">RI-VIS</param>\n" +
" <param name=\"CD_PROJECT_NUMBER\">824063</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"openaireId\">corda__h2020::af93b591b76991d8437993a8f6fc6538</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"instruct::contentproviders\" label=\"Instruct-ERIC Content providers\"/>\n"
+
" <category claim=\"false\" id=\"instruct::zenodocommunities\" label=\"Instruct-ERIC Zenodo Communities\">\n"
+
" <concept claim=\"false\" id=\"instruct::zenodocommunities::1\" label=\"Instruct\">\n" +
" <param name=\"zenodoid\">instruct</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::zenodocommunities::2\" label=\"West-Life Virtual Research Environment for Structural Biology\">\n"
+
" <param name=\"zenodoid\">west-life</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"instruct::organizations\" label=\"Instruct-ERIC Organizations\">\n"
+
" <concept claim=\"false\" id=\"instruct::organizations::1\" label=\"FRISBI\">\n" +
" <param name=\"name\">FRISBI</param>\n" +
" <param name=\"logourl\">aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==</param>\n"
+
" <param name=\"websiteurl\">aHR0cDovL2ZyaXNiaS5ldS8=</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::organizations::2\" label=\"RI-VIS\">\n" +
" <param name=\"name\">RI-VIS</param>\n" +
" <param name=\"logourl\">aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=</param>\n"
+
" <param name=\"websiteurl\">aHR0cHM6Ly9yaS12aXMuZXU=</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::organizations::3\" label=\"CIISB\">\n" +
" <param name=\"name\">CIISB</param>\n" +
" <param name=\"logourl\">aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=</param>\n" +
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuY2lpc2Iub3Jn</param>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"elixir-gr\" label=\"ELIXIR GR\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.</param>\n"
+
" <param name=\"logourl\">https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png</param>\n"
+
" <param name=\"name\">The Greek National Node of the ESFRI European RI ELIXIR</param>\n" +
" <param name=\"manager\">vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"zenodoCommunity\">oaa_elixir-gr</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"elixir-gr::projects\" label=\"ELIXIR GR Projects\"/>\n" +
" <category claim=\"false\" id=\"elixir-gr::contentproviders\" label=\"Elixir-GR Content providers\">\n"
+
" <concept claim=\"false\" id=\"elixir-gr::contentproviders::1\" label=\"bio.tools\">\n" +
" <param name=\"openaireId\">rest________::b8e502674c3c3499d5374e9b2ea6d8d5</param>\n" +
" <param name=\"name\">bio.tools</param>\n" +
" <param name=\"officialname\">bio.tools</param>\n" +
" <param name=\"enabled\">false</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"elixir-gr::zenodocommunities\" label=\"Elixir-GR Zenodo Communities\"/>\n"
+
" <category claim=\"false\" id=\"elixir-gr::organizations\" label=\"Elixir-GR Organizations\">\n" +
" <concept claim=\"false\" id=\"elixir-gr::organizations::1\" label=\"ATHENA RC\">\n" +
" <param name=\"name\">ATHENA RC</param>\n" +
" <param name=\"logourl\">aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=</param>\n"
+
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=</param>\n" +
" </concept>\n" +
" </category><!-- <category claim=\"false\" id=\"elixir-gr::resultorganizations\" label=\"Elixir-GR Results through organizations\"/> -->\n"
+
"</context>");
@Mock
private ISLookUpService isLookUpService;
private QueryInformationSystem queryInformationSystem;
private Map<String, String> map;
@BeforeEach
public void setUp() throws ISLookUpException {
lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap);
lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityContext);
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(isLookUpService);
}
@Test
void testSizeEntity() throws ISLookUpException {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem.getContextInformation(consumer);
Assertions.assertEquals(12, cInfoList.size());
}
@Test
void testSizeRelation() throws ISLookUpException {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem.execContextRelationQuery();
queryInformationSystem
.getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource"));
Assertions.assertEquals(5, cInfoList.size());
}
@Test
void testContentRelation() throws ISLookUpException {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem.execContextRelationQuery();
queryInformationSystem
.getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource"));
cInfoList.forEach(contextInfo -> {
switch (contextInfo.getId()) {
case "elixir-gr":
Assertions.assertEquals(1, contextInfo.getDatasourceList().size());
Assertions
.assertEquals(
"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5",
contextInfo.getDatasourceList().get(0));
break;
case "instruct":
Assertions.assertEquals(0, contextInfo.getDatasourceList().size());
break;
case "ni":
Assertions.assertEquals(6, contextInfo.getDatasourceList().size());
Assertions
.assertTrue(
contextInfo
.getDatasourceList()
.contains("10|rest________::fb1a3d4523c95e63496e3bc7ba36244b"));
break;
case "dh-ch":
Assertions.assertEquals(10, contextInfo.getDatasourceList().size());
break;
case "clarin":
Assertions.assertEquals(0, contextInfo.getDatasourceList().size());
break;
}
});
}
@Test
void testContentEntity() throws ISLookUpException {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem.getContextInformation(consumer);
cInfoList.forEach(context -> {
switch (context.getId()) {
case "clarin":// clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin",
Assertions
.assertEquals("Common Language Resources and Technology Infrastructure", context.getName());
Assertions.assertEquals("CLARIN", context.getDescription());
Assertions
.assertTrue(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals("oac_clarin", context.getZenodocommunity());
Assertions.assertEquals("ri", context.getType());
break;
case "ee":
Assertions.assertEquals("Sustainable Development Solutions Network - Greece", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(17, context.getSubject().size());
Assertions.assertEquals("oac_sdsn-greece", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
break;
case "dh-ch":
Assertions.assertEquals("Digital Humanities and Cultural Heritage", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(67, context.getSubject().size());
Assertions.assertEquals("oac_dh-ch", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
break;
case "fam":
Assertions.assertEquals("Fisheries and Aquaculture Management", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith("Conservation of marine resources for sustainable development"));
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(19, context.getSubject().size());
Assertions.assertEquals("fisheries", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
break;
case "ni":
Assertions.assertEquals("Neuroinformatics", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith("The neuroinformatics dashboard gathers research outputs from the"));
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(18, context.getSubject().size());
Assertions.assertEquals("oac_ni", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
Assertions.assertTrue(context.getSubject().contains("brain"));
break;
case "mes":
Assertions.assertEquals("European Marine Science", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"This community was initially defined to include a very broad range of topics"));
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(5, context.getSubject().size());
Assertions.assertEquals("oac_mes", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
Assertions.assertTrue(context.getSubject().contains("sea"));
Assertions.assertTrue(context.getSubject().contains("fish"));
Assertions.assertTrue(context.getSubject().contains("ocean"));
Assertions.assertTrue(context.getSubject().contains("aqua"));
Assertions.assertTrue(context.getSubject().contains("marine"));
break;
case "instruct":
Assertions.assertEquals("Instruct-ERIC", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.equals(
"Instruct-ERIC is the European Research Infrastructure for Structural Biology"));
Assertions
.assertTrue(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals("oac_instruct", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
break;
case "elixir-gr":
Assertions
.assertEquals("The Greek National Node of the ESFRI European RI ELIXIR", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open"));
Assertions
.assertTrue(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals("oaa_elixir-gr", context.getZenodocommunity());
Assertions.assertEquals("ri", context.getType());
break;
case "aginfra":
Assertions.assertEquals("Agricultural and Food Sciences", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"The scope of this community is to provide access to publications, research data, projects and software"));
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(18, context.getSubject().size());
Assertions.assertEquals("oac_aginfra", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
Assertions.assertTrue(context.getSubject().contains("food distribution"));
break;
case "dariah":
Assertions.assertEquals("DARIAH EU", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support "));
Assertions
.assertTrue(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals("dariah", context.getZenodocommunity());
Assertions.assertEquals("ri", context.getType());
break;
case "epos":
Assertions.assertEquals("European Plate Observing System", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of "));
Assertions
.assertTrue(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals("", context.getZenodocommunity());
Assertions.assertEquals("ri", context.getType());
break;
case "covid-19":
Assertions.assertEquals("Corona Virus Disease", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"This portal provides access to publications, research data, projects and "));
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(25, context.getSubject().size());
Assertions.assertEquals("covid-19", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
Assertions.assertTrue(context.getSubject().contains("coronavirus disease 2019"));
break;
}
});
}
}

View File

@ -5,6 +5,8 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;

File diff suppressed because one or more lines are too long

81
dump/wget-log Normal file
View File

@ -0,0 +1,81 @@
--2023-11-30 16:20:33-- http://10.5555/1071509.1071530
Resolving 10.5555 (10.5555)... 10.0.21.179
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 09:02:48-- (try: 2) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 11:03:58-- (try: 3) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 13:04:08-- (try: 4) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 13:37:35-- (try: 5) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 13:38:55-- (try: 6) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 13:40:16-- (try: 7) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 13:41:38-- (try: 8) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 13:43:01-- (try: 9) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 13:44:25-- (try:10) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 13:45:50-- (try:11) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 14:35:37-- (try:12) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 14:37:02-- (try:13) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 14:38:27-- (try:14) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 14:39:52-- (try:15) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 14:41:17-- (try:16) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 14:42:42-- (try:17) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 14:44:07-- (try:18) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 14:45:32-- (try:19) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Retrying.
--2023-12-11 14:46:57-- (try:20) http://10.5555/1071509.1071530
Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out.
Giving up.

View File

@ -6,6 +6,7 @@
<modules>
<module>dump-schema</module>
<module>dump</module>
<module>api</module>
</modules>
<parent>
@ -102,7 +103,7 @@
<junit-jupiter.version>5.6.1</junit-jupiter.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<dhp.guava.version>11.0.2</dhp.guava.version>
<dhp-schemas.version>[3.17.1]</dhp-schemas.version>
<dhp-schemas.version>[4.17.2]</dhp-schemas.version>
</properties>
</project>