diff --git a/.gitignore b/.gitignore index 73d9179..14314ae 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,8 @@ spark-warehouse /**/*.log /**/.factorypath /**/.scalafmt.conf +/**/job.properties +/job.properties +/*/job.properties +/*/*/job.properties +/*/*/*/job.properties \ No newline at end of file diff --git a/api/pom.xml b/api/pom.xml new file mode 100644 index 0000000..7a3e978 --- /dev/null +++ b/api/pom.xml @@ -0,0 +1,49 @@ + + + 4.0.0 + + + eu.dnetlib.dhp + dhp-graph-dump + 1.2.5-SNAPSHOT + + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + + + 8 + 8 + + + + + + + dom4j + dom4j + + + + jaxen + jaxen + + + + eu.dnetlib.dhp + dhp-common + ${project.version} + + + com.fasterxml.jackson.core + jackson-annotations + compile + + + + + + \ No newline at end of file diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java new file mode 100644 index 0000000..0b7cdaf --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java @@ -0,0 +1,75 @@ + +package eu.dnetlib.dhp.communityapi; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class QueryCommunityAPI { + private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/"; + + private static String get(String geturl) throws IOException { + URL url = new URL(geturl); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("GET"); + + int responseCode = conn.getResponseCode(); + String body = getBody(conn); + conn.disconnect(); + if (responseCode != HttpURLConnection.HTTP_OK) + throw new IOException("Unexpected code " + responseCode + body); + + return body; + } + + public static String communities() throws IOException { + + return get(PRODUCTION_BASE_URL + "community/communities"); + } + + public static String community(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id); + + } + + public static String communityDatasource(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders"); + + } + + public static String communityPropagationOrganization(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations"); + } + + public static String communityProjects(String id, String page, String size) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + } + + private static String getBody(HttpURLConnection conn) throws IOException { + String body = "{}"; + try (BufferedReader br = new BufferedReader( + new InputStreamReader(conn.getInputStream(), "utf-8"))) { + StringBuilder response = new StringBuilder(); + String responseLine = null; + while ((responseLine = br.readLine()) != null) { + response.append(responseLine.trim()); + } + + body = response.toString(); + + } + return body; + } + +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java new file mode 100644 index 0000000..52c65b1 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java @@ -0,0 +1,30 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonAutoDetect +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityContentprovider { + private String openaireId; + + private String enabled; + + public String getEnabled() { + return enabled; + } + + public void setEnabled(String enabled) { + this.enabled = enabled; + } + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(final String openaireId) { + this.openaireId = openaireId; + } + +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java new file mode 100644 index 0000000..91e17e5 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java @@ -0,0 +1,21 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +public class CommunityEntityMap extends HashMap> { + + public CommunityEntityMap() { + super(); + } + + public List get(String key) { + + if (super.get(key) == null) { + return new ArrayList<>(); + } + return super.get(key); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java new file mode 100644 index 0000000..f942eea --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java @@ -0,0 +1,82 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityModel implements Serializable { + private String id; + private String name; + private String description; + + private String status; + + private String type; + + private List subjects; + + private String zenodoCommunity; + + public List getSubjects() { + return subjects; + } + + public void setSubjects(List subjects) { + this.subjects = subjects; + } + + public String getZenodoCommunity() { + return zenodoCommunity; + } + + public void setZenodoCommunity(String zenodoCommunity) { + this.zenodoCommunity = zenodoCommunity; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java new file mode 100644 index 0000000..47d4d1b --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java @@ -0,0 +1,15 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class CommunitySummary extends ArrayList implements Serializable { + public CommunitySummary() { + super(); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java new file mode 100644 index 0000000..9d7245d --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java @@ -0,0 +1,51 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ContentModel implements Serializable { + private List content; + private Integer totalPages; + private Boolean last; + private Integer number; + + public List getContent() { + return content; + } + + public void setContent(List content) { + this.content = content; + } + + public Integer getTotalPages() { + return totalPages; + } + + public void setTotalPages(Integer totalPages) { + this.totalPages = totalPages; + } + + public Boolean getLast() { + return last; + } + + public void setLast(Boolean last) { + this.last = last; + } + + public Integer getNumber() { + return number; + } + + public void setNumber(Integer number) { + this.number = number; + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java new file mode 100644 index 0000000..0cd98a7 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java @@ -0,0 +1,11 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.ArrayList; + +public class DatasourceList extends ArrayList implements Serializable { + public DatasourceList() { + super(); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java new file mode 100644 index 0000000..33ccfd5 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java @@ -0,0 +1,16 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +public class OrganizationList extends ArrayList implements Serializable { + + public OrganizationList() { + super(); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java new file mode 100644 index 0000000..72429ea --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ProjectModel implements Serializable { + + private String openaireId; + + private String funder; + + private String gratId; + + public String getFunder() { + return funder; + } + + public void setFunder(String funder) { + this.funder = funder; + } + + public String getGratId() { + return gratId; + } + + public void setGratId(String gratId) { + this.gratId = gratId; + } + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(String openaireId) { + this.openaireId = openaireId; + } +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessColor.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessColor.java new file mode 100644 index 0000000..5baeb33 --- /dev/null +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/OpenAccessColor.java @@ -0,0 +1,15 @@ + +package eu.dnetlib.dhp.oa.model; + +/** + * @author miriam.baglioni + * @Date 19/12/23 + */ +/** + * The OpenAccess color meant to be used on the result level + */ +public enum OpenAccessColor { + + gold, hybrid, bronze + +} diff --git a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java index e388bf8..bc455dc 100644 --- a/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java +++ b/dump-schema/src/main/java/eu/dnetlib/dhp/oa/model/Result.java @@ -5,6 +5,7 @@ import java.io.Serializable; import java.util.List; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema; /** @@ -74,6 +75,53 @@ public class Result implements Serializable { private List author; // resulttype allows subclassing results into publications | datasets | software + + @JsonProperty("isGreen") + @JsonSchema(description = "True if the result is green Open Access") + private Boolean isGreen; + + @JsonSchema(description = "The Open Access Color of the publication") + private OpenAccessColor openAccessColor; + + @JsonProperty("isInDiamondJournal") + @JsonSchema(description = "True if the result is published in a Diamond Journal") + private Boolean isInDiamondJournal; + + @JsonSchema(description = "True if the result is outcome of a project") + private Boolean publiclyFunded; + + public Boolean getGreen() { + return isGreen; + } + + public void setGreen(Boolean green) { + isGreen = green; + } + + public OpenAccessColor getOpenAccessColor() { + return openAccessColor; + } + + public void setOpenAccessColor(OpenAccessColor openAccessColor) { + this.openAccessColor = openAccessColor; + } + + public Boolean getInDiamondJournal() { + return isInDiamondJournal; + } + + public void setInDiamondJournal(Boolean inDiamondJournal) { + isInDiamondJournal = inDiamondJournal; + } + + public Boolean getPubliclyFunded() { + return publiclyFunded; + } + + public void setPubliclyFunded(Boolean publiclyFunded) { + this.publiclyFunded = publiclyFunded; + } + @JsonSchema( description = "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)") private String type; // resulttype diff --git a/dump-schema/src/main/resources/jsonschemas/community_result_schema.json b/dump-schema/src/main/resources/jsonschemas/community_result_schema.json index a30fe6a..9b86d7b 100644 --- a/dump-schema/src/main/resources/jsonschemas/community_result_schema.json +++ b/dump-schema/src/main/resources/jsonschemas/community_result_schema.json @@ -1,621 +1,684 @@ { - "$schema" : "http://json-schema.org/draft-07/schema#", - "definitions" : { - "CfHbKeyValue" : { - "type" : "object", - "properties" : { - "key" : { - "type" : "string", - "description" : "the OpenAIRE identifier of the data source" + "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "CfHbKeyValue": { + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "Description of key" }, - "value" : { - "type" : "string", - "description" : "the name of the data source" + "value": { + "type": "string", + "description": "Description of value" } } }, - "Provenance" : { - "type" : "object", - "properties" : { - "provenance" : { - "type" : "string" + "Provenance": { + "type": "object", + "properties": { + "provenance": { + "type": "string", + "description": "Description of provenance" }, - "trust" : { - "type" : "string" + "trust": { + "type": "string", + "description": "Description of trust" } } }, - "ResultPid" : { - "type" : "object", - "properties" : { - "scheme" : { - "type" : "string", - "description" : "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories" + "ResultPid": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "Description of scheme" }, - "value" : { - "type" : "string", - "description" : "The value expressed in the scheme (i.e. 10.1000/182)" + "value": { + "type": "string", + "description": "Description of value" } } } }, - "type" : "object", - "properties" : { - "author" : { - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "fullname" : { - "type" : "string" + "type": "object", + "properties": { + "author": { + "description": "Description of author", + "type": "array", + "items": { + "type": "object", + "properties": { + "fullname": { + "type": "string", + "description": "Description of fullname" }, - "name" : { - "type" : "string" + "name": { + "type": "string", + "description": "Description of name" }, - "pid" : { - "type" : "object", - "properties" : { - "id" : { - "type" : "object", - "properties" : { - "scheme" : { - "type" : "string", - "description" : "The author's pid scheme. OpenAIRE currently supports 'ORCID'" + "pid": { + "type": "object", + "properties": { + "id": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "Description of scheme" }, - "value" : { - "type" : "string", - "description" : "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)" + "value": { + "type": "string", + "description": "Description of value" } - } - }, - "provenance" : { - "allOf" : [ { - "$ref" : "#/definitions/Provenance" - }, { - "description" : "The reason why the pid was associated to the author" - } ] - } - }, - "description" : "The author's persistent identifiers" - }, - "rank" : { - "type" : "integer" - }, - "surname" : { - "type" : "string" - } - } - } - }, - "bestaccessright" : { - "type" : "object", - "properties" : { - "code" : { - "type" : "string", - "description" : "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label" : { - "type" : "string", - "description" : "Label for the access mode" - }, - "scheme" : { - "type" : "string", - "description" : "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - }, - "description" : "The openest of the access rights of this result." - }, - "codeRepositoryUrl" : { - "type" : "string", - "description" : "Only for results with type 'software': the URL to the repository with the source code" - }, - "collectedfrom" : { - "description" : "Information about the sources from which the record has been collected", - "type" : "array", - "items" : { - "allOf" : [ { - "$ref" : "#/definitions/CfHbKeyValue" - }, { - "description" : "Information about the sources from which the record has been collected" - } ] - } - }, - "contactgroup" : { - "description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource" - } - }, - "contactperson" : { - "description" : "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource" - } - }, - "container" : { - "type" : "object", - "properties" : { - "conferencedate" : { - "type" : "string" - }, - "conferenceplace" : { - "type" : "string" - }, - "edition" : { - "type" : "string", - "description" : "Edition of the journal or conference proceeding" - }, - "ep" : { - "type" : "string", - "description" : "End page" - }, - "iss" : { - "type" : "string", - "description" : "Journal issue number" - }, - "issnLinking" : { - "type" : "string" - }, - "issnOnline" : { - "type" : "string" - }, - "issnPrinted" : { - "type" : "string" - }, - "name" : { - "type" : "string", - "description" : "Name of the journal or conference" - }, - "sp" : { - "type" : "string", - "description" : "Start page" - }, - "vol" : { - "type" : "string", - "description" : "Volume" - } - }, - "description" : "Container has information about the conference or journal where the result has been presented or published" - }, - "context" : { - "description" : "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "code" : { - "type" : "string", - "description" : "Code identifying the RI/RC" - }, - "label" : { - "type" : "string", - "description" : "Label of the RI/RC" - }, - "provenance" : { - "description" : "Why this result is associated to the RI/RC.", - "type" : "array", - "items" : { - "allOf" : [ { - "$ref" : "#/definitions/Provenance" - }, { - "description" : "Why this result is associated to the RI/RC." - } ] - } - } - }, - "description" : "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu" - } - }, - "contributor" : { - "description" : "Contributors for the result", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Contributors for the result" - } - }, - "country" : { - "description" : "The list of countries associated to this result", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "code" : { - "type" : "string", - "description" : "ISO 3166-1 alpha-2 country code (i.e. IT)" - }, - "label" : { - "type" : "string", - "description" : "The label for that code (i.e. Italy)" - }, - "provenance" : { - "allOf" : [ { - "$ref" : "#/definitions/Provenance" - }, { - "description" : "Why this result is associated to the country." - } ] - } - }, - "description" : "The list of countries associated to this result" - } - }, - "coverage" : { - "type" : "array", - "items" : { - "type" : "string" - } - }, - "dateofcollection" : { - "type" : "string", - "description" : "When OpenAIRE collected the record the last time" - }, - "description" : { - "type" : "array", - "items" : { - "type" : "string" - } - }, - "documentationUrl" : { - "description" : "Only for results with type 'software': URL to the software documentation", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Only for results with type 'software': URL to the software documentation" - } - }, - "embargoenddate" : { - "type" : "string", - "description" : "Date when the embargo ends and this result turns Open Access" - }, - "format" : { - "type" : "array", - "items" : { - "type" : "string" - } - }, - "geolocation" : { - "description" : "Geolocation information", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "box" : { - "type" : "string" - }, - "place" : { - "type" : "string" - }, - "point" : { - "type" : "string" - } - }, - "description" : "Geolocation information" - } - }, - "id" : { - "type" : "string", - "description" : "The OpenAIRE identifiers for this result" - }, - "indicators" : { - "type" : "object", - "properties" : { - "bipIndicators" : { - "description" : "The impact measures (i.e. popularity)", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "clazz" : { - "type" : "string" - }, - "indicator" : { - "type" : "string" - }, - "score" : { - "type" : "string" - } - }, - "description" : "The impact measures (i.e. popularity)" - } - }, - "usageCounts" : { - "type" : "object", - "properties" : { - "downloads" : { - "type" : "string" - }, - "views" : { - "type" : "string" - } - }, - "description" : "The usage counts (i.e. downloads)" - } - }, - "description" : "Indicators computed for this result, for example UsageCount ones" - }, - "instance" : { - "description" : "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "accessright" : { - "type" : "object", - "properties" : { - "code" : { - "type" : "string", - "description" : "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label" : { - "type" : "string", - "description" : "Label for the access mode" - }, - "openAccessRoute" : { - "type" : "string", - "enum" : [ "gold", "green", "hybrid", "bronze" ] - }, - "scheme" : { - "type" : "string", - "description" : "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - }, - "description" : "The accessRights for this materialization of the result" - }, - "alternateIdentifier" : { - "description" : "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "scheme" : { - "type" : "string", - "description" : "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi" }, - "value" : { - "type" : "string", - "description" : "The value expressed in the scheme" + "description": "Description of id" + }, + "provenance": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Description of provenance"} + ] + } + }, + "description": "Description of pid" + }, + "rank": { + "type": "integer", + "description": "Description of rank" + }, + "surname": { + "type": "string", + "description": "Description of surname" + } + }, + "description": "Description of author" + } + }, + "bestaccessright": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Description of code" + }, + "label": { + "type": "string", + "description": "Description of label" + }, + "scheme": { + "type": "string", + "description": "Description of scheme" + } + }, + "description": "Description of bestaccessright" + }, + "codeRepositoryUrl": { + "type": "string", + "description": "Description of codeRepositoryUrl" + }, + "collectedfrom": { + "description": "Description of collectedfrom", + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/definitions/CfHbKeyValue"}, + {"description": "Description of collectedfrom"} + ] + } + }, + "contactgroup": { + "description": "Description of contactgroup", + "type": "array", + "items": { + "type": "string", + "description": "Description of contactgroup" + } + }, + "contactperson": { + "description": "Description of contactperson", + "type": "array", + "items": { + "type": "string", + "description": "Description of contactperson" + } + }, + "container": { + "type": "object", + "properties": { + "conferencedate": { + "type": "string", + "description": "Description of conferencedate" + }, + "conferenceplace": { + "type": "string", + "description": "Description of conferenceplace" + }, + "edition": { + "type": "string", + "description": "Description of edition" + }, + "ep": { + "type": "string", + "description": "Description of ep" + }, + "iss": { + "type": "string", + "description": "Description of iss" + }, + "issnLinking": { + "type": "string", + "description": "Description of issnLinking" + }, + "issnOnline": { + "type": "string", + "description": "Description of issnOnline" + }, + "issnPrinted": { + "type": "string", + "description": "Description of issnPrinted" + }, + "name": { + "type": "string", + "description": "Description of name" + }, + "sp": { + "type": "string", + "description": "Description of sp" + }, + "vol": { + "type": "string", + "description": "Description of vol" + } + }, + "description": "Description of container" + }, + "context": { + "description": "Description of context", + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Description of code" + }, + "label": { + "type": "string", + "description": "Description of label" + }, + "provenance": { + "description": "Description of provenance", + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Description of provenance"} + ] + } + } + }, + "description": "Description of context" + } + }, + "contributor": { + "description": "Description of contributor", + "type": "array", + "items": { + "type": "string", + "description": "Description of contributor" + } + }, + "country": { + "description": "Description of country", + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Description of code" + }, + "label": { + "type": "string", + "description": "Description of label" + }, + "provenance": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Description of provenance"} + ] + } + }, + "description": "Description of country" + } + }, + "coverage": { + "description": "Description of coverage", + "type": "array", + "items": { + "type": "string", + "description": "Description of coverage" + } + }, + "dateofcollection": { + "type": "string", + "description": "Description of dateofcollection" + }, + "description": { + "description": "Description of description", + "type": "array", + "items": { + "type": "string", + "description": "Description of description" + } + }, + "documentationUrl": { + "description": "Description of documentationUrl", + "type": "array", + "items": { + "type": "string", + "description": "Description of documentationUrl" + } + }, + "embargoenddate": { + "type": "string", + "description": "Description of embargoenddate" + }, + "format": { + "description": "Description of format", + "type": "array", + "items": { + "type": "string", + "description": "Description of format" + } + }, + "geolocation": { + "description": "Description of geolocation", + "type": "array", + "items": { + "type": "object", + "properties": { + "box": { + "type": "string", + "description": "Description of box" + }, + "place": { + "type": "string", + "description": "Description of place" + }, + "point": { + "type": "string", + "description": "Description of point" + } + }, + "description": "Description of geolocation" + } + }, + "id": { + "type": "string", + "description": "Description of id" + }, + "indicators": { + "type": "object", + "properties": { + "bipIndicators": { + "description": "Description of bipIndicators", + "type": "array", + "items": { + "type": "object", + "properties": { + "clazz": { + "type": "string", + "description": "Description of clazz" + }, + "indicator": { + "type": "string", + "description": "Description of indicator" + }, + "score": { + "type": "string", + "description": "Description of score" + } + }, + "description": "Description of bipIndicators" + } + }, + "usageCounts": { + "type": "object", + "properties": { + "downloads": { + "type": "string", + "description": "Description of downloads" + }, + "views": { + "type": "string", + "description": "Description of views" + } + }, + "description": "Description of usageCounts" + } + }, + "description": "Description of indicators" + }, + "instance": { + "description": "Description of instance", + "type": "array", + "items": { + "type": "object", + "properties": { + "accessright": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Description of code" + }, + "label": { + "type": "string", + "description": "Description of label" + }, + "openAccessRoute": { + "type": "string", + "enum": [ + "gold", + "green", + "hybrid", + "bronze" + ], + "description": "Description of openAccessRoute" + }, + "scheme": { + "type": "string", + "description": "Description of scheme" + } + }, + "description": "Description of accessright" + }, + "alternateIdentifier": { + "description": "Description of alternateIdentifier", + "type": "array", + "items": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "Description of scheme" + }, + "value": { + "type": "string", + "description": "Description of value" } }, - "description" : "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs" + "description": "Description of alternateIdentifier" } }, - "articleprocessingcharge" : { - "type" : "object", - "properties" : { - "amount" : { - "type" : "string" + "articleprocessingcharge": { + "type": "object", + "properties": { + "amount": { + "type": "string", + "description": "Description of amount" }, - "currency" : { - "type" : "string" + "currency": { + "type": "string", + "description": "Description of currency" } }, - "description" : "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative." + "description": "Description of articleprocessingcharge" }, - "collectedfrom" : { - "allOf" : [ { - "$ref" : "#/definitions/CfHbKeyValue" - }, { - "description" : "Information about the source from which the record has been collected" - } ] + "collectedfrom": { + "allOf": [ + {"$ref": "#/definitions/CfHbKeyValue"}, + {"description": "Description of collectedfrom"} + ] }, - "hostedby" : { - "allOf" : [ { - "$ref" : "#/definitions/CfHbKeyValue" - }, { - "description" : "Information about the source from which the instance can be viewed or downloaded." - } ] + "hostedby": { + "allOf": [ + {"$ref": "#/definitions/CfHbKeyValue"}, + {"description": "Description of hostedby"} + ] }, - "license" : { - "type" : "string" + "license": { + "type": "string", + "description": "Description of license" }, - "pid" : { - "type" : "array", - "items" : { - "$ref" : "#/definitions/ResultPid" + "pid": { + "description": "Description of pid", + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/definitions/ResultPid"}, + {"description": "Description of pid"} + ] } }, - "publicationdate" : { - "type" : "string", - "description" : "Date of the research product" + "publicationdate": { + "type": "string", + "description": "Description of publicationdate" }, - "refereed" : { - "type" : "string", - "description" : "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)" + "refereed": { + "type": "string", + "description": "Description of refereed" }, - "type" : { - "type" : "string", - "description" : "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" + "type": { + "type": "string", + "description": "Description of type" }, - "url" : { - "description" : "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", - "type" : "array", - "items" : { - "type" : "string", - "description" : "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. " + "url": { + "description": "Description of url", + "type": "array", + "items": { + "type": "string", + "description": "Description of url" } } }, - "description" : "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version" + "description": "Description of instance" } }, - "language" : { - "type" : "object", - "properties" : { - "code" : { - "type" : "string", - "description" : "alpha-3/ISO 639-2 code of the language" + "isGreen": { + "type": "boolean", + "description": "Description of isGreen" + }, + "isInDiamondJournal": { + "type": "boolean", + "description": "Description of isInDiamondJournal" + }, + "language": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Description of code" }, - "label" : { - "type" : "string", - "description" : "Language label in English" + "label": { + "type": "string", + "description": "Description of label" } + }, + "description": "Description of language" + }, + "lastupdatetimestamp": { + "type": "integer", + "description": "Description of lastupdatetimestamp" + }, + "maintitle": { + "type": "string", + "description": "Description of maintitle" + }, + "openAccessColor": { + "type": "string", + "enum": [ + "gold", + "hybrid", + "bronze" + ], + "description": "Description of openAccessColor" + }, + "originalId": { + "description": "Description of originalId", + "type": "array", + "items": { + "type": "string", + "description": "Description of originalId" } }, - "lastupdatetimestamp" : { - "type" : "integer", - "description" : "Timestamp of last update of the record in OpenAIRE" - }, - "maintitle" : { - "type" : "string", - "description" : "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software." - }, - "originalId" : { - "description" : "Identifiers of the record at the original sources", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Identifiers of the record at the original sources" + "pid": { + "description": "Description of pid", + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/definitions/ResultPid"}, + {"description": "Description of pid"} + ] } }, - "pid" : { - "description" : "Persistent identifiers of the result", - "type" : "array", - "items" : { - "allOf" : [ { - "$ref" : "#/definitions/ResultPid" - }, { - "description" : "Persistent identifiers of the result" - } ] - } + "programmingLanguage": { + "type": "string", + "description": "Description of programmingLanguage" }, - "programmingLanguage" : { - "type" : "string", - "description" : "Only for results with type 'software': the programming language" - }, - "projects" : { - "description" : "List of projects (i.e. grants) that (co-)funded the production ofn the research results", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "acronym" : { - "type" : "string", - "description" : "The acronym of the project" + "projects": { + "description": "Description of projects", + "type": "array", + "items": { + "type": "object", + "properties": { + "acronym": { + "type": "string", + "description": "Description of acronym" }, - "code" : { - "type" : "string", - "description" : "The grant agreement number" + "code": { + "type": "string", + "description": "Description of code" }, - "funder" : { - "type" : "object", - "properties" : { - "fundingStream" : { - "type" : "string", - "description" : "Stream of funding (e.g. for European Commission can be H2020 or FP7)" + "funder": { + "type": "object", + "properties": { + "fundingStream": { + "type": "string", + "description": "Description of fundingStream" }, - "jurisdiction" : { - "type" : "string", - "description" : "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)" + "jurisdiction": { + "type": "string", + "description": "Description of jurisdiction" }, - "name" : { - "type" : "string", - "description" : "The name of the funder (European Commission)" + "name": { + "type": "string", + "description": "Description of name" }, - "shortName" : { - "type" : "string", - "description" : "The short name of the funder (EC)" + "shortName": { + "type": "string", + "description": "Description of shortName" } }, - "description" : "Information about the funder funding the project" + "description": "Description of funder" }, - "id" : { - "type" : "string", - "description" : "The OpenAIRE id for the project" + "id": { + "type": "string", + "description": "Description of id" }, - "provenance" : { - "$ref" : "#/definitions/Provenance" + "provenance": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Description of provenance"} + ] }, - "title" : { - "type" : "string" + "title": { + "type": "string", + "description": "Description of title" }, - "validated" : { - "type" : "object", - "properties" : { - "validatedByFunder" : { - "type" : "boolean" + "validated": { + "type": "object", + "properties": { + "validatedByFunder": { + "type": "boolean", + "description": "Description of validatedByFunder" }, - "validationDate" : { - "type" : "string" + "validationDate": { + "type": "string", + "description": "Description of validationDate" } - } + }, + "description": "Description of validated" } }, - "description" : "List of projects (i.e. grants) that (co-)funded the production ofn the research results" + "description": "Description of projects" } }, - "publicationdate" : { - "type" : "string", - "description" : "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date." + "publicationdate": { + "type": "string", + "description": "Description of publicationdate" }, - "publisher" : { - "type" : "string", - "description" : "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource." + "publiclyFunded": { + "type": "boolean", + "description": "Description of publiclyFunded" }, - "size" : { - "type" : "string", - "description" : "Only for results with type 'dataset': the declared size of the dataset" + "publisher": { + "type": "string", + "description": "Description of publisher" }, - "source" : { - "description" : "See definition of Dublin Core field dc:source", - "type" : "array", - "items" : { - "type" : "string", - "description" : "See definition of Dublin Core field dc:source" + "size": { + "type": "string", + "description": "Description of size" + }, + "source": { + "description": "Description of source", + "type": "array", + "items": { + "type": "string", + "description": "Description of source" } }, - "subjects" : { - "description" : "Keywords associated to the result", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "provenance" : { - "allOf" : [ { - "$ref" : "#/definitions/Provenance" - }, { - "description" : "Why this subject is associated to the result" - } ] + "subjects": { + "description": "Description of subjects", + "type": "array", + "items": { + "type": "object", + "properties": { + "provenance": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Description of provenance"} + ] }, - "subject" : { - "type" : "object", - "properties" : { - "scheme" : { - "type" : "string", - "description" : "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)." + "subject": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "Description of scheme" }, - "value" : { - "type" : "string", - "description" : "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)." + "value": { + "type": "string", + "description": "Description of value" } - } + }, + "description": "Description of subject" } }, - "description" : "Keywords associated to the result" + "description": "Description of subjects" } }, - "subtitle" : { - "type" : "string", - "description" : "Explanatory or alternative name by which a scientific result is known." + "subtitle": { + "type": "string", + "description": "Description of subtitle" }, - "tool" : { - "description" : "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product" + "tool": { + "description": "Description of tool", + "type": "array", + "items": { + "type": "string", + "description": "Description of tool" } }, - "type" : { - "type" : "string", - "description" : "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" + "type": { + "type": "string", + "description": "Description of type" }, - "version" : { - "type" : "string", - "description" : "Version of the result" + "version": { + "type": "string", + "description": "Description of version" } } -} +} \ No newline at end of file diff --git a/dump-schema/src/main/resources/jsonschemas/result_schema.json b/dump-schema/src/main/resources/jsonschemas/result_schema.json index 7120763..5d6cd62 100644 --- a/dump-schema/src/main/resources/jsonschemas/result_schema.json +++ b/dump-schema/src/main/resources/jsonschemas/result_schema.json @@ -1,493 +1,553 @@ { - "$schema" : "http://json-schema.org/draft-07/schema#", - "definitions" : { - "Provenance" : { - "type" : "object", - "properties" : { - "provenance" : { - "type" : "string" + "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "Provenance": { + "type": "object", + "properties": { + "provenance": { + "type": "string", + "description": "Description of provenance" }, - "trust" : { - "type" : "string" + "trust": { + "type": "string", + "description": "Description of trust" } } }, - "ResultPid" : { - "type" : "object", - "properties" : { - "scheme" : { - "type" : "string", - "description" : "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories" + "ResultPid": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "Description of scheme" }, - "value" : { - "type" : "string", - "description" : "The value expressed in the scheme (i.e. 10.1000/182)" + "value": { + "type": "string", + "description": "Description of value" } } } }, - "type" : "object", - "properties" : { - "author" : { - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "fullname" : { - "type" : "string" + "type": "object", + "properties": { + "author": { + "description": "Description of author", + "type": "array", + "items": { + "type": "object", + "properties": { + "fullname": { + "type": "string", + "description": "Description of fullname" }, - "name" : { - "type" : "string" + "name": { + "type": "string", + "description": "Description of name" }, - "pid" : { - "type" : "object", - "properties" : { - "id" : { - "type" : "object", - "properties" : { - "scheme" : { - "type" : "string", - "description" : "The author's pid scheme. OpenAIRE currently supports 'ORCID'" + "pid": { + "type": "object", + "properties": { + "id": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "Description of scheme" }, - "value" : { - "type" : "string", - "description" : "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)" + "value": { + "type": "string", + "description": "Description of value" } - } - }, - "provenance" : { - "allOf" : [ { - "$ref" : "#/definitions/Provenance" - }, { - "description" : "The reason why the pid was associated to the author" - } ] - } - }, - "description" : "The author's persistent identifiers" - }, - "rank" : { - "type" : "integer" - }, - "surname" : { - "type" : "string" - } - } - } - }, - "bestaccessright" : { - "type" : "object", - "properties" : { - "code" : { - "type" : "string", - "description" : "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label" : { - "type" : "string", - "description" : "Label for the access mode" - }, - "scheme" : { - "type" : "string", - "description" : "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - }, - "description" : "The openest of the access rights of this result." - }, - "codeRepositoryUrl" : { - "type" : "string", - "description" : "Only for results with type 'software': the URL to the repository with the source code" - }, - "contactgroup" : { - "description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource" - } - }, - "contactperson" : { - "description" : "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource" - } - }, - "container" : { - "type" : "object", - "properties" : { - "conferencedate" : { - "type" : "string" - }, - "conferenceplace" : { - "type" : "string" - }, - "edition" : { - "type" : "string", - "description" : "Edition of the journal or conference proceeding" - }, - "ep" : { - "type" : "string", - "description" : "End page" - }, - "iss" : { - "type" : "string", - "description" : "Journal issue number" - }, - "issnLinking" : { - "type" : "string" - }, - "issnOnline" : { - "type" : "string" - }, - "issnPrinted" : { - "type" : "string" - }, - "name" : { - "type" : "string", - "description" : "Name of the journal or conference" - }, - "sp" : { - "type" : "string", - "description" : "Start page" - }, - "vol" : { - "type" : "string", - "description" : "Volume" - } - }, - "description" : "Container has information about the conference or journal where the result has been presented or published" - }, - "contributor" : { - "description" : "Contributors for the result", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Contributors for the result" - } - }, - "country" : { - "description" : "The list of countries associated to this result", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "code" : { - "type" : "string", - "description" : "ISO 3166-1 alpha-2 country code (i.e. IT)" - }, - "label" : { - "type" : "string", - "description" : "The label for that code (i.e. Italy)" - }, - "provenance" : { - "allOf" : [ { - "$ref" : "#/definitions/Provenance" - }, { - "description" : "Why this result is associated to the country." - } ] - } - }, - "description" : "The list of countries associated to this result" - } - }, - "coverage" : { - "type" : "array", - "items" : { - "type" : "string" - } - }, - "dateofcollection" : { - "type" : "string", - "description" : "When OpenAIRE collected the record the last time" - }, - "description" : { - "type" : "array", - "items" : { - "type" : "string" - } - }, - "documentationUrl" : { - "description" : "Only for results with type 'software': URL to the software documentation", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Only for results with type 'software': URL to the software documentation" - } - }, - "embargoenddate" : { - "type" : "string", - "description" : "Date when the embargo ends and this result turns Open Access" - }, - "format" : { - "type" : "array", - "items" : { - "type" : "string" - } - }, - "geolocation" : { - "description" : "Geolocation information", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "box" : { - "type" : "string" - }, - "place" : { - "type" : "string" - }, - "point" : { - "type" : "string" - } - }, - "description" : "Geolocation information" - } - }, - "id" : { - "type" : "string", - "description" : "The OpenAIRE identifiers for this result" - }, - "indicators" : { - "type" : "object", - "properties" : { - "bipIndicators" : { - "description" : "The impact measures (i.e. popularity)", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "class" : { - "type" : "string" - }, - "indicator" : { - "type" : "string" - }, - "score" : { - "type" : "string" - } - }, - "description" : "The impact measures (i.e. popularity)" - } - }, - "usageCounts" : { - "type" : "object", - "properties" : { - "downloads" : { - "type" : "string" - }, - "views" : { - "type" : "string" - } - }, - "description" : "The usage counts (i.e. downloads)" - } - }, - "description" : "Indicators computed for this result, for example UsageCount ones" - }, - "instance" : { - "description" : "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "accessright" : { - "type" : "object", - "properties" : { - "code" : { - "type" : "string", - "description" : "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/" - }, - "label" : { - "type" : "string", - "description" : "Label for the access mode" - }, - "openAccessRoute" : { - "type" : "string", - "enum" : [ "gold", "green", "hybrid", "bronze" ] - }, - "scheme" : { - "type" : "string", - "description" : "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/" - } - }, - "description" : "The accessRights for this materialization of the result" - }, - "alternateIdentifier" : { - "description" : "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "scheme" : { - "type" : "string", - "description" : "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi" }, - "value" : { - "type" : "string", - "description" : "The value expressed in the scheme" - } + "description": "Description of id" }, - "description" : "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs" - } - }, - "articleprocessingcharge" : { - "type" : "object", - "properties" : { - "amount" : { - "type" : "string" - }, - "currency" : { - "type" : "string" + "provenance": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Description of provenance"} + ] } }, - "description" : "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative." + "description": "Description of pid" }, - "license" : { - "type" : "string" + "rank": { + "type": "integer", + "description": "Description of rank" }, - "pid" : { - "type" : "array", - "items" : { - "$ref" : "#/definitions/ResultPid" - } - }, - "publicationdate" : { - "type" : "string", - "description" : "Date of the research product" - }, - "refereed" : { - "type" : "string", - "description" : "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)" - }, - "type" : { - "type" : "string", - "description" : "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)" - }, - "url" : { - "description" : "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ", - "type" : "array", - "items" : { - "type" : "string", - "description" : "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. " - } + "surname": { + "type": "string", + "description": "Description of surname" } }, - "description" : "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version" + "description": "Description of author" } }, - "language" : { - "type" : "object", - "properties" : { - "code" : { - "type" : "string", - "description" : "alpha-3/ISO 639-2 code of the language" + "bestaccessright": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Description of code" }, - "label" : { - "type" : "string", - "description" : "Language label in English" + "label": { + "type": "string", + "description": "Description of label" + }, + "scheme": { + "type": "string", + "description": "Description of scheme" } + }, + "description": "Description of bestaccessright" + }, + "codeRepositoryUrl": { + "type": "string", + "description": "Description of codeRepositoryUrl" + }, + "contactgroup": { + "description": "Description of contactgroup", + "type": "array", + "items": { + "type": "string", + "description": "Description of contactgroup" } }, - "lastupdatetimestamp" : { - "type" : "integer", - "description" : "Timestamp of last update of the record in OpenAIRE" - }, - "maintitle" : { - "type" : "string", - "description" : "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software." - }, - "originalId" : { - "description" : "Identifiers of the record at the original sources", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Identifiers of the record at the original sources" + "contactperson": { + "description": "Description of contactperson", + "type": "array", + "items": { + "type": "string", + "description": "Description of contactperson" } }, - "pid" : { - "description" : "Persistent identifiers of the result", - "type" : "array", - "items" : { - "allOf" : [ { - "$ref" : "#/definitions/ResultPid" - }, { - "description" : "Persistent identifiers of the result" - } ] + "container": { + "type": "object", + "properties": { + "conferencedate": { + "type": "string", + "description": "Description of conferencedate" + }, + "conferenceplace": { + "type": "string", + "description": "Description of conferenceplace" + }, + "edition": { + "type": "string", + "description": "Description of edition" + }, + "ep": { + "type": "string", + "description": "Description of ep" + }, + "iss": { + "type": "string", + "description": "Description of iss" + }, + "issnLinking": { + "type": "string", + "description": "Description of issnLinking" + }, + "issnOnline": { + "type": "string", + "description": "Description of issnOnline" + }, + "issnPrinted": { + "type": "string", + "description": "Description of issnPrinted" + }, + "name": { + "type": "string", + "description": "Description of name" + }, + "sp": { + "type": "string", + "description": "Description of sp" + }, + "vol": { + "type": "string", + "description": "Description of vol" + } + }, + "description": "Description of container" + }, + "contributor": { + "description": "Description of contributor", + "type": "array", + "items": { + "type": "string", + "description": "Description of contributor" } }, - "programmingLanguage" : { - "type" : "string", - "description" : "Only for results with type 'software': the programming language" - }, - "publicationdate" : { - "type" : "string", - "description" : "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it’s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date." - }, - "publisher" : { - "type" : "string", - "description" : "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource." - }, - "size" : { - "type" : "string", - "description" : "Only for results with type 'dataset': the declared size of the dataset" - }, - "source" : { - "description" : "See definition of Dublin Core field dc:source", - "type" : "array", - "items" : { - "type" : "string", - "description" : "See definition of Dublin Core field dc:source" - } - }, - "subjects" : { - "description" : "Keywords associated to the result", - "type" : "array", - "items" : { - "type" : "object", - "properties" : { - "provenance" : { - "allOf" : [ { - "$ref" : "#/definitions/Provenance" - }, { - "description" : "Why this subject is associated to the result" - } ] + "country": { + "description": "Description of country", + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Description of code" }, - "subject" : { - "type" : "object", - "properties" : { - "scheme" : { - "type" : "string", - "description" : "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)." + "label": { + "type": "string", + "description": "Description of label" + }, + "provenance": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Description of provenance"} + ] + } + }, + "description": "Description of country" + } + }, + "coverage": { + "description": "Description of coverage", + "type": "array", + "items": { + "type": "string", + "description": "Description of coverage" + } + }, + "dateofcollection": { + "type": "string", + "description": "Description of dateofcollection" + }, + "description": { + "description": "Description of description", + "type": "array", + "items": { + "type": "string", + "description": "Description of description" + } + }, + "documentationUrl": { + "description": "Description of documentationUrl", + "type": "array", + "items": { + "type": "string", + "description": "Description of documentationUrl" + } + }, + "embargoenddate": { + "type": "string", + "description": "Description of embargoenddate" + }, + "format": { + "description": "Description of format", + "type": "array", + "items": { + "type": "string", + "description": "Description of format" + } + }, + "geolocation": { + "description": "Description of geolocation", + "type": "array", + "items": { + "type": "object", + "properties": { + "box": { + "type": "string", + "description": "Description of box" + }, + "place": { + "type": "string", + "description": "Description of place" + }, + "point": { + "type": "string", + "description": "Description of point" + } + }, + "description": "Description of geolocation" + } + }, + "id": { + "type": "string", + "description": "Description of id" + }, + "indicators": { + "type": "object", + "properties": { + "bipIndicators": { + "description": "Description of bipIndicators", + "type": "array", + "items": { + "type": "object", + "properties": { + "clazz": { + "type": "string", + "description": "Description of clazz" }, - "value" : { - "type" : "string", - "description" : "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)." + "indicator": { + "type": "string", + "description": "Description of indicator" + }, + "score": { + "type": "string", + "description": "Description of score" } + }, + "description": "Description of bipIndicators" + } + }, + "usageCounts": { + "type": "object", + "properties": { + "downloads": { + "type": "string", + "description": "Description of downloads" + }, + "views": { + "type": "string", + "description": "Description of views" + } + }, + "description": "Description of usageCounts" + } + }, + "description": "Description of indicators" + }, + "instance": { + "description": "Description of instance", + "type": "array", + "items": { + "type": "object", + "properties": { + "accessright": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Description of code" + }, + "label": { + "type": "string", + "description": "Description of label" + }, + "openAccessRoute": { + "type": "string", + "enum": [ + "gold", + "green", + "hybrid", + "bronze" + ], + "description": "Description of openAccessRoute" + }, + "scheme": { + "type": "string", + "description": "Description of scheme" + } + }, + "description": "Description of accessright" + }, + "alternateIdentifier": { + "description": "Description of alternateIdentifier", + "type": "array", + "items": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "Description of scheme" + }, + "value": { + "type": "string", + "description": "Description of value" + } + }, + "description": "Description of alternateIdentifier" + } + }, + "articleprocessingcharge": { + "type": "object", + "properties": { + "amount": { + "type": "string", + "description": "Description of amount" + }, + "currency": { + "type": "string", + "description": "Description of currency" + } + }, + "description": "Description of articleprocessingcharge" + }, + "license": { + "type": "string", + "description": "Description of license" + }, + "pid": { + "description": "Description of pid", + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/definitions/ResultPid"}, + {"description": "Description of pid"} + ] + } + }, + "publicationdate": { + "type": "string", + "description": "Description of publicationdate" + }, + "refereed": { + "type": "string", + "description": "Description of refereed" + }, + "type": { + "type": "string", + "description": "Description of type" + }, + "url": { + "description": "Description of url", + "type": "array", + "items": { + "type": "string", + "description": "Description of url" } } }, - "description" : "Keywords associated to the result" + "description": "Description of instance" } }, - "subtitle" : { - "type" : "string", - "description" : "Explanatory or alternative name by which a scientific result is known." + "isGreen": { + "type": "boolean", + "description": "Description of isGreen" }, - "tool" : { - "description" : "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product", - "type" : "array", - "items" : { - "type" : "string", - "description" : "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product" + "isInDiamondJournal": { + "type": "boolean", + "description": "Description of isInDiamondJournal" + }, + "language": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Description of code" + }, + "label": { + "type": "string", + "description": "Description of label" + } + }, + "description": "Description of language" + }, + "lastupdatetimestamp": { + "type": "integer", + "description": "Description of lastupdatetimestamp" + }, + "maintitle": { + "type": "string", + "description": "Description of maintitle" + }, + "openAccessColor": { + "type": "string", + "enum": [ + "gold", + "hybrid", + "bronze" + ], + "description": "Description of openAccessColor" + }, + "originalId": { + "description": "Description of originalId", + "type": "array", + "items": { + "type": "string", + "description": "Description of originalId" } }, - "type" : { - "type" : "string", - "description" : "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)" + "pid": { + "description": "Description of pid", + "type": "array", + "items": { + "allOf": [ + {"$ref": "#/definitions/ResultPid"}, + {"description": "Description of pid"} + ] + } }, - "version" : { - "type" : "string", - "description" : "Version of the result" + "programmingLanguage": { + "type": "string", + "description": "Description of programmingLanguage" + }, + "publicationdate": { + "type": "string", + "description": "Description of publicationdate" + }, + "publiclyFunded": { + "type": "boolean", + "description": "Description of publiclyFunded" + }, + "publisher": { + "type": "string", + "description": "Description of publisher" + }, + "size": { + "type": "string", + "description": "Description of size" + }, + "source": { + "description": "Description of source", + "type": "array", + "items": { + "type": "string", + "description": "Description of source" + } + }, + "subjects": { + "description": "Description of subjects", + "type": "array", + "items": { + "type": "object", + "properties": { + "provenance": { + "allOf": [ + {"$ref": "#/definitions/Provenance"}, + {"description": "Description of provenance"} + ] + }, + "subject": { + "type": "object", + "properties": { + "scheme": { + "type": "string", + "description": "Description of scheme" + }, + "value": { + "type": "string", + "description": "Description of value" + } + }, + "description": "Description of subject" + } + }, + "description": "Description of subjects" + } + }, + "subtitle": { + "type": "string", + "description": "Description of subtitle" + }, + "tool": { + "description": "Description of tool", + "type": "array", + "items": { + "type": "string", + "description": "Description of tool" + } + }, + "type": { + "type": "string", + "description": "Description of type" + }, + "version": { + "type": "string", + "description": "Description of version" } } -} +} \ No newline at end of file diff --git a/dump-schema/src/test/java/GenerateJsonSchema.java b/dump-schema/src/test/java/GenerateJsonSchema.java index 35d20ff..c7916e2 100644 --- a/dump-schema/src/test/java/GenerateJsonSchema.java +++ b/dump-schema/src/test/java/GenerateJsonSchema.java @@ -9,6 +9,7 @@ import com.github.imifou.jsonschema.module.addon.AddonModule; import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.ExecCreateSchemas; +import eu.dnetlib.dhp.oa.model.Result; import eu.dnetlib.dhp.oa.model.community.CommunityResult; import eu.dnetlib.dhp.oa.model.graph.*; @@ -24,7 +25,7 @@ class GenerateJsonSchema { configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName()); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(GraphResult.class); + JsonNode jsonSchema = generator.generateSchema(CommunityResult.class); System.out.println(jsonSchema.toString()); } @@ -41,7 +42,7 @@ class GenerateJsonSchema { .without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS); SchemaGeneratorConfig config = configBuilder.build(); SchemaGenerator generator = new SchemaGenerator(config); - JsonNode jsonSchema = generator.generateSchema(Project.class); + JsonNode jsonSchema = generator.generateSchema(Result.class); System.out.println(jsonSchema.toString()); } diff --git a/dump/miriam.baglioni@hadoop-edge3.garr-pa1.d4science.org b/dump/miriam.baglioni@hadoop-edge3.garr-pa1.d4science.org deleted file mode 100644 index 41a91b3..0000000 --- a/dump/miriam.baglioni@hadoop-edge3.garr-pa1.d4science.org +++ /dev/null @@ -1,5 +0,0 @@ -id name acronym description -04a00617ca659adc944977ac700ea14b Digital Humanities and Cultural Heritage dh-ch This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields. -3ee95893613de7450247d7fef747136f DARIAH EU dariah The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. -5fde864866ea5ded4cc873b3170b63c3 Transport Research beopen Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research. The TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323) -aa0e56dd2e9d2a0be749f5debdd2b3d8 Energy Research enermaps

EnerMaps Open Data Management Tool aims to  improve data management  and  accessibility  in the field of  energy research  for the  renewable energy industry.

EnerMaps’ tool accelerates and facilitates the energy transition offering a qualitative and user-friendly digital platform to the energy professionals.

The project is based on the  FAIR data principle  which requires data to be  Findable,  Accessible,  Interoperable and  Reusable.

EnerMaps project  coordinates and enriches existing energy databases to promote  trans-disciplinary research  and to develop partnerships between researchers and the energy professionals.

The EnerMaps project has received funding from the European Union’s Horizon 2020 research and innovation program under   grant agreement N°884161

 

Website:  https://enermaps.eu/ 

diff --git a/dump/pom.xml b/dump/pom.xml index 1600163..01d29b6 100644 --- a/dump/pom.xml +++ b/dump/pom.xml @@ -53,6 +53,11 @@ dump-schema 1.2.5-SNAPSHOT + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + org.apache.httpcomponents httpclient @@ -62,6 +67,12 @@ classgraph 4.8.71 + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + compile + diff --git a/dump/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dump/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java index 43e61e5..b72fbe5 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java +++ b/dump/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java @@ -10,6 +10,7 @@ import java.util.Optional; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.slf4j.Logger; @@ -20,6 +21,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class MakeTarArchive implements Serializable { private static final Logger log = LoggerFactory.getLogger(MakeTarArchive.class); + private static int index = 1; + private static String prevname = new String(); public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -154,13 +157,21 @@ public class MakeTarArchive implements Serializable { String pString = p.toString(); if (!pString.endsWith("_SUCCESS")) { String name = pString.substring(pString.lastIndexOf("/") + 1); - if (name.startsWith("part-") & name.length() > 10) { - String tmp = name.substring(0, 10); - if (name.contains(".")) { - tmp += name.substring(name.indexOf(".")); - } - name = tmp; - } +// if (name.startsWith("part-") & name.length() > 10) { +// String tmp = name.substring(0, 10); +// if (prevname.equalsIgnoreCase(tmp)) { +// tmp = tmp + "_" + index; +// index += 1; +// } else { +// prevname = tmp; +// index = 1; +// } +// if (name.contains(".")) { +// tmp += name.substring(name.indexOf(".")); +// } +// name = tmp; +// +// } if (rename) { if (name.endsWith(".txt.gz")) name = name.replace(".txt.gz", ".json.gz"); diff --git a/dump/src/main/java/eu/dnetlib/dhp/common/zenodoapi/ZenodoAPIClient.java b/dump/src/main/java/eu/dnetlib/dhp/common/zenodoapi/ZenodoAPIClient.java index 192ba8c..6a206b3 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/common/zenodoapi/ZenodoAPIClient.java +++ b/dump/src/main/java/eu/dnetlib/dhp/common/zenodoapi/ZenodoAPIClient.java @@ -8,8 +8,9 @@ import java.util.concurrent.TimeUnit; import org.apache.http.HttpHeaders; import org.apache.http.entity.ContentType; -// import org.apache.http.impl.client.HttpClients; import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.gson.Gson; @@ -18,7 +19,7 @@ import eu.dnetlib.dhp.common.zenodoapi.model.ZenodoModelList; import okhttp3.*; public class ZenodoAPIClient implements Serializable { - + private static final Logger log = LoggerFactory.getLogger(ZenodoAPIClient.class); String urlString; String bucket; @@ -29,6 +30,8 @@ public class ZenodoAPIClient implements Serializable { private static final MediaType MEDIA_TYPE_ZIP = MediaType.parse("application/zip"); + private static final MediaType MEDIA_TYPE_TAR = MediaType.parse("application/octet-stream"); + public String getUrlString() { return urlString; } @@ -61,40 +64,40 @@ public class ZenodoAPIClient implements Serializable { * @return response code * @throws IOException */ -// public int newDeposition() throws IOException { -// String json = "{}"; -// -// URL url = new URL(urlString); -// HttpURLConnection conn = (HttpURLConnection) url.openConnection(); -// conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); -// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); -// conn.setRequestMethod("POST"); -// conn.setDoOutput(true); -// try (OutputStream os = conn.getOutputStream()) { -// byte[] input = json.getBytes("utf-8"); -// os.write(input, 0, input.length); -// } -// -// String body = getBody(conn); -// -// int responseCode = conn.getResponseCode(); -// conn.disconnect(); -// -// if (!checkOKStatus(responseCode)) -// throw new IOException("Unexpected code " + responseCode + body); -// -// ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); -// this.bucket = newSubmission.getLinks().getBucket(); -// this.deposition_id = newSubmission.getId(); -// -// return responseCode; -// } + public int newDeposition2() throws IOException { + String json = "{}"; + + URL url = new URL(urlString); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setRequestMethod("POST"); + conn.setDoOutput(true); + try (OutputStream os = conn.getOutputStream()) { + byte[] input = json.getBytes("utf-8"); + os.write(input, 0, input.length); + } + + String body = getBody(conn); + + int responseCode = conn.getResponseCode(); + conn.disconnect(); + + if (!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); + + ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); + this.bucket = newSubmission.getLinks().getBucket(); + this.deposition_id = newSubmission.getId(); + + return responseCode; + } public int newDeposition() throws IOException { String json = "{}"; OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); - RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON); + RequestBody body = RequestBody.create(json.getBytes()); Request request = new Request.Builder() .url(urlString) @@ -122,50 +125,6 @@ public class ZenodoAPIClient implements Serializable { } -// public int uploadIS2(InputStream is, String fileName) throws IOException { -// -// final String crlf = "\r\n"; -// final String twoHyphens = "--"; -// final String boundary = "*****"; -// -// HttpPut put = new HttpPut(bucket + "/" + fileName); -// -// put.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip"); -// put.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); -// -// put.setEntity(new InputStreamEntity(is)); -// -// int statusCode; -// try (CloseableHttpClient client = HttpClients.createDefault()) { -// CloseableHttpResponse response = client.execute(put); -// statusCode = response.getStatusLine().getStatusCode(); -// -// } -// -// if (!checkOKStatus(statusCode)) { -// throw new IOException("Unexpected code " + statusCode); -// } -// -// return statusCode; -// } - -// public int publish() throws IOException { -// String json = "{}"; -// HttpPost post = new HttpPost(urlString + "/" + deposition_id + "/actions/publish"); -// post.addHeader(HttpHeaders.CONTENT_TYPE, "application/json"); -// post.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); -// post.setEntity(new StringEntity(json)); -// int statusCode; -// try (CloseableHttpClient client = HttpClients.createDefault()) { -// CloseableHttpResponse response = client.execute(post); -// statusCode = response.getStatusLine().getStatusCode(); -// } -// if (!checkOKStatus(statusCode)) { -// throw new IOException("Unexpected code " + statusCode); -// } -// return statusCode; -// } - /** * Upload files in Zenodo. * @@ -217,22 +176,33 @@ public class ZenodoAPIClient implements Serializable { } public int uploadIS3(InputStream is, String file_name, long len) throws IOException { +// String urlString = "http://checkip.amazonaws.com/"; +// URL url = new URL(urlString); +// try (BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()))) { +// log.info(br.readLine()); +// } OkHttpClient httpClient = new OkHttpClient.Builder() .writeTimeout(600, TimeUnit.SECONDS) .readTimeout(600, TimeUnit.SECONDS) .connectTimeout(600, TimeUnit.SECONDS) .build(); + System.out.println(bucket + "/" + file_name); + Request request = new Request.Builder() .url(bucket + "/" + file_name) - .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers + .addHeader(HttpHeaders.CONTENT_TYPE, "application/json") // add request headers .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) - .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) + .put(InputStreamRequestBody.create(MEDIA_TYPE_TAR, is, len)) .build(); + log.info("URL: " + request.url().toString()); + // log.info("Headers: " + request.headers().toString()); + try (Response response = httpClient.newCall(request).execute()) { if (!response.isSuccessful()) - throw new IOException("Unexpected code " + response + response.body().string()); + log.info("Unexpected code " + response + response.body().string()); + System.out.println("Unexpected code " + response + response.body().string()); return response.code(); } } @@ -287,39 +257,40 @@ public class ZenodoAPIClient implements Serializable { * @throws IOException * @throws MissingConceptDoiException */ -// public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException { -// setDepositionId(concept_rec_id, 1); -// String json = "{}"; -// -// URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); -// HttpURLConnection conn = (HttpURLConnection) url.openConnection(); -// -// conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); -// conn.setDoOutput(true); -// conn.setRequestMethod("POST"); -// -// try (OutputStream os = conn.getOutputStream()) { -// byte[] input = json.getBytes("utf-8"); -// os.write(input, 0, input.length); -// -// } -// -// String body = getBody(conn); -// -// int responseCode = conn.getResponseCode(); -// -// conn.disconnect(); -// if (!checkOKStatus(responseCode)) -// throw new IOException("Unexpected code " + responseCode + body); -// -// ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); -// String latest_draft = zenodoModel.getLinks().getLatest_draft(); -// deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); -// bucket = getBucket(latest_draft); -// -// return responseCode; -// -// } + public int newVersion2(String concept_rec_id) throws Exception, MissingConceptDoiException { + setDepositionId(concept_rec_id, 1); + String json = "{}"; + + URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + + conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); + conn.setDoOutput(true); + conn.setRequestMethod("POST"); + + try (OutputStream os = conn.getOutputStream()) { + byte[] input = json.getBytes("utf-8"); + os.write(input, 0, input.length); + + } + + String body = getBody(conn); + + int responseCode = conn.getResponseCode(); + + conn.disconnect(); + if (!checkOKStatus(responseCode)) + throw new IOException("Unexpected code " + responseCode + body); + + ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); + String latest_draft = zenodoModel.getLinks().getLatest_draft(); + deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1); + bucket = getBucket(latest_draft); + + return responseCode; + + } + public int newVersion(String concept_rec_id) throws Exception, MissingConceptDoiException { setDepositionId(concept_rec_id, 1); String json = "{}"; @@ -336,8 +307,10 @@ public class ZenodoAPIClient implements Serializable { try (Response response = httpClient.newCall(request).execute()) { - if (!response.isSuccessful()) + if (!response.isSuccessful()) { + // log.info("response headers " + response.headers().toString()); throw new IOException("Unexpected code " + response + response.body().string()); + } ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class); String latest_draft = zenodoModel.getLinks().getLatest_draft(); @@ -360,7 +333,11 @@ public class ZenodoAPIClient implements Serializable { * @throws MissingConceptDoiException */ public int uploadOpenDeposition(String deposition_id) throws IOException, MissingConceptDoiException { - +// String urlamazon = "http://checkip.amazonaws.com/"; +// URL url = new URL(urlamazon); +// try (BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()))) { +// log.info(br.readLine()); +// } this.deposition_id = deposition_id; OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); @@ -370,11 +347,15 @@ public class ZenodoAPIClient implements Serializable { .addHeader("Authorization", "Bearer " + access_token) .build(); + log.info("URL: " + request.url().toString()); + // log.info("Headers: " + request.headers().toString()); + try (Response response = httpClient.newCall(request).execute()) { - if (!response.isSuccessful()) + if (!response.isSuccessful()) { + log.info("Unexpected code " + response + response.body().string()); throw new IOException("Unexpected code " + response + response.body().string()); - + } ZenodoModel zenodoModel = new Gson() .fromJson(response.body().string(), ZenodoModel.class); bucket = zenodoModel.getLinks().getBucket(); @@ -385,6 +366,11 @@ public class ZenodoAPIClient implements Serializable { } private void setDepositionId(String concept_rec_id, Integer page) throws Exception, MissingConceptDoiException { +// String urlString = "http://checkip.amazonaws.com/"; +// URL url = new URL(urlString); +// try (BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()))) { +// log.info(br.readLine()); +// } ZenodoModelList zenodoModelList = new Gson() .fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class); @@ -402,27 +388,13 @@ public class ZenodoAPIClient implements Serializable { } -// private String getPrevDepositions(String page) throws Exception { -// -// HttpGet get = new HttpGet(urlString); -// URI uri = new URIBuilder(get.getURI()).addParameter("page", page).build(); -// -// get.setURI(uri); -// -// get.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()); -// get.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token); -// try (CloseableHttpClient client = HttpClients.createDefault()) { -// CloseableHttpResponse response = client.execute(get); -// final String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); -// return body; -// } -// } - private String getPrevDepositions(String page) throws IOException { OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build(); - HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder(); + HttpUrl.Builder urlBuilder = HttpUrl + .parse(urlString)// + "?access_token=" + access_token + "&page=" + page) + .newBuilder(); urlBuilder.addQueryParameter("page", page); String url = urlBuilder.build().toString(); @@ -433,10 +405,15 @@ public class ZenodoAPIClient implements Serializable { .get() .build(); + log.info("URL: " + request.url().toString()); + log.info("Headers: " + request.headers().toString()); + try (Response response = httpClient.newCall(request).execute()) { - if (!response.isSuccessful()) + if (!response.isSuccessful()) { + log.info("response headers: " + response.headers()); throw new IOException("Unexpected code " + response + response.body().string()); + } return response.body().string(); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java deleted file mode 100644 index 8ca73ea..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ /dev/null @@ -1,110 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump; - -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; - -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.Node; -import org.dom4j.io.SAXReader; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; - -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; -import eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities; -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -public class QueryInformationSystem { - - private ISLookUpService isLookUp; - - private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class); - - private static final String XQUERY_ALL = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and ($x//context/param[./@name = 'status']/text() = 'all') " - + - " return " + - " " + - "{$x//CONFIGURATION/context/@id}" + - "{$x//CONFIGURATION/context/@label}" + - ""; - - private static final String XQUERY_CI = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and $x//CONFIGURATION/context[./@id=%s] " - + - " return " + - " " + - "{$x//CONFIGURATION/context/@id}" + - "{$x//CONFIGURATION/context/@label}" + - ""; - - public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) - throws ISLookUpException, DocumentException, SAXException { - if (singleCommunity) - return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'"))); - return getMap(isLookUp.quickSearchProfile(XQUERY_ALL)); - - } - - public ISLookUpService getIsLookUp() { - return isLookUp; - } - - public void setIsLookUp(ISLookUpService isLookUpService) { - this.isLookUp = isLookUpService; - } - - private CommunityMap getMap(List communityMap) throws DocumentException, SAXException { - final CommunityMap map = new CommunityMap(); - - for (String xml : communityMap) { - final Document doc; - final SAXReader reader = new SAXReader(); - reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - doc = reader.read(new StringReader(xml)); - Element root = doc.getRootElement(); - map.put(root.attribute("id").getValue(), root.attribute("label").getValue()); - } - - return map; - } - - public List getCommunityCsv(String toString) throws ISLookUpException, SAXException, DocumentException { - List communities = new ArrayList<>(); - - for (String xml : isLookUp.quickSearchProfile(toString)) { - log.info(xml); - final Document doc; - final SAXReader reader = new SAXReader(); - reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - doc = reader.read(new StringReader(xml)); - Element root = doc.getRootElement(); - StringBuilder builder = new StringBuilder(); - builder.append(DHPUtils.md5(root.attribute("id").getValue())); - builder.append(Constants.SEP); - builder.append(root.attribute("label").getValue()); - builder.append(Constants.SEP); - builder.append(root.attribute("id").getValue()); - builder.append(Constants.SEP); - builder - .append( - ((Node) (root.selectNodes("//description").get(0))) - .getText() - .replace("\n", " ") - .replace("\t", " ")); - communities.add(builder.toString()); - } - return communities; - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index 75f01a6..3c45e7f 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -2,13 +2,14 @@ package eu.dnetlib.dhp.oa.graph.dump; import static eu.dnetlib.dhp.oa.graph.dump.Constants.*; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId; import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; -import org.jetbrains.annotations.NotNull; import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException; import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; @@ -16,8 +17,8 @@ import eu.dnetlib.dhp.oa.model.*; import eu.dnetlib.dhp.oa.model.AccessRight; import eu.dnetlib.dhp.oa.model.Author; import eu.dnetlib.dhp.oa.model.GeoLocation; -import eu.dnetlib.dhp.oa.model.Indicator; import eu.dnetlib.dhp.oa.model.Instance; +import eu.dnetlib.dhp.oa.model.OpenAccessColor; import eu.dnetlib.dhp.oa.model.OpenAccessRoute; import eu.dnetlib.dhp.oa.model.Result; import eu.dnetlib.dhp.oa.model.Subject; @@ -30,6 +31,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; public class ResultMapper implements Serializable { + private static final String NULL = "null"; public static Result map( E in, Map communityMap, String dumpType) @@ -54,10 +56,14 @@ public class ResultMapper implements Serializable { mapCountry(out, input); mapCoverage(out, input); out.setDateofcollection(input.getDateofcollection()); + out.setGreen(input.getIsGreen()); + out.setInDiamondJournal(input.getIsInDiamondJournal()); + out.setPubliclyFunded(input.getPubliclyFunded()); + mapOpenAccessColor(out, input); mapDescription(out, input); mapEmbargo(out, input); mapFormat(out, input); - out.setId(input.getId().substring(3)); + out.setId(getEntityId(input.getId(), ENTITY_ID_SEPARATOR)); mapOriginalId(out, input); mapInstance(dumpType, out, input); mapLanguage(out, input); @@ -83,6 +89,23 @@ public class ResultMapper implements Serializable { } + private static void mapOpenAccessColor(Result out, eu.dnetlib.dhp.schema.oaf.Result input) { + if (Optional.ofNullable(input.getOpenAccessColor()).isPresent()) + switch (input.getOpenAccessColor()) { + case bronze: + out.setOpenAccessColor(OpenAccessColor.bronze); + break; + case gold: + out.setOpenAccessColor(OpenAccessColor.gold); + break; + case hybrid: + out.setOpenAccessColor(OpenAccessColor.hybrid); + break; + + } + + } + private static void mapContext(Map communityMap, CommunityResult out, eu.dnetlib.dhp.schema.oaf.Result input) { Set communities = communityMap.keySet(); @@ -155,7 +178,7 @@ public class ResultMapper implements Serializable { input .getCollectedfrom() .stream() - .map(cf -> CfHbKeyValue.newInstance(cf.getKey().substring(3), cf.getValue())) + .map(cf -> CfHbKeyValue.newInstance(getEntityId(cf.getKey(), ENTITY_ID_SEPARATOR), cf.getValue())) .collect(Collectors.toList())); } @@ -187,6 +210,7 @@ public class ResultMapper implements Serializable { // .getProvenanceaction() // .getClassid() // .equalsIgnoreCase("subject:sdg")))) + .filter(s -> !s.getValue().equalsIgnoreCase(NULL)) .forEach(s -> subjectList.add(getSubject(s)))); out.setSubjects(subjectList); @@ -521,14 +545,18 @@ public class ResultMapper implements Serializable { instance .setCollectedfrom( CfHbKeyValue - .newInstance(i.getCollectedfrom().getKey().substring(3), i.getCollectedfrom().getValue())); + .newInstance( + getEntityId(i.getCollectedfrom().getKey(), ENTITY_ID_SEPARATOR), + i.getCollectedfrom().getValue())); if (Optional.ofNullable(i.getHostedby()).isPresent() && Optional.ofNullable(i.getHostedby().getKey()).isPresent() && StringUtils.isNotBlank(i.getHostedby().getKey())) instance .setHostedby( - CfHbKeyValue.newInstance(i.getHostedby().getKey().substring(3), i.getHostedby().getValue())); + CfHbKeyValue + .newInstance( + getEntityId(i.getHostedby().getKey(), ENTITY_ID_SEPARATOR), i.getHostedby().getValue())); return instance; @@ -670,7 +698,9 @@ public class ResultMapper implements Serializable { if (di.isPresent()) { Provenance p = new Provenance(); p.setProvenance(di.get().getProvenanceaction().getClassname()); - p.setTrust(di.get().getTrust()); + if (!s.getQualifier().getClassid().equalsIgnoreCase("fos") && + !s.getQualifier().getClassid().equalsIgnoreCase("sdg")) + p.setTrust(di.get().getTrust()); subject.setProvenance(p); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 51f1852..2a7324d 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -13,16 +13,13 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** - * This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the + * This class connects with the community APIs for production. It saves the information about the * context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community * - research infrastructure/initiative , the value is the label of the research community - research * infrastructure/initiative. @@ -31,11 +28,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); - private final transient QueryInformationSystem queryInformationSystem; + private final transient UtilCommunityAPI queryInformationSystem; private final transient BufferedWriter writer; - public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { + public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException { final Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); @@ -45,8 +42,7 @@ public class SaveCommunityMap implements Serializable { fileSystem.delete(hdfsWritePath, true); } - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); + queryInformationSystem = new UtilCommunityAPI(); FSDataOutputStream fos = fileSystem.create(hdfsWritePath); writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); @@ -68,9 +64,6 @@ public class SaveCommunityMap implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String isLookUpUrl = parser.get("isLookUpUrl"); - log.info("isLookUpUrl: {}", isLookUpUrl); - final Boolean singleCommunity = Optional .ofNullable(parser.get("singleDeposition")) .map(Boolean::valueOf) @@ -78,14 +71,14 @@ public class SaveCommunityMap implements Serializable { final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null); - final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl); + final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode); scm.saveCommunityMap(singleCommunity, community_id); } private void saveCommunityMap(boolean singleCommunity, String communityId) - throws ISLookUpException, IOException, DocumentException, SAXException { + throws IOException { final String communityMapString = Utils.OBJECT_MAPPER .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)); log.info("communityMap {} ", communityMapString); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index 71c10be..29a3ab5 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -3,21 +3,31 @@ package eu.dnetlib.dhp.oa.graph.dump; import java.io.Serializable; import java.util.Optional; +import java.util.concurrent.TimeUnit; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; +import org.apache.http.HttpStatus; +import org.joda.time.DateTime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.api.MissingConceptDoiException; -import eu.dnetlib.dhp.common.api.ZenodoAPIClient; import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; +import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException; +import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient; public class SendToZenodoHDFS implements Serializable { private static final String NEW = "new"; // to be used for a brand new deposition in zenodo private static final String VERSION = "version"; // to be used to upload a new version of a published deposition private static final String UPDATE = "update"; // to upload content to an open deposition not published + private static final Integer NUMBER_OF_RETRIES = 5; + private static final Integer DELAY = 10; + private static final Integer MULTIPLIER = 5; + + private static final Logger log = LoggerFactory.getLogger(SendToZenodoHDFS.class); public static void main(final String[] args) throws Exception, MissingConceptDoiException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -79,22 +89,44 @@ public class SendToZenodoHDFS implements Serializable { Path p = fileStatus.getPath(); String pString = p.toString(); + boolean retry = true; + int numberOfRetries = 0; + if (!pString.endsWith("_SUCCESS")) { String name = pString.substring(pString.lastIndexOf("/") + 1); + log.info("Upoloading: {}", name); + FSDataInputStream inputStream = fileSystem.open(p); + while (retry && numberOfRetries < NUMBER_OF_RETRIES) { + int response_code = -1; - try (FSDataInputStream inputStream = fileSystem.open(p)) { - zenodoApiClient.uploadIS(inputStream, name); + try { + response_code = zenodoApiClient + .uploadIS3(inputStream, name, fileSystem.getFileStatus(p).getLen()); + } catch (Exception e) { + log.info(e.getMessage()); + throw new RuntimeException("Error while uploading on Zenodo"); + } + log.info("response code: {}", response_code); + if (HttpStatus.SC_OK == response_code || HttpStatus.SC_CREATED == response_code) { + retry = false; + } else { + numberOfRetries += 1; + TimeUnit.SECONDS.sleep(DELAY * MULTIPLIER ^ numberOfRetries); + } + } + if (numberOfRetries == NUMBER_OF_RETRIES) { + throw new RuntimeException("reached the maximun number or retries to upload on Zenodo"); } - } + } + // log.info(DateTime.now().toDateTimeISO().toString()); + TimeUnit.SECONDS.sleep(DELAY); + // log.info("Delayed: {}", DateTime.now().toDateTimeISO().toString()); } if (!metadata.equals("")) { zenodoApiClient.sendMretadata(metadata); } - if (Boolean.TRUE.equals(publish)) { - zenodoApiClient.publish(); - } } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkCopyGraph.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkCopyGraph.java new file mode 100644 index 0000000..77ff618 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkCopyGraph.java @@ -0,0 +1,92 @@ + +package eu.dnetlib.dhp.oa.graph.dump; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.FlatMapFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import scala.Tuple2; + +/** + * @author miriam.baglioni + * @Date 22/09/23 + */ +public class SparkCopyGraph implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(SparkCopyGraph.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + SparkCopyGraph.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/copygraph_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String hivePath = parser.get("hivePath"); + log.info("hivePath: {}", hivePath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> + + execCopy( + spark, + hivePath, + outputPath)); + } + + private static void execCopy(SparkSession spark, String hivePath, String outputPath) { + + ModelSupport.oafTypes.entrySet().parallelStream().forEach(entry -> { + String entityType = entry.getKey(); + Class clazz = entry.getValue(); + // if (!entityType.equalsIgnoreCase("relation")) { + spark + .read() + .schema(Encoders.bean(clazz).schema()) + .parquet(hivePath + "/" + entityType) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/" + entityType); + + }); + + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java new file mode 100644 index 0000000..30737e7 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java @@ -0,0 +1,203 @@ + +package eu.dnetlib.dhp.oa.graph.dump; + +import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.communityapi.model.*; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo; +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; +import eu.dnetlib.dhp.utils.DHPUtils; + +public class UtilCommunityAPI { + + private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class); + + public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) + throws IOException { + if (singleCommunity) + return getMap(Arrays.asList(getCommunity(communityId))); + return getMap(getValidCommunities()); + + } + + private CommunityMap getMap(List communities) { + final CommunityMap map = new CommunityMap(); + communities.forEach(c -> map.put(c.getId(), c.getName())); + return map; + } + + public List getCommunityCsv(List comms) { + return comms.stream().map(c -> { + try { + CommunityModel community = getCommunity(c); + StringBuilder builder = new StringBuilder(); + builder.append(DHPUtils.md5(community.getId())); + builder.append(Constants.SEP); + builder.append(community.getName()); + builder.append(Constants.SEP); + builder.append(community.getId()); + builder.append(Constants.SEP); + builder + .append( + community.getDescription()); + return builder.toString(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }).collect(Collectors.toList()); + + } + + private List getValidCommunities() throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class) + .stream() + .filter( + community -> (community.getStatus().equals("all") || community.getStatus().equalsIgnoreCase("public")) + && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); + + } + + private CommunityModel getCommunity(String id) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class); + + } + + public List getContextInformation() throws IOException { + + return getValidCommunities() + .stream() + .map(c -> getContext(c)) + .collect(Collectors.toList()); + + } + + public ContextInfo getContext(CommunityModel c) { + + ContextInfo cinfo = new ContextInfo(); + cinfo.setId(c.getId()); + cinfo.setDescription(c.getDescription()); + CommunityModel cm = null; + try { + cm = getCommunity(c.getId()); + } catch (IOException e) { + throw new RuntimeException(e); + } + cinfo.setSubject(new ArrayList<>()); + cinfo.getSubject().addAll(cm.getSubjects()); + cinfo.setZenodocommunity(c.getZenodoCommunity()); + cinfo.setType(c.getType()); + return cinfo; + } + + public List getContextRelation() throws IOException { + return getValidCommunities().stream().map(c -> { + ContextInfo cinfo = new ContextInfo(); + cinfo.setId(c.getId()); + cinfo.setDatasourceList(getDatasourceList(c.getId())); + cinfo.setProjectList(getProjectList(c.getId())); + + return cinfo; + }).collect(Collectors.toList()); + } + + private List getDatasourceList(String id) { + List datasourceList = new ArrayList<>(); + try { + + new ObjectMapper() + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id), + DatasourceList.class) + .stream() + .forEach(ds -> { + if (Optional.ofNullable(ds.getOpenaireId()).isPresent()) { + + datasourceList.add(ds.getOpenaireId()); + } + + }); + + } catch (IOException e) { + throw new RuntimeException(e); + } + return datasourceList; + } + + private List getProjectList(String id) { + int page = -1; + int size = 100; + ContentModel cm = null; + ; + ArrayList projectList = new ArrayList<>(); + do { + page++; + try { + cm = new ObjectMapper() + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI + .communityProjects( + id, String.valueOf(page), String.valueOf(size)), + ContentModel.class); + if (cm.getContent().size() > 0) { + cm.getContent().forEach(p -> { + if (Optional.ofNullable(p.getOpenaireId()).isPresent()) + projectList.add(p.getOpenaireId()); + + }); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } while (!cm.getLast()); + + return projectList; + } + + /** + * it returns for each organization the list of associated communities + */ + public CommunityEntityMap getCommunityOrganization() throws IOException { + CommunityEntityMap organizationMap = new CommunityEntityMap(); + getValidCommunities() + .forEach(community -> { + String id = community.getId(); + try { + List associatedOrgs = MAPPER + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id), + OrganizationList.class); + associatedOrgs.forEach(o -> { + if (!organizationMap + .keySet() + .contains(o)) + organizationMap.put(o, new ArrayList<>()); + organizationMap.get(o).add(community.getId()); + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + return organizationMap; + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java index d3f41d8..c72955c 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java @@ -8,6 +8,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -32,12 +33,11 @@ import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Measure; import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import scala.Tuple2; public class Utils { public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + public static final String ENTITY_ID_SEPARATOR = "|"; private Utils() { } @@ -54,10 +54,6 @@ public class Utils { .map((MapFunction) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz)); } - public static ISLookUpService getIsLookUpService(String isLookUpUrl) { - return ISLookupClientFactory.getLookUpService(isLookUpUrl); - } - public static String getContextId(String id) { return String @@ -88,6 +84,10 @@ public class Utils { return new Gson().fromJson(sb.toString(), CommunityMap.class); } + public static String getEntityId(String id, String separator) { + return id.substring(id.indexOf(separator) + 1); + } + public static Dataset getEntitiesId(SparkSession spark, String inputPath) { Dataset dumpedIds = Utils .readPath(spark, inputPath + "/publication", GraphResult.class) @@ -162,23 +162,7 @@ public class Utils { default: getImpactMeasure(i).add(getScore(m.getId(), m.getUnit())); break; -// case IMPACT_POPULARITY: -// getImpactMeasure(i).setPopularity(getScore(m.getUnit())); -// break; -// case IMPACT_POPULARITY_ALT: -// getImpactMeasure(i).setPopularity_alt(getScore(m.getUnit())); -// break; -// case IMPACT_IMPULSE: -// getImpactMeasure(i).setImpulse(getScore(m.getUnit())); -// break; -// case IMPACT_INFLUENCE: -// getImpactMeasure(i).setInfluence(getScore(m.getUnit())); -// break; -// case IMPACT_INFLUENCE_ALT: -// getImpactMeasure(i).setInfluence_alt(getScore(m.getUnit())); -// break; -// default: -// break; + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 7fa3764..a4f9e2d 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -67,7 +67,7 @@ public class CommunitySplit implements Serializable { .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) - .text(outputPath + "/" + communities.get(c).replace(" ", "_")); + .text(outputPath + "/" + c.replace(" ", "_")); }); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index 8c4faba..42fd683 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -2,6 +2,8 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId; import java.io.Serializable; import java.io.StringReader; @@ -110,7 +112,7 @@ public class SparkPrepareResultProject implements Serializable { Tuple2 first = it.next(); ResultProject rp = new ResultProject(); if (substring) - rp.setResultId(s.substring(3)); + rp.setResultId(getEntityId(s, ENTITY_ID_SEPARATOR)); else rp.setResultId(s); eu.dnetlib.dhp.schema.oaf.Project p = first._1(); @@ -142,7 +144,7 @@ public class SparkPrepareResultProject implements Serializable { private static Project getProject(eu.dnetlib.dhp.schema.oaf.Project op, Relation relation) { Project p = Project .newInstance( - op.getId().substring(3), + getEntityId(op.getId(), ENTITY_ID_SEPARATOR), op.getCode().getValue(), Optional .ofNullable(op.getAcronym()) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index 0356bd4..20bce5d 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -20,9 +20,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** * Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and @@ -52,13 +52,10 @@ public class CreateContextEntities implements Serializable { final String hdfsNameNode = parser.get("nameNode"); log.info("nameNode: {}", hdfsNameNode); - final String isLookUpUrl = parser.get("isLookUpUrl"); - log.info("isLookUpUrl: {}", isLookUpUrl); - final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode); log.info("Processing contexts..."); - cce.execute(Process::getEntity, isLookUpUrl); + cce.execute(Process::getEntity); cce.close(); @@ -87,15 +84,14 @@ public class CreateContextEntities implements Serializable { } - public void execute(final Function producer, String isLookUpUrl) - throws ISLookUpException { + public void execute(final Function producer) + throws IOException { - QueryInformationSystem queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); + UtilCommunityAPI queryInformationSystem = new UtilCommunityAPI(); final Consumer consumer = ci -> writeEntity(producer.apply(ci)); - queryInformationSystem.getContextInformation(consumer); + queryInformationSystem.getContextInformation().forEach(ci -> consumer.accept(ci)); } protected void writeEntity(final R r) { diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 0708fc9..86106fb 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -12,7 +12,6 @@ import java.util.function.Function; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -22,14 +21,11 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException; import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate; -import eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB; import eu.dnetlib.dhp.oa.model.graph.*; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** * Writes the set of new Relation between the context and datasources. At the moment the relation between the context @@ -39,10 +35,6 @@ public class CreateContextRelation implements Serializable { private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class); private final transient Configuration conf; private final transient BufferedWriter writer; - private final transient QueryInformationSystem queryInformationSystem; - - private static final String CONTEX_RELATION_DATASOURCE = "contentproviders"; - private static final String CONTEX_RELATION_PROJECT = "projects"; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -68,59 +60,26 @@ public class CreateContextRelation implements Serializable { final String hdfsNameNode = parser.get("nameNode"); log.info("hdfsNameNode: {}", hdfsNameNode); - final String isLookUpUrl = parser.get("isLookUpUrl"); - log.info("isLookUpUrl: {}", isLookUpUrl); + final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode); - final String masterDuplicatePath = parser.get("masterDuplicate"); - log.info("masterDuplicatePath: {}", masterDuplicatePath); - - final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl); - - final List masterDuplicateList = cce.readMasterDuplicate(masterDuplicatePath); - - log.info("Creating relation for datasource..."); + log.info("Creating relation for datasources and projects..."); cce .execute( - Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class), - masterDuplicateList); - - log.info("Creating relations for projects... "); - cce - .execute( - Process::getRelation, CONTEX_RELATION_PROJECT, - ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); + Process::getRelation); cce.close(); } - private List readMasterDuplicate(String masterDuplicatePath) throws IOException { - FileSystem fileSystem = FileSystem.get(conf); - Path hdfsReadPath = new Path(masterDuplicatePath); - BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(hdfsReadPath))); - List mdlist = new ArrayList<>(); - ObjectMapper mapper = new ObjectMapper(); - String line; - - while ((line = reader.readLine()) != null) { - mdlist.add(mapper.readValue(line, MasterDuplicate.class)); - } - return mdlist; - } - private void close() throws IOException { writer.close(); } - public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl) - throws IOException, ISLookUpException { + public CreateContextRelation(String hdfsPath, String hdfsNameNode) + throws IOException { this.conf = new Configuration(); this.conf.set("fs.defaultFS", hdfsNameNode); - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); - queryInformationSystem.execContextRelationQuery(); - FileSystem fileSystem = FileSystem.get(this.conf); Path hdfsWritePath = new Path(hdfsPath); FSDataOutputStream fsDataOutputStream = null; @@ -134,17 +93,12 @@ public class CreateContextRelation implements Serializable { } - public void execute(final Function> producer, String category, String prefix) { - - execute(producer, category, prefix, null); - } - - public void execute(final Function> producer, String category, String prefix, - List masterDuplicateList) { + public void execute(final Function> producer) throws IOException { final Consumer consumer = ci -> producer.apply(ci).forEach(this::writeEntity); - queryInformationSystem.getContextRelation(consumer, category, prefix, masterDuplicateList); + UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI(); + queryCommunityAPI.getContextRelation().forEach(ci -> consumer.accept(ci)); } protected void writeEntity(final Relation r) { diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java index 8315808..794e769 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java @@ -2,6 +2,8 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId; import java.io.Serializable; import java.util.*; @@ -84,7 +86,7 @@ public class Extractor implements Serializable { .orElse(null)) .orElse(null); Relation r = getRelation( - value.getId().substring(3), contextId, + getEntityId(value.getId(), ENTITY_ID_SEPARATOR), contextId, Constants.RESULT_ENTITY, Constants.CONTEXT_ENTITY, ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP, provenance); @@ -94,7 +96,7 @@ public class Extractor implements Serializable { hashCodes.add(r.hashCode()); } r = getRelation( - contextId, value.getId().substring(3), + contextId, getEntityId(value.getId(), ENTITY_ID_SEPARATOR), Constants.CONTEXT_ENTITY, Constants.RESULT_ENTITY, ModelConstants.IS_RELATED_TO, @@ -163,8 +165,8 @@ public class Extractor implements Serializable { eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED, eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)); Relation r = getRelation( - value.getId().substring(3), - cf.getKey().substring(3), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY, + getEntityId(value.getId(), ENTITY_ID_SEPARATOR), + getEntityId(cf.getKey(), ENTITY_ID_SEPARATOR), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY, resultDatasource, ModelConstants.PROVISION, provenance); if (!hashCodes.contains(r.hashCode())) { @@ -174,7 +176,7 @@ public class Extractor implements Serializable { } r = getRelation( - cf.getKey().substring(3), value.getId().substring(3), + getEntityId(cf.getKey(), ENTITY_ID_SEPARATOR), getEntityId(value.getId(), ENTITY_ID_SEPARATOR), Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY, datasourceResult, ModelConstants.PROVISION, provenance); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java index 9a46d05..8fd5b3e 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java @@ -54,38 +54,11 @@ public class Process implements Serializable { List relationList = new ArrayList<>(); ci .getDatasourceList() - .forEach(ds -> { - String nodeType = ModelSupport.idPrefixEntity.get(ds.substring(0, 2)); - String datasourceId = ds; - if (ds.startsWith("10|") || ds.startsWith("40|")) - datasourceId = ds.substring(3); + .forEach(ds -> relationList.addAll(addRelations(ci, ds, ModelSupport.idPrefixEntity.get("10")))); - String contextId = Utils.getContextId(ci.getId()); - relationList - .add( - Relation - .newInstance( - contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY, - datasourceId, nodeType, - RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), - Provenance - .newInstance( - Constants.USER_CLAIM, - Constants.DEFAULT_TRUST))); - - relationList - .add( - Relation - .newInstance( - datasourceId, nodeType, - contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY, - RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), - Provenance - .newInstance( - Constants.USER_CLAIM, - Constants.DEFAULT_TRUST))); - - }); + ci + .getProjectList() + .forEach(p -> relationList.addAll(addRelations(ci, p, ModelSupport.idPrefixEntity.get("40")))); return relationList; @@ -94,4 +67,33 @@ public class Process implements Serializable { } } + private static List addRelations(ContextInfo ci, String ds, String nodeType) { + List relationList = new ArrayList<>(); + String contextId = Utils.getContextId(ci.getId()); + relationList + .add( + Relation + .newInstance( + contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY, + ds, nodeType, + RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), + Provenance + .newInstance( + Constants.USER_CLAIM, + Constants.DEFAULT_TRUST))); + + relationList + .add( + Relation + .newInstance( + ds, nodeType, + contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY, + RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), + Provenance + .newInstance( + Constants.USER_CLAIM, + Constants.DEFAULT_TRUST))); + return relationList; + } + } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java deleted file mode 100644 index b982b26..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ /dev/null @@ -1,246 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.StringReader; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.function.Consumer; - -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.Node; -import org.dom4j.io.SAXReader; -import org.jetbrains.annotations.NotNull; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; - -import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate; -import eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -public class QueryInformationSystem { - private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class); - private ISLookUpService isLookUp; - private List contextRelationResult; - - private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and $x//context/param[./@name = 'status']/text() = 'all' " + - " return " + - "$x//context"; - - private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return " - + - "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " + - "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', " - + - "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)"; - - public void getContextInformation(final Consumer consumer) throws ISLookUpException { - - isLookUp - .quickSearchProfile(XQUERY_ENTITY) - .forEach(c -> { - ContextInfo cinfo = new ContextInfo(); - String[] cSplit = c.split("@@"); - cinfo.setId(cSplit[0]); - cinfo.setName(cSplit[1]); - log.info("community name : {}", cSplit[1]); - cinfo.setDescription(cSplit[2]); - if (!cSplit[3].trim().equals("")) { - cinfo.setSubject(Arrays.asList(cSplit[3].split(","))); - } - cinfo.setZenodocommunity(cSplit[4]); - cinfo.setType(cSplit[5]); - consumer.accept(cinfo); - }); - - } - - public List getContextInformation() throws ISLookUpException { - List ret = new ArrayList<>(); - isLookUp - .quickSearchProfile(XQUERY_ENTITY) - .forEach(c -> { - ContextInfo cinfo = new ContextInfo(); - String[] cSplit = c.split("@@"); - cinfo.setId(cSplit[0]); - cinfo.setName(cSplit[1]); - cinfo.setDescription(cSplit[2]); - if (!cSplit[3].trim().equals("")) { - cinfo.setSubject(Arrays.asList(cSplit[3].split(","))); - } - cinfo.setZenodocommunity(cSplit[4]); - cinfo.setType(cSplit[5]); - ret.add(cinfo); - }); - - return ret; - - } - - public List getContextRelationResult() { - return contextRelationResult; - } - - public void setContextRelationResult(List contextRelationResult) { - this.contextRelationResult = contextRelationResult; - } - - public ISLookUpService getIsLookUp() { - return isLookUp; - } - - public void setIsLookUp(ISLookUpService isLookUpService) { - this.isLookUp = isLookUpService; - } - - public void execContextRelationQuery() throws ISLookUpException { - contextRelationResult = isLookUp.quickSearchProfile(XQUERY); - - } - - public void getContextRelation(final Consumer consumer, String category, String prefix) { - getContextRelation(consumer, category, prefix, null); - } - - public void getContextRelation(final Consumer consumer, String category, String prefix, - List masterDuplicateList) { - - contextRelationResult.forEach(xml -> { - ContextInfo cinfo = new ContextInfo(); - final Document doc; - - try { - final SAXReader reader = new SAXReader(); - reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - doc = reader.read(new StringReader(xml)); - Element root = doc.getRootElement(); - cinfo.setId(root.attributeValue("id")); - - Iterator it = root.elementIterator(); - while (it.hasNext()) { - Element el = it.next(); - if (el.getName().equals("category")) { - String categoryId = el.attributeValue("id"); - categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2); - if (categoryId.equals(category)) { - cinfo.setDatasourceList(getCategoryList(el, prefix, masterDuplicateList)); - } - } - - } - consumer.accept(cinfo); - } catch (DocumentException | SAXException e) { - e.printStackTrace(); - } - - }); - - } - - @NotNull - private List getCategoryList(Element el, String prefix, List masterDuplicateList) { - List datasourceList = new ArrayList<>(); - for (Object node : el.selectNodes(".//concept")) { - String oid = getOpenaireId((Node) node, prefix); - if (oid != null) - if (masterDuplicateList == null) - datasourceList.add(oid); - else - datasourceList.add(getMaster(oid, masterDuplicateList)); - } - - return datasourceList; - } - - private String getMaster(String oid, List masterDuplicateList) { - for (MasterDuplicate md : masterDuplicateList) { - if (md.getDuplicate().equals(oid)) - return md.getMaster(); - } - return oid; - } - - private String getOpenaireId(Node el, String prefix) { - for (Object node : el.selectNodes(".//param")) { - Node n = (Node) node; - if (n.valueOf("./@name").equals("openaireId")) { - String id = n.getText(); - if (id.startsWith(prefix + "|")) - return id; - return prefix + "|" + id; - } - } - - return makeOpenaireId(el, prefix); - - } - - private String makeOpenaireId(Node el, String prefix) { - if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) { - return null; - } - String funder = ""; - String grantId = null; - String funding = null; - for (Object node : el.selectNodes(".//param")) { - Node n = (Node) node; - switch (n.valueOf("./@name")) { - case "funding": - funding = n.getText(); - break; - case "funder": - funder = n.getText(); - break; - case "CD_PROJECT_NUMBER": - grantId = n.getText(); - break; - default: - break; - } - } - String nsp = null; - - switch (funder.toLowerCase()) { - case "ec": - if (funding == null) { - return null; - } - if (funding.toLowerCase().contains("h2020")) { - nsp = "corda__h2020::"; - } else if (funding.toLowerCase().contains("he")) { - nsp = "corda_____he::"; - } else { - nsp = "corda_______::"; - } - break; - case "tubitak": - nsp = "tubitakf____::"; - break; - case "dfg": - nsp = "dfgf________::"; - break; - default: - StringBuilder bld = new StringBuilder(); - bld.append(funder.toLowerCase()); - for (int i = funder.length(); i < 12; i++) - bld.append("_"); - bld.append("::"); - nsp = bld.toString(); - } - - return prefix + "|" + nsp + DHPUtils.md5(grantId); - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java index e9ad376..218a123 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpEntitiesJob.java @@ -2,6 +2,8 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId; import java.io.Serializable; import java.io.StringReader; @@ -216,7 +218,7 @@ public class SparkDumpEntitiesJob implements Serializable { return null; Datasource datasource = new Datasource(); - datasource.setId(d.getId().substring(3)); + datasource.setId(getEntityId(d.getId(), ENTITY_ID_SEPARATOR)); Optional .ofNullable(d.getOriginalId()) @@ -406,7 +408,7 @@ public class SparkDumpEntitiesJob implements Serializable { Optional .ofNullable(p.getId()) - .ifPresent(id -> project.setId(id.substring(3))); + .ifPresent(id -> project.setId(getEntityId(id, ENTITY_ID_SEPARATOR))); Optional .ofNullable(p.getWebsiteurl()) @@ -619,7 +621,7 @@ public class SparkDumpEntitiesJob implements Serializable { Optional .ofNullable(org.getId()) - .ifPresent(value -> organization.setId(value.substring(3))); + .ifPresent(value -> organization.setId(getEntityId(value, ENTITY_ID_SEPARATOR))); Optional .ofNullable(org.getPid()) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java index 5c84c55..0abf994 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkDumpRelationJob.java @@ -2,6 +2,8 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId; import java.io.Serializable; import java.util.Collections; @@ -85,11 +87,11 @@ public class SparkDumpRelationJob implements Serializable { .map((MapFunction) relation -> { eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation(); relNew - .setSource(relation.getSource().substring(3)); + .setSource(getEntityId(relation.getSource(), ENTITY_ID_SEPARATOR)); relNew.setSourceType(ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))); relNew - .setTarget(relation.getTarget().substring(3)); + .setTarget(getEntityId(relation.getTarget(), ENTITY_ID_SEPARATOR)); relNew.setTargetType(ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))); relNew diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index 527e324..aa9b7a0 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -2,6 +2,9 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId; +import static eu.dnetlib.dhp.schema.common.ModelSupport.idPrefixMap; import java.io.Serializable; import java.util.ArrayList; @@ -21,12 +24,16 @@ import org.slf4j.LoggerFactory; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.model.Provenance; import eu.dnetlib.dhp.oa.model.graph.RelType; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; /** @@ -58,8 +65,9 @@ public class SparkOrganizationRelation implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final OrganizationMap organizationMap = new Gson() - .fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class); + UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI(); + final CommunityEntityMap organizationMap = queryCommunityAPI.getCommunityOrganization(); + final String serializedOrganizationMap = new Gson().toJson(organizationMap); log.info("organization map : {}", serializedOrganizationMap); @@ -79,7 +87,7 @@ public class SparkOrganizationRelation implements Serializable { } - private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap, + private static void extractRelation(SparkSession spark, String inputPath, CommunityEntityMap organizationMap, String outputPath, String communityMapPath) { CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); @@ -100,7 +108,7 @@ public class SparkOrganizationRelation implements Serializable { .as(Encoders.bean(MergedRels.class)); mergedRelsDataset.map((MapFunction) mergedRels -> { - if (organizationMap.containsKey(mergedRels.getOrganizationId())) { + if (organizationMap.containsKey(getEntityId(mergedRels.getOrganizationId(), ENTITY_ID_SEPARATOR))) { return mergedRels; } return null; @@ -129,15 +137,16 @@ public class SparkOrganizationRelation implements Serializable { } @NotNull - private static Consumer getMergedRelsConsumer(OrganizationMap organizationMap, + private static Consumer getMergedRelsConsumer(CommunityEntityMap organizationMap, List relList, CommunityMap communityMap) { return mergedRels -> { - String oId = mergedRels.getOrganizationId(); + String oId = getEntityId(mergedRels.getOrganizationId(), ENTITY_ID_SEPARATOR); organizationMap .get(oId) .forEach(community -> { if (communityMap.containsKey(community)) { - addRelations(relList, community, mergedRels.getRepresentativeId()); + addRelations( + relList, community, getEntityId(mergedRels.getRepresentativeId(), ENTITY_ID_SEPARATOR)); } }); @@ -155,8 +164,8 @@ public class SparkOrganizationRelation implements Serializable { eu.dnetlib.dhp.oa.model.graph.Relation .newInstance( id, Constants.CONTEXT_ENTITY, - organization.substring(3), - ModelSupport.idPrefixEntity.get(organization.substring(0, 2)), + organization, + ModelSupport.idPrefixEntity.get(idPrefixMap.get(Organization.class)), RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), Provenance .newInstance( @@ -167,7 +176,7 @@ public class SparkOrganizationRelation implements Serializable { .add( eu.dnetlib.dhp.oa.model.graph.Relation .newInstance( - organization.substring(3), ModelSupport.idPrefixEntity.get(organization.substring(0, 2)), + organization, ModelSupport.idPrefixEntity.get(idPrefixMap.get(Organization.class)), id, Constants.CONTEXT_ENTITY, RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP), Provenance diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java index ebbadaa..2231dac 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java @@ -11,27 +11,17 @@ import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.SparkSession; -import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem; -import eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; /** * @author miriam.baglioni @@ -44,7 +34,7 @@ public class DumpCommunities implements Serializable { private final BufferedWriter writer; private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP + " description \n"; - private final transient QueryInformationSystem queryInformationSystem; + private final transient UtilCommunityAPI queryCommunityAPI; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -64,31 +54,19 @@ public class DumpCommunities implements Serializable { final List communities = Arrays.asList(split(parser.get("communities"), ";")); - final DumpCommunities dc = new DumpCommunities(outputPath, nameNode, parser.get("isLookUpUrl")); + final DumpCommunities dc = new DumpCommunities(outputPath, nameNode); dc.writeCommunity(communities); } private void writeCommunity(List communities) - throws IOException, ISLookUpException, DocumentException, SAXException { + throws IOException { writer.write(HEADER); writer.flush(); - String a = IOUtils - .toString( - DumpCommunities.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq")); - final String xquery = String - .format( - a, - communities - .stream() - .map(t -> String.format("$x//CONFIGURATION/context[./@id= '%s']", t)) - .collect(Collectors.joining(" or "))); - - for (String community : queryInformationSystem - .getCommunityCsv(xquery)) { + for (String community : queryCommunityAPI + .getCommunityCsv(communities)) { writer .write( community); @@ -98,10 +76,9 @@ public class DumpCommunities implements Serializable { writer.close(); } - public DumpCommunities(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws Exception { + public DumpCommunities(String hdfsPath, String hdfsNameNode) throws Exception { final Configuration conf = new Configuration(); - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); + queryCommunityAPI = new UtilCommunityAPI(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 668ae21..ad11516 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -19,6 +19,7 @@ import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.amazonaws.transform.SimpleTypeUnmarshallers; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -26,6 +27,7 @@ import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.model.community.CommunityResult; import eu.dnetlib.dhp.oa.model.community.Funder; import eu.dnetlib.dhp.oa.model.community.Project; +import io.netty.util.internal.StringUtil; /** * Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC @@ -95,6 +97,8 @@ public class SparkDumpFunderResults implements Serializable { Optional ofunder = Optional.ofNullable(p.getFunder()); if (ofunder.isPresent()) { String fName = ofunder.get().getShortName(); + if (StringUtil.isNullOrEmpty(fName)) + return ofunder.get().getName(); if (fName.equalsIgnoreCase("ec")) { fName += "_" + ofunder.get().getFundingStream(); } diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/organizationonly/SparkDumpOrganizationJob.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/organizationonly/SparkDumpOrganizationJob.java new file mode 100644 index 0000000..c218600 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/organizationonly/SparkDumpOrganizationJob.java @@ -0,0 +1,270 @@ + +package eu.dnetlib.dhp.oa.graph.dump.organizationonly; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.ENTITY_ID_SEPARATOR; +import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntityId; + +import java.io.Serializable; +import java.io.StringReader; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.Node; +import org.dom4j.io.SAXReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Constants; +import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException; +import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException; +import eu.dnetlib.dhp.oa.model.Container; +import eu.dnetlib.dhp.oa.model.Provenance; +import eu.dnetlib.dhp.oa.model.Result; +import eu.dnetlib.dhp.oa.model.graph.*; +import eu.dnetlib.dhp.oa.model.graph.Datasource; +import eu.dnetlib.dhp.oa.model.graph.Organization; +import eu.dnetlib.dhp.oa.model.graph.Project; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; + +/** + * Spark Job that fires the dump for the entites + */ +public class SparkDumpOrganizationJob implements Serializable { + private static final Logger log = LoggerFactory + .getLogger(eu.dnetlib.dhp.oa.graph.dump.organizationonly.SparkDumpOrganizationJob.class); + public static final String COMPRESSION = "compression"; + public static final String GZIP = "gzip"; + + public static void main(String[] args) throws Exception { + + Boolean isSparkSessionManaged = Boolean.TRUE; + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = "/tmp/prod_provision/graph/20_graph_blacklisted/"; + log.info("inputPath: {}", inputPath); + + final String outputPath = "/tmp/miriam/organizationsOnly/"; + log.info("outputPath: {}", outputPath); + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + // Utils.removeOutputDir(spark, outputPath); + organizationMap(spark, inputPath, outputPath); + // relationMap2(spark, inputPath, outputPath); + }); + + } + + private static void relationMap2(SparkSession spark, String inputPath, String outputPath) { + Utils + .readPath(spark, inputPath + "relation", Relation.class) + .filter((FilterFunction) r -> r.getRelType().equalsIgnoreCase("organizationOrganization")) + .map((MapFunction) relation -> { + eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation(); + relNew + .setSource(getEntityId(relation.getSource(), ENTITY_ID_SEPARATOR)); + relNew.setSourceType(ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))); + + relNew + .setTarget(getEntityId(relation.getTarget(), ENTITY_ID_SEPARATOR)); + relNew.setTargetType(ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))); + + relNew + .setReltype( + RelType + .newInstance( + relation.getRelClass(), + relation.getSubRelType())); + + Optional odInfo = Optional.ofNullable(relation.getDataInfo()); + if (odInfo.isPresent()) { + DataInfo dInfo = odInfo.get(); + if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() && + Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) { + relNew + .setProvenance( + Provenance + .newInstance( + dInfo.getProvenanceaction().getClassname(), + dInfo.getTrust())); + } + } + if (Boolean.TRUE.equals(relation.getValidated())) { + relNew.setValidated(relation.getValidated()); + relNew.setValidationDate(relation.getValidationDate()); + } + + return relNew; + }, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "relation"); + } + + private static void relationMap(SparkSession spark, String inputPath, String outputPath) { + Dataset organization = Utils + .readPath(spark, inputPath + "organization", eu.dnetlib.dhp.schema.oaf.Organization.class); + Dataset rels = Utils.readPath(spark, inputPath + "relation", Relation.class); + organization + .joinWith(rels, organization.col("id").equalTo(rels.col("source")), "left") + .map( + (MapFunction, Relation>) t2 -> t2._2(), + Encoders.bean(Relation.class)) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json("/tmp/orgSource"); + + rels = Utils.readPath(spark, "/tmp/orgSource", Relation.class); + + organization + .joinWith(rels, organization.col("id").equalTo(rels.col("target")), "left") + .map( + (MapFunction, Relation>) t2 -> t2._2(), + Encoders.bean(Relation.class)) + .filter(Objects::nonNull) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json("/tmp/orgSourceTarget"); + + Utils + .readPath(spark, "/tmp/orgSourceTarget", Relation.class) + .map((MapFunction) relation -> { + eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation(); + relNew + .setSource(getEntityId(relation.getSource(), ENTITY_ID_SEPARATOR)); + relNew.setSourceType(ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))); + + relNew + .setTarget(getEntityId(relation.getTarget(), ENTITY_ID_SEPARATOR)); + relNew.setTargetType(ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))); + + relNew + .setReltype( + RelType + .newInstance( + relation.getRelClass(), + relation.getSubRelType())); + + Optional odInfo = Optional.ofNullable(relation.getDataInfo()); + if (odInfo.isPresent()) { + DataInfo dInfo = odInfo.get(); + if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() && + Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) { + relNew + .setProvenance( + Provenance + .newInstance( + dInfo.getProvenanceaction().getClassname(), + dInfo.getTrust())); + } + } + if (Boolean.TRUE.equals(relation.getValidated())) { + relNew.setValidated(relation.getValidated()); + relNew.setValidationDate(relation.getValidationDate()); + } + + return relNew; + }, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "relation"); + } + + private static void organizationMap(SparkSession spark, String inputPath, String outputPath) { + Utils + .readPath(spark, inputPath + "organization", eu.dnetlib.dhp.schema.oaf.Organization.class) + .map( + (MapFunction) o -> mapOrganization(o), + Encoders.bean(Organization.class)) + .filter((FilterFunction) o -> o != null) + .write() + .mode(SaveMode.Overwrite) + .option(COMPRESSION, GZIP) + .json(outputPath + "/organization"); + } + + private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization( + eu.dnetlib.dhp.schema.oaf.Organization org) { + + Organization organization = new Organization(); + + Optional + .ofNullable(org.getLegalshortname()) + .ifPresent(value -> organization.setLegalshortname(value.getValue())); + + Optional + .ofNullable(org.getLegalname()) + .ifPresent(value -> organization.setLegalname(value.getValue())); + + Optional + .ofNullable(org.getWebsiteurl()) + .ifPresent(value -> organization.setWebsiteurl(value.getValue())); + + Optional + .ofNullable(org.getAlternativeNames()) + .ifPresent( + value -> organization + .setAlternativenames( + value + .stream() + .map(v -> v.getValue()) + .collect(Collectors.toList()))); + + Optional + .ofNullable(org.getCountry()) + .ifPresent( + value -> { + if (!value.getClassid().equals(eu.dnetlib.dhp.oa.graph.dump.complete.Constants.UNKNOWN)) { + organization + .setCountry( + eu.dnetlib.dhp.oa.model.Country.newInstance(value.getClassid(), value.getClassname())); + } + + }); + + Optional + .ofNullable(org.getId()) + .ifPresent(value -> organization.setId(getEntityId(value, ENTITY_ID_SEPARATOR))); + + Optional + .ofNullable(org.getPid()) + .ifPresent( + value -> organization + .setPid( + value + .stream() + .map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue())) + .collect(Collectors.toList()))); + + return organization; + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkDumpResult.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkDumpResult.java index 9b082ab..2815468 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkDumpResult.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/subset/SparkDumpResult.java @@ -74,9 +74,6 @@ public class SparkDumpResult implements Serializable { final String resultClassName = parser.get("resultTableName"); log.info("resultTableName: {}", resultClassName); - final String masterDuplicatePath = parser.get("masterDuplicatePath"); - log.info("masterDuplicatePath: {}", masterDuplicatePath); - Optional pathString = Optional.ofNullable(parser.get("pathMap")); HashMap pathMap = null; if (pathString.isPresent()) { @@ -97,13 +94,13 @@ public class SparkDumpResult implements Serializable { run( isSparkSessionManaged, inputPath, outputPath, pathMap, selectionConstraints, inputClazz, - resultType, masterDuplicatePath); + resultType); } private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, HashMap pathMap, SelectionConstraints selectionConstraints, - Class inputClazz, String resultType, String masterDuplicatePath) { + Class inputClazz, String resultType) { SparkConf conf = new SparkConf(); HashMap finalPathMap = pathMap; @@ -116,7 +113,7 @@ public class SparkDumpResult implements Serializable { Utils.removeOutputDir(spark, outputPath + "/dump/" + resultType); resultDump( spark, inputPath, outputPath, inputClazz, finalPathMap, - finalSelectionConstraints, resultType, masterDuplicatePath); + finalSelectionConstraints, resultType); }); } @@ -128,17 +125,13 @@ public class SparkDumpResult implements Serializable { Class inputClazz, Map pathMap, SelectionConstraints selectionConstraints, - String resultType, - String masterDuplicatePath) { + String resultType) { - List masterDuplicateList = Utils - .readPath(spark, masterDuplicatePath, MasterDuplicate.class) - .collectAsList(); Utils .readPath(spark, inputPath, inputClazz) .map( (MapFunction) value -> filterResult( - value, pathMap, selectionConstraints, inputClazz, masterDuplicateList, resultType), + value, pathMap, selectionConstraints, inputClazz, resultType), Encoders.bean(inputClazz)) .filter(Objects::nonNull) .write() @@ -163,7 +156,7 @@ public class SparkDumpResult implements Serializable { } private static I filterResult(I value, Map pathMap, - SelectionConstraints selectionConstraints, Class inputClazz, List masterDuplicateList, + SelectionConstraints selectionConstraints, Class inputClazz, String resultType) { Optional odInfo = Optional.ofNullable(value.getDataInfo()); @@ -195,14 +188,7 @@ public class SparkDumpResult implements Serializable { return null; } } - if (Optional.ofNullable(value.getCollectedfrom()).isPresent()) - value.getCollectedfrom().forEach(cf -> update(cf, masterDuplicateList)); - if (Optional.ofNullable(value.getInstance()).isPresent()) { - value.getInstance().forEach(i -> { - update(i.getCollectedfrom(), masterDuplicateList); - update(i.getHostedby(), masterDuplicateList); - }); - } + return value; } @@ -210,13 +196,4 @@ public class SparkDumpResult implements Serializable { return (classid.equals(resultType) || (classid.equals("other") && resultType.equals("otherresearchproduct"))); } - private static void update(KeyValue kv, List masterDuplicateList) { - for (MasterDuplicate md : masterDuplicateList) { - if (md.getDuplicate().equals(kv.getKey())) { - kv.setKey(md.getMaster()); - return; - } - } - } - } diff --git a/dump/src/main/java/eu/dnetlib/dhp/skgif/ExtendResult.java b/dump/src/main/java/eu/dnetlib/dhp/skgif/ExtendResult.java index d7e3d69..09647df 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/skgif/ExtendResult.java +++ b/dump/src/main/java/eu/dnetlib/dhp/skgif/ExtendResult.java @@ -1,11 +1,11 @@ + package eu.dnetlib.dhp.skgif; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.skgif.model.RelationType; -import eu.dnetlib.dhp.skgif.model.Relations; -import eu.dnetlib.dhp.skgif.model.ResearchProduct; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.*; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -14,113 +14,121 @@ import org.apache.spark.sql.*; import org.apache.spark.sql.types.StructType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.skgif.model.RelationType; +import eu.dnetlib.dhp.skgif.model.Relations; +import eu.dnetlib.dhp.skgif.model.ResearchProduct; import scala.Tuple2; -import java.io.Serializable; -import java.util.*; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - /** * @author miriam.baglioni * @Date 05/09/23 */ public class ExtendResult implements Serializable { - private static final Logger log = LoggerFactory.getLogger(ExtendResult.class); + private static final Logger log = LoggerFactory.getLogger(ExtendResult.class); - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - PrepareResultRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/extend_result_parameters.json")); + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + PrepareResultRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/extend_result_parameters.json")); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); + SparkConf conf = new SparkConf(); - SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + extendResult(spark, inputPath, outputPath); + }); + } - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - extendResult(spark, inputPath, outputPath); - }); - } + private static void extendResult(SparkSession spark, String inputPath, String outputPath) { + ObjectMapper mapper = new ObjectMapper(); + Dataset result = spark + .read() + .json(inputPath + "/result") + .as(Encoders.bean(ResearchProduct.class)); + final StructType structureSchema = new StructType() + .fromDDL("`resultId` STRING, `target` STRING, `resultClass` STRING"); - private static void extendResult(SparkSession spark, String inputPath, String outputPath) { - ObjectMapper mapper = new ObjectMapper(); - Dataset result = spark.read().json(inputPath + "/result") - .as(Encoders.bean(ResearchProduct.class)); - final StructType structureSchema = new StructType() - .fromDDL("`resultId` STRING, `target` STRING, `resultClass` STRING"); + Dataset relations = spark + .read() + .schema(structureSchema) + .json(inputPath + "/preparedRelations"); - Dataset relations = spark - .read() - .schema(structureSchema) - .json(inputPath + "/preparedRelations"); + result + .joinWith( + relations, result + .col("localIdentifier") + .equalTo(relations.col("resultId")), + "left") + .groupByKey( + (MapFunction, String>) t2 -> t2._1().getLocalIdentifier(), + Encoders.STRING()) + .mapGroups((MapGroupsFunction, ResearchProduct>) (key, it) -> { + Tuple2 first = it.next(); + ResearchProduct rp = first._1(); + addRels(rp, first._2()); + it.forEachRemaining(t2 -> addRels(rp, t2._2())); + return rp; + }, Encoders.bean(ResearchProduct.class)) + .map((MapFunction) r -> mapper.writeValueAsString(r), Encoders.STRING()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(outputPath); - result.joinWith(relations, result.col("localIdentifier") - .equalTo(relations.col("resultId")), "left") - .groupByKey((MapFunction, String>)t2 -> t2._1().getLocalIdentifier(),Encoders.STRING() ) - .mapGroups((MapGroupsFunction, ResearchProduct>) (key,it) -> { - Tuple2 first = it.next(); - ResearchProduct rp = first._1(); - addRels(rp, first._2()); - it.forEachRemaining(t2 -> addRels(rp, t2._2())); - return rp; - }, Encoders.bean(ResearchProduct.class)) - .map((MapFunction) r -> mapper.writeValueAsString(r), Encoders.STRING()) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .text(outputPath); + } - } + private static void addRels(ResearchProduct rp, Row row) { + String relClass = row.getAs("relClass"); + Map> relations = new HashMap<>(); + if (relClass.equals(RelationType.OUTCOME.label)) { + if (!Optional.ofNullable(rp.getFunding()).isPresent()) { + rp.setFunding(new ArrayList<>()); + } + rp.getFunding().add(row.getAs("target")); + } else if (relClass.equals(RelationType.AFFILIATION)) { + if (!Optional.ofNullable(rp.getRelevantOrganizations()).isPresent()) + rp.setRelevantOrganizations(new ArrayList<>()); + rp.getRelevantOrganizations().add(row.getAs("target")); + } else { + if (!relations.containsKey(relClass)) { + relations.put(relClass, new ArrayList<>()); + } + relations.get(relClass).add(row.getAs("target")); + } + if (relations.size() > 0) { + rp.setRelatedProducts(new ArrayList<>()); + for (String key : relations.keySet()) { + Relations rel = new Relations(); + rel.setRelationType(key); + rel.setProductList(relations.get(key)); + } + } - private static void addRels(ResearchProduct rp, Row row) { - String relClass = row.getAs("relClass"); - Map> relations = new HashMap<>(); - if(relClass.equals(RelationType.OUTCOME.label)){ - if(!Optional.ofNullable(rp.getFunding()).isPresent()){ - rp.setFunding(new ArrayList<>()); - } - rp.getFunding().add(row.getAs("target")); - }else if(relClass.equals(RelationType.AFFILIATION)){ - if(!Optional.ofNullable(rp.getRelevantOrganizations()).isPresent()) - rp.setRelevantOrganizations(new ArrayList<>()); - rp.getRelevantOrganizations().add(row.getAs("target")); - }else{ - if(!relations.containsKey(relClass)){ - relations.put(relClass, new ArrayList<>()); - } - relations.get(relClass).add(row.getAs("target")); - } - if(relations.size() > 0) { - rp.setRelatedProducts(new ArrayList<>()); - for (String key: relations.keySet()){ - Relations rel = new Relations(); - rel.setRelationType(key); - rel.setProductList(relations.get(key)); - } - } - - - - } + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/skgif/JournalsFromDatasources.java b/dump/src/main/java/eu/dnetlib/dhp/skgif/JournalsFromDatasources.java new file mode 100644 index 0000000..30b365c --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/skgif/JournalsFromDatasources.java @@ -0,0 +1,77 @@ +package eu.dnetlib.dhp.skgif; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.skgif.model.ResearchProduct; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +/** + * @author miriam.baglioni + * @Date 06/02/24 + */ +public class JournalsFromDatasources implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(JournalsFromDatasources.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + PrepareResultRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/journals_from_datasource_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + final String datasourcePath = parser.get("datasourcePath"); + log.info("datasourcePath: {}", datasourcePath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + extendResult(spark, inputPath, outputPath, datasourcePath); + }); + } + + //find the results having a container in the metadata + //map all the hostedby.key delle istanze associate al risultato + //find a corrispondence to a datasource which is a journal + //write for the result the biblio + public static void extendResult(SparkSession spark, String inputPath, String outputPath, String datasourcePath ){ + Dataset datasource = Utils.readPath(spark, datasourcePath, Datasource.class) + .filter((FilterFunction) d -> Optional.ofNullable(d.getEosctype()).isPresent() && + d.getEosctype().getClassname().equalsIgnoreCase("Journal archive"); + + Dataset results = Utils.readPath(spark, inputPath, ResearchProduct.class); + + + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/skgif/PrepareResultRelation.java b/dump/src/main/java/eu/dnetlib/dhp/skgif/PrepareResultRelation.java index 634efcb..99caca6 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/skgif/PrepareResultRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/skgif/PrepareResultRelation.java @@ -1,93 +1,99 @@ + package eu.dnetlib.dhp.skgif; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.schema.oaf.Relation; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.*; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.sql.*; import org.apache.spark.sql.types.StructType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Serializable; -import java.util.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.schema.oaf.Relation; /** * @author miriam.baglioni * @Date 04/09/23 */ public class PrepareResultRelation implements Serializable { - private static final Logger log = LoggerFactory.getLogger(PrepareResultRelation.class); + private static final Logger log = LoggerFactory.getLogger(PrepareResultRelation.class); - public static void main(String[] args) throws Exception { - String jsonConfiguration = IOUtils - .toString( - PrepareResultRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/result_relation_parameters.json")); + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + PrepareResultRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/result_relation_parameters.json")); - final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); - parser.parseArgument(args); + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String inputPath = parser.get("sourcePath"); - log.info("inputPath: {}", inputPath); + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); - final String outputPath = parser.get("outputPath"); - log.info("outputPath: {}", outputPath); + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); - SparkConf conf = new SparkConf(); + SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - Utils.removeOutputDir(spark, outputPath); - prepareResultRelationList(spark, inputPath, outputPath); - }); - } + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + prepareResultRelationList(spark, inputPath, outputPath); + }); + } - private static void prepareResultRelationList(SparkSession spark, String inputPath, String outputPath) { - final StructType structureSchema = new StructType() - .fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>"); - Dataset relation = spark.read().json(inputPath).as(Encoders.bean(Relation.class)) - .filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true") - .filter("relClass == 'hasAuthorInstitution' or relClass == 'outcome' or " + - "relClass == 'IsSupplementedBy' or relClass == 'IsDocumentedBy' or relClass == 'IsPartOf' " + - "relClass == IsNewVersionOf"); + private static void prepareResultRelationList(SparkSession spark, String inputPath, String outputPath) { + final StructType structureSchema = new StructType() + .fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>"); + Dataset relation = spark + .read() + .json(inputPath) + .as(Encoders.bean(Relation.class)) + .filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true") + .filter( + "relClass == 'hasAuthorInstitution' or relClass == 'outcome' or " + + "relClass == 'IsSupplementedBy' or relClass == 'IsDocumentedBy' or relClass == 'IsPartOf' " + + "relClass == IsNewVersionOf"); - org.apache.spark.sql.Dataset df = spark.createDataFrame(new ArrayList(), structureSchema); - List entities = Arrays - .asList( - "publication", "dataset", "otherresearchproduct", "software"); - for (String e : entities) - df = df - .union( - spark - .read() - .schema(structureSchema) - .json(inputPath + "/" + e) - .filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true")); + org.apache.spark.sql.Dataset df = spark.createDataFrame(new ArrayList(), structureSchema); + List entities = Arrays + .asList( + "publication", "dataset", "otherresearchproduct", "software"); + for (String e : entities) + df = df + .union( + spark + .read() + .schema(structureSchema) + .json(inputPath + "/" + e) + .filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true")); - relation.joinWith(df, relation.col("source").equalTo(df.col("id"))) - .select( - new Column("id").as("resultId"), - new Column("target"), - new Column("relClass")) - .write() - .mode(SaveMode.Overwrite) - .option("compression","gzip") - .json(outputPath); - - - } + relation + .joinWith(df, relation.col("source").equalTo(df.col("id"))) + .select( + new Column("id").as("resultId"), + new Column("target"), + new Column("relClass")) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath); + } } diff --git a/dump/src/main/java/eu/dnetlib/dhp/skgif/ResultMapper.java b/dump/src/main/java/eu/dnetlib/dhp/skgif/ResultMapper.java index e5e927a..730c679 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/skgif/ResultMapper.java +++ b/dump/src/main/java/eu/dnetlib/dhp/skgif/ResultMapper.java @@ -1,5 +1,18 @@ + package eu.dnetlib.dhp.skgif; +import static eu.dnetlib.dhp.oa.graph.dump.ResultMapper.*; + +import java.io.Serializable; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import javax.management.RuntimeErrorException; import eu.dnetlib.dhp.oa.graph.dump.csv.AuthorResult; import eu.dnetlib.dhp.oa.model.ResultPid; @@ -13,173 +26,166 @@ import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; import scala.reflect.internal.Trees; -import javax.management.RuntimeErrorException; -import java.io.Serializable; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - -import static eu.dnetlib.dhp.oa.graph.dump.ResultMapper.*; - - /** * @author miriam.baglioni * @Date 01/09/23 */ public class ResultMapper implements Serializable { - public static ResearchProduct map( - E input) - throws Exception{ + public static ResearchProduct map( + E input) + throws Exception { - ResearchProduct out = new ResearchProduct(); + ResearchProduct out = new ResearchProduct(); - Optional ort = Optional.ofNullable(input.getResulttype()); - if (ort.isPresent()) { - try { - out.setLocalIdentifier(input.getId()); - mapPid(out, input); - mapTitle(out, input); - mapAbstract(out, input); - mapType(out, input); - mapTopic(out, input); - mapContribution(out, input); + Optional ort = Optional.ofNullable(input.getResulttype()); + if (ort.isPresent()) { + try { + out.setLocalIdentifier(input.getId()); + mapPid(out, input); + mapTitle(out, input); + mapAbstract(out, input); + mapType(out, input); + mapTopic(out, input); + mapContribution(out, input); - if(!Optional.ofNullable(out.getTitles()).isPresent() || - !Optional.ofNullable(out.getContributions()).isPresent()) - return null; - //TODO map the manifestation directly from the instances - //it is not completed - mapManifestation(out, input); + if (!Optional.ofNullable(out.getTitles()).isPresent() || + !Optional.ofNullable(out.getContributions()).isPresent()) + return null; + // TODO map the manifestation directly from the instances + // it is not completed + mapManifestation(out, input); - - //TODO extend the mapping to consider relations between these entities and the results + // TODO extend the mapping to consider relations between these entities and the results // private List relevantOrganizations; // private List funding; // private List relatedProducts; - } catch (ClassCastException cce) { - return null; - } - } + } catch (ClassCastException cce) { + return null; + } + } - return null; + return null; - } + } - private static void mapManifestation(ResearchProduct out, E input) { - out.setManifestations( input.getInstance().stream().parallel() - .map(i -> { - try { - return getManifestation(i); - } catch (MalformedURLException e) { - throw new RuntimeException(e); - } - }).collect(Collectors.toList())); - } + private static void mapManifestation(ResearchProduct out, E input) { + out + .setManifestations( + input + .getInstance() + .stream() + .parallel() + .map(i -> { + try { + return getManifestation(i); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + }) + .collect(Collectors.toList())); + } - private static Manifestation getManifestation(Instance i) throws MalformedURLException { - Manifestation manifestation = new Manifestation(); - manifestation.setProductLocalType(i.getInstancetype().getClassname()); - manifestation.setProductLocalTypeSchema(i.getInstancetype().getSchemename()); - Dates dates = new Dates(); - dates.setType("publishing"); - dates.setValue(i.getDateofacceptance().getValue()); - manifestation.setDates(Arrays.asList(dates)); - switch(i.getRefereed().getClassid()){ - case "0000": - manifestation.setPeerReview(PeerReview.UNAVAILABLE.label); - break; - case "0001": - manifestation.setPeerReview(PeerReview.PEER_REVIEWED.label); - break; - case "0002": - manifestation.setPeerReview(PeerReview.NON_PEER_REVIEWED.label); - break; - } - manifestation.setMetadataCuration(MetadataCuration.UNAVAILABLE.label); - //TODO filter out the URL that refer to pids. If nothing remains, decide what to do - manifestation.setUrl(new URL(i.getUrl().get(0))); - if(Optional.ofNullable(i.getPid()).isPresent()){ - manifestation.setPid(i.getPid().get(0).getValue()); - } - switch(i.getAccessright().getClassid()){ - case"OPEN": - case"OPEN DATA": - case "OPEN SOURCE": - manifestation.setAccessRight(AccessRight.OPEN.label); - break; - case "CLOSED": - manifestation.setAccessRight(AccessRight.CLOSED.label); - break; - case "RESTRICTED": - manifestation.setAccessRight(AccessRight.RESTRICTED.label); - break; - case"EMBARGO": - case "12MONTHS": - case "6MONTHS": - manifestation.setAccessRight(AccessRight.EMBARGO.label); - break; - default: - manifestation.setAccessRight(AccessRight.UNAVAILABLE.label); - } - if(Optional.ofNullable(i.getLicense()).isPresent()) - manifestation.setLicence(i.getLicense().getValue()); - //TODO to fill the biblio in case it is a journal, we need to join with the datasource and verify the type - Biblio biblio = null; - manifestation.setHostingDatasource(i.getHostedby().getKey()); - //TODO verify if the result is published in ojournal or conferences. In that case the venue is the identifier - //of the journal/conference. In case it is not, the venue is the datasource - if(biblio == null){ - manifestation.setVenue(i.getHostedby().getKey()); - }else{ - manifestation.setVenue("insert the id of the venue"); - } - return manifestation; - } + private static Manifestation getManifestation(Instance i) throws MalformedURLException { + Manifestation manifestation = new Manifestation(); + manifestation.setProductLocalType(i.getInstancetype().getClassname()); + manifestation.setProductLocalTypeSchema(i.getInstancetype().getSchemename()); + Dates dates = new Dates(); + dates.setType("publishing"); + dates.setValue(i.getDateofacceptance().getValue()); + manifestation.setDates(Arrays.asList(dates)); + switch (i.getRefereed().getClassid()) { + case "0000": + manifestation.setPeerReview(PeerReview.UNAVAILABLE.label); + break; + case "0001": + manifestation.setPeerReview(PeerReview.PEER_REVIEWED.label); + break; + case "0002": + manifestation.setPeerReview(PeerReview.NON_PEER_REVIEWED.label); + break; + } + manifestation.setMetadataCuration(MetadataCuration.UNAVAILABLE.label); + // TODO filter out the URL that refer to pids. If nothing remains, decide what to do + manifestation.setUrl(new URL(i.getUrl().get(0))); + if (Optional.ofNullable(i.getPid()).isPresent()) { + manifestation.setPid(i.getPid().get(0).getValue()); + } + switch (i.getAccessright().getClassid()) { + case "OPEN": + case "OPEN DATA": + case "OPEN SOURCE": + manifestation.setAccessRight(AccessRight.OPEN.label); + break; + case "CLOSED": + manifestation.setAccessRight(AccessRight.CLOSED.label); + break; + case "RESTRICTED": + manifestation.setAccessRight(AccessRight.RESTRICTED.label); + break; + case "EMBARGO": + case "12MONTHS": + case "6MONTHS": + manifestation.setAccessRight(AccessRight.EMBARGO.label); + break; + default: + manifestation.setAccessRight(AccessRight.UNAVAILABLE.label); + } + if (Optional.ofNullable(i.getLicense()).isPresent()) + manifestation.setLicence(i.getLicense().getValue()); + // TODO to fill the biblio in case it is a journal, we need to join with the datasource and verify the type + Biblio biblio = null; + manifestation.setHostingDatasource(i.getHostedby().getKey()); + // TODO verify if the result is published in ojournal or conferences. In that case the venue is the identifier + // of the journal/conference. In case it is not, the venue is the datasource + if (biblio == null) { + manifestation.setVenue(i.getHostedby().getKey()); + } else { + manifestation.setVenue("insert the id of the venue"); + } + return manifestation; + } - private static Tuple2 getOrcid(List pid) { - if (!Optional.ofNullable(pid).isPresent()) - return null; - if (pid.size() == 0) - return null; - for (StructuredProperty p : pid) { - if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) { - return new Tuple2<>(p.getValue(), Boolean.TRUE); - } - } - for (StructuredProperty p : pid) { - if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) { - return new Tuple2<>(p.getValue(), Boolean.FALSE); - } - } - return null; - } - private static void mapContribution(ResearchProduct out, E input) { - if(Optional.ofNullable(input.getAuthor()).isPresent()){ - int count = 0; - for (Author a : input.getAuthor()) { - count += 1; - Contribution contribution = new Contribution(); - if(Optional.ofNullable(a.getPid()).isPresent()){ - Tuple2 orcid = getOrcid(a.getPid()); - if(orcid != null){ - contribution.setPerson(DHPUtils.md5(orcid._1() + orcid._2())); - }else{ - if(Optional.ofNullable(a.getRank()).isPresent()){ - contribution.setPerson(DHPUtils.md5(input.getId() + a.getRank())); - }else{ - contribution.setPerson(DHPUtils.md5(input.getId() + count)); - } + private static Tuple2 getOrcid(List pid) { + if (!Optional.ofNullable(pid).isPresent()) + return null; + if (pid.size() == 0) + return null; + for (StructuredProperty p : pid) { + if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) { + return new Tuple2<>(p.getValue(), Boolean.TRUE); + } + } + for (StructuredProperty p : pid) { + if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) { + return new Tuple2<>(p.getValue(), Boolean.FALSE); + } + } + return null; + } - } - } - } - } + private static void mapContribution(ResearchProduct out, E input) { + if (Optional.ofNullable(input.getAuthor()).isPresent()) { + int count = 0; + for (Author a : input.getAuthor()) { + count += 1; + Contribution contribution = new Contribution(); + if (Optional.ofNullable(a.getPid()).isPresent()) { + Tuple2 orcid = getOrcid(a.getPid()); + if (orcid != null) { + contribution.setPerson("person______::"+DHPUtils.md5(orcid._1() + orcid._2())); + } else { + if (Optional.ofNullable(a.getRank()).isPresent()) { + contribution.setPerson("person______::"+DHPUtils.md5(input.getId() + a.getRank())); + } else { + contribution.setPerson("tmp_person__::"+DHPUtils.md5(input.getId() + count)); + } + + } + } + } + } // "contributions": [ // { @@ -189,94 +195,93 @@ public class ResultMapper implements Serializable { // "roles": ["writing-original-draft", "conceptualization"] // } // ] - } + } - private static void mapTopic(ResearchProduct out, E input) { - if(Optional.ofNullable(input.getSubject()).isPresent()){ - out.setTopics(input.getSubject().stream().parallel().map(s -> { - Topic topic = new Topic(); - topic.setTopic(getIdentifier(s)); - Provenance provenance = new Provenance(); - provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust())); - provenance.setType(s.getDataInfo().getInferenceprovenance()); - topic.setProvenance(provenance); - return topic; - }).collect(Collectors.toList())); - } - } + private static void mapTopic(ResearchProduct out, E input) { + if (Optional.ofNullable(input.getSubject()).isPresent()) { + out.setTopics(input.getSubject().stream().parallel().map(s -> { + Topic topic = new Topic(); + topic.setTopic(getIdentifier(s)); + Provenance provenance = new Provenance(); + provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust())); + provenance.setType(s.getDataInfo().getInferenceprovenance()); + topic.setProvenance(provenance); + return topic; + }).collect(Collectors.toList())); + } + } - private static String getIdentifier(StructuredProperty s) { - return DHPUtils.md5(s.getQualifier().getClassid() + s.getValue()); - } + private static String getIdentifier(StructuredProperty s) { + return DHPUtils.md5(s.getQualifier().getClassid() + s.getValue()); + } - private static void mapType(ResearchProduct out, E input) throws NoAllowedTypeException{ - switch (input.getResulttype().getClassid()){ - case "publication": - out.setProductType(ResearchTypes.LITERATURE.label); - break; - case"dataset": - out.setProductType(ResearchTypes.RESEARCH_DATA.label); - break; - case"software": - out.setProductType(ResearchTypes.RESEARCH_SOFTWARE.label); - break; - case"other": - out.setProductType(ResearchTypes.OTHER.label); - break; - default: - throw new ClassCastException("Result type not present or not allowed"); - } - } + private static void mapType(ResearchProduct out, E input) throws NoAllowedTypeException { + switch (input.getResulttype().getClassid()) { + case "publication": + out.setProductType(ResearchTypes.LITERATURE.label); + break; + case "dataset": + out.setProductType(ResearchTypes.RESEARCH_DATA.label); + break; + case "software": + out.setProductType(ResearchTypes.RESEARCH_SOFTWARE.label); + break; + case "other": + out.setProductType(ResearchTypes.OTHER.label); + break; + default: + throw new ClassCastException("Result type not present or not allowed"); + } + } - private static void mapPid(ResearchProduct out, Result input) { - Optional - .ofNullable(input.getPid()) - .ifPresent( - value -> out - .setIdentifiers( - value - .stream() - .map( - p -> { - Identifier identifier = new Identifier(); - identifier.setValue(p.getValue()); - identifier.setScheme(p.getQualifier().getSchemeid()); - return identifier; - }) - .collect(Collectors.toList()))); - } + private static void mapPid(ResearchProduct out, Result input) { + Optional + .ofNullable(input.getPid()) + .ifPresent( + value -> out + .setIdentifiers( + value + .stream() + .map( + p -> { + Identifier identifier = new Identifier(); + identifier.setValue(p.getValue()); + identifier.setScheme(p.getQualifier().getSchemeid()); + return identifier; + }) + .collect(Collectors.toList()))); + } - private static void mapTitle(ResearchProduct out, Result input) throws NoTitleFoundException { - Optional> otitle = Optional.ofNullable(input.getTitle()); - if (otitle.isPresent()) { - List iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setTitles(Arrays.asList(iTitle.get(0).getValue())); - return; - } + private static void mapTitle(ResearchProduct out, Result input) throws NoTitleFoundException { + Optional> otitle = Optional.ofNullable(input.getTitle()); + if (otitle.isPresent()) { + List iTitle = otitle + .get() + .stream() + .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) + .collect(Collectors.toList()); + if (!iTitle.isEmpty()) { + out.setTitles(Arrays.asList(iTitle.get(0).getValue())); + return; + } - iTitle = otitle - .get() - .stream() - .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) - .collect(Collectors.toList()); - if (!iTitle.isEmpty()) { - out.setTitles(Arrays.asList(iTitle.get(0).getValue())); - } + iTitle = otitle + .get() + .stream() + .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) + .collect(Collectors.toList()); + if (!iTitle.isEmpty()) { + out.setTitles(Arrays.asList(iTitle.get(0).getValue())); + } + } + } - } - } - - private static void mapAbstract(ResearchProduct out, Result input) { - final List descriptionList = new ArrayList<>(); - Optional - .ofNullable(input.getDescription()) - .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); - out.setAbstracts(descriptionList); - } + private static void mapAbstract(ResearchProduct out, Result input) { + final List descriptionList = new ArrayList<>(); + Optional + .ofNullable(input.getDescription()) + .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue()))); + out.setAbstracts(descriptionList); + } } diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/config-default.xml new file mode 100644 index 0000000..e5ec3d0 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/workflow.xml new file mode 100644 index 0000000..47420ed --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph/oozie_app/workflow.xml @@ -0,0 +1,110 @@ + + + + + outputPath + the output path + + + hivePath + the country for which to produce the dump + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + yarn + cluster + Copy graph + eu.dnetlib.dhp.oa.graph.dump.SparkCopyGraph + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --hivePath${hivePath} + --outputPath${outputPath} + + + + + + + \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph_parameters.json new file mode 100644 index 0000000..cf76eb0 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/copygraph_parameters.json @@ -0,0 +1,21 @@ +[ + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName":"hp", + "paramLongName":"hivePath", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName":"issm", + "paramLongName":"isSparkSessionManaged", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": false +} +] + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml index 5d62bd9..0abe7cb 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sourcePath @@ -88,7 +88,6 @@ eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap --outputPath${workingDir}/communityMap --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml index bacf0a3..c29798d 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml @@ -81,7 +81,6 @@ eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities --outputPath${outputPath}/community --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} --communities${communities} @@ -143,7 +142,6 @@ --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --workingPath${outputPath}/workingDir --resultTypepublication @@ -169,7 +167,6 @@ --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --workingPath${outputPath}/workingDir --resultTypedataset @@ -195,7 +192,6 @@ --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --workingPath${outputPath}/workingDir --resultTypeotherresearchproduct @@ -221,7 +217,6 @@ --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --workingPath${outputPath}/workingDir --resultTypesoftware @@ -252,9 +247,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --workingPath${outputPath}/workingDir - --outputPath${outputPath} - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json index 225819f..31d8619 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json @@ -1,12 +1,6 @@ [ - { - "paramName":"is", - "paramLongName":"isLookUpUrl", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": true - }, { "paramName":"nn", "paramLongName":"nameNode", diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json index 2f89c84..00376d9 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_dump_csv_ste1.json @@ -13,13 +13,6 @@ "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": true }, - - { - "paramName":"ilu", - "paramLongName":"isLookUpUrl", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, { "paramName":"c", "paramLongName":"communities", diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json index 9946e94..ba359ce 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json @@ -1,11 +1,6 @@ [ - { - "paramName":"is", - "paramLongName":"isLookUpUrl", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": false - }, + { "paramName": "hdfs", "paramLongName": "hdfsPath", @@ -17,12 +12,8 @@ "paramLongName": "nameNode", "paramDescription": "the name node", "paramRequired": true - },{ - "paramName": "md", - "paramLongName": "masterDuplicate", - "paramDescription": "the master duplicate path for datasource deduplication", - "paramRequired": false -} + } + ] diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json index c27a923..70abce6 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json @@ -1,11 +1,6 @@ [ - { - "paramName":"ocm", - "paramLongName":"organizationCommunityMap", - "paramDescription": "the organization community map association", - "paramRequired": false - }, + { "paramName":"s", "paramLongName":"sourcePath", diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/config-default.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/config-default.xml new file mode 100644 index 0000000..d262cb6 --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/workflow.xml new file mode 100644 index 0000000..41b0ebe --- /dev/null +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/organizationonly/oozie_app/workflow.xml @@ -0,0 +1,88 @@ + + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + yarn + cluster + Dump table organization and related relations + eu.dnetlib.dhp.oa.graph.dump.organizationonly.SparkDumpOrganizationJob + dump-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath}/project + --resultTableNameeu.dnetlib.dhp.schema.oaf.Project + --outputPath${workingDir}/project + --communityMapPathnoneed + + + + + + + \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml index 32226fc..fccbc3d 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml @@ -27,10 +27,6 @@ sourcePath the source path - - isLookUpUrl - the isLookup service endpoint - outputPath the output path @@ -67,11 +63,7 @@ none the depositionId of a deposition open that has to be added content - - organizationCommunityMap - none - the organization community map - + hiveDbName the target hive database name @@ -167,7 +159,6 @@ eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap --outputPath${workingDir}/communityMap --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} --singleDeposition${singleDeposition} --communityId${communityId} @@ -175,6 +166,8 @@ + + ${wf:conf('dumpType') eq "funder"} @@ -203,22 +196,6 @@ sourcePath ${sourcePath} - - organizationCommunityMap - ${organizationCommunityMap} - - - isLookUpUrl - ${isLookUpUrl} - - - pathMap - ${pathMap} - - - selectionCriteria - ${selectionCriteria} - @@ -244,14 +221,7 @@ sourcePath ${sourcePath} - - organizationCommunityMap - ${organizationCommunityMap} - - - isLookUpUrl - ${isLookUpUrl} - + resultAggregation ${resultAggregation} @@ -327,17 +297,6 @@ - - - - - - - - - - - eu.dnetlib.dhp.oa.graph.dump.MakeTar diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml index 4b9983b..f744f6d 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml @@ -12,10 +12,7 @@ resultAggregation true if all the result type have to be dumped under result. false otherwise - - organizationCommunityMap - the organization community map - + hiveDbName @@ -85,20 +82,8 @@ - + - - - eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB - --hdfsPath${workingDir}/masterduplicate - --hdfsNameNode${nameNode} - --postgresUrl${postgresURL} - --postgresUser${postgresUser} - --postgresPassword${postgresPassword} - - - - @@ -349,7 +334,6 @@ eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities --hdfsPath${outputPath}/communities_infrastructures/community_infrastructure.json.gz --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} @@ -360,8 +344,6 @@ eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation --hdfsPath${workingDir}/relation/context --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - --masterDuplicate${workingDir}/masterduplicate @@ -386,7 +368,6 @@ --sourcePath${sourcePath}/relation --outputPath${workingDir}/relation/contextOrg - --organizationCommunityMap${organizationCommunityMap} --communityMapPath${communityMapPath} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml index ec18aaf..837a844 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app/workflow.xml @@ -8,10 +8,6 @@ outputPath the output path - - organizationCommunityMap - the organization community map - pathMap the path where to find the elements involved in the constraints within the json @@ -92,7 +88,7 @@ - + @@ -138,7 +134,6 @@ --pathMap${pathMap} --selectionCriteria${selectionCriteria} --resultTypepublication - --masterDuplicatePath${workingDir}/masterduplicate @@ -168,7 +163,6 @@ --pathMap${pathMap} --selectionCriteria${selectionCriteria} --resultTypedataset - --masterDuplicatePath${workingDir}/masterduplicate @@ -197,7 +191,6 @@ --pathMap${pathMap} --selectionCriteria${selectionCriteria} --resultTypeotherresearchproduct - --masterDuplicatePath${workingDir}/masterduplicate @@ -226,7 +219,6 @@ --pathMap${pathMap} --selectionCriteria${selectionCriteria} --resultTypesoftware - --masterDuplicatePath${workingDir}/masterduplicate @@ -612,7 +604,6 @@ eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities --hdfsPath${workingDir}/context/community_infrastructure.json.gz --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} @@ -648,8 +639,6 @@ eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation --hdfsPath${workingDir}/dump/relation/context --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - --masterDuplicate${workingDir}/masterduplicate @@ -674,7 +663,6 @@ --sourcePath${sourcePath}/relation --outputPath${workingDir}/dump/relation/contextOrg - --organizationCommunityMap${organizationCommunityMap} --communityMapPath${communityMapPath} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/all_communities.xq b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/all_communities.xq deleted file mode 100644 index 620955c..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/all_communities.xq +++ /dev/null @@ -1,8 +0,0 @@ -for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') -where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] -and ($x//context/param[./@name = 'status']/text() = 'all') -return - -{$x//CONFIGURATION/context/@id} -{$x//CONFIGURATION/context/@label} - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq deleted file mode 100644 index 7b470ca..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq +++ /dev/null @@ -1,11 +0,0 @@ -for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') -where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] -and (%s) -return - -{$x//CONFIGURATION/context/@id} -{$x//CONFIGURATION/context/@label} - -{$x//CONFIGURATION/context/param[@name='description']/text()} - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/single_community.xq b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/single_community.xq deleted file mode 100644 index 4f257a6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/single_community.xq +++ /dev/null @@ -1,8 +0,0 @@ -for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') -where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] -and $x//CONFIGURATION/context[./@id=%s] -return - -{$x//CONFIGURATION/context/@id} -{$x//CONFIGURATION/context/@label} - \ No newline at end of file diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java deleted file mode 100644 index 8391c89..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ /dev/null @@ -1,117 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump; - -import static org.mockito.Mockito.lenient; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -import org.dom4j.DocumentException; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.xml.sax.SAXException; - -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -@ExtendWith(MockitoExtension.class) -class QueryInformationSystemTest { - - private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and ($x//context/param[./@name = 'status']/text() = 'all') " - + - " return " + - " " + - "{$x//CONFIGURATION/context/@id}" + - "{$x//CONFIGURATION/context/@label}" + - ""; - - List communityMap = Arrays - .asList( - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ""); - - @Mock - private ISLookUpService isLookUpService; - - private QueryInformationSystem queryInformationSystem; - - private Map map; - - @BeforeEach - public void setUp() throws ISLookUpException, DocumentException, SAXException { - lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap); - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(isLookUpService); - map = queryInformationSystem.getCommunityMap(false, null); - } - - @Test - void testSize() throws ISLookUpException { - - Assertions.assertEquals(23, map.size()); - } - - @Test - void testContent() { - Assertions.assertTrue(map.containsKey("egi") && map.get("egi").equals("EGI Federation")); - - Assertions.assertTrue(map.containsKey("fet-fp7") && map.get("fet-fp7").equals("FET FP7")); - Assertions.assertTrue(map.containsKey("fet-h2020") && map.get("fet-h2020").equals("FET H2020")); - Assertions.assertTrue(map.containsKey("clarin") && map.get("clarin").equals("CLARIN")); - Assertions.assertTrue(map.containsKey("rda") && map.get("rda").equals("Research Data Alliance")); - Assertions.assertTrue(map.containsKey("ee") && map.get("ee").equals("SDSN - Greece")); - Assertions - .assertTrue( - map.containsKey("dh-ch") && map.get("dh-ch").equals("Digital Humanities and Cultural Heritage")); - Assertions.assertTrue(map.containsKey("fam") && map.get("fam").equals("Fisheries and Aquaculture Management")); - Assertions.assertTrue(map.containsKey("ni") && map.get("ni").equals("Neuroinformatics")); - Assertions.assertTrue(map.containsKey("mes") && map.get("mes").equals("European Marine Science")); - Assertions.assertTrue(map.containsKey("instruct") && map.get("instruct").equals("Instruct-ERIC")); - Assertions.assertTrue(map.containsKey("elixir-gr") && map.get("elixir-gr").equals("ELIXIR GR")); - Assertions - .assertTrue(map.containsKey("aginfra") && map.get("aginfra").equals("Agricultural and Food Sciences")); - Assertions.assertTrue(map.containsKey("dariah") && map.get("dariah").equals("DARIAH EU")); - Assertions.assertTrue(map.containsKey("risis") && map.get("risis").equals("RISIS")); - Assertions.assertTrue(map.containsKey("epos") && map.get("epos").equals("EPOS")); - Assertions.assertTrue(map.containsKey("beopen") && map.get("beopen").equals("Transport Research")); - Assertions.assertTrue(map.containsKey("euromarine") && map.get("euromarine").equals("EuroMarine")); - Assertions.assertTrue(map.containsKey("ifremer") && map.get("ifremer").equals("Ifremer")); - Assertions.assertTrue(map.containsKey("oa-pg") && map.get("oa-pg").equals("EC Post-Grant Open Access Pilot")); - Assertions - .assertTrue( - map.containsKey("science-innovation-policy") - && map.get("science-innovation-policy").equals("Science and Innovation Policy Studies")); - Assertions.assertTrue(map.containsKey("covid-19") && map.get("covid-19").equals("COVID-19")); - Assertions.assertTrue(map.containsKey("enermaps") && map.get("enermaps").equals("Energy Research")); - } - -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 1c326eb..9c384b4 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -16,9 +16,9 @@ import org.junit.jupiter.api.Test; import com.google.gson.Gson; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.common.zenodoapi.MissingConceptDoiException; import eu.dnetlib.dhp.common.zenodoapi.ZenodoAPIClient; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; @Disabled public class ZenodoUploadTest { @@ -95,46 +95,46 @@ public class ZenodoUploadTest { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); - client.newVersion("656628"); + System.out.println(client.newVersion("3516917")); - CommunityMap communityMap = new CommunityMap(); - communityMap.put("ni", "Neuroinformatics"); - communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage"); - LocalFileSystem fs = FileSystem.getLocal(new Configuration()); +// CommunityMap communityMap = new CommunityMap(); +// communityMap.put("ni", "Neuroinformatics"); +// communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage"); +// LocalFileSystem fs = FileSystem.getLocal(new Configuration()); +// +// fs +// .copyFromLocalFile( +// false, new Path(getClass() +// .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni") +// .getPath()), +// new Path(workingDir + "/zenodo/ni/ni")); +// fs +// .copyFromLocalFile( +// false, new Path(getClass() +// .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch") +// .getPath()), +// new Path(workingDir + "/zenodo/dh-ch/dh-ch")); +// +// RemoteIterator fileStatusListIterator = fs +// .listFiles( +// new Path(workingDir + "/zenodo"), true); +// while (fileStatusListIterator.hasNext()) { +// LocatedFileStatus fileStatus = fileStatusListIterator.next(); +// +// String p_string = fileStatus.getPath().toString(); +// +// int index = p_string.lastIndexOf("/"); +// String community = p_string.substring(0, index); +// community = community.substring(community.lastIndexOf("/") + 1); +// String community_name = communityMap.get(community).replace(" ", "_"); +// // fs.copyToLocalFile(fileStatus.getPath(), new Path("/tmp/" + community_name)); +// System.out.println(community); +// +// // File f = new File("/tmp/" + community_name); +// FSDataInputStream inputStream = fs.open(fileStatus.getPath()); +// System.out.println(client.uploadIS(inputStream, community_name)); - fs - .copyFromLocalFile( - false, new Path(getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/ni") - .getPath()), - new Path(workingDir + "/zenodo/ni/ni")); - fs - .copyFromLocalFile( - false, new Path(getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/dump/zenodo/dh-ch") - .getPath()), - new Path(workingDir + "/zenodo/dh-ch/dh-ch")); - - RemoteIterator fileStatusListIterator = fs - .listFiles( - new Path(workingDir + "/zenodo"), true); - while (fileStatusListIterator.hasNext()) { - LocatedFileStatus fileStatus = fileStatusListIterator.next(); - - String p_string = fileStatus.getPath().toString(); - - int index = p_string.lastIndexOf("/"); - String community = p_string.substring(0, index); - community = community.substring(community.lastIndexOf("/") + 1); - String community_name = communityMap.get(community).replace(" ", "_"); - // fs.copyToLocalFile(fileStatus.getPath(), new Path("/tmp/" + community_name)); - System.out.println(community); - - // File f = new File("/tmp/" + community_name); - FSDataInputStream inputStream = fs.open(fileStatus.getPath()); - System.out.println(client.uploadIS(inputStream, community_name)); - - } +// } // System.out.println(client.publish()); @@ -146,7 +146,7 @@ public class ZenodoUploadTest { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); - client.newVersion("1210237"); + client.newVersion("4559725"); File file = new File("/Users/miriam.baglioni/Desktop/EOSC_DUMP/publication.tar"); // File file = new File(getClass() @@ -200,7 +200,7 @@ public class ZenodoUploadTest { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); - client.uploadOpenDeposition("8144316"); + client.uploadOpenDeposition("10037121"); File file = new File("/Users/miriam.baglioni/Desktop/EOSC_DUMP/publication.tar"); // File file = new File(getClass() @@ -209,7 +209,9 @@ public class ZenodoUploadTest { InputStream is = new FileInputStream(file); - Assertions.assertEquals(200, client.uploadIS3(is, "newVersion_deposition", file.length())); + Integer response_code = client.uploadIS3(is, "newVersion_deposition", file.length()); + System.out.println(response_code); + Assertions.assertEquals(201, response_code); // Assertions.assertEquals(202, client.publish()); diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 37ba576..32ed39b 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -20,6 +20,7 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.codehaus.jackson.map.ObjectMapper; import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; @@ -27,57 +28,25 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.google.gson.Gson; +import eu.dnetlib.dhp.communityapi.QueryCommunityAPI; +import eu.dnetlib.dhp.communityapi.model.CommunityModel; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity; import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative; import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class CreateEntityTest { - private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + private static final String validCommunities = "[{\"id\":\"eut\",\"name\":\"EUt+\",\"description\":\"

EUt+ is an " + - "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return " - + - "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " + - "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', " - + - "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)"; - - List communityMap = Arrays - .asList( - "clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri", - "ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. " - + - "SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - " - + - "Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community", - "dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community", - "fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community", - "ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community", - "mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community", - "instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community", - "elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri", - "aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community", - "dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri", - "epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri", - "covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community"); - - @Mock - private ISLookUpService isLookUpService; - - private QueryInformationSystem queryInformationSystem; + "alliance of 8 universities: Technological University Dublin, Riga Technical University, Cyprus University " + + "of Technology, Technical University of Cluj-Napoca, Polytechnic University of Cartagena, University of " + + "Technology of Troyes, Technical University of Sofia and Hochschule Darmstadt.

\",\"status\":\"all\"," + + "\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"knowmad\",\"name\":\"Knowmad Institut\",\"description\":\"

\\\"\\\"

The Knowmad Research Gateway facilitates researchers, policymakers, activists, and experts to discover publications, data, software, and other research products  related to Human Rights, Sustainable Development, Drug Policy, New Technologies, Ethnobotany, and Special Populations. 

The European Institute for Multidisciplinary Studies on Human Rights & Science - Knowmad Institut is an independent Think & Do Tank based in Brandenburg, Germany, promoting Human Dignity and Open Science in Public and Private Policies....Click Here For More Information.

  • If you would like to suggest research inputs with bibliographic information, Please Use This Form.

El Knowmad Research Gateway facilita a investigadores, responsables políticos, activistas y expertos el descubrimiento de publicaciones, datos, software y otros productos de investigación relacionados con los Derechos Humanos, el Desarrollo Sostenible, las Políticas de Drogas, las Nuevas Tecnologías, la Etnobotánica y las Poblaciones Especiales.

El Instituto Europeo de Estudios Multidisciplinares sobre Derechos Humanos y Ciencia - Knowmad Institut es un Think & Do Tank independiente con sede en Brandenburgo, Alemania, que promueve la Dignidad Humana y la Ciencia Abierta en las Políticas Públicas y Privadas....Haga clic aquí para obtener más información.

Si desea sugerir aportaciones de investigación con información bibliográfica, Utilice Este Formulario.


Das Knowmad Research Gateway ermöglicht Forschern, politischen Entscheidungsträgern, Aktivisten und Experten die Entdeckung von Publikationen, Daten, Software und anderen Forschungsprodukten im Zusammenhang mit Menschenrechten, nachhaltiger Entwicklung, Drogenpolitik, neuen Technologien, Ethnobotanik und speziellen Bevölkerungsgruppen.

Das Europäische Institut für multidisziplinäre Studien zu Menschenrechten und Wissenschaft - Knowmad Institut ist ein unabhängiger Think & Do Tank mit Sitz in Brandenburg (Deutschland), der sich für Menschenwürde und Open Science in öffentlichen und privaten Politiken einsetzt… Klicken Sie hier für weitere Informationen.

Wenn Sie Forschungseingaben mit bibliographischen Informationen vorschlagen möchten, nutzen Sie bitte dieses Formular.

 

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"knowmadinstitut\"},{\"id\":\"netherlands\",\"name\":\"Netherlands Research Portal\",\"description\":\"

All Dutch Research,
In One Place.

A comprehensive and open dataset of research information covering 3m publications, 500k research data, 100 research software items, from 48 data sources, linked to 3K grants and 900 organizations.
All linked together through citations and semantics.
 
Try browsing by:

 

Welcome to the Portal of Research Output from the Netherlands. This has been developed as part of a collaboration between UKB, SURF and OpenAIRE. This portal presents Dutch research results and research projects as a sub-section found in the OpenAIRE Graph. This Research Graph is actively fed by Dutch institutional repositories, data and software repositories, and Research Information Systems (CRIS's) that comply with the OpenAIRE metadata guidelines. In addition, this portal also includes research output and research projects from other sources that have an affiliation with one or more Dutch research performing organisations and research funders found in the OpenAIRE Graph.

In Dutch:

Welkom bij het Portaal van het Nederlandse Onderzoeks Resultaten. Dit is ontwikkeld als onderdeel van een samenwerking tussen UKB, SURF and OpenAIRE.  Dit portaal presenteert Nederlandse onderzoeksresultaten en onderzoeksprojecten als een sub-sectie die zijn gevonden in de OpenAIRE Graph. Deze Research Graph wordt actief gevoed door Nederlandse institutionele repositories, data- en software-repositories, en onderzoeksinformatiesystemen (CRISen) die voldoen aan de OpenAIRE metadata richtlijnen. Daarnaast bevat dit portaal ook onderzoeksresultaten en onderzoeksprojecten uit andere bronnen die een affiliatie hebben met een of meer Nederlandse onderzoeksuitvoeringsorganisaties en onderzoeksfinanciers gevonden in de OpenAIRE Graph.

\\\"OpenAIRE

Share your research.

\\\"Linking\\\"
Link your work.
Connect all your research. If you can’t find your research results in OpenAIRE, don’t worry! Use our Link Out service , that reaches out to many external sources via APIs, to connect your research results and claim them to your project.
Learn More
\\\"Deposit\\\"
Deposit your research.
Whether it’s publications, data or software, select an OpenAIRE compatible repository and share using community standards. Alternatively use Zenodo, a catch-all repository hosted by CERN. All results will be indexed, discoverable and accessible via this portal.
Learn More

 

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"tunet\",\"name\":\"TU-NET\",\"description\":\"

The Technological University Network (TU-NET) is an affiliation of the Technological Universities in Ireland: Technological University Dublin (TU Dublin), Technological University of the South-East (SETU), Munster Technological University (MTU), Technological University of the Shannon: Midlands Midwest (TUS) and Atlantic Technological University (ATU).  TU-NET is a network for the Irish Technological Universities to share expertise, information and resources where possible.

This portal harvests data from the following Open Access Repositories: ARROW (TU Dublin), SETU Waterford Libraries Repository and Research@THEA (SETU Carlow, TUS, ATU & IADT).

 

Background photo by  Pietro Jeng on Unsplash

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"forthem\",\"name\":\"FORTHEM Alliance\",\"description\":\"FORTHEM: Fostering Outreach within European Regions, Transnational Higher Education and Mobility\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"neanias-space\",\"name\":\"NEANIAS Space Research Community\",\"description\":\"

The NEANIAS Space Research Community engages Open Science practices through the European Open Science Cloud (EOSC), targeting a wide variety of scientific and professional communities related to Astrophysics and Planetary Science engaging also computer scientists and software engineers interested in computer vision and machine learning. NEANIAS Space Services data and products may also have a high impact in planetary mining and robotics, space weather and mobile telecommunications.

The NEANIAS Space Research services  are aimed at supporting management and analysis of large data volumes in astrophysics and planetary sciences through visualization (SPACE-VIS services), efficiently generating large multidimensional maps and mosaics (SPACE-MOS services), and, finally, supporting mechanisms for automatic detection of structures within maps through machine learning (SPACE-ML services). 

The SPACE-VIS services provide an integrated operational solution for astrophysics and planetary data management aided by advanced visualization mechanisms, including visual analytics and virtual reality, and it is underpinned by FAIR principles.  

  • The ViaLactea service accesses astrophysical surveys to aid understanding of the star formation process of the Milky Way. ViaLactea Visual Analytics (VLVA) combine different types of visualization to perform analysis by exploring correlations managed in the ViaLactea Knowledge Base (VLKB). VLKB includes 2D and 3D (velocity cubes) surveys, numerical model outputs, point-like and diffuse object catalogues and allows for retrieval of all available datasets as well as cut-outs on the positional and/or velocity axis.  
  • The  Astra Data Navigator (ADN) is a virtual reality environment for visualizing large stellar catalogues. The first prototype has been customised to access cloud services for interactive data exploration and navigation with the ability of exploring advanced virtual reality mechanisms providing full immersion.  
  • Finally, the ADAM-Space Service (Advanced Geospatial Data Management platform) accesses a large variety of environmental data and is customised in NEANIAS to access planetary data. 

The SPACE-MOS services provide tools for making high quality images from raw data (map making) and for assembling such images into custom mosaics (mosaicing). 

  • The AstroMapMerging service exploits Montage (http://montage.ipac.caltech.edu/) and is integrated with the ViaLactea service for merging adjacent datasets.  

  • The ISIS3 and ASP under ADAM-DPS service allows integration with data processing pipelines in ADAM which offers tools for planetary data analysis and for producing cartographic products, such as Digital Elevation Models (DEMs) and 3D models from stereo imagery. 

  

The SPACE-ML services provide advanced solutions for pattern and structure detection in astronomical surveys as well as in planetary surface composition, topography and morphometry. The service integrates cutting-edge machine learning algorithms to perform automatic classification of compact and extended sky structures or planetary surfaces. 

  • CAESAR service allows to extract and parametrize compact and extended sources from astronomical radio interferometric maps. The processing pipeline consists of a series of distinct stages that can be run on multiple cores and processors.  

  • AstroML service has been developed to integrate a deep learning mechanism to significantly improve source identification, classification, and characterization of sources in large-scale radio surveys. 

  • The Latent Space Explorer service performs unsupervised representation learning of astronomical images using deep learning techniques (e.g., autoencoders) and interactive visualization of the representations with the chance to apply clustering methods in order to help the domain expert to understand the structure of the representation space. 

Please visit also the NEANIAS Space thematic portal for more info on the Space Services and the research community.

NEANIAS web portal: https://www.neanias.eu/

Please consider acknowledging the NEANIAS project if you use the results of this service in any paper or communication: NEANIAS is funded by European Union under Horizon 2020 research and innovation programme via grant agreement No. 863448.

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"neanias\"},{\"id\":\"aurora\",\"name\":\"Aurora Universities Network\",\"description\":\"

Aurora consists of research-intensive universities deeply committed to the social impact of our activities, and with a history of engagement with the communities in which we operate. Our overall vision is to use our academic excellence to influence societal change through our research and education – aiming to contribute to the achievement of the United Nations’ Sustainable Development Goals.

All our research output is combines via this Aurora Connect Gateway, and put on the Aurora Monitor Dashboard.

More about Aurora: https://aurora-universities.eu/

 

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"argo-france\",\"name\":\"Argo France\",\"description\":\"

Argo France brings together all French contributions to the international Argo programme, i.e. scientific and technical coordination, float purchase and deployment, data processing and interfaces with the user community (operational oceanography with Mercator Ocean and research).

\",\"status\":\"all\",\"type\":\"ri\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"dariah\",\"name\":\"DARIAH EU\",\"description\":\"The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents.\",\"status\":\"all\",\"type\":\"ri\",\"subjects\":null,\"zenodoCommunity\":\"dariah\"},{\"id\":\"north-american-studies\",\"name\":\"North American Studies\",\"description\":\"

North American Studies was born in the context of collaboration between CISAN, the Center for Research on North America and the multi and interdisciplinary research Unit of Universidad National Autonoma de Mexico UNAM, and OpenAIRE. The aim of this collaboration is to strengthen the integration and automated intercommunication between digital platforms in North America and Europe and become a leading platform for North America Studies which will facilitate communication among the community.

In this community scholars, teachers, students, policy stakeholders, journalists and anyone interested in the validated information of the region will be very welcome and able to find easy and directly research outputs on North American Studies, enriching and updating their knowledge from very diverse perspectives and origins in a single portal.

The platform is managed by MiCISAN, the institutional repository of CISAN, whose purpose is collecting research on North American research findings. With OpenAIRE collaboration, it will be possible the integration, organization, protection and dissemination of the CISAN’s scientific products, and similar scientific sources and resources by following the OpenAIRE guidelines and FAIR Principles, in the frame of Open Science.

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"redancommunity\"},{\"id\":\"dth\",\"name\":\"Digital Twins in Health\",\"description\":\"

Open Research Gateway on Digital Twins in Health.

Virtual Human Twin (VHT) is an integrated multiscale, multi-time, and multi-discipline representation of quantitative human physiology and pathology. Its realisation through a collaborative distributed knowledge and resource platform is specifically designed to accelerate the development, integration, and adoption of patient-specific predictive computer models, which will be used as clinical decision support systems, for personal health forecasting or as methodologies for the development and de-risking of personalised medical products. The vision of EDITH is to facilitate the realisation of the opportunities presented by VHTs for the benefit of patients, healthcare providers, regulatory bodies and industry, both within Europe and globally.

Objectives

 

ECOSYSTEM

EDITH has the objective to frame an inclusive ecosystem of digital twins in healthcare within the EU Member States and associated countries. It will start with a comprehensive mapping of the existing relevant actors, initiatives, resources (i.e., models, data sets, methods, good practices, infrastructures, solutions, services), and barriers in the digital twins. This will ensure adequate clinical representation, while fostering collaboration and integration of all relevant stakeholders, such as solution developers, technology providers, infrastructure providers, end-users (healthcare professionals and patients), regulatory agencies, and Health Technology Assessment (HTA) bodies.

 

ROADMAP

EDITH has the objective to build a roadmap with all the necessary conditions to evolve towards an integrated Virtual Human Twin (VHT). The roadmap will identify the necessary building blocks to ensure the VHT’s clinical application and formulate clear short- and mid-term policy recommendations. It will also address aspects of interoperability, computability, health information integration from various sources. It will identify any possible implementation needs and barriers, including the development of a strategic approach for clinical deployment of the VHT model. This will foster the uptake of personalised clinical decision-making, with a particular focus on areas of unmet medical needs.

 

REPOSITORY

EDITH has the objective to develop a federated and cloud-based repository of digital twins (data, models, algorithms, and good practices) in healthcare. The repository will provide a virtual collaboration environment for multiple users and organisations, that will pool together existing digital twin resources across Europe. The ecosystem will be leveraged to create a repository catalogue with available resources and recruit resources into the repository from the consortium and external contributors. Through a federated approach, links will be established with ongoing initiatives that have complementary objectives.

 

SIMULATION PLATFORM

EDITH has the objective to develop the architecture of a simulation platform that will support the transition towards an integrated Virtual Human Twin (VHT). EDITH’s platform, based on a personal user account with specific permissions, will outline how to provide users with a one-stop shop to design, develop, test, and validate single organ digital twins, and combine them with other twins to build integrated VHT models. Five use-cases (cancer, cardiovascular, intensive care, osteoporosis, brain) have been pre-selected to be developed as prototypes, representing the variety of functionalities to be provided by the platform.

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"elixir-gr\",\"name\":\"The Greek National Node of the ESFRI European RI ELIXIR\",\"description\":\"ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.\",\"status\":\"all\",\"type\":\"ri\",\"subjects\":null,\"zenodoCommunity\":\"oaa_elixir-gr\"},{\"id\":\"rural-digital-europe\",\"name\":\"Assessing the socio-economic impact of digitalisation in rural areas\",\"description\":\"The scope of this community is to provide access to publications, research data, projects and software for assessing the socio-economic impact of digitalisation in rural areas in Europe\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"mes\",\"name\":\"European Marine Science\",\"description\":\"This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"oac_mes\"},{\"id\":\"neanias-underwater\",\"name\":\"NEANIAS Underwater Research Community\",\"description\":\"

NEANIAS Underwater Research Community engages Open Science practices through the European Open Science Cloud (EOSC), targeting a wide variety of scientific and professional communities related to  the marine environment. NEANIAS Underwater group provides three user-friendly, cloud-based solutions addressing bathymetry processing, seafloor mosaicking and classification (https://www.neanias.eu/index.php/services/underwater). More specifically:

  • Bathymetry Mapping from Acoustic Data service delivers an advanced, user-friendly, cloud-based version of the popular open-source MB-System software for post-processing bathymetry through Jupyter notebooks with additional functionalities .
  • The Seafloor Mosaicing from Optical Data service provides an operational solution for large area representation (in the order of tens of thousands of images) of the predominantly flat, seafloor, also addressing visibility limitations from the underwater medium (https://marketplace.eosc-portal.eu/services/uw-mos).
  • The Seabed Classification from Multispectral, Multibeam Data service delivers a user-friendly cloud-based solution integrating cutting-edge machine learning frameworks for mapping several seabed classes, validated for archaeological, geo-hazards, energy, and other applications (https://marketplace.eosc-portal.eu/services/uw-map).

NEANIAS: https://www.neanias.eu/

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"neanias\"},{\"id\":\"neanias-atmospheric\",\"name\":\"NEANIAS Atmospheric Research Community\",\"description\":\"

NEANIAS Atmospheric Research Community engages Open Science practices through the European Open Science Cloud (EOSC), targeting a wide variety of sectors related to the atmosphere. The sectors it targets include, among others, meteorologists, industrial air pollutant emitters, ecologists, geologists, rural urban planners and air quality authorities, geohazards, civil protection, insurance or health agencies.

More in detail, NEANIAS Atmospheric group offers the following services on the EOSC platform:

  • The Greenhouse Gases Flux Density Monitoring service (A1 - ATMO-FLUD) delivers an operational workflow for estimating flux density and fluxes of gases, aerosol, energy from data obtained from specifically set meteorological stations, validated towards standardized, regularized processes.
  • The Atmospheric Perturbations and Components Monitoring service (A2, divided in two services, ATMO-STRESS and ATMO-SEISM) performs all required analyses of atmospheric and geological data in order to estimate possible correlations of gaseous and particulate components of the atmosphere with earthquake and volcanic processes.
  • The Air Quality Estimation, Monitoring and Forecasting service (A3 – ATMO-4CAST) delivers a novel cloud-based solution providing crucial information and products to a variety of stakeholder in agriculture, urban/ city authorities, health, insurance agencies and relative governmental authorities.

NEANIAS: https://www.neanias.eu/

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"neanias\"},{\"id\":\"beopen\",\"name\":\"Transport Research\",\"description\":\"Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research. The TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323)\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"be-open-transport\"},{\"id\":\"heritage-science\",\"name\":\"Heritage Science\",\"description\":\"

Heritage Science community gateway is managed by the IPERION HS Research Infrastructure EU project. 

Heritage science is the interdisciplinary domain of scientific  study  of  cultural  or  natural  heritage. Heritage science draws on  diverse  humanities, sciences  and  engineering  disciplines. It  focuses  on  enhancing  the  understanding,  care  and  sustainable  use  of  heritage  so it  can enrich people's lives, both today and in the future. Heritage science is an umbrella term encompassing all forms of scientific enquiry into human works and the combined works of nature and humans, of value to people.

The gateway aims at including all the relevant research outcomes in  this  field.

 

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"ni\",\"name\":\"Neuroinformatics\",\"description\":\"The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"oac_ni\"},{\"id\":\"dh-ch\",\"name\":\"Digital Humanities and Cultural Heritage\",\"description\":\"This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"oac_dh-ch\"},{\"id\":\"eutopia\",\"name\":\"EUTOPIA Open Research Portal\",\"description\":\"

EUTOPIA is an ambitious alliance of 10 like-minded universities ready to reinvent themselves: the Babeș-Bolyai University in Cluj-Napoca (Romania), the Vrije Universiteit Brussel (Belgium), the Ca'Foscari University of Europe (Italy), CY Cergy Paris Université (France), the Technische Universität Dresden (Germany), the University of Gothenburg (Sweden), the University of Ljubljana (Slovenia), the NOVA University Lisbon (Portugal), the University of Pompeu Fabra (Spain) and the University of Warwick (United Kingdom). Together, these 10 pioneers join forces to build the university of the future.

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"sdsn-gr\",\"name\":\"Sustainable Development Solutions Network - Greece\",\"description\":\"The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"oac_sdsn-greece\"},{\"id\":\"covid-19\",\"name\":\"Corona Virus Disease\",\"description\":\"

This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"covid-19\"},{\"id\":\"enermaps\",\"name\":\"Welcome to EnerMaps Gateway! Find the latest scientific data.\",\"description\":\"

EnerMaps Open Data Management Tool aims to  improve data management  and  accessibility  in the field of  energy research  for the  renewable energy industry.

EnerMaps’ tool accelerates and facilitates the energy transition offering a qualitative and user-friendly digital platform to the energy professionals.

The project is based on the  FAIR data principle  which requires data to be  Findable,  Accessible,  Interoperable and  Reusable.

EnerMaps project  coordinates and enriches existing energy databases to promote  trans-disciplinary research  and to develop partnerships between researchers and the energy professionals.

The EnerMaps project has received funding from the European Union’s Horizon 2020 research and innovation program under   grant agreement N°884161

 

Website:  https://enermaps.eu/ 

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"},{\"id\":\"eu-conexus\",\"name\":\"European University for Smart Urban Coastal Sustainability\",\"description\":\"

The European University for Smart Urban Coastal Sustainability - EU-CONEXUS promotes common European values and a strengthened European identity by bringing together a new generation of Europeans, who can cooperate and work within different European cultures, in different languages and across borders, sectors and academic disciplines. EU-CONEXUS is committed to creating a new institutional framework for higher education in Europe that allows for a truly European way of studying and carrying out research through institutional alignment and seamless mobility for all. 

The thematic framework Smart Urban Coastal Sustainability (SmUCS) means for EU-CONEXUS to focus its research & innovation, education and knowledge sharing activities on societal challenges that are linked to a specific geographical location – the urban and semi-urbanised coastal (rivers and oceans) regions. SmUCS is not a subject-driven thematic framework, but a challenge-driven education and research domain.

\",\"status\":\"all\",\"type\":\"community\",\"subjects\":null,\"zenodoCommunity\":\"\"}]"; private static String workingDir; - @BeforeEach - public void setUp() throws ISLookUpException { - lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap); - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(isLookUpService); - } - @BeforeAll public static void beforeAll() throws IOException { workingDir = Files @@ -86,15 +55,18 @@ public class CreateEntityTest { } @Test - void test1() throws ISLookUpException, IOException { + void test1() throws IOException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.getContextInformation(consumer); + UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI(); + new ObjectMapper() + .readValue(validCommunities, CommunityModelList.class) + .forEach(ri -> consumer.accept(queryCommunityAPI.getContext(ri))); List riList = new ArrayList<>(); cInfoList.forEach(cInfo -> riList.add(Process.getEntity(cInfo))); - Assertions.assertEquals(12, riList.size()); + Assertions.assertEquals(25, riList.size()); riList.stream().forEach(c -> { switch (c.getAcronym()) { @@ -114,7 +86,7 @@ public class CreateEntityTest { .equals( String .format( - "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, + "%s::%s", Constants.CONTEXT_NS_PREFIX, DHPUtils.md5(c.getAcronym())))); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes")); Assertions.assertTrue("mes".equals(c.getAcronym())); @@ -129,7 +101,7 @@ public class CreateEntityTest { .equals( String .format( - "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, + "%s::%s", Constants.CONTEXT_NS_PREFIX, DHPUtils.md5(c.getAcronym())))); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin")); Assertions.assertTrue("clarin".equals(c.getAcronym())); @@ -144,7 +116,7 @@ public class CreateEntityTest { @Test @Disabled - void test2() throws IOException, ISLookUpException { + void test2() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); Path hdfsWritePath = new Path(workingDir + "/prova"); @@ -162,7 +134,8 @@ public class CreateEntityTest { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.getContextInformation(consumer); + UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI(); + queryCommunityAPI.getContextInformation().forEach(ci -> consumer.accept(ci)); for (ContextInfo cInfo : cInfoList) { writer.write(new Gson().toJson(Process.getEntity(cInfo))); @@ -171,3 +144,9 @@ public class CreateEntityTest { } } + +class CommunityModelList extends ArrayList { + public CommunityModelList() { + super(); + } +} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java index 9b1fb7a..d399ec7 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; +import java.io.IOException; import java.util.*; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -13,6 +14,8 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; +import eu.dnetlib.dhp.communityapi.QueryCommunityAPI; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate; import eu.dnetlib.dhp.oa.model.graph.Relation; import eu.dnetlib.dhp.schema.common.ModelSupport; @@ -22,542 +25,25 @@ import eu.dnetlib.dhp.utils.DHPUtils; class CreateRelationTest { - List communityContext = Arrays - .asList( - "\n" + - " all\n" + - " CLARIN\n" + - " https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg\n" - + - " Common Language Resources and Technology Infrastructure\n" + - " maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " (Part of) the work reported here was made possible by using the CLARIN infrastructure.\n" - + - " The work reported here has received funding through <CLARIN national consortium member, e.g. CLARIN.SI>, <XYZ> project, grant no. <XYZ>.\n" - + - " The work reported here has received funding (through CLARIN ERIC) from the European Union’s Horizon 2020 research and innovation programme under grant agreement No <0-9> for project <XYZ>.\n" - + - " (E.g. No 676529 for project CLARIN-PLUS.)\n" + - " oac_clarin\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + - " CLARIN-PLUS\n" + - " \n" + - " \n" + - " 676529\n" + - " http://www.clarin.eu\n" + - " EC\n" + - " H2020-INFRADEV-1-2015-1\n" + - " CLARIN+\n" + - " \n" + - " \n" + - " Common Language Resources and Technology Infrastructure\n" - + - " CLARIN\n" + - " 212230\n" + - " EC\n" + - " corda_______::ef782b2d85676aa3e5a907427feb18c4\n" + - " \n" + - " \n" + - " " + - "\n" + - " \n" + - " LINDAT/CLARIN repository\n" + - " LINDAT/CLARIN repository\n" + - " true\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " CLARIN-D\n" + - " https://www.clarin-d.de/en/\n" + - " \n" + - " http://www.lrec-conf.org/proceedings/lrec2018/pdf/504.pdf\n" - + - " Germany\n" + - " \n" + - " \n" + - " \n" - + - " \n" + - "", - "\n" + - " all\n" + - " This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.\n" - + - " http://sanmamante.org/DH_CH_logo.png\n" + - " Digital Humanities and Cultural Heritage\n" + - " ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk\n" - + - " modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels\n" - + - " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n" - + - " oac_dh-ch\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" - + - " Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\n" - + - " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n" - + - " \n" + - " 654119\n" + - " http://www.parthenos-project.eu\n" + - " EC\n" + - " PARTHENOS\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " re3data_____::9ebe127e5f3a0bf401875690f3bb6b81\n" + - " The UK's largest collection of digital research data in the social sciences and humanities\n" - + - " UK Data Archive\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::c6cd4b532e12868c1d760a8d7cda6815\n" + - " Journal of Data Mining and Digital Humanities\n" + - " Journal of Data Mining and Digital Humanities\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b\n" + - " Frontiers in Digital Humanities\n" + - " Frontiers in Digital Humanities\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::6eb31d13b12bc06bbac06aef63cf33c9\n" + - " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n" - + - " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n" - + - " true\n" + - " \n" + - " \n" - + - " doajarticles::0da84e9dfdc8419576169e027baa8028\n" + - " Conservation Science in Cultural Heritage\n" + - " Conservation Science in Cultural Heritage\n" + - " true\n" + - " \n" + - " \n" - + - " re3data_____::84e123776089ce3c7a33db98d9cd15a8\n" + - " Electronic Archiving System\n" + - " EASY\n" + - " true\n" + - " \n" + - " \n" + - " openaire____::c5502a43e76feab55dd00cf50f519125\n" + - " DANS-KB Harvester\n" + - " Gemeenschappelijke Harvester DANS-KB\n" + - " true\n" + - " \n" + - " \n" + - " re3data_____::a48f09c562b247a9919acfe195549b47\n" + - " ads\n" + - " Archaeology Data Service\n" + - " true\n" + - " \n" + - " \n" + - " opendoar____::97275a23ca44226c9964043c8462be96\n" + - " KNAW Repository\n" + - " KNAW Repository\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::2899208a99aa7d142646e0a80bfeef05\n" + - " Internet Archaeology\n" + - " Internet Archaeology\n" + - " true\n" + - " \n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.\n" - + - " https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png\n" - + - " Neuroinformatics\n" + - " sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it\n" - + - " brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities\n" - + - " \n" + - " oac_ni\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + - " \n" + - " re3data_____::5b9bf9171d92df854cf3c520692e9122\n" + - " Formerly:OpenFMRI\n" + - " OpenNeuro\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::c7d3de67dc77af72f6747157441252ec\n" + - " Research Ideas and Outcomes\n" + - " Research Ideas and Outcomes\n" + - " true\n" + - " \n" + - " \n" + - " re3data_____::8515794670370f49c1d176c399c714f5\n" + - " Neuroimaging Informatics Tools and Resources Clearinghouse\n" - + - " NITRC\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::d640648c84b10d425f96f11c3de468f3\n" + - " Frontiers in Neuroinformatics\n" + - " Frontiers in Neuroinformatics\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a\n" + - " NeuroImage: Clinical\n" + - " NeuroImage: Clinical\n" + - " true\n" + - " \n" + - " \n" + - " rest________::fb1a3d4523c95e63496e3bc7ba36244b\n" + - " NeuroVault\n" + - " NeuroVault\n" + - " true\n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " Instruct-ERIC is the European Research Infrastructure for Structural Biology\n" - + - " https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png\n" - + - " Instruct-ERIC\n" + - " claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " The authors acknowledge the support and the use of resources of Instruct-ERIC.\n" - + - " The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project\n" - + - " oac_instruct\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" - + - " Authentication and Authorisation For Research and Collaboration\n" - + - " \n" + - " 730941\n" + - " \n" + - " H2020-EINFRA-2016-1\n" + - " AARC2\n" + - " EC\n" + - " \n" + - " \n" - + - " Building data bridges between biological and medical infrastructures in Europe\n" - + - " \n" + - " 284209\n" + - " \n" + - " FP7-INFRASTRUCTURES-2011-1\n" + - " EC\n" + - " BioMedBridges\n" + - " \n" + - " \n" - + - " Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\n" - + - " \n" + - " 283570\n" + - " \n" + - " FP7-INFRASTRUCTURES-2011-1\n" + - " EC\n" + - " BioStruct-X\n" + - " \n" + - " \n" - + - " Coordinated Research Infrastructures Building Enduring Life-science services\n" - + - " \n" + - " 654248\n" + - " \n" + - " H2020-INFRADEV-1-2014-1\n" + - " EC\n" + - " CORBEL\n" + - " \n" + - " \n" - + - " Infrastructure for NMR, EM and X-rays for translational research\n" - + - " \n" + - " 653706\n" + - " \n" + - " H2020-INFRAIA-2014-2015\n" + - " EC\n" + - " iNEXT\n" + - " \n" + - " \n" - + - " Integrated Structural Biology Infrastructure\n" + - " \n" + - " 211252\n" + - " \n" + - " FP7-INFRASTRUCTURES-2007-1\n" + - " EC\n" + - " INSTRUCT\n" + - " \n" + - " \n" - + - " Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\n" - + - " \n" + - " 731005\n" + - " \n" + - " H2020-INFRADEV-2016-1\n" + - " EC\n" + - " INSTRUCT-ULTRA\n" + - " \n" + - " \n" - + - " Opening Synchrotron Light for Experimental Science and Applications in the Middle East\n" - + - " \n" + - " 730943\n" + - " \n" + - " H2020-INFRASUPP-2016-1\n" + - " EC\n" + - " OPEN SESAME\n" + - " \n" + - " \n" - + - " Infrastructure for Protein Production Platforms\n" - + - " \n" + - " 227764\n" + - " \n" + - " FP7-INFRASTRUCTURES-2008-1\n" + - " EC\n" + - " PCUBE\n" + - " \n" + - " \n" - + - " European Vaccine Research and Development Infrastructure\n" - + - " \n" + - " 730964\n" + - " \n" + - " H2020-INFRAIA-2016-1\n" + - " EC\n" + - " TRAMSVAC2\n" + - " \n" + - " \n" - + - " World-wide E-infrastructure for structural biology\n" - + - " \n" + - " 675858\n" + - " \n" + - " EC | H2020 | RIA\n" + - " EC\n" + - " West-Life\n" + - " \n" + - " \n" + - " Expanding research infrastructure visibility to strengthen strategic partnerships\n" - + - " RI-VIS\n" + - " 824063\n" + - " EC\n" + - " corda__h2020::af93b591b76991d8437993a8f6fc6538\n" + - " \n" + - " \n" + - " \n" - + - " \n" - + - " \n" + - " instruct\n" + - " \n" + - " \n" + - " \n" - + - " west-life\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " \n" + - " FRISBI\n" + - " aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==\n" - + - " aHR0cDovL2ZyaXNiaS5ldS8=\n" + - " \n" + - " \n" + - " RI-VIS\n" + - " aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=\n" - + - " aHR0cHM6Ly9yaS12aXMuZXU=\n" + - " \n" + - " \n" + - " CIISB\n" + - " aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=\n" + - " aHR0cHM6Ly93d3cuY2lpc2Iub3Jn\n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.\n" - + - " https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png\n" - + - " The Greek National Node of the ESFRI European RI ELIXIR\n" + - " vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " \n" + - " oaa_elixir-gr\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + - " BIO-INFORMATICS RESEARCH NETWORK COORDINATING CENTER (BIRN-CC)\n" - + - " \n" + - " 1U24RR025736-01\n" + - " NIH\n" + - " \n" + - " \n" + - " COLLABORATIVE RESEARCH: The Cognitive Neuroscience of Category Learning\n" - + - " \n" + - " 0223843\n" + - " NSF\n" + - " \n" + - " \n" + - " The Cognitive Atlas: Developing an Interdisciplinary Knowledge Base Through Socia\n" - + - " \n" + - " 5R01MH082795-05\n" + - " NIH\n" + - " \n" + - " \n" + - " Fragmented early life environmental and emotional / cognitive vulnerabilities\n" - + - " \n" + - " 1P50MH096889-01A1\n" + - " NIH\n" + - " \n" + - " \n" + - " Enhancement of the 1000 Functional Connectome Project\n" - + - " \n" + - " 1R03MH096321-01A1\n" + - " TUBITAK\n" + - " \n" + - " \n" + - " CRCNS Data Sharing: An open data repository for cognitive neuroscience: The OpenfMRI Project\n" - + - " \n" + - " 1131441\n" + - " NSF\n" + - " \n" + - " \n" + - " Enhancing Human Cortical Plasticity: Visual Psychophysics and fMRI\n" - + - " \n" + - " 0121950\n" + - " NSF\n" + - " \n" + - " \n" + - " Transforming statistical methodology for neuroimaging meta-analysis.\n" - + - " \n" + - " 100309\n" + - " WT\n" + - " \n" + - " " + - - " \n" - + - " \n" + - " rest________::b8e502674c3c3499d5374e9b2ea6d8d5\n" + - " bio.tools\n" + - " bio.tools\n" + - " false\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " \n" + - " \n" + - " ATHENA RC\n" + - " aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=\n" - + - " aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=\n" + - " \n" + - " \n" - + - ""); - - private QueryInformationSystem queryInformationSystem; - - private Map map; - - @BeforeEach - public void setUp() { - - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setContextRelationResult(communityContext); - } + String contextInfo1 = "{\"id\":\"eut\",\"description\":null,\"type\":null,\"zenodocommunity\":null,\"name\":null,\"projectList\":[],\"datasourceList\":[\"opendoar____::39e4973ba3321b80f37d9b55f63ed8b8\",\"opendoar____::cda72177eba360ff16b7f836e2754370\",\"opendoar____::71f6278d140af599e06ad9bf1ba03cb0\",\"opendoar____::f5c59267dae7d123f54b741a76f28f84\",\"opendoar____::532a082cc38eaffa923d73bce41b4061\",\"opendoar____::00a03ec6533ca7f5c644d198d815329c\",\"opendoar____::33a854e247155d590883b93bca53848a\"],\"subject\":null}"; + String contextInfo2 = "{\"id\":\"knowmad\",\"description\":null,\"type\":null,\"zenodocommunity\":null,\"name\":null,\"projectList\":[],\"datasourceList\":[\"issn___print::81db6614a3fa07becf706193d4754f30\",\"doajarticles::71eae002613af3569ea42b7093eefdbd\",\"doajarticles::daf53452e1199952d6c95ee7d0088d35\",\"issn___print::ff1a02f622ff006edc668e55fc865ae8\",\"doajarticles::14c007581eb12d843050ebf0493f2254\",\"issn__online::14c007581eb12d843050ebf0493f2254\",\"doajarticles::19a7308c7287b3784aacdb1979141c50\",\"doajarticles::c28985d2363f1ed20048dfd682ee14be\",\"issn__online::c28985d2363f1ed20048dfd682ee14be\",\"doajarticles::81db6614a3fa07becf706193d4754f30\",\"doajarticles::588fc0f98218e9f29a0dabe1182851c4\",\"doajarticles::5ccbe475390dfa642a356c7fd678b70a\",\"doajarticles::cf63b988539f8d28ed366220691f751b\",\"doajarticles::c03353a080ac66c37a1f6aa2de05ca63\",\"issn__online::efec4cc1b1143f69d7d0954a2e9b18b6\",\"doajarticles::25ddd7bb737fc8b027b455f6712111a0\",\"doajarticles::ca2e3cc507ff73298333ed53177e9916\",\"doajarticles::220ab67f748963b6932600585452ad83\",\"doajarticles::b718013941a35a86b17b9b57aca9d260\",\"doajarticles::934e4b68deaeee0781f18a6a6e4fd906\",\"doajarticles::43587737046be23d692860e3212d6966\",\"doajarticles::23a27fb45b9af1f2b57632b7ceb98448\",\"doajarticles::9d32313b109db4f024eb1c802ad4e353\",\"fairsharing_::b4944963b5c83d545c3d3022bcf03282\",\"doajarticles::b5a2cadc830f94ceb2f8035369cf66a1\",\"doajarticles::8d64614210c51d4bc9e1470bbe1b5607\",\"doajarticles::e258604e7e1132683570d4178e9bec8f\",\"doajarticles::260c4ba804c4c08842ad39a6e8fd2a97\",\"doajarticles::8c9f0a0c20cb062a0ab7823af8262d0e\",\"issn__online::ef2866d8e4561162a1b115c24ebd7887\",\"doajarticles::de16206a48e58e344ef049670a1a0134\",\"doajarticles::96621c5056169f56ff3131ea90aa07c9\",\"issn__online::4558a062701b28ec0b4a6b2f0fbe8d09\",\"issn___print::5571b62bd2e69e2311990fceb7fe2e3a\",\"doajarticles::c61ecda4a5a75b94a28b1c782008e64c\",\"doajarticles::fd825a036d04fcdf0ab805d87e16d1de\",\"issn___print::61d35d3f7e7f635fa9818eb5c5724833\",\"doajarticles::685e5ac5fb55c74ee8a2b266923e1f1d\",\"doajarticles::b99614718f7a6545b1eed9b68d3441de\",\"doajarticles::7d2d2e0095bf1ec755bf3e39d31a5bcb\",\"doajarticles::6124a1bc19d8f538c1bb45da8fda914b\",\"issn___print::6f2e006697b7a4f9d6e2e4d61d9e87c2\",\"issn___print::24d6c4cc60c9446680a4bf6464f232cc\",\"issn___print::0482ed6012f16fa029dd0434b644cb90\",\"issn___print::74d68388e8ad50f80b754f5036c80fcf\",\"issn__online::22b586e63e4962054a332a538e5d2b71\",\"issn___print::4bceffb60baed7f1e3f25b171b8fcf63\",\"issn__online::4074709fc410b3be61ea1769634ae8ff\",\"issn__online::d787423124beb54d12945b8fb6effe17\",\"issn__online::92d6a58173da7e479557acb8701da9dc\",\"issn___print::36813e86616b329b06101afab0f2d115\",\"issn___print::60f102cf59ddbfc2297dbfd2efe1c006\",\"issn__online::e2bedb8ffd0dd9a05af59c2651eb4200\",\"issn___print::3a5263873041ce993d856c3a08b87e66\",\"issn___print::c0118216b90b4ec13c4344c302eb1cc2\",\"issn___print::36cb3b21af46a23327d120d848ac5256\",\"issn___print::f6528b255fa5b915efddf5bdd4b12bef\",\"issn___print::50ae07b765e6c893d93701b63dc885eb\",\"issn___print::1efd4ac736f64f83bc537339bf112882\",\"issn___print::f46704a2eb1338c3524391c7fcdc5f50\",\"issn__online::6e823cb8d73afda60e12a0ce7ec65fe3\",\"doajarticles::df6746f5d17a6c27cf1033e2888b934d\",\"issn___print::8a8a700ac926378b330e57d7faf5948e\",\"issn___print::6fbc352ab2267a17490b922033c2ce8c\",\"issn___print::692e57ca190e52967683bca19dcc6200\",\"issn___print::4f657189126a3c51cf8f2d58410d09b8\",\"issn___print::2fd71afb352b4ec1ee58b2bfa4d536da\",\"doajarticles::e91e4788bfb04277b61416fd184416b2\",\"issn__online::3097c43f11470b27e74f040cf267eedf\",\"doajarticles::942ba9d488531f3a60633b716db0b05c\",\"doajarticles::1aefeb2e98d29b17473e3f4dbcc5000e\",\"issn___print::223b857892c5de7247b3f906e1e1b96a\",\"doajarticles::b64f9c27d2c8899df904930cd6d2316a\"],\"subject\":null}"; + String contextInfo3 = "{\"id\":\"ni\",\"description\":null,\"type\":null,\"zenodocommunity\":null,\"name\":null,\"projectList\":[\"nih_________::412379d1e2683a9571405e6621d3cdeb\",\"nsf_________::1e685a3d3ca1d62192bb51d42d44c6d1\",\"nsf_________::8ff441b675f2c62061f15210b05c1584\",\"nih_________::2d8bd32ed8cb2f0b0d4508df61d494dd\",\"nsf_________::e71b0a0f6a347d76816adb29b322441c\",\"nih_________::c27d60d28c9bc5cf17dd7ae7ad2d4ab4\",\"nih_________::b28919975c85cfee114e442c30c918c6\",\"wt__________::d709ac8e1ed393d036c895d239d825a4\"],\"datasourceList\":[\"re3data_____::5b9bf9171d92df854cf3c520692e9122\",\"doajarticles::c7d3de67dc77af72f6747157441252ec\",\"re3data_____::8515794670370f49c1d176c399c714f5\",\"doajarticles::d640648c84b10d425f96f11c3de468f3\",\"doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a\",\"rest________::fb1a3d4523c95e63496e3bc7ba36244b\",\"opendoar____::7e7757b1e12abcb736ab9a754ffb617a\"],\"subject\":null}"; @Test - void test1() { + void test1() throws IOException { + ObjectMapper mapper = new ObjectMapper(); List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem - .getContextRelation(consumer, "contentproviders", ModelSupport.getIdPrefix(Datasource.class)); - - cInfoList.forEach(c -> System.out.println(new Gson().toJson(c))); + consumer.accept(mapper.readValue(contextInfo1, ContextInfo.class)); + consumer.accept(mapper.readValue(contextInfo2, ContextInfo.class)); + consumer.accept(mapper.readValue(contextInfo3, ContextInfo.class)); List rList = new ArrayList<>(); cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add)); - rList.forEach(r -> { - try { - System.out.println(new ObjectMapper().writeValueAsString(r)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - }); - - Assertions.assertEquals(34, rList.size()); + Assertions.assertEquals(190, rList.size()); Assertions .assertTrue( @@ -570,11 +56,11 @@ class CreateRelationTest { .format( "%s::%s", Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))); + DHPUtils.md5("eut")))); Assertions .assertEquals( - 10, + 7, rList .stream() .filter( @@ -586,13 +72,13 @@ class CreateRelationTest { .format( "%s::%s", Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))) + DHPUtils.md5("eut")))) .collect(Collectors.toList()) .size()); Assertions .assertEquals( - 10, + 7, rList .stream() .filter( @@ -604,7 +90,7 @@ class CreateRelationTest { .format( "%s::%s", Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))) + DHPUtils.md5("eut")))) .collect(Collectors.toList()) .size()); @@ -619,57 +105,22 @@ class CreateRelationTest { .format( "%s::%s", Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))) + DHPUtils.md5("eut")))) .map(r -> r.getTarget()) .collect(Collectors.toSet()); Assertions .assertTrue( - tmp.contains("re3data_____::9ebe127e5f3a0bf401875690f3bb6b81") && - tmp.contains("doajarticles::c6cd4b532e12868c1d760a8d7cda6815") && - tmp.contains("doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b") && - tmp.contains("doajarticles::6eb31d13b12bc06bbac06aef63cf33c9") && - tmp.contains("doajarticles::0da84e9dfdc8419576169e027baa8028") && - tmp.contains("re3data_____::84e123776089ce3c7a33db98d9cd15a8") && - tmp.contains("openaire____::c5502a43e76feab55dd00cf50f519125") && - tmp.contains("re3data_____::a48f09c562b247a9919acfe195549b47") && - tmp.contains("opendoar____::97275a23ca44226c9964043c8462be96") && - tmp.contains("doajarticles::2899208a99aa7d142646e0a80bfeef05")); - - } - - @Test - public void test2() { - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - - queryInformationSystem - .getContextRelation(consumer, "projects", ModelSupport.getIdPrefix(Project.class)); - - cInfoList.forEach(c -> System.out.println(new Gson().toJson(c))); - - List rList = new ArrayList<>(); - - cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add)); - - Assertions.assertEquals(44, rList.size()); + tmp.contains("opendoar____::532a082cc38eaffa923d73bce41b4061") && + tmp.contains("opendoar____::00a03ec6533ca7f5c644d198d815329c") && + tmp.contains("opendoar____::33a854e247155d590883b93bca53848a") && + tmp.contains("opendoar____::71f6278d140af599e06ad9bf1ba03cb0") && + tmp.contains("opendoar____::f5c59267dae7d123f54b741a76f28f84") && + tmp.contains("opendoar____::cda72177eba360ff16b7f836e2754370") && + tmp.contains("opendoar____::39e4973ba3321b80f37d9b55f63ed8b8")); Assertions - .assertFalse( - rList - .stream() - .map(r -> r.getSource()) - .collect(Collectors.toSet()) - .contains( - String - .format( - "%s::%s", - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))); - - Assertions - .assertEquals( - 2, + .assertTrue( rList .stream() .filter( @@ -681,13 +132,15 @@ class CreateRelationTest { .format( "%s::%s", Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("clarin")))) - .collect(Collectors.toList()) - .size()); + DHPUtils.md5("eut")))) + .map(r -> r.getTargetType()) + .collect(Collectors.toSet()) + .stream() + .allMatch(t -> t.equals("datasource"))); Assertions .assertEquals( - 2, + 15, rList .stream() .filter( @@ -699,144 +152,43 @@ class CreateRelationTest { .format( "%s::%s", Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("clarin")))) + DHPUtils.md5("ni")))) .collect(Collectors.toList()) .size()); - Set tmp = rList - .stream() - .filter( - r -> r - .getSource() - - .equals( - String - .format( - "%s::%s", - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("clarin")))) - .map(r -> r.getTarget()) - .collect(Collectors.toSet()); - - Assertions - .assertTrue( - tmp.contains("corda__h2020::b5a4eb56bf84bef2ebc193306b4d423f") && - tmp.contains("corda_______::ef782b2d85676aa3e5a907427feb18c4")); - - rList.forEach(rel -> { - if (rel.getSourceType().equals("project")) { - String proj = rel.getSource(); - Assertions.assertTrue(proj.substring(0, proj.indexOf("::")).length() == 12); - Assertions.assertFalse(proj.startsWith("40|")); - } - }); - - } - - @Test - void test3() { - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - - MasterDuplicate md1 = new MasterDuplicate(); - md1.setMaster("10|fake________::9ebe127e5f3a0bf401875690f3bb6b81"); - md1.setDuplicate("10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81"); - queryInformationSystem - .getContextRelation( - consumer, "contentproviders", ModelSupport.getIdPrefix(Datasource.class), Arrays.asList(md1)); - - cInfoList.forEach(c -> System.out.println(new Gson().toJson(c))); - - List rList = new ArrayList<>(); - - cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add)); - - rList.forEach(r -> { - try { - System.out.println(new ObjectMapper().writeValueAsString(r)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - }); - - Assertions.assertEquals(34, rList.size()); - - Assertions - .assertTrue( - rList - .stream() - .map(r -> r.getSource()) - .collect(Collectors.toSet()) - .contains( - String - .format( - "%s::%s", - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))); - Assertions .assertEquals( - 10, - rList + 7, rList .stream() .filter( r -> r .getSource() - .equals( String .format( "%s::%s", Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))) - .collect(Collectors.toList()) - .size()); + DHPUtils.md5("ni"))) + && + r.getTargetType().equals("datasource")) + .count()); Assertions .assertEquals( - 10, - rList + 8, rList .stream() .filter( r -> r - .getTarget() - + .getSource() .equals( String .format( "%s::%s", Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))) - .collect(Collectors.toList()) - .size()); - - Set tmp = rList - .stream() - .filter( - r -> r - .getSource() - - .equals( - String - .format( - "%s::%s", - Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5("dh-ch")))) - .map(r -> r.getTarget()) - .collect(Collectors.toSet()); - - Assertions - .assertTrue( - tmp.contains("fake________::9ebe127e5f3a0bf401875690f3bb6b81") && - tmp.contains("doajarticles::c6cd4b532e12868c1d760a8d7cda6815") && - tmp.contains("doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b") && - tmp.contains("doajarticles::6eb31d13b12bc06bbac06aef63cf33c9") && - tmp.contains("doajarticles::0da84e9dfdc8419576169e027baa8028") && - tmp.contains("re3data_____::84e123776089ce3c7a33db98d9cd15a8") && - tmp.contains("openaire____::c5502a43e76feab55dd00cf50f519125") && - tmp.contains("re3data_____::a48f09c562b247a9919acfe195549b47") && - tmp.contains("opendoar____::97275a23ca44226c9964043c8462be96") && - tmp.contains("doajarticles::2899208a99aa7d142646e0a80bfeef05")); - + DHPUtils.md5("ni"))) + && + r.getTargetType().equals("project")) + .count()); } + } diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java deleted file mode 100644 index 08fcd49..0000000 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java +++ /dev/null @@ -1,810 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import static org.mockito.Mockito.lenient; - -import java.util.*; -import java.util.function.Consumer; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; - -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -@ExtendWith(MockitoExtension.class) -class QueryInformationSystemTest { - - private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and $x//context/param[./@name = 'status']/text() = 'all' " + - " return " + - "$x//context"; - - private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return " - + - "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " + - "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', " - + - "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)"; - - List communityMap = Arrays - .asList( - "clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri", - "ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. " - + - "SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - " - + - "Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community", - "dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community", - "fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community", - "ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community", - "mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community", - "instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community", - "elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri", - "aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community", - "dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri", - "epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri", - "covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community"); - - List communityContext = Arrays - .asList( - "\n" + - " all\n" + - " CLARIN\n" + - " https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg\n" - + - " Common Language Resources and Technology Infrastructure\n" + - " maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " (Part of) the work reported here was made possible by using the CLARIN infrastructure.\n" - + - " The work reported here has received funding through <CLARIN national consortium member, e.g. CLARIN.SI>, <XYZ> project, grant no. <XYZ>.\n" - + - " The work reported here has received funding (through CLARIN ERIC) from the European Union’s Horizon 2020 research and innovation programme under grant agreement No <0-9> for project <XYZ>.\n" - + - " (E.g. No 676529 for project CLARIN-PLUS.)\n" + - " oac_clarin\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + - " CLARIN-PLUS\n" + - " \n" + - " \n" + - " 676529\n" + - " http://www.clarin.eu\n" + - " EC\n" + - " H2020-INFRADEV-1-2015-1\n" + - " CLARIN+\n" + - " \n" + - " \n" + - " Common Language Resources and Technology Infrastructure\n" - + - " CLARIN\n" + - " 212230\n" + - " EC\n" + - " corda_______::ef782b2d85676aa3e5a907427feb18c4\n" + - " \n" + - " \n" + - " " + - "\n" + - " \n" + - " LINDAT/CLARIN repository\n" + - " LINDAT/CLARIN repository\n" + - " true\n" + - " \n" + - " \n" + - " \n" + - " \n" + - " CLARIN-D\n" + - " https://www.clarin-d.de/en/\n" + - " \n" + - " http://www.lrec-conf.org/proceedings/lrec2018/pdf/504.pdf\n" - + - " Germany\n" + - " \n" + - " \n" + - " \n" - + - " \n" + - "", - "\n" + - " all\n" + - " This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.\n" - + - " http://sanmamante.org/DH_CH_logo.png\n" + - " Digital Humanities and Cultural Heritage\n" + - " ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk\n" - + - " modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels\n" - + - " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n" - + - " oac_dh-ch\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" - + - " Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\n" - + - " The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call\n" - + - " \n" + - " 654119\n" + - " http://www.parthenos-project.eu\n" + - " EC\n" + - " PARTHENOS\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " re3data_____::9ebe127e5f3a0bf401875690f3bb6b81\n" + - " The UK's largest collection of digital research data in the social sciences and humanities\n" - + - " UK Data Archive\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::c6cd4b532e12868c1d760a8d7cda6815\n" + - " Journal of Data Mining and Digital Humanities\n" + - " Journal of Data Mining and Digital Humanities\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b\n" + - " Frontiers in Digital Humanities\n" + - " Frontiers in Digital Humanities\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::6eb31d13b12bc06bbac06aef63cf33c9\n" + - " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n" - + - " Il Capitale Culturale: Studies on the Value of Cultural Heritage\n" - + - " true\n" + - " \n" + - " \n" - + - " doajarticles::0da84e9dfdc8419576169e027baa8028\n" + - " Conservation Science in Cultural Heritage\n" + - " Conservation Science in Cultural Heritage\n" + - " true\n" + - " \n" + - " \n" - + - " re3data_____::84e123776089ce3c7a33db98d9cd15a8\n" + - " Electronic Archiving System\n" + - " EASY\n" + - " true\n" + - " \n" + - " \n" + - " openaire____::c5502a43e76feab55dd00cf50f519125\n" + - " DANS-KB Harvester\n" + - " Gemeenschappelijke Harvester DANS-KB\n" + - " true\n" + - " \n" + - " \n" + - " re3data_____::a48f09c562b247a9919acfe195549b47\n" + - " ads\n" + - " Archaeology Data Service\n" + - " true\n" + - " \n" + - " \n" + - " opendoar____::97275a23ca44226c9964043c8462be96\n" + - " KNAW Repository\n" + - " KNAW Repository\n" + - " true\n" + - " \n" + - " \n" - + - " doajarticles::2899208a99aa7d142646e0a80bfeef05\n" + - " Internet Archaeology\n" + - " Internet Archaeology\n" + - " true\n" + - " \n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.\n" - + - " https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png\n" - + - " Neuroinformatics\n" + - " sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it\n" - + - " brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities\n" - + - " \n" + - " oac_ni\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" + - " re3data_____::5b9bf9171d92df854cf3c520692e9122\n" + - " Formerly:OpenFMRI\n" + - " OpenNeuro\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::c7d3de67dc77af72f6747157441252ec\n" + - " Research Ideas and Outcomes\n" + - " Research Ideas and Outcomes\n" + - " true\n" + - " \n" + - " \n" + - " re3data_____::8515794670370f49c1d176c399c714f5\n" + - " Neuroimaging Informatics Tools and Resources Clearinghouse\n" - + - " NITRC\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::d640648c84b10d425f96f11c3de468f3\n" + - " Frontiers in Neuroinformatics\n" + - " Frontiers in Neuroinformatics\n" + - " true\n" + - " \n" + - " \n" + - " doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a\n" + - " NeuroImage: Clinical\n" + - " NeuroImage: Clinical\n" + - " true\n" + - " \n" + - " \n" + - " rest________::fb1a3d4523c95e63496e3bc7ba36244b\n" + - " NeuroVault\n" + - " NeuroVault\n" + - " true\n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " Instruct-ERIC is the European Research Infrastructure for Structural Biology\n" - + - " https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png\n" - + - " Instruct-ERIC\n" + - " claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " The authors acknowledge the support and the use of resources of Instruct-ERIC.\n" - + - " The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project\n" - + - " oac_instruct\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" - + - " Authentication and Authorisation For Research and Collaboration\n" - + - " \n" + - " 730941\n" + - " \n" + - " H2020-EINFRA-2016-1\n" + - " AARC2\n" + - " EC\n" + - " \n" + - " \n" - + - " Building data bridges between biological and medical infrastructures in Europe\n" - + - " \n" + - " 284209\n" + - " \n" + - " FP7-INFRASTRUCTURES-2011-1\n" + - " EC\n" + - " BioMedBridges\n" + - " \n" + - " \n" - + - " Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\n" - + - " \n" + - " 283570\n" + - " \n" + - " FP7-INFRASTRUCTURES-2011-1\n" + - " EC\n" + - " BioStruct-X\n" + - " \n" + - " \n" - + - " Coordinated Research Infrastructures Building Enduring Life-science services\n" - + - " \n" + - " 654248\n" + - " \n" + - " H2020-INFRADEV-1-2014-1\n" + - " EC\n" + - " CORBEL\n" + - " \n" + - " \n" - + - " Infrastructure for NMR, EM and X-rays for translational research\n" - + - " \n" + - " 653706\n" + - " \n" + - " H2020-INFRAIA-2014-2015\n" + - " EC\n" + - " iNEXT\n" + - " \n" + - " \n" - + - " Integrated Structural Biology Infrastructure\n" + - " \n" + - " 211252\n" + - " \n" + - " FP7-INFRASTRUCTURES-2007-1\n" + - " EC\n" + - " INSTRUCT\n" + - " \n" + - " \n" - + - " Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\n" - + - " \n" + - " 731005\n" + - " \n" + - " H2020-INFRADEV-2016-1\n" + - " EC\n" + - " INSTRUCT-ULTRA\n" + - " \n" + - " \n" - + - " Opening Synchrotron Light for Experimental Science and Applications in the Middle East\n" - + - " \n" + - " 730943\n" + - " \n" + - " H2020-INFRASUPP-2016-1\n" + - " EC\n" + - " OPEN SESAME\n" + - " \n" + - " \n" - + - " Infrastructure for Protein Production Platforms\n" - + - " \n" + - " 227764\n" + - " \n" + - " FP7-INFRASTRUCTURES-2008-1\n" + - " EC\n" + - " PCUBE\n" + - " \n" + - " \n" - + - " European Vaccine Research and Development Infrastructure\n" - + - " \n" + - " 730964\n" + - " \n" + - " H2020-INFRAIA-2016-1\n" + - " EC\n" + - " TRAMSVAC2\n" + - " \n" + - " \n" - + - " World-wide E-infrastructure for structural biology\n" - + - " \n" + - " 675858\n" + - " \n" + - " H2020-EINFRA-2015-1\n" + - " EC\n" + - " West-Life\n" + - " \n" + - " \n" + - " Expanding research infrastructure visibility to strengthen strategic partnerships\n" - + - " RI-VIS\n" + - " 824063\n" + - " EC\n" + - " corda__h2020::af93b591b76991d8437993a8f6fc6538\n" + - " \n" + - " \n" + - " \n" - + - " \n" - + - " \n" + - " instruct\n" + - " \n" + - " \n" + - " \n" - + - " west-life\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " \n" + - " FRISBI\n" + - " aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==\n" - + - " aHR0cDovL2ZyaXNiaS5ldS8=\n" + - " \n" + - " \n" + - " RI-VIS\n" + - " aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=\n" - + - " aHR0cHM6Ly9yaS12aXMuZXU=\n" + - " \n" + - " \n" + - " CIISB\n" + - " aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=\n" + - " aHR0cHM6Ly93d3cuY2lpc2Iub3Jn\n" + - " \n" + - " \n" + - "\n", - "\n" + - " all\n" + - " ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.\n" - + - " https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png\n" - + - " The Greek National Node of the ESFRI European RI ELIXIR\n" + - " vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it\n" - + - " \n" + - " \n" + - " oaa_elixir-gr\n" + - " 2018-03-01T12:00:00\n" + - " \n" + - " \n" - + - " \n" + - " rest________::b8e502674c3c3499d5374e9b2ea6d8d5\n" + - " bio.tools\n" + - " bio.tools\n" + - " false\n" + - " \n" + - " \n" + - " \n" + - " \n" - + - " \n" + - " \n" + - " ATHENA RC\n" + - " aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=\n" - + - " aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=\n" + - " \n" + - " \n" - + - ""); - - @Mock - private ISLookUpService isLookUpService; - - private QueryInformationSystem queryInformationSystem; - - private Map map; - - @BeforeEach - public void setUp() throws ISLookUpException { - lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap); - lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityContext); - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(isLookUpService); - } - - @Test - void testSizeEntity() throws ISLookUpException { - - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.getContextInformation(consumer); - - Assertions.assertEquals(12, cInfoList.size()); - } - - @Test - void testSizeRelation() throws ISLookUpException { - - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.execContextRelationQuery(); - queryInformationSystem - .getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource")); - - Assertions.assertEquals(5, cInfoList.size()); - } - - @Test - void testContentRelation() throws ISLookUpException { - - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.execContextRelationQuery(); - queryInformationSystem - .getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource")); - - cInfoList.forEach(contextInfo -> { - switch (contextInfo.getId()) { - case "elixir-gr": - Assertions.assertEquals(1, contextInfo.getDatasourceList().size()); - Assertions - .assertEquals( - "10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5", - contextInfo.getDatasourceList().get(0)); - break; - case "instruct": - Assertions.assertEquals(0, contextInfo.getDatasourceList().size()); - break; - case "ni": - Assertions.assertEquals(6, contextInfo.getDatasourceList().size()); - Assertions - .assertTrue( - contextInfo - .getDatasourceList() - .contains("10|rest________::fb1a3d4523c95e63496e3bc7ba36244b")); - break; - case "dh-ch": - Assertions.assertEquals(10, contextInfo.getDatasourceList().size()); - break; - case "clarin": - Assertions.assertEquals(0, contextInfo.getDatasourceList().size()); - break; - } - }); - } - - @Test - void testContentEntity() throws ISLookUpException { - - List cInfoList = new ArrayList<>(); - final Consumer consumer = ci -> cInfoList.add(ci); - queryInformationSystem.getContextInformation(consumer); - - cInfoList.forEach(context -> { - switch (context.getId()) { - case "clarin":// clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin", - Assertions - .assertEquals("Common Language Resources and Technology Infrastructure", context.getName()); - Assertions.assertEquals("CLARIN", context.getDescription()); - Assertions - .assertTrue( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals("oac_clarin", context.getZenodocommunity()); - Assertions.assertEquals("ri", context.getType()); - break; - case "ee": - Assertions.assertEquals("Sustainable Development Solutions Network - Greece", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(17, context.getSubject().size()); - Assertions.assertEquals("oac_sdsn-greece", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - break; - case "dh-ch": - Assertions.assertEquals("Digital Humanities and Cultural Heritage", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(67, context.getSubject().size()); - Assertions.assertEquals("oac_dh-ch", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - break; - case "fam": - Assertions.assertEquals("Fisheries and Aquaculture Management", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith("Conservation of marine resources for sustainable development")); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(19, context.getSubject().size()); - Assertions.assertEquals("fisheries", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - break; - case "ni": - Assertions.assertEquals("Neuroinformatics", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith("The neuroinformatics dashboard gathers research outputs from the")); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(18, context.getSubject().size()); - Assertions.assertEquals("oac_ni", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - Assertions.assertTrue(context.getSubject().contains("brain")); - break; - case "mes": - Assertions.assertEquals("European Marine Science", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "This community was initially defined to include a very broad range of topics")); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(5, context.getSubject().size()); - Assertions.assertEquals("oac_mes", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - Assertions.assertTrue(context.getSubject().contains("sea")); - Assertions.assertTrue(context.getSubject().contains("fish")); - Assertions.assertTrue(context.getSubject().contains("ocean")); - Assertions.assertTrue(context.getSubject().contains("aqua")); - Assertions.assertTrue(context.getSubject().contains("marine")); - break; - case "instruct": - Assertions.assertEquals("Instruct-ERIC", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .equals( - "Instruct-ERIC is the European Research Infrastructure for Structural Biology")); - Assertions - .assertTrue( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals("oac_instruct", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - - break; - case "elixir-gr": - Assertions - .assertEquals("The Greek National Node of the ESFRI European RI ELIXIR", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open")); - Assertions - .assertTrue( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals("oaa_elixir-gr", context.getZenodocommunity()); - Assertions.assertEquals("ri", context.getType()); - - break; - case "aginfra": - Assertions.assertEquals("Agricultural and Food Sciences", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "The scope of this community is to provide access to publications, research data, projects and software")); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(18, context.getSubject().size()); - Assertions.assertEquals("oac_aginfra", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - Assertions.assertTrue(context.getSubject().contains("food distribution")); - break; - case "dariah": - Assertions.assertEquals("DARIAH EU", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support ")); - Assertions - .assertTrue( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - - Assertions.assertEquals("dariah", context.getZenodocommunity()); - Assertions.assertEquals("ri", context.getType()); - - break; - case "epos": - Assertions.assertEquals("European Plate Observing System", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of ")); - Assertions - .assertTrue( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - - Assertions.assertEquals("", context.getZenodocommunity()); - Assertions.assertEquals("ri", context.getType()); - - break; - case "covid-19": - Assertions.assertEquals("Corona Virus Disease", context.getName()); - Assertions.assertTrue(context.getDescription().length() > 0); - Assertions - .assertTrue( - context - .getDescription() - .startsWith( - "This portal provides access to publications, research data, projects and ")); - Assertions - .assertFalse( - Optional - .ofNullable(context.getSubject()) - .map(value -> false) - .orElse(true)); - Assertions.assertEquals(25, context.getSubject().size()); - Assertions.assertEquals("covid-19", context.getZenodocommunity()); - Assertions.assertEquals("community", context.getType()); - Assertions.assertTrue(context.getSubject().contains("coronavirus disease 2019")); - break; - - } - }); - - } -} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java index b9a0814..78b1374 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java @@ -5,6 +5,8 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; diff --git a/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance_new b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance_new new file mode 100644 index 0000000..83b735b --- /dev/null +++ b/dump/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance_new @@ -0,0 +1 @@ +{"measures":[{"id":"downloads","unit":[{"key":"count","value":"0","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:usage_counts","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"views","unit":[{"key":"count","value":"1","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:usage_counts","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id": "influence", "unit": [{"value": "6.01504990349e-09", "key": "score"}, {"value": "C", "key": "class"}]}, {"id": "popularity_alt", "unit": [{"value": "2.304", "key": "score"}, {"value": "C", "key": "class"}]}, {"id": "popularity", "unit": [{"value": "1.81666032463e-08", "key": "score"}, {"value": "C", "key": "class"}]}, {"id": "influence_alt", "unit": [{"value": "8.0", "key": "score"}, {"value": "C", "key": "class"}]}, {"id": "impulse", "unit": [{"value": "8.0", "key": "score"}, {"value": "C", "key": "class"}]}],"collectedfrom":[{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1628257970612,"id":"50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","originalId":["50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2","10.3897/oneeco.2.e13718"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/j.triboint.2014.05.004"}],"dateofcollection":"2020-03-23T00:20:51.392Z","dateoftransformation":"2020-03-23T00:26:59.078Z","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2020-03-23T00:20:51.392Z","altered":true,"baseURL":"http%3A%2F%2Fzookeys.pensoft.net%2Foai.php","identifier":"10.3897/oneeco.2.e13718","datestamp":"2017-09-08","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"author":[{"fullname":"Nikolaidou,Charitini","name":"Charitini","surname":"Nikolaidou","rank":1,"pid":[],"affiliation":null},{"fullname":"Votsi,Nefta","name":"Nefta","surname":"Votsi","rank":2,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Sgardelis,Steanos","name":"Steanos","surname":"Sgardelis","rank":3,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Halley,John","name":"John","surname":"Halley","rank":4,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Pantis,John","name":"John","surname":"Pantis","rank":5,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null},{"fullname":"Tsiafouli,Maria","name":"Maria","surname":"Tsiafouli","rank":6,"pid":[{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"mag","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"0000-0001-6651-1178","qualifier":{"classid":"orcid_pending","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[{"dataInfo":null, "classid":"IT", "classname":"Italy","schemeid":"fake","schemename":"fake"}],"subject":[{"value":"Ecosystem Services hotspots","qualifier":{"classid":"ACM","classname":"ACM Computing Classification System","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Natura 2000","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Quiet Protected Areas","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Biodiversity","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Agriculture","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Elevation","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Slope","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Ecosystem Service trade-offs and synergies","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":" cultural services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"provisioning services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"regulating services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"supporting services","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ecosystem Service capacity is higher in areas of multiple designation types","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[],"description":[{"value":"The implementation of the Ecosystem Service (ES) concept into practice might be a challenging task as it has to take into account previous “traditional” policies and approaches that have evaluated nature and biodiversity differently. Among them the Habitat (92/43/EC) and Bird Directives (79/409/EC), the Water Framework Directive (2000/60/EC), and the Noise Directive (2002/49/EC) have led to the evaluation/designation of areas in Europe with different criteria. In this study our goal was to understand how the ES capacity of an area is related to its designation and if areas with multiple designations have higher capacity in providing ES. We selected four catchments in Greece with a great variety of characteristics covering over 25% of the national territory. Inside the catchments we assessed the ES capacity (following the methodology of Burkhard et al. 2009) of areas designated as Natura 2000 sites, Quiet areas and Wetlands or Water bodies and found those areas that have multiple designations. Data were analyzed by GLM to reveal differences regarding the ES capacity among the different types of areas. We also investigated by PCA synergies and trade-offs among different kinds of ES and tested for correlations among landscape properties, such as elevation, aspect and slope and the ES potential. Our results show that areas with different types or multiple designations have a different capacity in providing ES. Areas of one designation type (Protected or Quiet Areas) had in general intermediate scores in most ES but scores were higher compared to areas with no designation, which displayed stronger capacity in provisioning services. Among Protected Areas and Quiet Areas the latter scored better in general. Areas that combined both designation types (Protected and Quiet Areas) showed the highest capacity in 13 out of 29 ES, that were mostly linked with natural and forest ecosystems. We found significant synergies among most regulating, supporting and cultural ES which in turn display trade-offs with provisioning services. The different ES are spatially related and display strong correlation with landscape properties, such as elevation and slope. We suggest that the designation status of an area can be used as an alternative tool for environmental policy, indicating the capacity for ES provision. Multiple designations of areas can be used as proxies for locating ES “hotspots”. This integration of “traditional” evaluation and designation and the “newer” ES concept forms a time- and cost-effective way to be adopted by stakeholders and policy-makers in order to start complying with new standards and demands for nature conservation and environmental management.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Pensoft Publishers","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[{"value":"One Ecosystem 2: e13718","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"fulltext":[],"format":[{"value":"text/html","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":null,"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"community:subject","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}],"id":"dh-ch"}],"externalReference":[],"instance":[{"measures":[{"id": "influence", "unit": [{"value": "6.01504990349e-09", "key": "score"}, {"value": "C", "key": "class"}]}, {"id": "popularity_alt", "unit": [{"value": "2.304", "key": "score"}, {"value": "C", "key": "class"}]}, {"id": "popularity", "unit": [{"value": "1.81666032463e-08", "key": "score"}, {"value": "C", "key": "class"}]}, {"id": "influence_alt", "unit": [{"value": "8.0", "key": "score"}, {"value": "C", "key": "class"}]}, {"id": "impulse", "unit": [{"value": "8.0", "key": "score"}, {"value": "C", "key": "class"}]}],"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":"green"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd","value":"One Ecosystem","dataInfo":null},"url":["https://doi.org/10.3897/oneeco.2.e13718","https://oneecosystem.pensoft.net/article/13718/"],"distributionlocation":"","collectedfrom":{"key":"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db","value":"Pensoft","dataInfo":null},"pid":[],"alternateIdentifier":[{"value":"10.3897/oneeco.2.e13718","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2017-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"0001","classname":"peerReviewed","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"journal":{"name":"One Ecosystem","issnPrinted":"","issnOnline":"2367-8194","issnLinking":"","ep":"","iss":"","sp":"","vol":"","edition":"","conferenceplace":null,"conferencedate":null,"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}} \ No newline at end of file diff --git a/dump/wget-log b/dump/wget-log new file mode 100644 index 0000000..11419dc --- /dev/null +++ b/dump/wget-log @@ -0,0 +1,81 @@ +--2023-11-30 16:20:33-- http://10.5555/1071509.1071530 +Resolving 10.5555 (10.5555)... 10.0.21.179 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 09:02:48-- (try: 2) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 11:03:58-- (try: 3) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 13:04:08-- (try: 4) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 13:37:35-- (try: 5) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 13:38:55-- (try: 6) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 13:40:16-- (try: 7) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 13:41:38-- (try: 8) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 13:43:01-- (try: 9) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 13:44:25-- (try:10) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 13:45:50-- (try:11) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 14:35:37-- (try:12) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 14:37:02-- (try:13) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 14:38:27-- (try:14) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 14:39:52-- (try:15) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 14:41:17-- (try:16) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 14:42:42-- (try:17) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 14:44:07-- (try:18) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 14:45:32-- (try:19) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Retrying. + +--2023-12-11 14:46:57-- (try:20) http://10.5555/1071509.1071530 +Connecting to 10.5555 (10.5555)|10.0.21.179|:80... failed: Operation timed out. +Giving up. + diff --git a/pom.xml b/pom.xml index 00b5e97..6641dbf 100644 --- a/pom.xml +++ b/pom.xml @@ -6,6 +6,7 @@ dump-schema dump + api @@ -102,7 +103,7 @@ 5.6.1 3.5 11.0.2 - [3.17.1] + [4.17.2] \ No newline at end of file