merge with changeMeasure

This commit is contained in:
Miriam Baglioni 2022-12-29 15:14:20 +01:00
commit 5e36b80dc1
115 changed files with 5762 additions and 1115 deletions

View File

@ -0,0 +1,56 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 07/11/22
*/
public class ImpactMeasures implements Serializable {
Score influence;
Score influence_alt;
Score popularity;
Score popularity_alt;
Score impulse;
public Score getInfluence() {
return influence;
}
public void setInfluence(Score influence) {
this.influence = influence;
}
public Score getInfluence_alt() {
return influence_alt;
}
public void setInfluence_alt(Score influence_alt) {
this.influence_alt = influence_alt;
}
public Score getPopularity() {
return popularity;
}
public void setPopularity(Score popularity) {
this.popularity = popularity;
}
public Score getPopularity_alt() {
return popularity_alt;
}
public void setPopularity_alt(Score popularity_alt) {
this.popularity_alt = popularity_alt;
}
public Score getImpulse() {
return impulse;
}
public void setImpulse(Score impulse) {
this.impulse = impulse;
}
}

View File

@ -0,0 +1,34 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class Indicator implements Serializable {
@JsonSchema(description = "The impact measures (i.e. popularity)")
ImpactMeasures impactMeasures;
@JsonSchema(description = "The usage counts (i.e. downloads)")
UsageCounts usageCounts;
@JsonInclude(JsonInclude.Include.NON_NULL)
public ImpactMeasures getImpactMeasures() {
return impactMeasures;
}
public void setImpactMeasures(ImpactMeasures impactMeasures) {
this.impactMeasures = impactMeasures;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public UsageCounts getUsageCounts() {
return usageCounts;
}
public void setUsageCounts(UsageCounts usageCounts) {
this.usageCounts = usageCounts;
}
}

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
@ -25,8 +26,8 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
*/
public class Instance implements Serializable {
@JsonSchema(description = "Measures computed for this instance, for example Bip!Finder ones")
private List<Measure> measures;
// @JsonSchema(description = "Indicators computed for this instance, for example Bip!Finder ones")
// private Indicator indicators;
private List<ResultPid> pid;
@ -59,6 +60,7 @@ public class Instance implements Serializable {
"nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)")
private String refereed; // peer-review status
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getLicense() {
return license;
}
@ -67,6 +69,7 @@ public class Instance implements Serializable {
this.license = license;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public AccessRight getAccessright() {
return accessright;
}
@ -75,6 +78,7 @@ public class Instance implements Serializable {
this.accessright = accessright;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getType() {
return type;
}
@ -83,6 +87,7 @@ public class Instance implements Serializable {
this.type = type;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getUrl() {
return url;
}
@ -91,6 +96,7 @@ public class Instance implements Serializable {
this.url = url;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getPublicationdate() {
return publicationdate;
}
@ -99,6 +105,7 @@ public class Instance implements Serializable {
this.publicationdate = publicationdate;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getRefereed() {
return refereed;
}
@ -107,6 +114,7 @@ public class Instance implements Serializable {
this.refereed = refereed;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public APC getArticleprocessingcharge() {
return articleprocessingcharge;
}
@ -115,6 +123,7 @@ public class Instance implements Serializable {
this.articleprocessingcharge = articleprocessingcharge;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<ResultPid> getPid() {
return pid;
}
@ -123,6 +132,7 @@ public class Instance implements Serializable {
this.pid = pid;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<AlternateIdentifier> getAlternateIdentifier() {
return alternateIdentifier;
}
@ -131,11 +141,12 @@ public class Instance implements Serializable {
this.alternateIdentifier = alternateIdentifier;
}
public List<Measure> getMeasures() {
return measures;
}
public void setMeasures(List<Measure> measures) {
this.measures = measures;
}
// @JsonInclude(JsonInclude.Include.NON_NULL)
// public Indicator getIndicators() {
// return indicators;
// }
//
// public void setIndicators(Indicator indicators) {
// this.indicators = indicators;
// }
}

View File

@ -8,8 +8,12 @@ import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* @author miriam.baglioni
* @Date 03/08/22
*/
public class Measure implements Serializable {
@JsonSchema(description = "The measure (i.e. popularity)")
@JsonSchema(description = "The measure (i.e. class)")
private String key;
@JsonSchema(description = "The value for that measure")
@ -32,15 +36,14 @@ public class Measure implements Serializable {
}
public static Measure newInstance(String key, String value) {
Measure inst = new Measure();
inst.key = key;
inst.value = value;
return inst;
Measure mes = new Measure();
mes.key = key;
mes.value = value;
return mes;
}
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(key) && StringUtils.isBlank(value);
}
}

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
@ -168,6 +169,19 @@ public class Result implements Serializable {
@JsonSchema(description = "Timestamp of last update of the record in OpenAIRE")
private Long lastupdatetimestamp;
@JsonSchema(description = "Indicators computed for this result, for example UsageCount ones")
private Indicator indicators;
@JsonInclude(JsonInclude.Include.NON_NULL)
public Indicator getIndicators() {
return indicators;
}
public void setIndicators(Indicator indicators) {
this.indicators = indicators;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public Long getLastupdatetimestamp() {
return lastupdatetimestamp;
}
@ -176,6 +190,7 @@ public class Result implements Serializable {
this.lastupdatetimestamp = lastupdatetimestamp;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getId() {
return id;
}
@ -184,6 +199,7 @@ public class Result implements Serializable {
this.id = id;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getOriginalId() {
return originalId;
}
@ -192,6 +208,7 @@ public class Result implements Serializable {
this.originalId = originalId;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<ResultPid> getPid() {
return pid;
}
@ -200,6 +217,7 @@ public class Result implements Serializable {
this.pid = pid;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getDateofcollection() {
return dateofcollection;
}
@ -208,10 +226,12 @@ public class Result implements Serializable {
this.dateofcollection = dateofcollection;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<Author> getAuthor() {
return author;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getType() {
return type;
}
@ -220,6 +240,7 @@ public class Result implements Serializable {
this.type = type;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public Container getContainer() {
return container;
}
@ -232,6 +253,7 @@ public class Result implements Serializable {
this.author = author;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public Language getLanguage() {
return language;
}
@ -240,6 +262,7 @@ public class Result implements Serializable {
this.language = language;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<ResultCountry> getCountry() {
return country;
}
@ -248,6 +271,7 @@ public class Result implements Serializable {
this.country = country;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<Subject> getSubjects() {
return subjects;
}
@ -256,6 +280,7 @@ public class Result implements Serializable {
this.subjects = subjects;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getMaintitle() {
return maintitle;
}
@ -264,6 +289,7 @@ public class Result implements Serializable {
this.maintitle = maintitle;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getSubtitle() {
return subtitle;
}
@ -272,6 +298,7 @@ public class Result implements Serializable {
this.subtitle = subtitle;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getDescription() {
return description;
}
@ -280,6 +307,7 @@ public class Result implements Serializable {
this.description = description;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getPublicationdate() {
return publicationdate;
}
@ -288,6 +316,7 @@ public class Result implements Serializable {
this.publicationdate = publicationdate;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getPublisher() {
return publisher;
}
@ -296,6 +325,7 @@ public class Result implements Serializable {
this.publisher = publisher;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getEmbargoenddate() {
return embargoenddate;
}
@ -304,6 +334,7 @@ public class Result implements Serializable {
this.embargoenddate = embargoenddate;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getSource() {
return source;
}
@ -312,6 +343,7 @@ public class Result implements Serializable {
this.source = source;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getFormat() {
return format;
}
@ -320,6 +352,7 @@ public class Result implements Serializable {
this.format = format;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getContributor() {
return contributor;
}
@ -328,6 +361,7 @@ public class Result implements Serializable {
this.contributor = contributor;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getCoverage() {
return coverage;
}
@ -336,6 +370,7 @@ public class Result implements Serializable {
this.coverage = coverage;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public BestAccessRight getBestaccessright() {
return bestaccessright;
}
@ -344,6 +379,7 @@ public class Result implements Serializable {
this.bestaccessright = bestaccessright;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getDocumentationUrl() {
return documentationUrl;
}
@ -352,6 +388,7 @@ public class Result implements Serializable {
this.documentationUrl = documentationUrl;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getCodeRepositoryUrl() {
return codeRepositoryUrl;
}
@ -360,6 +397,7 @@ public class Result implements Serializable {
this.codeRepositoryUrl = codeRepositoryUrl;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getProgrammingLanguage() {
return programmingLanguage;
}
@ -368,6 +406,7 @@ public class Result implements Serializable {
this.programmingLanguage = programmingLanguage;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getContactperson() {
return contactperson;
}
@ -376,6 +415,7 @@ public class Result implements Serializable {
this.contactperson = contactperson;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getContactgroup() {
return contactgroup;
}
@ -384,6 +424,7 @@ public class Result implements Serializable {
this.contactgroup = contactgroup;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getTool() {
return tool;
}
@ -392,6 +433,7 @@ public class Result implements Serializable {
this.tool = tool;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getSize() {
return size;
}
@ -400,6 +442,7 @@ public class Result implements Serializable {
this.size = size;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getVersion() {
return version;
}
@ -408,6 +451,7 @@ public class Result implements Serializable {
this.version = version;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<GeoLocation> getGeolocation() {
return geolocation;
}

View File

@ -0,0 +1,37 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonGetter;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSetter;
/**
* @author miriam.baglioni
* @Date 07/11/22
*/
public class Score implements Serializable {
private String score;
@JsonProperty("class")
private String clazz;
public String getScore() {
return score;
}
public void setScore(String score) {
this.score = score;
}
@JsonGetter("class")
public String getClazz() {
return clazz;
}
@JsonSetter("class")
public void setClazz(String clazz) {
this.clazz = clazz;
}
}

View File

@ -0,0 +1,27 @@
package eu.dnetlib.dhp.oa.model;
/**
* @author miriam.baglioni
* @Date 07/11/22
*/
public class UsageCounts {
private String downloads;
private String views;
public String getDownloads() {
return downloads;
}
public void setDownloads(String downloads) {
this.downloads = downloads;
}
public String getViews() {
return views;
}
public void setViews(String views) {
this.views = views;
}
}

View File

@ -1,6 +1,7 @@
package eu.dnetlib.dhp.oa.model.community;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Instance;
@ -22,6 +23,7 @@ public class CommunityInstance extends Instance {
@JsonSchema(description = "Information about the source from which the record has been collected")
private CfHbKeyValue collectedfrom;
@JsonInclude(JsonInclude.Include.NON_NULL)
public CfHbKeyValue getHostedby() {
return hostedby;
}
@ -30,6 +32,7 @@ public class CommunityInstance extends Instance {
this.hostedby = hostedby;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public CfHbKeyValue getCollectedfrom() {
return collectedfrom;
}

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.model.community;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Result;
@ -35,6 +36,7 @@ public class CommunityResult extends Result {
description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version")
private List<CommunityInstance> instance;
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<CommunityInstance> getInstance() {
return instance;
}
@ -43,6 +45,7 @@ public class CommunityResult extends Result {
this.instance = instance;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<CfHbKeyValue> getCollectedfrom() {
return collectedfrom;
}
@ -51,6 +54,7 @@ public class CommunityResult extends Result {
this.collectedfrom = collectedfrom;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<Project> getProjects() {
return projects;
}
@ -59,6 +63,7 @@ public class CommunityResult extends Result {
this.projects = projects;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<Context> getContext() {
return context;
}

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.model.graph;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
@ -12,6 +13,8 @@ import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class ResearchCommunity extends ResearchInitiative {
@JsonSchema(
description = "Only for research communities: the list of the subjects associated to the research community")
@JsonInclude(JsonInclude.Include.NON_NULL)
private List<String> subject;
public List<String> getSubject() {

View File

@ -1,20 +1,6 @@
{
"$schema":"http://json-schema.org/draft-07/schema#",
"definitions": {
"ControlledField": {
"type": "object",
"properties": {
"scheme": {
"type": "string"
},
"value": {
"type": "string"
}
},
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
}
},
"type":"object",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"accessrights": {
"type": "string",
@ -26,13 +12,14 @@
},
"citationguidelineurl": {
"type": "string",
"description":"The URL of the data source providing information on how to cite its items. As defined by re3data.org."
"description": "The URL of the data source providing information on how to cite its items. As defined by re3data.org."
},
"contenttypes": {
"description": "Types of content in the data source, as defined by OpenDOAR",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Types of content in the data source, as defined by OpenDOAR"
}
},
"databaseaccessrestriction": {
@ -40,14 +27,18 @@
"description": "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}"
},
"datasourcetype": {
"allOf": [
{
"$ref": "#/definitions/ControlledField"
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme used to express the value (i.e. pubsrepository::journal)"
},
{
"description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies"
"value": {
"type": "string",
"description": "The value expressed in the scheme (Journal)"
}
]
},
"description": "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies"
},
"datauploadrestriction": {
"type": "string",
@ -57,9 +48,7 @@
"type": "string",
"description": "The date of last validation against the OpenAIRE guidelines for the datasource records"
},
"description": {
"type": "string"
},
"description": {"type": "string"},
"englishname": {
"type": "string",
"description": "The English name of the datasource"
@ -71,14 +60,11 @@
"journal": {
"type": "object",
"properties": {
"conferencedate": {
"type": "string"
},
"conferenceplace": {
"type": "string"
},
"conferencedate": {"type": "string"},
"conferenceplace": {"type": "string"},
"edition": {
"type": "string"
"type": "string",
"description": "Edition of the journal or conference proceeding"
},
"ep": {
"type": "string",
@ -86,19 +72,14 @@
},
"iss": {
"type": "string",
"description": "Issue number"
},
"issnLinking": {
"type": "string"
},
"issnOnline": {
"type": "string"
},
"issnPrinted": {
"type": "string"
"description": "Journal issue number"
},
"issnLinking": {"type": "string"},
"issnOnline": {"type": "string"},
"issnPrinted": {"type": "string"},
"name": {
"type": "string"
"type": "string",
"description": "Name of the journal or conference"
},
"sp": {
"type": "string",
@ -115,15 +96,14 @@
"description": "The languages present in the data source's content, as defined by OpenDOAR.",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "The languages present in the data source's content, as defined by OpenDOAR."
}
},
"logourl": {
"type": "string"
},
"logourl": {"type": "string"},
"missionstatementurl": {
"type": "string",
"description":"The URL of a mission statement describing the designated community of the data source. As defined by re3data.org"
"description": "The URL of a mission statement describing the designated community of the data source. As defined by re3data.org"
},
"officialname": {
"type": "string",
@ -134,21 +114,29 @@
"description": "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu."
},
"originalId": {
"description": "Original identifiers for the datasource"
"description": "Original identifiers for the datasource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Original identifiers for the datasource"
}
},
"pid": {
"description": "Persistent identifiers of the datasource",
"type": "array",
"items": {
"allOf": [
{
"$ref": "#/definitions/ControlledField"
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme used to express the value "
},
"value": {
"type": "string",
"description": "The value expressed in the scheme "
}
]
},
"description": "Persistent identifiers of the datasource"
}
},
"pidsystems": {
@ -159,7 +147,8 @@
"description": "Policies of the data source, as defined in OpenDOAR.",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Policies of the data source, as defined in OpenDOAR."
}
},
"releaseenddate": {
@ -174,7 +163,8 @@
"description": "List of subjects associated to the datasource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "List of subjects associated to the datasource"
}
},
"uploadrights": {
@ -185,8 +175,6 @@
"type": "boolean",
"description": "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise."
},
"websiteurl": {
"type": "string"
}
"websiteurl": {"type": "string"}
}
}

View File

@ -0,0 +1,563 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"CfHbKeyValue": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "the OpenAIRE identifier of the data source"
},
"value": {
"type": "string",
"description": "the name of the data source"
}
}
},
"Provenance": {
"type": "object",
"properties": {
"provenance": {"type": "string"},
"trust": {"type": "string"}
}
},
"ResultPid": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories"
},
"value": {
"type": "string",
"description": "The value expressed in the scheme (i.e. 10.1000/182)"
}
}
},
"Score": {
"type": "object",
"properties": {
"clazz": {"type": "string"},
"score": {"type": "string"}
}
}
},
"type": "object",
"properties": {
"author": {
"type": "array",
"items": {
"type": "object",
"properties": {
"fullname": {"type": "string"},
"name": {"type": "string"},
"pid": {
"type": "object",
"properties": {
"id": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The author's pid scheme. OpenAIRE currently supports 'ORCID'"
},
"value": {
"type": "string",
"description": "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)"
}
}
},
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "The reason why the pid was associated to the author"}
]
}
},
"description": "The author's persistent identifiers"
},
"rank": {"type": "integer"},
"surname": {"type": "string"}
}
}
},
"bestaccessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
},
"description": "The openest of the access rights of this result."
},
"codeRepositoryUrl": {
"type": "string",
"description": "Only for results with type 'software': the URL to the repository with the source code"
},
"collectedfrom": {
"description": "Information about the sources from which the record has been collected",
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/CfHbKeyValue"},
{"description": "Information about the sources from which the record has been collected"}
]
}
},
"contactgroup": {
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource"
}
},
"contactperson": {
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource"
}
},
"container": {
"type": "object",
"properties": {
"conferencedate": {"type": "string"},
"conferenceplace": {"type": "string"},
"edition": {
"type": "string",
"description": "Edition of the journal or conference proceeding"
},
"ep": {
"type": "string",
"description": "End page"
},
"iss": {
"type": "string",
"description": "Journal issue number"
},
"issnLinking": {"type": "string"},
"issnOnline": {"type": "string"},
"issnPrinted": {"type": "string"},
"name": {
"type": "string",
"description": "Name of the journal or conference"
},
"sp": {
"type": "string",
"description": "Start page"
},
"vol": {
"type": "string",
"description": "Volume"
}
},
"description": "Container has information about the conference or journal where the result has been presented or published"
},
"context": {
"description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu",
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "Code identifying the RI/RC"
},
"label": {
"type": "string",
"description": "Label of the RI/RC"
},
"provenance": {
"description": "Why this result is associated to the RI/RC.",
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Why this result is associated to the RI/RC."}
]
}
}
},
"description": "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu"
}
},
"contributor": {
"description": "Contributors for the result",
"type": "array",
"items": {
"type": "string",
"description": "Contributors for the result"
}
},
"country": {
"description": "The list of countries associated to this result",
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "ISO 3166-1 alpha-2 country code (i.e. IT)"
},
"label": {
"type": "string",
"description": "The label for that code (i.e. Italy)"
},
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Why this result is associated to the country."}
]
}
},
"description": "The list of countries associated to this result"
}
},
"coverage": {
"type": "array",
"items": {"type": "string"}
},
"dateofcollection": {
"type": "string",
"description": "When OpenAIRE collected the record the last time"
},
"description": {
"type": "array",
"items": {"type": "string"}
},
"documentationUrl": {
"description": "Only for results with type 'software': URL to the software documentation",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'software': URL to the software documentation"
}
},
"embargoenddate": {
"type": "string",
"description": "Date when the embargo ends and this result turns Open Access"
},
"format": {
"type": "array",
"items": {"type": "string"}
},
"geolocation": {
"description": "Geolocation information",
"type": "array",
"items": {
"type": "object",
"properties": {
"box": {"type": "string"},
"place": {"type": "string"},
"point": {"type": "string"}
},
"description": "Geolocation information"
}
},
"id": {
"type": "string",
"description": "The OpenAIRE identifiers for this result"
},
"indicators": {
"type": "object",
"properties": {
"impactMeasures": {
"type": "object",
"properties": {
"impulse": {"$ref": "#/definitions/Score"},
"influence": {"$ref": "#/definitions/Score"},
"influence_alt": {"$ref": "#/definitions/Score"},
"popularity": {"$ref": "#/definitions/Score"},
"popularity_alt": {"$ref": "#/definitions/Score"}
},
"description": "The impact measures (i.e. popularity)"
},
"usageCounts": {
"type": "object",
"properties": {
"downloads": {"type": "string"},
"views": {"type": "string"}
},
"description": "The usage counts (i.e. downloads)"
}
},
"description": "Indicators computed for this result, for example UsageCount ones"
},
"instance": {
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
"type": "array",
"items": {
"type": "object",
"properties": {
"accessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"openAccessRoute": {
"type": "string",
"enum": [
"gold",
"green",
"hybrid",
"bronze"
]
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
},
"description": "The accessRights for this materialization of the result"
},
"alternateIdentifier": {
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs",
"type": "array",
"items": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi"
},
"value": {
"type": "string",
"description": "The value expressed in the scheme"
}
},
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
}
},
"articleprocessingcharge": {
"type": "object",
"properties": {
"amount": {"type": "string"},
"currency": {"type": "string"}
},
"description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative."
},
"collectedfrom": {
"allOf": [
{"$ref": "#/definitions/CfHbKeyValue"},
{"description": "Information about the source from which the record has been collected"}
]
},
"hostedby": {
"allOf": [
{"$ref": "#/definitions/CfHbKeyValue"},
{"description": "Information about the source from which the instance can be viewed or downloaded."}
]
},
"license": {"type": "string"},
"pid": {
"type": "array",
"items": {"$ref": "#/definitions/ResultPid"}
},
"publicationdate": {
"type": "string",
"description": "Date of the research product"
},
"refereed": {
"type": "string",
"description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)"
},
"type": {
"type": "string",
"description": "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
},
"url": {
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
"type": "array",
"items": {
"type": "string",
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. "
}
}
},
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version"
}
},
"language": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "alpha-3/ISO 639-2 code of the language"
},
"label": {
"type": "string",
"description": "Language label in English"
}
}
},
"lastupdatetimestamp": {
"type": "integer",
"description": "Timestamp of last update of the record in OpenAIRE"
},
"maintitle": {
"type": "string",
"description": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
},
"originalId": {
"description": "Identifiers of the record at the original sources",
"type": "array",
"items": {
"type": "string",
"description": "Identifiers of the record at the original sources"
}
},
"pid": {
"description": "Persistent identifiers of the result",
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/ResultPid"},
{"description": "Persistent identifiers of the result"}
]
}
},
"programmingLanguage": {
"type": "string",
"description": "Only for results with type 'software': the programming language"
},
"projects": {
"description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results",
"type": "array",
"items": {
"type": "object",
"properties": {
"acronym": {
"type": "string",
"description": "The acronym of the project"
},
"code": {
"type": "string",
"description": "The grant agreement number"
},
"funder": {
"type": "object",
"properties": {
"fundingStream": {
"type": "string",
"description": "Stream of funding (e.g. for European Commission can be H2020 or FP7)"
},
"jurisdiction": {
"type": "string",
"description": "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)"
},
"name": {
"type": "string",
"description": "The name of the funder (European Commission)"
},
"shortName": {
"type": "string",
"description": "The short name of the funder (EC)"
}
},
"description": "Information about the funder funding the project"
},
"id": {
"type": "string",
"description": "The OpenAIRE id for the project"
},
"provenance": {"$ref": "#/definitions/Provenance"},
"title": {"type": "string"},
"validated": {
"type": "object",
"properties": {
"validatedByFunder": {"type": "boolean"},
"validationDate": {"type": "string"}
}
}
},
"description": "List of projects (i.e. grants) that (co-)funded the production ofn the research results"
}
},
"publicationdate": {
"type": "string",
"description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it\u2019s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the \u201cwinning date\u201d because it becomes the most frequent well-formatted date."
},
"publisher": {
"type": "string",
"description": "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource."
},
"size": {
"type": "string",
"description": "Only for results with type 'dataset': the declared size of the dataset"
},
"source": {
"description": "See definition of Dublin Core field dc:source",
"type": "array",
"items": {
"type": "string",
"description": "See definition of Dublin Core field dc:source"
}
},
"subjects": {
"description": "Keywords associated to the result",
"type": "array",
"items": {
"type": "object",
"properties": {
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Why this subject is associated to the result"}
]
},
"subject": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)."
},
"value": {
"type": "string",
"description": "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."
}
}
}
},
"description": "Keywords associated to the result"
}
},
"subtitle": {
"type": "string",
"description": "Explanatory or alternative name by which a scientific result is known."
},
"tool": {
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
"type": "array",
"items": {
"type": "string",
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product"
}
},
"type": {
"type": "string",
"description": "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)"
},
"version": {
"type": "string",
"description": "Version of the result"
}
}
}

View File

@ -6,7 +6,8 @@
"description": "Alternative names that identify the organisation",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Alternative names that identify the organisation"
}
},
"country": {
@ -14,25 +15,21 @@
"properties": {
"code": {
"type": "string",
"description": "The organisation country code"
"description": "ISO 3166-1 alpha-2 country code (i.e. IT)"
},
"label": {
"type": "string",
"description": "The organisation country label"
"description": "The label for that code (i.e. Italy)"
}
},
"description": "The country of the organisation"
"description": "The organisation country"
},
"id": {
"type": "string",
"description": "The OpenAIRE id for the organisation"
},
"legalname": {
"type": "string"
},
"legalshortname": {
"type": "string"
},
"legalname": {"type": "string"},
"legalshortname": {"type": "string"},
"pid": {
"description": "Persistent identifiers for the organisation i.e. isni 0000000090326370",
"type": "array",
@ -45,13 +42,12 @@
},
"value": {
"type": "string",
"description": "the value in the schema (i.e. 0000000090326370)"
"description": "The value in the schema (i.e. 0000000090326370)"
}
}
},
"description": "Persistent identifiers for the organisation i.e. isni 0000000090326370"
}
},
"websiteurl": {
"type": "string"
}
"websiteurl": {"type": "string"}
}
}

View File

@ -1,30 +1,32 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"ControlledField": {
"type": "object",
"properties": {
"scheme": {
"type": "string"
},
"value": {
"type": "string"
}
},
"description": "To represent the information described by a scheme and a value in that scheme (i.e. pid)"
},
"Provenance": {
"type": "object",
"properties": {
"provenance": {
"provenance": {"type": "string"},
"trust": {"type": "string"}
}
},
"ResultPid": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The process that produced/provided the information"
"description": "The scheme of the persistent identifier for the result (i.e. doi). If the pid is here it means the information for the pid has been collected from an authority for that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories"
},
"trust": {
"type": "string"
"value": {
"type": "string",
"description": "The value expressed in the scheme (i.e. 10.1000/182)"
}
},
"description": "Indicates the process that produced (or provided) the information, and the trust associated to the information"
}
},
"Score": {
"type": "object",
"properties": {
"class": {"type": "string"},
"score": {"type": "string"}
}
}
},
"type": "object",
@ -34,55 +36,56 @@
"items": {
"type": "object",
"properties": {
"fullname": {
"type": "string"
},
"name": {
"type": "string"
},
"fullname": {"type": "string"},
"name": {"type": "string"},
"pid": {
"type": "object",
"properties": {
"id": {
"allOf": [
{"$ref": "#/definitions/ControlledField"},
{"description": "The author's id and scheme. OpenAIRE currently supports 'ORCID'"}
]
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The author's pid scheme. OpenAIRE currently supports 'ORCID'"
},
"value": {
"type": "string",
"description": "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)"
}
}
},
"provenance": {
"allOf": [
{"$ref": "#/definitions/Provenance"},
{"description": "Provenance of author's pid"}
{"description": "The reason why the pid was associated to the author"}
]
}
}
},
"description": "The author's persistent identifiers"
},
"rank": {
"type": "integer"
},
"surname": {
"type": "string"
}
"rank": {"type": "integer"},
"surname": {"type": "string"}
}
}
},
"bestaccessright":{
"type":"object",
"properties":{
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"bestaccessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
},
"label": {
"type": "string",
"description": "Label for the access mode"
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
}
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
},
"description": "The openest of the access rights of this result."
},
"codeRepositoryUrl": {
"type": "string",
"description": "Only for results with type 'software': the URL to the repository with the source code"
@ -91,25 +94,23 @@
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource"
}
},
"contactperson": {
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource"
}
},
"container": {
"type": "object",
"properties": {
"conferencedate": {
"type": "string"
},
"conferenceplace": {
"type": "string"
},
"conferencedate": {"type": "string"},
"conferenceplace": {"type": "string"},
"edition": {
"type": "string",
"description": "Edition of the journal or conference proceeding"
@ -120,32 +121,28 @@
},
"iss": {
"type": "string",
"description": "Journal issue"
},
"issnLinking": {
"type": "string"
},
"issnOnline": {
"type": "string"
},
"issnPrinted": {
"type": "string"
"description": "Journal issue number"
},
"issnLinking": {"type": "string"},
"issnOnline": {"type": "string"},
"issnPrinted": {"type": "string"},
"name": {
"type": "string",
"description": "Name of the journal or conference"
},
"sp": {
"type": "string",
"description": "start page"
"description": "Start page"
},
"vol": {
"type": "string"
"type": "string",
"description": "Volume"
}
},
"description": "Container has information about the conference or journal where the result has been presented or published"
},
"contributor": {
"description": "Contributors for the result",
"type": "array",
"items": {
"type": "string",
@ -153,16 +150,18 @@
}
},
"country": {
"description": "The list of countries associated to this result",
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "ISO 3166-1 alpha-2 country code"
"description": "ISO 3166-1 alpha-2 country code (i.e. IT)"
},
"label": {
"type": "string"
"type": "string",
"description": "The label for that code (i.e. Italy)"
},
"provenance": {
"allOf": [
@ -170,14 +169,13 @@
{"description": "Why this result is associated to the country."}
]
}
}
},
"description": "The list of countries associated to this result"
}
},
"coverage": {
"type": "array",
"items": {
"type": "string"
}
"items": {"type": "string"}
},
"dateofcollection": {
"type": "string",
@ -185,15 +183,14 @@
},
"description": {
"type": "array",
"items": {
"type": "string"
}
"items": {"type": "string"}
},
"documentationUrl": {
"description": "Only for results with type 'software': URL to the software documentation",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'software': URL to the software documentation"
}
},
"embargoenddate": {
@ -202,9 +199,7 @@
},
"format": {
"type": "array",
"items": {
"type": "string"
}
"items": {"type": "string"}
},
"geolocation": {
"description": "Geolocation information",
@ -212,31 +207,51 @@
"items": {
"type": "object",
"properties": {
"box": {
"type": "string"
},
"place": {
"type": "string"
},
"point": {
"type": "string"
}
}
"box": {"type": "string"},
"place": {"type": "string"},
"point": {"type": "string"}
},
"description": "Geolocation information"
}
},
"id": {
"type": "string",
"description": "OpenAIRE Identifier"
"description": "The OpenAIRE identifiers for this result"
},
"instance":{
"description":"Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
"type":"array",
"items":{
"type":"object",
"properties":{
"accessright":{
"type":"object",
"properties":{
"indicators": {
"type": "object",
"properties": {
"impactMeasures": {
"type": "object",
"properties": {
"impulse": {"$ref": "#/definitions/Score"},
"influence": {"$ref": "#/definitions/Score"},
"influence_alt": {"$ref": "#/definitions/Score"},
"popularity": {"$ref": "#/definitions/Score"},
"popularity_alt": {"$ref": "#/definitions/Score"}
},
"description": "The impact measures (i.e. popularity)"
},
"usageCounts": {
"type": "object",
"properties": {
"downloads": {"type": "string"},
"views": {"type": "string"}
},
"description": "The usage counts (i.e. downloads)"
}
},
"description": "Indicators computed for this result, for example UsageCount ones"
},
"instance": {
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version",
"type": "array",
"items": {
"type": "object",
"properties": {
"accessright": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
@ -245,102 +260,75 @@
"type": "string",
"description": "Label for the access mode"
},
"openAccessRoute":{
"type":"string",
"enum":[
"openAccessRoute": {
"type": "string",
"enum": [
"gold",
"green",
"hybrid",
"bronze"
],
"description":"The type of OpenAccess applied to the result"
]
},
"scheme": {
"type": "string",
"description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
}
}
},
"description": "The accessRights for this materialization of the result"
},
"alternateIdentifier":{
"type":"array",
"items":{
"allOf":[
{
"$ref":"#/definitions/ControlledField"
"alternateIdentifier": {
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs",
"type": "array",
"items": {
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides as identifier for the result also the doi"
},
{
"description":"All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
}
]
}
},
"articleprocessingcharge":{
"description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.",
"type":"object",
"properties":{
"amount":{
"type":"string"
},
"currency":{
"type":"string"
}
}
},
"license":{
"type":"string"
},
"measures":{
"type":"array",
"items":{
"type":"object",
"properties":{
"key":{
"type":"string",
"description":"The measure"
},
"value":{
"type":"string",
"description":"The value for the measure"
"value": {
"type": "string",
"description": "The value expressed in the scheme"
}
},
"description":"Measures computed for this instance, for example Bip!Finder ones"
"description": "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs"
}
},
"pid":{
"description":"The set of persistent identifiers associated to this instance that have been collected from an authority for the pid type (i.e. Crossref/Datacite for doi)",
"type":"array",
"items":{
"allOf":[
{
"$ref":"#/definitions/ControlledField"
},
{
"description":"The persistent identifier associated to the result"
}
]
}
},
"publicationdate":{
"type":"string",
"articleprocessingcharge": {
"type": "object",
"properties": {
"amount": {"type": "string"},
"currency": {"type": "string"}
},
"description": "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative."
},
"license": {"type": "string"},
"pid": {
"type": "array",
"items": {"$ref": "#/definitions/ResultPid"}
},
"publicationdate": {
"type": "string",
"description": "Date of the research product"
},
"refereed":{
"description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)",
"type":"string"
"refereed": {
"type": "string",
"description": "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)"
},
"type":{
"type":"string",
"description":"The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
"type": {
"type": "string",
"description": "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
},
"url":{
"description":"URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
"type":"array",
"items":{
"type":"string"
"url": {
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ",
"type": "array",
"items": {
"type": "string",
"description": "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. "
}
}
}
},
"description": "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version"
}
},
"language": {
@ -362,17 +350,14 @@
},
"maintitle": {
"type": "string",
"descriptio": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
},
"subtitle": {
"type": "string",
"descriptio": "Explanatory or alternative name by which a scientific result is known."
"description": "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software."
},
"originalId": {
"description": "Identifiers of the record at the original sources",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Identifiers of the record at the original sources"
}
},
"pid": {
@ -380,8 +365,8 @@
"type": "array",
"items": {
"allOf": [
{"$ref": "#/definitions/ControlledField"},
{"description": "scheme: list of available schemes are at https://api.openaire.eu/vocabularies/dnet:pid_types, value: the PID of the result. Note: the result will have a pid associated only if it was collected from an authority for that pid type. For example a doi will be among the pids for one result if the result metadata were collected from Crossref or Datacite. In all the other cases, the doi will be present among the alteranteIdentifiers for the result "}
{"$ref": "#/definitions/ResultPid"},
{"description": "Persistent identifiers of the result"}
]
}
},
@ -391,7 +376,7 @@
},
"publicationdate": {
"type": "string",
"description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because its the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date."
"description": "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because it\u2019s the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the \u201cwinning date\u201d because it becomes the most frequent well-formatted date."
},
"publisher": {
"type": "string",
@ -405,7 +390,8 @@
"description": "See definition of Dublin Core field dc:source",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "See definition of Dublin Core field dc:source"
}
},
"subjects": {
@ -421,19 +407,32 @@
]
},
"subject": {
"allOf": [
{"$ref": "#/definitions/ControlledField"},
{"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."}
]
"type": "object",
"properties": {
"scheme": {
"type": "string",
"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies)."
},
"value": {
"type": "string",
"description": "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."
}
}
}
}
},
"description": "Keywords associated to the result"
}
},
"subtitle": {
"type": "string",
"description": "Explanatory or alternative name by which a scientific result is known."
},
"tool": {
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product",
"type": "array",
"items": {
"type": "string"
"type": "string",
"description": "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product"
}
},
"type": {

View File

@ -1,5 +1,6 @@
import java.io.IOException;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.core.JsonProcessingException;
@ -9,7 +10,10 @@ import com.github.imifou.jsonschema.module.addon.AddonModule;
import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.ExecCreateSchemas;
import eu.dnetlib.dhp.oa.model.graph.Datasource;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Organization;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
//@Disabled
class GenerateJsonSchema {

View File

@ -53,7 +53,11 @@
<artifactId>dump-schema</artifactId>
<version>1.2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>io.github.classgraph</groupId>
<artifactId>classgraph</artifactId>
<version>4.8.71</version>
</dependency>
</dependencies>

View File

@ -27,6 +27,14 @@ public class Constants {
public static final String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";
public static final String USAGE_COUNT_DOWNLOADS = "downloads";
public static final String USAGE_COUNT_VIEWS = "views";
public static final String IMPACT_POPULARITY = "popularity";
public static final String IMPACT_POPULARITY_ALT = "popularity_alt";
public static final String IMPACT_INFLUENCE = "influence";
public static final String IMPACT_INFLUENCE_ALT = "influence_alt";
public static final String IMPACT_IMPULSE = "impulse";
static {
ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);
ACCESS_RIGHTS_COAR_MAP.put("RESTRICTED", "c_16ec");

View File

@ -1,111 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
/**
* It fires the execution of the actual dump for result entities. If the dump is for RC/RI products its checks for each
* result its belongingess to at least one RC/RI before "asking" for its mapping.
*/
public class DumpProducts implements Serializable {
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
Class<? extends OafEntity> inputClazz,
Class<? extends Result> outputClazz,
String dumpType) {
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
execDump(
spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType);
});
}
public static <I extends OafEntity, O extends Result> void execDump(
SparkSession spark,
String inputPath,
String outputPath,
String communityMapPath,
Class<I> inputClazz,
Class<O> outputClazz,
String dumpType) {
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
Utils
.readPath(spark, inputPath, inputClazz)
.map((MapFunction<I, O>) value -> execMap(value, communityMap, dumpType), Encoders.bean(outputClazz))
.filter((FilterFunction<O>) value -> value != null)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static <I extends OafEntity, O extends Result> O execMap(I value,
CommunityMap communityMap,
String dumpType) throws NoAvailableEntityTypeException, CardinalityTooHighException {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
if (odInfo.isPresent()) {
if (odInfo.get().getDeletedbyinference() || odInfo.get().getInvisible()) {
return null;
}
} else {
return null;
}
if (Constants.DUMPTYPE.COMMUNITY.getType().equals(dumpType)) {
Set<String> communities = communityMap.keySet();
Optional<List<Context>> inputContext = Optional
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
if (!inputContext.isPresent()) {
return null;
}
List<String> toDumpFor = inputContext.get().stream().map(c -> {
if (communities.contains(c.getId())) {
return c.getId();
}
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
return c.getId().substring(0, c.getId().indexOf("::"));
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
if (toDumpFor.isEmpty()) {
return null;
}
}
return (O) ResultMapper.map(value, communityMap, dumpType);
}
}

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump;
import static eu.dnetlib.dhp.oa.graph.dump.Constants.*;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
@ -15,8 +17,8 @@ import eu.dnetlib.dhp.oa.model.*;
import eu.dnetlib.dhp.oa.model.AccessRight;
import eu.dnetlib.dhp.oa.model.Author;
import eu.dnetlib.dhp.oa.model.GeoLocation;
import eu.dnetlib.dhp.oa.model.Indicator;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.Measure;
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.community.CfHbKeyValue;
@ -222,12 +224,32 @@ public class ResultMapper implements Serializable {
.ofNullable(input.getSubject())
.ifPresent(
value -> value
.stream()
.filter(
s -> !((s.getQualifier().getClassid().equalsIgnoreCase("fos") &&
Optional.ofNullable(s.getDataInfo()).isPresent()
&& Optional.ofNullable(s.getDataInfo().getProvenanceaction()).isPresent() &&
s.getDataInfo().getProvenanceaction().getClassid().equalsIgnoreCase("subject:fos"))
||
(s.getQualifier().getClassid().equalsIgnoreCase("sdg") &&
Optional.ofNullable(s.getDataInfo()).isPresent()
&& Optional.ofNullable(s.getDataInfo().getProvenanceaction()).isPresent() &&
s
.getDataInfo()
.getProvenanceaction()
.getClassid()
.equalsIgnoreCase("subject:sdg"))))
.forEach(s -> subjectList.add(getSubject(s))));
out.setSubjects(subjectList);
out.setType(input.getResulttype().getClassid());
if (Optional.ofNullable(input.getMeasures()).isPresent() && input.getMeasures().size() > 0) {
out.setIndicators(getIndicator(input.getMeasures()));
}
if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
((CommunityResult) out)
.setCollectedfrom(
@ -301,7 +323,7 @@ public class ResultMapper implements Serializable {
}
}
} catch (ClassCastException cce) {
return out;
return null;
}
}
@ -309,6 +331,79 @@ public class ResultMapper implements Serializable {
}
private static Indicator getIndicator(List<eu.dnetlib.dhp.schema.oaf.Measure> measures) {
UsageCounts uc = null;
ImpactMeasures im = null;
Indicator i = new Indicator();
for (eu.dnetlib.dhp.schema.oaf.Measure m : measures) {
switch (m.getId()) {
case USAGE_COUNT_DOWNLOADS:
if (uc == null) {
uc = new UsageCounts();
i.setUsageCounts(uc);
}
uc.setDownloads(m.getUnit().get(0).getValue());
break;
case USAGE_COUNT_VIEWS:
if (uc == null) {
uc = new UsageCounts();
i.setUsageCounts(uc);
}
uc.setViews(m.getUnit().get(0).getValue());
break;
case IMPACT_POPULARITY:
if (im == null) {
im = new ImpactMeasures();
i.setImpactMeasures(im);
}
im.setPopularity(getScore(m.getUnit()));
break;
case IMPACT_POPULARITY_ALT:
if (im == null) {
im = new ImpactMeasures();
i.setImpactMeasures(im);
}
im.setPopularity_alt(getScore(m.getUnit()));
break;
case IMPACT_IMPULSE:
if (im == null) {
im = new ImpactMeasures();
i.setImpactMeasures(im);
}
im.setImpulse(getScore(m.getUnit()));
break;
case IMPACT_INFLUENCE:
if (im == null) {
im = new ImpactMeasures();
i.setImpactMeasures(im);
}
im.setInfluence(getScore(m.getUnit()));
break;
case IMPACT_INFLUENCE_ALT:
if (im == null) {
im = new ImpactMeasures();
i.setImpactMeasures(im);
}
im.setInfluence_alt(getScore(m.getUnit()));
break;
}
}
return i;
}
private static Score getScore(List<KeyValue> unit) {
Score s = new Score();
for (KeyValue u : unit) {
if (u.getKey().equals("score")) {
s.setScore(u.getValue());
} else {
s.setClazz(u.getValue());
}
}
return s;
}
private static void addTypeSpecificInformation(Result out, eu.dnetlib.dhp.schema.oaf.Result input,
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort) throws NoAvailableEntityTypeException {
switch (ort.get().getClassid()) {
@ -453,14 +548,6 @@ public class ResultMapper implements Serializable {
Constants.COAR_ACCESS_RIGHT_SCHEMA));
Optional<List<eu.dnetlib.dhp.schema.oaf.Measure>> mes = Optional.ofNullable(i.getMeasures());
if (mes.isPresent()) {
List<Measure> measure = new ArrayList<>();
mes
.get()
.forEach(
m -> m.getUnit().forEach(u -> measure.add(Measure.newInstance(m.getId(), u.getValue()))));
instance.setMeasures(measure);
}
if (opAr.get().getOpenAccessRoute() != null) {
switch (opAr.get().getOpenAccessRoute()) {

View File

@ -10,6 +10,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -18,9 +19,13 @@ import com.google.gson.Gson;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.Constants;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;
public class Utils {
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -74,4 +79,65 @@ public class Utils {
return new Gson().fromJson(sb.toString(), CommunityMap.class);
}
public static Dataset<String> getEntitiesId(SparkSession spark, String inputPath) {
Dataset<String> dumpedIds = Utils
.readPath(spark, inputPath + "/publication", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, inputPath + "/dataset", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/software", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/otherresearchproduct", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/organization", eu.dnetlib.dhp.oa.model.graph.Organization.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Organization, String>) o -> o.getId(),
Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.oa.model.graph.Project.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Project, String>) o -> o.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/datasource", eu.dnetlib.dhp.oa.model.graph.Datasource.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Datasource, String>) o -> o.getId(),
Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/communities_infrastructures", ResearchCommunity.class)
.map((MapFunction<ResearchCommunity, String>) c -> c.getId(), Encoders.STRING()));
return dumpedIds;
}
public static Dataset<Relation> getValidRelations(SparkSession spark, Dataset<Relation> relations,
Dataset<String> entitiesIds) {
Dataset<Tuple2<String, Relation>> relationSource = relations
.map(
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getSource().getId(), r),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
.joinWith(entitiesIds, relationSource.col("_1").equalTo(entitiesIds.col("value")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
t2._1()._2().getTarget().getId(), t2._1()._2()),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
return relJoinSource
.joinWith(entitiesIds, relJoinSource.col("_1").equalTo(entitiesIds.col("value")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
Encoders.bean(Relation.class));
}
}

View File

@ -9,10 +9,14 @@ import java.util.stream.Collectors;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.community.Context;
@ -60,10 +64,13 @@ public class CommunitySplit implements Serializable {
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
communityProducts
.map(
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
.text(outputPath);
}

View File

@ -1,16 +1,35 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Result;
/**
@ -48,20 +67,79 @@ public class SparkDumpCommunityProducts implements Serializable {
String communityMapPath = parser.get("communityMapPath");
final String dumpType = Optional
.ofNullable(parser.get("dumpType"))
.map(String::valueOf)
.orElse("community");
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
DumpProducts dump = new DumpProducts();
SparkConf conf = new SparkConf();
dump
.run(
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, CommunityResult.class,
dumpType);
String finalCommunityMapPath = communityMapPath;
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
resultDump(
spark, inputPath, outputPath, finalCommunityMapPath, inputClazz);
});
}
public static <I extends OafEntity> void resultDump(
SparkSession spark,
String inputPath,
String outputPath,
String communityMapPath,
Class<I> inputClazz) {
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<I, CommunityResult>) value -> execMap(value, communityMap),
Encoders.bean(CommunityResult.class))
.filter((FilterFunction<CommunityResult>) value -> value != null)
.map(
(MapFunction<CommunityResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.text(outputPath);
}
private static <I extends OafEntity, O extends eu.dnetlib.dhp.oa.model.Result> O execMap(I value,
CommunityMap communityMap) throws NoAvailableEntityTypeException, CardinalityTooHighException {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
if (Boolean.FALSE.equals(odInfo.isPresent())) {
return null;
}
if (Boolean.TRUE.equals(odInfo.get().getDeletedbyinference())
|| Boolean.TRUE.equals(odInfo.get().getInvisible())) {
return null;
}
Set<String> communities = communityMap.keySet();
Optional<List<Context>> inputContext = Optional
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
if (!inputContext.isPresent()) {
return null;
}
List<String> toDumpFor = inputContext.get().stream().map(c -> {
if (communities.contains(c.getId())) {
return c.getId();
}
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
return c.getId().substring(0, c.getId().indexOf("::"));
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
if (toDumpFor.isEmpty()) {
return null;
}
return (O) ResultMapper.map(value, communityMap, Constants.DUMPTYPE.COMMUNITY.getType());
}
}

View File

@ -71,7 +71,7 @@ public class SparkUpdateProjectInfo implements Serializable {
String preparedInfoPath) {
Dataset<CommunityResult> result = Utils.readPath(spark, inputPath, CommunityResult.class);
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
result
Dataset<CommunityResult> tmp = result
.joinWith(
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
"left")
@ -79,11 +79,16 @@ public class SparkUpdateProjectInfo implements Serializable {
CommunityResult r = value._1();
Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList()));
return r;
}, Encoders.bean(CommunityResult.class))
}, Encoders.bean(CommunityResult.class));
long count = tmp.count();
tmp
.map(
(MapFunction<CommunityResult, String>) cr -> new ObjectMapper().writeValueAsString(cr),
Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Append)
.json(outputPath);
.text(outputPath);
}

View File

@ -1,520 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.io.StringReader;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.*;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.oa.model.graph.Funder;
import eu.dnetlib.dhp.oa.model.graph.Project;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
/**
* Dumps of entities in the model defined in eu.dnetlib.dhp.schema.dump.oaf.graph. Results are dumped using the same
* Mapper as for eu.dnetlib.dhp.schema.dump.oaf.community, while for the other entities the mapping is defined below
*/
public class DumpGraphEntities implements Serializable {
public void run(Boolean isSparkSessionManaged,
String inputPath,
String outputPath,
Class<? extends OafEntity> inputClazz,
String communityMapPath) {
SparkConf conf = new SparkConf();
switch (ModelSupport.idPrefixMap.get(inputClazz)) {
case "50":
DumpProducts d = new DumpProducts();
d
.run(
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, GraphResult.class,
eu.dnetlib.dhp.oa.graph.dump.Constants.DUMPTYPE.COMPLETE.getType());
break;
case "40":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
projectMap(spark, inputPath, outputPath, inputClazz);
});
break;
case "20":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
organizationMap(spark, inputPath, outputPath, inputClazz);
});
break;
case "10":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
datasourceMap(spark, inputPath, outputPath, inputClazz);
});
break;
}
}
private static <E extends OafEntity> void datasourceMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d),
Encoders.bean(Datasource.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static <E extends OafEntity> void projectMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p),
Encoders.bean(Project.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
Datasource datasource = new Datasource();
datasource.setId(d.getId());
Optional
.ofNullable(d.getOriginalId())
.ifPresent(
oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
Optional
.ofNullable(d.getPid())
.ifPresent(
pids -> datasource
.setPid(
pids
.stream()
.map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
Optional
.ofNullable(d.getDatasourcetype())
.ifPresent(
dsType -> datasource
.setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname())));
Optional
.ofNullable(d.getOpenairecompatibility())
.ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname()));
Optional
.ofNullable(d.getOfficialname())
.ifPresent(oname -> datasource.setOfficialname(oname.getValue()));
Optional
.ofNullable(d.getEnglishname())
.ifPresent(ename -> datasource.setEnglishname(ename.getValue()));
Optional
.ofNullable(d.getWebsiteurl())
.ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue()));
Optional
.ofNullable(d.getLogourl())
.ifPresent(lurl -> datasource.setLogourl(lurl.getValue()));
Optional
.ofNullable(d.getDateofvalidation())
.ifPresent(dval -> datasource.setDateofvalidation(dval.getValue()));
Optional
.ofNullable(d.getDescription())
.ifPresent(dex -> datasource.setDescription(dex.getValue()));
Optional
.ofNullable(d.getSubjects())
.ifPresent(
sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getOdpolicies())
.ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue())));
Optional
.ofNullable(d.getOdlanguages())
.ifPresent(
langs -> datasource
.setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getOdcontenttypes())
.ifPresent(
ctypes -> datasource
.setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getReleasestartdate())
.ifPresent(rd -> datasource.setReleasestartdate(rd.getValue()));
Optional
.ofNullable(d.getReleaseenddate())
.ifPresent(ed -> datasource.setReleaseenddate(ed.getValue()));
Optional
.ofNullable(d.getMissionstatementurl())
.ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue()));
Optional
.ofNullable(d.getDatabaseaccesstype())
.ifPresent(ar -> datasource.setAccessrights(ar.getValue()));
Optional
.ofNullable(d.getDatauploadtype())
.ifPresent(dut -> datasource.setUploadrights(dut.getValue()));
Optional
.ofNullable(d.getDatabaseaccessrestriction())
.ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue()));
Optional
.ofNullable(d.getDatauploadrestriction())
.ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue()));
Optional
.ofNullable(d.getVersioning())
.ifPresent(v -> datasource.setVersioning(v.getValue()));
Optional
.ofNullable(d.getCitationguidelineurl())
.ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue()));
Optional
.ofNullable(d.getPidsystems())
.ifPresent(ps -> datasource.setPidsystems(ps.getValue()));
Optional
.ofNullable(d.getCertificates())
.ifPresent(c -> datasource.setCertificates(c.getValue()));
Optional
.ofNullable(d.getPolicies())
.ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getJournal())
.ifPresent(j -> datasource.setJournal(getContainer(j)));
return datasource;
}
private static Container getContainer(Journal j) {
Container c = new Container();
Optional
.ofNullable(j.getName())
.ifPresent(n -> c.setName(n));
Optional
.ofNullable(j.getIssnPrinted())
.ifPresent(issnp -> c.setIssnPrinted(issnp));
Optional
.ofNullable(j.getIssnOnline())
.ifPresent(issno -> c.setIssnOnline(issno));
Optional
.ofNullable(j.getIssnLinking())
.ifPresent(isnl -> c.setIssnLinking(isnl));
Optional
.ofNullable(j.getEp())
.ifPresent(ep -> c.setEp(ep));
Optional
.ofNullable(j.getIss())
.ifPresent(iss -> c.setIss(iss));
Optional
.ofNullable(j.getSp())
.ifPresent(sp -> c.setSp(sp));
Optional
.ofNullable(j.getVol())
.ifPresent(vol -> c.setVol(vol));
Optional
.ofNullable(j.getEdition())
.ifPresent(edition -> c.setEdition(edition));
Optional
.ofNullable(j.getConferencedate())
.ifPresent(cdate -> c.setConferencedate(cdate));
Optional
.ofNullable(j.getConferenceplace())
.ifPresent(cplace -> c.setConferenceplace(cplace));
return c;
}
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException {
Project project = new Project();
Optional
.ofNullable(p.getId())
.ifPresent(id -> project.setId(id));
Optional
.ofNullable(p.getWebsiteurl())
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
Optional
.ofNullable(p.getCode())
.ifPresent(code -> project.setCode(code.getValue()));
Optional
.ofNullable(p.getAcronym())
.ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
Optional
.ofNullable(p.getTitle())
.ifPresent(title -> project.setTitle(title.getValue()));
Optional
.ofNullable(p.getStartdate())
.ifPresent(sdate -> project.setStartdate(sdate.getValue()));
Optional
.ofNullable(p.getEnddate())
.ifPresent(edate -> project.setEnddate(edate.getValue()));
Optional
.ofNullable(p.getCallidentifier())
.ifPresent(cide -> project.setCallidentifier(cide.getValue()));
Optional
.ofNullable(p.getKeywords())
.ifPresent(key -> project.setKeywords(key.getValue()));
Optional<Field<String>> omandate = Optional.ofNullable(p.getOamandatepublications());
Optional<Field<String>> oecsc39 = Optional.ofNullable(p.getEcsc39());
boolean mandate = false;
if (omandate.isPresent()) {
if (omandate.get().getValue().equals("true")) {
mandate = true;
}
}
if (oecsc39.isPresent()) {
if (oecsc39.get().getValue().equals("true")) {
mandate = true;
}
}
project.setOpenaccessmandateforpublications(mandate);
project.setOpenaccessmandatefordataset(false);
Optional
.ofNullable(p.getEcarticle29_3())
.ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true")));
project
.setSubject(
Optional
.ofNullable(p.getSubjects())
.map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
.orElse(new ArrayList<>()));
Optional
.ofNullable(p.getSummary())
.ifPresent(summary -> project.setSummary(summary.getValue()));
Optional<Float> ofundedamount = Optional.ofNullable(p.getFundedamount());
Optional<Field<String>> ocurrency = Optional.ofNullable(p.getCurrency());
Optional<Float> ototalcost = Optional.ofNullable(p.getTotalcost());
if (ocurrency.isPresent()) {
if (ofundedamount.isPresent()) {
if (ototalcost.isPresent()) {
project
.setGranted(
Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
} else {
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
}
}
}
project
.setH2020programme(
Optional
.ofNullable(p.getH2020classification())
.map(
classification -> classification
.stream()
.map(
c -> Programme
.newInstance(
c.getH2020Programme().getCode(), c.getH2020Programme().getDescription()))
.collect(Collectors.toList()))
.orElse(new ArrayList<>()));
Optional<List<Field<String>>> ofundTree = Optional
.ofNullable(p.getFundingtree());
List<Funder> funList = new ArrayList<>();
if (ofundTree.isPresent()) {
for (Field<String> fundingtree : ofundTree.get()) {
funList.add(getFunder(fundingtree.getValue()));
}
}
project.setFunding(funList);
return project;
}
public static Funder getFunder(String fundingtree) throws DocumentException {
Funder f = new Funder();
final Document doc;
doc = new SAXReader().read(new StringReader(fundingtree));
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
// f.setId(((org.dom4j.Node) (doc.selectNodes("//funder/id").get(0))).getText());
String id = "";
String description = "";
// List<Levels> fundings = new ArrayList<>();
int level = 0;
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_" + level);
while (nodes.size() > 0) {
for (org.dom4j.Node n : nodes) {
List node = n.selectNodes("./id");
id = ((org.dom4j.Node) node.get(0)).getText();
id = id.substring(id.indexOf("::") + 2);
node = n.selectNodes("./description");
description += ((Node) node.get(0)).getText() + " - ";
}
level += 1;
nodes = doc.selectNodes("//funding_level_" + level);
}
if (!id.equals("")) {
Fundings fundings = new Fundings();
fundings.setId(id);
fundings.setDescription(description.substring(0, description.length() - 3).trim());
f.setFunding_stream(fundings);
}
return f;
}
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
Encoders.bean(Organization.class))
.filter((FilterFunction<Organization>) o -> o != null)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization(
eu.dnetlib.dhp.schema.oaf.Organization org) {
if (org.getDataInfo().getDeletedbyinference())
return null;
Organization organization = new Organization();
Optional
.ofNullable(org.getLegalshortname())
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
Optional
.ofNullable(org.getLegalname())
.ifPresent(value -> organization.setLegalname(value.getValue()));
Optional
.ofNullable(org.getWebsiteurl())
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
Optional
.ofNullable(org.getAlternativeNames())
.ifPresent(
value -> organization
.setAlternativenames(
value
.stream()
.map(v -> v.getValue())
.collect(Collectors.toList())));
Optional
.ofNullable(org.getCountry())
.ifPresent(
value -> {
if (!value.getClassid().equals(Constants.UNKNOWN)) {
organization.setCountry(Country.newInstance(value.getClassid(), value.getClassname()));
}
});
Optional
.ofNullable(org.getId())
.ifPresent(value -> organization.setId(value));
Optional
.ofNullable(org.getPid())
.ifPresent(
value -> organization
.setPid(
value
.stream()
.map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
return organization;
}
}

View File

@ -44,7 +44,6 @@ public class Extractor implements Serializable {
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extractRelationResult(
spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath));
});
@ -116,7 +115,7 @@ public class Extractor implements Serializable {
}, Encoders.bean(Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.mode(SaveMode.Append)
.json(outputPath);
}

View File

@ -14,15 +14,18 @@ import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class QueryInformationSystem {
private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class);
private ISLookUpService isLookUp;
private List<String> contextRelationResult;
@ -51,6 +54,7 @@ public class QueryInformationSystem {
String[] cSplit = c.split("@@");
cinfo.setId(cSplit[0]);
cinfo.setName(cSplit[1]);
log.info("community name : {}", cSplit[1]);
cinfo.setDescription(cSplit[2]);
if (!cSplit[3].trim().equals("")) {
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
@ -62,6 +66,28 @@ public class QueryInformationSystem {
}
public List<ContextInfo> getContextInformation() throws ISLookUpException {
List<ContextInfo> ret = new ArrayList<>();
isLookUp
.quickSearchProfile(XQUERY_ENTITY)
.forEach(c -> {
ContextInfo cinfo = new ContextInfo();
String[] cSplit = c.split("@@");
cinfo.setId(cSplit[0]);
cinfo.setName(cSplit[1]);
cinfo.setDescription(cSplit[2]);
if (!cSplit[3].trim().equals("")) {
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
}
cinfo.setZenodocommunity(cSplit[4]);
cinfo.setType(cSplit[5]);
ret.add(cinfo);
});
return ret;
}
public List<String> getContextRelationResult() {
return contextRelationResult;
}

View File

@ -8,16 +8,22 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import it.unimi.dsi.fastutil.objects.Object2BooleanMap;
import scala.Tuple2;
/**
* Reads all the entities of the same type (Relation / Results) and saves them in the same folder
@ -73,10 +79,12 @@ public class SparkCollectAndSave implements Serializable {
.union(Utils.readPath(spark, inputPath + "/result/dataset", GraphResult.class))
.union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class))
.union(Utils.readPath(spark, inputPath + "/result/software", GraphResult.class))
.map(
(MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath + "/result");
.text(outputPath + "/result");
} else {
write(
Utils
@ -89,7 +97,7 @@ public class SparkCollectAndSave implements Serializable {
write(
Utils
.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class),
outputPath + "/otheresearchproduct");
outputPath + "/otherresearchproduct");
write(
Utils
.readPath(spark, inputPath + "/result/software", GraphResult.class),
@ -97,14 +105,27 @@ public class SparkCollectAndSave implements Serializable {
}
Utils
// Dataset<String> dumpedIds = Utils.getEntitiesId(spark, outputPath);
Dataset<Relation> relations = Utils
.readPath(spark, inputPath + "/relation/publication", Relation.class)
.union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class));
Utils.getValidRelations(spark, relations, Utils.getEntitiesId(spark, outputPath))
// Dataset<Relation> relJoinSource = relations
// .joinWith(dumpedIds, relations.col("source.id").equalTo(dumpedIds.col("value")))
// .map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(),
// Encoders.bean(Relation.class));
//
// relJoinSource
// .joinWith(dumpedIds, relJoinSource.col("target.id").equalTo(dumpedIds.col("value")))
// .map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(),
// Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
@ -114,9 +135,11 @@ public class SparkCollectAndSave implements Serializable {
private static void write(Dataset<GraphResult> dataSet, String outputPath) {
dataSet
.map((MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
.text(outputPath);
}
}

View File

@ -1,21 +1,58 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import java.io.StringReader;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.Container;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.oa.model.graph.Datasource;
import eu.dnetlib.dhp.oa.model.graph.Organization;
import eu.dnetlib.dhp.oa.model.graph.Project;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
/**
* Spark Job that fires the dump for the entites
*/
public class SparkDumpEntitiesJob implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkDumpEntitiesJob.class);
public static final String COMPRESSION = "compression";
public static final String GZIP = "gzip";
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@ -42,13 +79,542 @@ public class SparkDumpEntitiesJob implements Serializable {
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String communityMapPath = parser.get("communityMapPath");
Optional<String> communityMap = Optional.ofNullable(parser.get("communityMapPath"));
String communityMapPath = null;
if (communityMap.isPresent())
communityMapPath = communityMap.get();
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
DumpGraphEntities dg = new DumpGraphEntities();
dg.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
run(isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz);
}
private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
Class<? extends OafEntity> inputClazz) {
SparkConf conf = new SparkConf();
switch (ModelSupport.idPrefixMap.get(inputClazz)) {
case "50":
String finalCommunityMapPath = communityMapPath;
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
resultDump(
spark, inputPath, outputPath, finalCommunityMapPath, inputClazz);
});
break;
case "40":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
projectMap(spark, inputPath, outputPath, inputClazz);
});
break;
case "20":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
organizationMap(spark, inputPath, outputPath, inputClazz);
});
break;
case "10":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
datasourceMap(spark, inputPath, outputPath, inputClazz);
});
break;
}
}
public static <I extends OafEntity> void resultDump(
SparkSession spark,
String inputPath,
String outputPath,
String communityMapPath,
Class<I> inputClazz) {
CommunityMap communityMap = null;
if (!StringUtils.isEmpty(communityMapPath))
communityMap = Utils.getCommunityMap(spark, communityMapPath);
CommunityMap finalCommunityMap = communityMap;
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<I, GraphResult>) value -> execMap(value, finalCommunityMap),
Encoders.bean(GraphResult.class))
.filter((FilterFunction<GraphResult>) value -> value != null)
.map((MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.text(outputPath);
}
private static <I extends OafEntity, O extends Result> O execMap(I value,
CommunityMap communityMap) throws NoAvailableEntityTypeException, CardinalityTooHighException {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
if (Boolean.FALSE.equals(odInfo.isPresent())) {
return null;
}
if (Boolean.TRUE.equals(odInfo.get().getDeletedbyinference())
|| Boolean.TRUE.equals(odInfo.get().getInvisible())) {
return null;
}
return (O) ResultMapper.map(value, communityMap, Constants.DUMPTYPE.COMPLETE.getType());
}
private static <E extends OafEntity> void datasourceMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d),
Encoders.bean(Datasource.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.json(outputPath);
}
private static <E extends OafEntity> void projectMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p),
Encoders.bean(Project.class))
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.json(outputPath);
}
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
Datasource datasource = new Datasource();
datasource.setId(d.getId());
Optional
.ofNullable(d.getOriginalId())
.ifPresent(
oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
Optional
.ofNullable(d.getPid())
.ifPresent(
pids -> datasource
.setPid(
pids
.stream()
.map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
Optional
.ofNullable(d.getDatasourcetype())
.ifPresent(
dsType -> datasource
.setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname())));
Optional
.ofNullable(d.getOpenairecompatibility())
.ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname()));
Optional
.ofNullable(d.getOfficialname())
.ifPresent(oname -> datasource.setOfficialname(oname.getValue()));
Optional
.ofNullable(d.getEnglishname())
.ifPresent(ename -> datasource.setEnglishname(ename.getValue()));
Optional
.ofNullable(d.getWebsiteurl())
.ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue()));
Optional
.ofNullable(d.getLogourl())
.ifPresent(lurl -> datasource.setLogourl(lurl.getValue()));
Optional
.ofNullable(d.getDateofvalidation())
.ifPresent(dval -> datasource.setDateofvalidation(dval.getValue()));
Optional
.ofNullable(d.getDescription())
.ifPresent(dex -> datasource.setDescription(dex.getValue()));
Optional
.ofNullable(d.getSubjects())
.ifPresent(
sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getOdpolicies())
.ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue())));
Optional
.ofNullable(d.getOdlanguages())
.ifPresent(
langs -> datasource
.setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getOdcontenttypes())
.ifPresent(
ctypes -> datasource
.setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getReleasestartdate())
.ifPresent(rd -> datasource.setReleasestartdate(rd.getValue()));
Optional
.ofNullable(d.getReleaseenddate())
.ifPresent(ed -> datasource.setReleaseenddate(ed.getValue()));
Optional
.ofNullable(d.getMissionstatementurl())
.ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue()));
Optional
.ofNullable(d.getDatabaseaccesstype())
.ifPresent(ar -> datasource.setAccessrights(ar.getValue()));
Optional
.ofNullable(d.getDatauploadtype())
.ifPresent(dut -> datasource.setUploadrights(dut.getValue()));
Optional
.ofNullable(d.getDatabaseaccessrestriction())
.ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue()));
Optional
.ofNullable(d.getDatauploadrestriction())
.ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue()));
Optional
.ofNullable(d.getVersioning())
.ifPresent(v -> datasource.setVersioning(v.getValue()));
Optional
.ofNullable(d.getCitationguidelineurl())
.ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue()));
Optional
.ofNullable(d.getPidsystems())
.ifPresent(ps -> datasource.setPidsystems(ps.getValue()));
Optional
.ofNullable(d.getCertificates())
.ifPresent(c -> datasource.setCertificates(c.getValue()));
Optional
.ofNullable(d.getPolicies())
.ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getJournal())
.ifPresent(j -> datasource.setJournal(getContainer(j)));
return datasource;
}
private static Container getContainer(Journal j) {
Container c = new Container();
Optional
.ofNullable(j.getName())
.ifPresent(n -> c.setName(n));
Optional
.ofNullable(j.getIssnPrinted())
.ifPresent(issnp -> c.setIssnPrinted(issnp));
Optional
.ofNullable(j.getIssnOnline())
.ifPresent(issno -> c.setIssnOnline(issno));
Optional
.ofNullable(j.getIssnLinking())
.ifPresent(isnl -> c.setIssnLinking(isnl));
Optional
.ofNullable(j.getEp())
.ifPresent(ep -> c.setEp(ep));
Optional
.ofNullable(j.getIss())
.ifPresent(iss -> c.setIss(iss));
Optional
.ofNullable(j.getSp())
.ifPresent(sp -> c.setSp(sp));
Optional
.ofNullable(j.getVol())
.ifPresent(vol -> c.setVol(vol));
Optional
.ofNullable(j.getEdition())
.ifPresent(edition -> c.setEdition(edition));
Optional
.ofNullable(j.getConferencedate())
.ifPresent(cdate -> c.setConferencedate(cdate));
Optional
.ofNullable(j.getConferenceplace())
.ifPresent(cplace -> c.setConferenceplace(cplace));
return c;
}
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException {
Project project = new Project();
Optional
.ofNullable(p.getId())
.ifPresent(id -> project.setId(id));
Optional
.ofNullable(p.getWebsiteurl())
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
Optional
.ofNullable(p.getCode())
.ifPresent(code -> project.setCode(code.getValue()));
Optional
.ofNullable(p.getAcronym())
.ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
Optional
.ofNullable(p.getTitle())
.ifPresent(title -> project.setTitle(title.getValue()));
Optional
.ofNullable(p.getStartdate())
.ifPresent(sdate -> project.setStartdate(sdate.getValue()));
Optional
.ofNullable(p.getEnddate())
.ifPresent(edate -> project.setEnddate(edate.getValue()));
Optional
.ofNullable(p.getCallidentifier())
.ifPresent(cide -> project.setCallidentifier(cide.getValue()));
Optional
.ofNullable(p.getKeywords())
.ifPresent(key -> project.setKeywords(key.getValue()));
Optional<Field<String>> omandate = Optional.ofNullable(p.getOamandatepublications());
Optional<Field<String>> oecsc39 = Optional.ofNullable(p.getEcsc39());
boolean mandate = false;
if (omandate.isPresent()) {
if (omandate.get().getValue().equals("true")) {
mandate = true;
}
}
if (oecsc39.isPresent()) {
if (oecsc39.get().getValue().equals("true")) {
mandate = true;
}
}
project.setOpenaccessmandateforpublications(mandate);
project.setOpenaccessmandatefordataset(false);
Optional
.ofNullable(p.getEcarticle29_3())
.ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true")));
project
.setSubject(
Optional
.ofNullable(p.getSubjects())
.map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
.orElse(new ArrayList<>()));
Optional
.ofNullable(p.getSummary())
.ifPresent(summary -> project.setSummary(summary.getValue()));
Optional<Float> ofundedamount = Optional.ofNullable(p.getFundedamount());
Optional<Field<String>> ocurrency = Optional.ofNullable(p.getCurrency());
Optional<Float> ototalcost = Optional.ofNullable(p.getTotalcost());
if (ocurrency.isPresent()) {
if (ofundedamount.isPresent()) {
if (ototalcost.isPresent()) {
project
.setGranted(
Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
} else {
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
}
}
}
project
.setH2020programme(
Optional
.ofNullable(p.getH2020classification())
.map(
classification -> classification
.stream()
.map(
c -> Programme
.newInstance(
c.getH2020Programme().getCode(), c.getH2020Programme().getDescription()))
.collect(Collectors.toList()))
.orElse(new ArrayList<>()));
Optional<List<Field<String>>> ofundTree = Optional
.ofNullable(p.getFundingtree());
List<Funder> funList = new ArrayList<>();
if (ofundTree.isPresent()) {
for (Field<String> fundingtree : ofundTree.get()) {
funList.add(getFunder(fundingtree.getValue()));
}
}
project.setFunding(funList);
return project;
}
public static Funder getFunder(String fundingtree) throws DocumentException {
Funder f = new Funder();
final Document doc;
doc = new SAXReader().read(new StringReader(fundingtree));
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
String id = "";
StringBuilder bld = new StringBuilder();
int level = 0;
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_" + level);
while (!nodes.isEmpty()) {
for (org.dom4j.Node n : nodes) {
List node = n.selectNodes("./id");
id = ((org.dom4j.Node) node.get(0)).getText();
id = id.substring(id.indexOf("::") + 2);
node = n.selectNodes("./description");
bld.append(((Node) node.get(0)).getText() + " - ");
}
level += 1;
nodes = doc.selectNodes("//funding_level_" + level);
}
String description = bld.toString();
if (!id.equals("")) {
Fundings fundings = new Fundings();
fundings.setId(id);
fundings.setDescription(description.substring(0, description.length() - 3).trim());
f.setFunding_stream(fundings);
}
return f;
}
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
Encoders.bean(Organization.class))
.filter((FilterFunction<Organization>) o -> o != null)
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.json(outputPath);
}
private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization(
eu.dnetlib.dhp.schema.oaf.Organization org) {
if (Boolean.TRUE.equals(org.getDataInfo().getDeletedbyinference()))
return null;
Organization organization = new Organization();
Optional
.ofNullable(org.getLegalshortname())
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
Optional
.ofNullable(org.getLegalname())
.ifPresent(value -> organization.setLegalname(value.getValue()));
Optional
.ofNullable(org.getWebsiteurl())
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
Optional
.ofNullable(org.getAlternativeNames())
.ifPresent(
value -> organization
.setAlternativenames(
value
.stream()
.map(v -> v.getValue())
.collect(Collectors.toList())));
Optional
.ofNullable(org.getCountry())
.ifPresent(
value -> {
if (!value.getClassid().equals(eu.dnetlib.dhp.oa.graph.dump.complete.Constants.UNKNOWN)) {
organization
.setCountry(
eu.dnetlib.dhp.oa.model.Country.newInstance(value.getClassid(), value.getClassname()));
}
});
Optional
.ofNullable(org.getId())
.ifPresent(value -> organization.setId(value));
Optional
.ofNullable(org.getPid())
.ifPresent(
value -> organization
.setPid(
value
.stream()
.map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
return organization;
}
}

View File

@ -127,7 +127,7 @@ public class SparkDumpRelationJob implements Serializable {
}, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.mode(SaveMode.Append)
.json(outputPath);
}

View File

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 21/07/22
*/
public class MasterDuplicate implements Serializable {
private String duplicate;
private String master;
public String getDuplicate() {
return duplicate;
}
public void setDuplicate(String duplicate) {
this.duplicate = duplicate;
}
public String getMaster() {
return master;
}
public void setMaster(String master) {
this.master = master;
}
}

View File

@ -0,0 +1,97 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.function.Function;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.mongodb.DBCursor;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ReadMasterDuplicateFromDB {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final String QUERY = "SELECT id as master, duplicate FROM dsm_dedup_services; ";
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
ReadMasterDuplicateFromDB.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/datasourcemaster_parameters.json")));
parser.parseArgument(args);
final String dbUrl = parser.get("postgresUrl");
final String dbUser = parser.get("postgresUser");
final String dbPassword = parser.get("postgresPassword");
final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("hdfsNameNode");
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsWritePath);
execute(dbUrl, dbUser, dbPassword, fsDataOutputStream);
}
private static void execute(String dbUrl, String dbUser, String dbPassword, FSDataOutputStream fos) {
try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8))) {
dbClient.processResults(QUERY, rs -> writeMap(datasourceMasterMap(rs), writer));
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static MasterDuplicate datasourceMasterMap(ResultSet rs) {
try {
MasterDuplicate dm = new MasterDuplicate();
String duplicate = rs.getString("duplicate");
dm.setDuplicate(OafMapperUtils.createOpenaireId(10, duplicate, true));
String master = rs.getString("master");
dm.setMaster(OafMapperUtils.createOpenaireId(10, master, true));
return dm;
} catch (final SQLException e) {
throw new RuntimeException(e);
}
}
protected static void writeMap(final MasterDuplicate dm, BufferedWriter writer) {
try {
writer.write(OBJECT_MAPPER.writeValueAsString(dm));
writer.newLine();
} catch (final IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,222 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.io.StringReader;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolverFactory;
import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.Param;
import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.SelectionConstraints;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.schema.oaf.*;
/**
* Spark Job that fires the dump for the entities
*/
public class SparkDumpResult implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkDumpResult.class);
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
public static final String COMPRESSION = "compression";
public static final String GZIP = "gzip";
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkDumpResult.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String resultType = parser.get("resultType");
log.info("resultType: {}", resultType);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String masterDuplicatePath = parser.get("masterDuplicatePath");
log.info("masterDuplicatePath: {}", masterDuplicatePath);
Optional<String> pathString = Optional.ofNullable(parser.get("pathMap"));
HashMap<String, String> pathMap = null;
if (pathString.isPresent()) {
pathMap = new Gson().fromJson(parser.get("pathMap"), HashMap.class);
log.info("pathMap: {}", new Gson().toJson(pathMap));
}
final Optional<String> parameter = Optional.ofNullable(parser.get("selectionCriteria"));
SelectionConstraints selectionConstraints = null;
if (parameter.isPresent()) {
selectionConstraints = new ObjectMapper().readValue(parameter.get(), SelectionConstraints.class);
selectionConstraints.addResolver(resolver);
}
Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz = (Class<? extends eu.dnetlib.dhp.schema.oaf.Result>) Class
.forName(resultClassName);
run(
isSparkSessionManaged, inputPath, outputPath, pathMap, selectionConstraints, inputClazz,
resultType, masterDuplicatePath);
}
private static void run(Boolean isSparkSessionManaged, String inputPath, String outputPath,
HashMap<String, String> pathMap, SelectionConstraints selectionConstraints,
Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz, String resultType, String masterDuplicatePath) {
SparkConf conf = new SparkConf();
HashMap<String, String> finalPathMap = pathMap;
SelectionConstraints finalSelectionConstraints = selectionConstraints;
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath + "/original/" + resultType);
Utils.removeOutputDir(spark, outputPath + "/dump/" + resultType);
resultDump(
spark, inputPath, outputPath, inputClazz, finalPathMap,
finalSelectionConstraints, resultType, masterDuplicatePath);
});
}
public static <I extends eu.dnetlib.dhp.schema.oaf.Result> void resultDump(
SparkSession spark,
String inputPath,
String outputPath,
Class<I> inputClazz,
Map<String, String> pathMap,
SelectionConstraints selectionConstraints,
String resultType,
String masterDuplicatePath) {
List<MasterDuplicate> masterDuplicateList = Utils
.readPath(spark, masterDuplicatePath, MasterDuplicate.class)
.collectAsList();
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<I, I>) value -> filterResult(
value, pathMap, selectionConstraints, inputClazz, masterDuplicateList, resultType),
Encoders.bean(inputClazz))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/original/" + resultType);
Utils
.readPath(spark, outputPath + "/original/" + resultType, inputClazz)
.map(
(MapFunction<I, GraphResult>) value -> (GraphResult) ResultMapper
.map(
value, null,
Constants.DUMPTYPE.COMPLETE.getType()),
Encoders.bean(GraphResult.class))
.map((MapFunction<GraphResult, String>) r -> new ObjectMapper().writeValueAsString(r), Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)
.text(outputPath + "/dump/" + resultType);
}
private static <I extends eu.dnetlib.dhp.schema.oaf.Result> I filterResult(I value, Map<String, String> pathMap,
SelectionConstraints selectionConstraints, Class<I> inputClazz, List<MasterDuplicate> masterDuplicateList,
String resultType) {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
if (Boolean.FALSE.equals(odInfo.isPresent())) {
return null;
}
if (Boolean.TRUE.equals(odInfo.get().getDeletedbyinference())
|| Boolean.TRUE.equals(odInfo.get().getInvisible())) {
return null;
}
if (!isCompatible(value.getResulttype().getClassid(), resultType)) {
return null;
}
if (selectionConstraints != null) {
Param param = new Param();
String json = new Gson().toJson(value, inputClazz);
DocumentContext jsonContext = JsonPath.parse(json);
for (String key : pathMap.keySet()) {
try {
param.insert(key, jsonContext.read(pathMap.get(key)));
} catch (com.jayway.jsonpath.PathNotFoundException e) {
param.insert(key, new ArrayList<>());
}
}
if (!selectionConstraints.verifyCriteria(param)) {
return null;
}
}
if (Optional.ofNullable(value.getCollectedfrom()).isPresent())
value.getCollectedfrom().forEach(cf -> update(cf, masterDuplicateList));
if (Optional.ofNullable(value.getInstance()).isPresent()) {
value.getInstance().forEach(i -> {
update(i.getCollectedfrom(), masterDuplicateList);
update(i.getHostedby(), masterDuplicateList);
});
}
return value;
}
private static boolean isCompatible(String classid, String resultType) {
return (classid.equals(resultType) || (classid.equals("other") && resultType.equals("otherresearchproduct")));
}
private static void update(KeyValue kv, List<MasterDuplicate> masterDuplicateList) {
for (MasterDuplicate md : masterDuplicateList) {
if (md.getDuplicate().equals(kv.getKey())) {
kv.setKey(md.getMaster());
return;
}
}
}
}

View File

@ -0,0 +1,328 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import javax.print.attribute.standard.MediaSize;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.*;
import org.apache.spark.sql.*;
import org.apache.spark.sql.Dataset;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Function1;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 11/11/22
*/
public class SparkSelectSubset implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSelectSubset.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSelectSubset.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
Optional<String> rs = Optional.ofNullable(parser.get("removeSet"));
final Set<String> removeSet = new HashSet<>();
if (rs.isPresent()) {
Collections.addAll(removeSet, rs.get().split(";"));
}
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
selectSubset(spark, inputPath, outputPath, removeSet);
});
}
private static void selectSubset(SparkSession spark, String inputPath, String outputPath, Set<String> removeSet) {
Dataset<Relation> relation = Utils
.readPath(spark, inputPath + "/relation", Relation.class)
.filter(
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference()
&& !removeSet.contains(r.getRelClass()));
Dataset<String> resultIds = Utils
.readPath(spark, outputPath + "/original/publication", Publication.class)
.map((MapFunction<Publication, String>) p -> p.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, outputPath + "/original/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class)
.map((MapFunction<eu.dnetlib.dhp.schema.oaf.Dataset, String>) d -> d.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, outputPath + "/original/software", Software.class)
.map((MapFunction<Software, String>) s -> s.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, outputPath + "/original/otherresearchproduct", OtherResearchProduct.class)
.map((MapFunction<OtherResearchProduct, String>) o -> o.getId(), Encoders.STRING()));
// select result -> result relations
Dataset<Relation> relResultResult = relation
.joinWith(resultIds, relation.col("source").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relResultResult
.joinWith(resultIds, relResultResult.col("target").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath + "/original/relation");
// save the relations among other entities and the results
Dataset<String> otherIds = Utils
.readPath(spark, inputPath + "/organization", Organization.class)
.filter((FilterFunction<Organization>) e -> !e.getDataInfo().getDeletedbyinference())
.map((MapFunction<Organization, String>) o -> o.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, inputPath + "/project", Project.class)
.filter((FilterFunction<Project>) e -> !e.getDataInfo().getDeletedbyinference())
.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/datasource", Datasource.class)
.filter((FilterFunction<Datasource>) e -> !e.getDataInfo().getDeletedbyinference())
.map((MapFunction<Datasource, String>) d -> d.getId(), Encoders.STRING()));
Dataset<Relation> relResultOther = relation
.joinWith(resultIds, relation.col("source").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relResultOther
.joinWith(otherIds, relResultOther.col("target").equalTo(otherIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath + "/original/relation");
Dataset<Relation> relOtherResult = relation
.joinWith(resultIds, relation.col("target").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relOtherResult
.joinWith(otherIds, relOtherResult.col("source").equalTo(otherIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath + "/original/relation");
Dataset<String> relAll = Utils
.readPath(spark, outputPath + "/original/relation", Relation.class)
.flatMap(
(FlatMapFunction<Relation, String>) r -> Arrays.asList(r.getSource(), r.getTarget()).iterator(),
Encoders.STRING())
.distinct();
// Save the entities in relations with at least one result
Dataset<Organization> organization = Utils
.readPath(spark, inputPath + "/organization", Organization.class)
.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference());
organization
.joinWith(relAll, organization.col("id").equalTo(relAll.col("value")))
.map(
(MapFunction<Tuple2<Organization, String>, Organization>) t2 -> t2._1(),
Encoders.bean(Organization.class))
.groupByKey((MapFunction<Organization, String>) v -> v.getId(), Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, Organization, Organization>) (k, it) -> it.next(),
Encoders.bean(Organization.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/original/organization");
Dataset<Datasource> datasource = Utils
.readPath(spark, inputPath + "/datasource", Datasource.class)
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference());
datasource
.joinWith(relAll, datasource.col("id").equalTo(relAll.col("value")))
.map((MapFunction<Tuple2<Datasource, String>, Datasource>) t2 -> t2._1(), Encoders.bean(Datasource.class))
.groupByKey((MapFunction<Datasource, String>) v -> v.getId(), Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, Datasource, Datasource>) (k, it) -> it.next(),
Encoders.bean(Datasource.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/original/datasource");
// plus we need to dump all the datasource in collectedfrom hostedby
Dataset<String> cfhb_orig = Utils
.readPath(spark, outputPath + "/original/publication", Publication.class)
.flatMap(
(FlatMapFunction<Publication, String>) p -> {
List<String> ret = new ArrayList<>();
p.getInstance().stream().forEach(i -> {
if (Optional.ofNullable(i.getHostedby()).isPresent()
&& Optional.ofNullable(i.getHostedby().getKey()).isPresent())
ret.add(i.getHostedby().getKey());
});
if (Optional.ofNullable(p.getCollectedfrom()).isPresent()) {
p.getCollectedfrom().stream().forEach(cf -> {
if (Optional.ofNullable(cf.getKey()).isPresent())
ret.add(cf.getKey());
});
}
return ret.iterator();
}, Encoders.STRING())
.union(
Utils
.readPath(spark, outputPath + "/original/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class)
.flatMap(
(FlatMapFunction<eu.dnetlib.dhp.schema.oaf.Dataset, String>) p -> {
List<String> ret = new ArrayList<>();
p.getInstance().stream().forEach(i -> {
if (Optional.ofNullable(i.getHostedby()).isPresent()
&& Optional.ofNullable(i.getHostedby().getKey()).isPresent())
ret.add(i.getHostedby().getKey());
});
if (Optional.ofNullable(p.getCollectedfrom()).isPresent()) {
p.getCollectedfrom().stream().forEach(cf -> {
if (Optional.ofNullable(cf.getKey()).isPresent())
ret.add(cf.getKey());
});
}
return ret.iterator();
}, Encoders.STRING()))
.union(
Utils
.readPath(spark, outputPath + "/original/software", Software.class)
.flatMap(
(FlatMapFunction<Software, String>) p -> {
List<String> ret = new ArrayList<>();
p.getInstance().stream().forEach(i -> {
if (Optional.ofNullable(i.getHostedby()).isPresent()
&& Optional.ofNullable(i.getHostedby().getKey()).isPresent())
ret.add(i.getHostedby().getKey());
});
if (Optional.ofNullable(p.getCollectedfrom()).isPresent()) {
p.getCollectedfrom().stream().forEach(cf -> {
if (Optional.ofNullable(cf.getKey()).isPresent())
ret.add(cf.getKey());
});
}
return ret.iterator();
}, Encoders.STRING()))
.union(
Utils
.readPath(spark, outputPath + "/original/otherresearchproduct", OtherResearchProduct.class)
.flatMap(
(FlatMapFunction<OtherResearchProduct, String>) p -> {
List<String> ret = new ArrayList<>();
p.getInstance().stream().forEach(i -> {
if (Optional.ofNullable(i.getHostedby()).isPresent()
&& Optional.ofNullable(i.getHostedby().getKey()).isPresent())
ret.add(i.getHostedby().getKey());
});
if (Optional.ofNullable(p.getCollectedfrom()).isPresent()) {
p.getCollectedfrom().stream().forEach(cf -> {
if (Optional.ofNullable(cf.getKey()).isPresent())
ret.add(cf.getKey());
});
}
return ret.iterator();
}, Encoders.STRING()))
.filter((FilterFunction<String>) s -> !s.equals(ModelConstants.UNKNOWN_REPOSITORY.getKey()))
.distinct();
datasource
.joinWith(cfhb_orig, datasource.col("id").equalTo(cfhb_orig.col("value")))
.map((MapFunction<Tuple2<Datasource, String>, Datasource>) t2 -> t2._1(), Encoders.bean(Datasource.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath + "/original/datasource");
Dataset<Project> project = Utils
.readPath(spark, inputPath + "/project", Project.class)
.filter((FilterFunction<Project>) d -> !d.getDataInfo().getDeletedbyinference());
project
.joinWith(relAll, project.col("id").equalTo(relAll.col("value")))
.map((MapFunction<Tuple2<Project, String>, Project>) t2 -> t2._1(), Encoders.bean(Project.class))
.groupByKey((MapFunction<Project, String>) v -> v.getId(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Project, Project>) (k, it) -> it.next(), Encoders.bean(Project.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/original/project");
// save the relations among entities different from the result
Dataset<String> selectedIDs = Utils
.readPath(spark, outputPath + "/original/project", Project.class)
.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, outputPath + "/original/organization", Organization.class)
.map((MapFunction<Organization, String>) o -> o.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, outputPath + "/original/datasource", Datasource.class)
.map((MapFunction<Datasource, String>) d -> d.getId(), Encoders.STRING()));
Dataset<Relation> relOtherOther = relation
.joinWith(selectedIDs, relation.col("source").equalTo(selectedIDs.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relOtherOther
.joinWith(selectedIDs, relOtherOther.col("target").equalTo(selectedIDs.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath + "/original/relation");
}
}

View File

@ -0,0 +1,133 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 15/11/22
*/
public class SparkSelectValidContext implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSelectValidContext.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSelectValidContext.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_select_context.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String contextPath = parser.get("contextPath");
log.info("contextPath: {}", contextPath);
final String communityMapPath = parser.get("communityMapPath");
log.info("communityMapPath: {}", communityMapPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
selectValidContext(spark, inputPath, contextPath, communityMapPath, outputPath);
});
}
private static void selectValidContext(SparkSession spark, String inputPath, String contextPath,
String communityMapPath, String outputPath) {
List<String> keys = Arrays
.asList(Utils.getCommunityMap(spark, communityMapPath).keySet().stream().toArray(String[]::new));
Dataset<String> context = getFilter(spark, inputPath + "/publication", keys, Publication.class)
.union(getFilter(spark, inputPath + "/dataset", keys, eu.dnetlib.dhp.schema.oaf.Dataset.class))
.union(getFilter(spark, inputPath + "/software", keys, Software.class))
.union(getFilter(spark, inputPath + "/otherresearchproduct", keys, OtherResearchProduct.class))
.distinct();
context.foreach((ForeachFunction<String>) c -> System.out.println(c));
Dataset<ResearchCommunity> researchCommunity = Utils.readPath(spark, contextPath, ResearchCommunity.class);
researchCommunity
.joinWith(context, researchCommunity.col("acronym").equalTo(context.col("value")))
.map(
(MapFunction<Tuple2<ResearchCommunity, String>, ResearchCommunity>) t2 -> t2._1(),
Encoders.bean(ResearchCommunity.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static <I extends Result> Dataset<String> getFilter(SparkSession spark, String inputPath,
List<String> keys, Class<I> inputClazz) {
return Utils
.readPath(spark, inputPath, inputClazz)
.filter((FilterFunction<I>) r -> isPresentContext(r))
.flatMap(
(FlatMapFunction<I, String>) r -> r
.getContext()
.stream()
.map(c -> extract(c.getId(), keys))
.collect(Collectors.toList())
.iterator(),
Encoders.STRING())
.filter(Objects::nonNull);
}
private static <I extends Result> boolean isPresentContext(I r) {
return Optional.ofNullable(r.getContext()).isPresent();
}
private static String extract(String c, List<String> keySet) {
if (keySet.contains(c))
return c;
if (c.contains(":") && keySet.contains(c.substring(0, c.indexOf(":"))))
return c.substring(0, c.indexOf(":"));
return null;
}
}

View File

@ -0,0 +1,116 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getEntitiesId;
import static eu.dnetlib.dhp.oa.graph.dump.Utils.getValidRelations;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 15/11/22
*/
public class SparkSelectValidRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelation.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSelectValidRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
// results dumped
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
// all relations plus those produced via context and extracted from results
final String relationPath = parser.get("relationPath");
log.info("relationPath: {}", relationPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
selectValidRelation(spark, inputPath, relationPath);
});
}
private static void selectValidRelation(SparkSession spark, String inputPath,
String relationPath) {
// read the results
getValidRelations(
spark, Utils
.readPath(spark, relationPath, Relation.class),
getEntitiesId(spark, inputPath))
// Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
// .joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))
// .map(
// (MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
// t2._1()._2().getTarget().getId(), t2._1()._2()),
// Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
//
// relJoinSource
// .joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value")))
// .map(
// (MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
// Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(inputPath + "/relation");
// relJoinSource = relationSource
// .joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))
// .map(
// (MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
// t2._1()._2().getTarget().getId(), t2._1()._2()),
// Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
//
// relJoinSource
// .joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value")))
// .map(
// (MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
// Encoders.bean(Relation.class))
// .write()
// .mode(SaveMode.Append)
// .option("compression", "gzip")
// .json(inputPath + "/relation");
}
}

View File

@ -0,0 +1,153 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 15/11/22
*/
public class SparkSelectValidRelationContext implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationContext.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSelectValidRelationContext.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_context_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
// results dumped
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String contextRelationPath = parser.get("contextRelationPath");
log.info("contextRelationPath: {}", contextRelationPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
selectValidRelation(spark, inputPath, contextRelationPath);
});
}
private static void selectValidRelation(SparkSession spark, String inputPath,
String contextRelationPath) {
// read the results
Dataset<String> dumpedIds = Utils
.readPath(spark, inputPath + "/publication", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, inputPath + "/dataset", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/software", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/otherresearchproduct", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/organization", eu.dnetlib.dhp.oa.model.graph.Organization.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Organization, String>) o -> o.getId(),
Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.oa.model.graph.Project.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Project, String>) o -> o.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, inputPath + "/datasource", eu.dnetlib.dhp.oa.model.graph.Datasource.class)
.map(
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Datasource, String>) o -> o.getId(),
Encoders.STRING()));
Dataset<Tuple2<String, Relation>> relationSource = Utils
.readPath(spark, contextRelationPath + "/context", Relation.class)
.union(Utils.readPath(spark, contextRelationPath + "/contextOrg", Relation.class))
.map(
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getSource().getId(), r),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
Dataset<ResearchCommunity> allowedContext = Utils
.readPath(spark, inputPath + "/communities_infrastructures", ResearchCommunity.class);
Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
.joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
t2._1()._2().getTarget().getId(), t2._1()._2()),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
relJoinSource
.joinWith(allowedContext, relJoinSource.col("_1").equalTo(allowedContext.col("id")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, ResearchCommunity>, Relation>) t2 -> t2._1()._2(),
Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(inputPath + "/relation");
relJoinSource = relationSource
.joinWith(allowedContext, relationSource.col("_1").equalTo(allowedContext.col("id")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, ResearchCommunity>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
t2._1()._2().getTarget().getId(), t2._1()._2()),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
relJoinSource
.joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(inputPath + "/relation");
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("contains")
public class ContainsVerb implements Selection, Serializable {
private String param;
public ContainsVerb() {
}
public ContainsVerb(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.contains(param);
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("contains_ignorecase")
public class ContainsVerbIgnoreCase implements Selection, Serializable {
private String param;
public ContainsVerbIgnoreCase() {
}
public ContainsVerbIgnoreCase(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.toLowerCase().contains(param.toLowerCase());
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("equals")
public class EqualVerb implements Selection, Serializable {
private String param;
public EqualVerb() {
}
public EqualVerb(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.equals(param);
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("equals_ignorecase")
public class EqualVerbIgnoreCase implements Selection, Serializable {
private String param;
public EqualVerbIgnoreCase() {
}
public EqualVerbIgnoreCase(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.equalsIgnoreCase(param);
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,34 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 11/11/22
*/
@VerbClass("greater_than")
public class GreatThanVerb implements Selection, Serializable {
private String param;
public GreatThanVerb() {
}
public GreatThanVerb(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.compareTo(param) > 0;
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.lang.reflect.Type;
import com.google.gson.*;
public class InterfaceAdapter implements JsonSerializer, JsonDeserializer {
private static final String CLASSNAME = "CLASSNAME";
private static final String DATA = "DATA";
public Object deserialize(
JsonElement jsonElement,
Type type,
JsonDeserializationContext jsonDeserializationContext)
throws JsonParseException {
JsonObject jsonObject = jsonElement.getAsJsonObject();
JsonPrimitive prim = (JsonPrimitive) jsonObject.get(CLASSNAME);
String className = prim.getAsString();
Class klass = getObjectClass(className);
return jsonDeserializationContext.deserialize(jsonObject.get(DATA), klass);
}
public JsonElement serialize(
Object jsonElement, Type type, JsonSerializationContext jsonSerializationContext) {
JsonObject jsonObject = new JsonObject();
jsonObject.addProperty(CLASSNAME, jsonElement.getClass().getName());
jsonObject.add(DATA, jsonSerializationContext.serialize(jsonElement));
return jsonObject;
}
/** **** Helper method to get the className of the object to be deserialized **** */
public Class getObjectClass(String className) {
try {
return Class.forName(className);
} catch (ClassNotFoundException e) {
// e.printStackTrace();
throw new JsonParseException(e.getMessage());
}
}
}

View File

@ -0,0 +1,34 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 11/11/22
*/
@VerbClass("lesser_than")
public class LessThanVerb implements Selection, Serializable {
private String param;
public LessThanVerb() {
}
public LessThanVerb(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return value.compareTo(param) < 0;
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("not_contains")
public class NotContainsVerb implements Selection, Serializable {
private String param;
public NotContainsVerb() {
}
public NotContainsVerb(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return !value.contains(param);
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("not_contains_ignorecase")
public class NotContainsVerbIgnoreCase implements Selection, Serializable {
private String param;
public NotContainsVerbIgnoreCase() {
}
public NotContainsVerbIgnoreCase(final String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return !(value.toLowerCase().contains(param.toLowerCase()));
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("not_equals")
public class NotEqualVerb implements Selection, Serializable {
private String param;
public NotEqualVerb(final String param) {
this.param = param;
}
public NotEqualVerb() {
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return !value.equals(param);
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
@VerbClass("not_equals_ignorecase")
public class NotEqualVerbIgnoreCase implements Selection, Serializable {
private String param;
public NotEqualVerbIgnoreCase(final String param) {
this.param = param;
}
public NotEqualVerbIgnoreCase() {
}
public String getParam() {
return param;
}
public void setParam(String param) {
this.param = param;
}
@Override
public boolean apply(String value) {
return !value.equalsIgnoreCase(param);
}
}

View File

@ -0,0 +1,9 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
public interface Selection extends Serializable {
boolean apply(String value);
}

View File

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
@interface VerbClass {
String value();
}

View File

@ -0,0 +1,54 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.util.Map;
import java.util.stream.Collectors;
import io.github.classgraph.ClassGraph;
import io.github.classgraph.ClassInfoList;
import io.github.classgraph.ScanResult;
public class VerbResolver implements Serializable {
private Map<String, Class<Selection>> map = null;
public VerbResolver() {
final ClassGraph classgraph = new ClassGraph();
try (ScanResult scanResult = // Assign scanResult in try-with-resources
classgraph // Create a new ClassGraph instance
.verbose() // If you want to enable logging to stderr
.enableAllInfo() // Scan classes, methods, fields, annotations
.whitelistPackages(
"eu.dnetlib.dhp.oa.graph.dump.subset.criteria") // Scan com.xyz and subpackages
.scan()) { // Perform the scan and return a ScanResult
ClassInfoList routeClassInfoList = scanResult
.getClassesWithAnnotation(
"eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbClass");
this.map = routeClassInfoList
.stream()
.collect(
Collectors
.toMap(
value -> (String) value
.getAnnotationInfo()
.get(0)
.getParameterValues()
.get(0)
.getValue(),
value -> (Class<Selection>) value.loadClass()));
}
}
public Selection getSelectionCriteria(String name, String param)
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException,
InstantiationException {
// return Class.forName(tmp_map.get(name)).
return map.get(name).getDeclaredConstructor((String.class)).newInstance(param);
}
}

View File

@ -0,0 +1,13 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.criteria;
public class VerbResolverFactory {
private VerbResolverFactory() {
}
public static VerbResolver newInstance() {
return new VerbResolver();
}
}

View File

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.Selection;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
public class Constraint implements Serializable {
private String verb;
private String field;
private String value;
private Selection selection;
public String getVerb() {
return verb;
}
public void setVerb(String verb) {
this.verb = verb;
}
public String getField() {
return field;
}
public void setField(String field) {
this.field = field;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
// public void setSelection(Selection sel) {
// selection = sel;
// }
public void setSelection(VerbResolver resolver)
throws InvocationTargetException, NoSuchMethodException, InstantiationException,
IllegalAccessException {
selection = resolver.getSelectionCriteria(verb, value);
}
public boolean verifyCriteria(String metadata) {
return selection.apply(metadata);
}
}

View File

@ -0,0 +1,65 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Type;
import java.util.Collection;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
/** Created by miriam on 02/08/2018. */
public class Constraints implements Serializable {
private static final Log log = LogFactory.getLog(Constraints.class);
private List<Constraint> constraint;
public List<Constraint> getConstraint() {
return constraint;
}
public void setConstraint(List<Constraint> constraint) {
this.constraint = constraint;
}
public void setSc(String json) {
Type collectionType = new TypeToken<Collection<Constraint>>() {
}.getType();
constraint = new Gson().fromJson(json, collectionType);
}
void setSelection(VerbResolver resolver) {
for (Constraint st : constraint) {
try {
st.setSelection(resolver);
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException
| InstantiationException e) {
log.error(e.getMessage());
}
}
}
// Constraint in and
public boolean verifyCriteria(final Param param) {
for (Constraint sc : constraint) {
boolean verified = false;
for (String value : param.get(sc.getField())) {
if (sc.verifyCriteria(value.trim())) {
verified = true;
}
}
if (!verified)
return verified;
}
return true;
}
}

View File

@ -0,0 +1,32 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.function.BiFunction;
import scala.xml.PrettyPrinter;
/**
* @author miriam.baglioni
* @Date 11/11/22
*/
public class Param extends HashMap<String, ArrayList<String>> implements Serializable {
public Param() {
super();
}
public void insert(String key, Object value) {
if (value instanceof ArrayList)
super.put(key, (ArrayList<String>) value);
if (value instanceof String) {
ArrayList<String> al = new ArrayList<>();
al.add((String) value);
super.put(key, al);
}
}
}

View File

@ -0,0 +1,15 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints;
import java.io.Serializable;
import java.util.HashMap;
/**
* @author miriam.baglioni
* @Date 11/11/22
*/
public class ProtoMap extends HashMap<String, String> implements Serializable {
public ProtoMap() {
super();
}
}

View File

@ -0,0 +1,47 @@
package eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints;
import java.io.Serializable;
import java.lang.reflect.Type;
import java.util.Collection;
import java.util.List;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
public class SelectionConstraints implements Serializable {
private List<Constraints> criteria;
public List<Constraints> getCriteria() {
return criteria;
}
public void setCriteria(List<Constraints> criteria) {
this.criteria = criteria;
}
public void setSc(String json) {
Type collectionType = new TypeToken<Collection<Constraints>>() {
}.getType();
criteria = new Gson().fromJson(json, collectionType);
}
// Constraints in or
public boolean verifyCriteria(final Param param) {
for (Constraints selc : criteria) {
if (selc.verifyCriteria(param)) {
return true;
}
}
return false;
}
public void addResolver(VerbResolver resolver) {
for (Constraints cs : criteria) {
cs.setSelection(resolver);
}
}
}

View File

@ -0,0 +1,32 @@
[
{
"paramName":"hdp",
"paramLongName":"hdfsPath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "pp",
"paramLongName": "postgresPassword",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "pu",
"paramLongName": "postgresUser",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": true
},
{
"paramName": "purl",
"paramLongName": "postgresUrl",
"paramDescription": "the relationPath",
"paramRequired": true
},
{
"paramName": "hdnn",
"paramLongName": "hdfsNameNode",
"paramDescription": "the relationPath",
"paramRequired": true
}
]

View File

@ -4,7 +4,7 @@
"paramName":"cmp",
"paramLongName":"communityMapPath",
"paramDescription": "the path to the serialization of the community map",
"paramRequired": true
"paramRequired": false
},
{
"paramName":"s",
@ -35,12 +35,40 @@
"paramLongName":"dumpType",
"paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project",
"paramRequired": false
}, {
},
{
"paramName":"cid",
"paramLongName":"communityId",
"paramDescription": "the id of the community to be dumped",
"paramRequired": false
}
},
{
"paramName":"sc",
"paramLongName":"selectionCriteria",
"paramDescription": "the selection criteria to choose the results",
"paramRequired": false
},
{
"paramName":"pm",
"paramLongName":"pathMap",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
},
{
"paramName":"rt",
"paramLongName":"resultType",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
},
{
"paramName":"md",
"paramLongName":"masterDuplicatePath",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
}
]

View File

@ -25,6 +25,18 @@
"paramLongName": "removeSet",
"paramDescription": "the list of classname relations, split by ';', not to be dumped",
"paramRequired": false
},
{
"paramName": "wd",
"paramLongName": "workingDir",
"paramDescription": "the list of classname relations, split by ';', not to be dumped",
"paramRequired": false
},
{
"paramName": "mdp",
"paramLongName": "masterDuplicatePath",
"paramDescription": "the list of classname relations, split by ';', not to be dumped",
"paramRequired": false
}
]

View File

@ -0,0 +1,37 @@
[
{
"paramName":"cmp",
"paramLongName":"communityMapPath",
"paramDescription": "the path to the serialization of the community map",
"paramRequired": false
},
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"cp",
"paramLongName":"contextPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -0,0 +1,27 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"crp",
"paramLongName":"contextRelationPath",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
}
]

View File

@ -0,0 +1,27 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"rp",
"paramLongName":"relationPath",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
}
]

View File

@ -1,4 +1,5 @@
## This is a classpath-based import file (this header is required)
dump_complete classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app
dump_funder classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/funder/oozie_app
dump_community classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app
dump_community classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/community/oozie_app
dump_subset classpath eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/subset/oozie_app

View File

@ -3,10 +3,12 @@
<parameters>
<property>
<name>singleDeposition</name>
<value>false</value>
<description>Indicates if it is a single community deposition</description>
</property>
<property>
<name>communityId</name>
<value>none</value>
<description>the id of the community to be dumped if a dump for a single community should be done</description>
</property>
<property>
@ -35,6 +37,7 @@
</property>
<property>
<name>resultAggregation</name>
<value>false</value>
<description>true if all the result type have to be dumped under result. false otherwise</description>
</property>
<property>
@ -47,6 +50,7 @@
</property>
<property>
<name>metadata</name>
<value>""</value>
<description> the metadata associated to the deposition</description>
</property>
<property>
@ -55,17 +59,19 @@
</property>
<property>
<name>conceptRecordId</name>
<value>none</value>
<description>for new version, the id of the record for the old deposition</description>
</property>
<property>
<name>depositionId</name>
<value>none</value>
<description>the depositionId of a deposition open that has to be added content</description>
</property>
<property>
<name>organizationCommunityMap</name>
<value>none</value>
<description>the organization community map</description>
</property>
<property>
<name>hiveDbName</name>
<description>the target hive database name</description>
@ -173,10 +179,52 @@
<switch>
<case to="dump_funder">${wf:conf('dumpType') eq "funder"}</case>
<case to="dump_community">${wf:conf('dumpType') eq "community"}</case>
<case to="dump_subset">${wf:conf('dumpType') eq "subset"}</case>
<default to="dump_complete"/>
</switch>
</decision>
<!-- Sub-workflow which runs the dump subset for the complete graph -->
<action name="dump_subset">
<sub-workflow>
<app-path>${wf:appPath()}/dump_subset
</app-path>
<propagate-configuration/>
<configuration>
<property>
<name>communityMapPath</name>
<value>${workingDir}/communityMap</value>
</property>
<property>
<name>outputPath</name>
<value>${outputPath}</value>
</property>
<property>
<name>sourcePath</name>
<value>${sourcePath}</value>
</property>
<property>
<name>organizationCommunityMap</name>
<value>${organizationCommunityMap}</value>
</property>
<property>
<name>isLookUpUrl</name>
<value>${isLookUpUrl}</value>
</property>
<property>
<name>pathMap</name>
<value>${pathMap}</value>
</property>
<property>
<name>selectionCriteria</name>
<value>${selectionCriteria}</value>
</property>
</configuration>
</sub-workflow>
<ok to="make_archive" />
<error to="Kill" />
</action>
<!-- Sub-workflow which runs the dump for the complete graph -->
<action name="dump_complete">
<sub-workflow>
@ -214,7 +262,7 @@
<error to="Kill" />
</action>
<!-- Sub-workflow which runs the dump for the complete graph -->
<!-- Sub-workflow which runs the dump for the communities -->
<action name="dump_community">
<sub-workflow>
<app-path>${wf:appPath()}/dump_community
@ -278,6 +326,17 @@
<error to="Kill"/>
</action>
<!-- <action name="make_archive">-->
<!-- <java>-->
<!-- <main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>-->
<!-- <arg>&#45;&#45;hdfsPath</arg><arg>${outputPath}/tar</arg>-->
<!-- <arg>&#45;&#45;nameNode</arg><arg>${nameNode}</arg>-->
<!-- <arg>&#45;&#45;sourcePath</arg><arg>${outputPath}/dump</arg>-->
<!-- </java>-->
<!-- <ok to="should_upload"/>-->
<!-- <error to="Kill"/>-->
<!-- </action>-->
<decision name="should_upload">
<switch>
<case to="send_zenodo">${wf:conf('upload') eq true}</case>

View File

@ -120,7 +120,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/publication</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -146,7 +145,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/dataset</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -172,7 +170,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -198,7 +195,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/result/software</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -224,7 +220,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/organization</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
<arg>--outputPath</arg><arg>${outputPath}/organization</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -250,7 +245,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--outputPath</arg><arg>${outputPath}/project</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -276,7 +270,6 @@
<arg>--sourcePath</arg><arg>${sourcePath}/datasource</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
<arg>--outputPath</arg><arg>${outputPath}/datasource</arg>
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
@ -343,7 +336,7 @@
<action name="create_entities_fromcontext">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}/communities_infrastructures/communities_infrastructure.json.gz</arg>
<arg>--hdfsPath</arg><arg>${outputPath}/communities_infrastructures/community_infrastructure.json.gz</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>

View File

@ -0,0 +1,30 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,611 @@
<workflow-app name="sub-dump_subset" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>organizationCommunityMap</name>
<description>the organization community map</description>
</property>
<property>
<name>pathMap</name>
<description>the path where to find the elements involved in the constraints within the json</description>
</property>
<property>
<name>selectionCriteria</name>
<description>the selection criteria used to select the results</description>
</property>
<property>
<name>hiveDbName</name>
<description>the target hive database name</description>
</property>
<property>
<name>hiveJdbcUrl</name>
<description>hive server jdbc url</description>
</property>
<property>
<name>hiveMetastoreUris</name>
<description>hive server metastore URIs</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="get_master_duplicate" />
<action name="get_master_duplicate">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/masterduplicate</arg>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
</java>
<ok to="fork_select_and_dump"/>
<error to="Kill"/>
</action>
<fork name="fork_select_and_dump">
<path start="select_and_dump_publication"/>
<path start="select_and_dump_dataset"/>
<path start="select_and_dump_orp"/>
<path start="select_and_dump_software"/>
</fork>
<action name="select_and_dump_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table publication </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>publication</arg>
<arg>--masterDuplicatePath</arg><arg>${workingDir}/masterduplicate</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="select_and_dump_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table dataset </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>dataset</arg>
<arg>--masterDuplicatePath</arg><arg>${workingDir}/masterduplicate</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="select_and_dump_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table ORP </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>otherresearchproduct</arg>
<arg>--masterDuplicatePath</arg><arg>${workingDir}/masterduplicate</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="select_and_dump_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--selectionCriteria</arg><arg>${selectionCriteria}</arg>
<arg>--resultType</arg><arg>software</arg>
<arg>--masterDuplicatePath</arg><arg>${workingDir}/masterduplicate</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<join name="join_dump" to="select_subset"/>
<action name="select_subset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Select valid table relation </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectSubset</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--removeSet</arg><arg>${removeSet}</arg>
</spark>
<ok to="fork_dump_otherentities"/>
<error to="Kill"/>
</action>
<fork name="fork_dump_otherentities">
<path start="dump_organization"/>
<path start="dump_project"/>
<path start="dump_datasource"/>
</fork>
<action name="dump_organization">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table organization </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/organization</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/organization</arg>
</spark>
<ok to="join_dump_otherentities"/>
<error to="Kill"/>
</action>
<action name="dump_project">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table project </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/project</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/project</arg>
</spark>
<ok to="join_dump_otherentities"/>
<error to="Kill"/>
</action>
<action name="dump_datasource">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table datasource </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/datasource</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/datasource</arg>
</spark>
<ok to="join_dump_otherentities"/>
<error to="Kill"/>
</action>
<join name="join_dump_otherentities" to="fork_context"/>
<fork name="fork_context">
<path start="create_entities_fromcontext"/>
<path start="create_relation_fromcontext"/>
<path start="create_relation_fromorgs"/>
</fork>
<action name="create_entities_fromcontext">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/context/community_infrastructure.json.gz</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>
<ok to="select_valid_context"/>
<error to="Kill"/>
</action>
<action name="select_valid_context">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectValidContext</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original</arg>
<arg>--contextPath</arg><arg>${workingDir}/context/community_infrastructure.json.gz</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/communities_infrastructures</arg>
</spark>
<ok to="join_context"/>
<error to="Kill"/>
</action>
<action name="create_relation_fromcontext">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/dump/relation/context</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>
<ok to="join_context"/>
<error to="Kill"/>
</action>
<action name="create_relation_fromorgs">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table relation </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkOrganizationRelation</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/relation/contextOrg</arg>
<arg>--organizationCommunityMap</arg><arg>${organizationCommunityMap}</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_context"/>
<error to="Kill"/>
</action>
<join name="join_context" to="filter_relation_context"/>
<action name="filter_relation_context">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software </name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectValidRelationContext</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/dump</arg>
<arg>--contextRelationPath</arg><arg>${workingDir}/dump/relation</arg> <!-- new relations from context -->
</spark>
<ok to="dump_relation"/>
<error to="Kill"/>
</action>
<action name="dump_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table relation </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpRelationJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation</arg>
<arg>--removeSet</arg><arg>${removeSet}</arg>
</spark>
<ok to="rels_from_pubs"/>
<error to="Kill"/>
</action>
<action name="rels_from_pubs">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extract Relations from publication </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="rels_from_dats"/>
<error to="Kill"/>
</action>
<action name="rels_from_dats">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table dataset </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="rels_from_orp"/>
<error to="Kill"/>
</action>
<action name="rels_from_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table ORP </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="rels_from_sw"/>
<error to="Kill"/>
</action>
<action name="rels_from_sw">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkExtractRelationFromEntities</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="filter_relation"/>
<error to="Kill"/>
</action>
<action name="filter_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Select valid relations</name>
<class>eu.dnetlib.dhp.oa.graph.dump.subset.SparkSelectValidRelation</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/dump</arg>
<arg>--relationPath</arg><arg>${workingDir}/relation</arg> <!-- new relations from context -->
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<kill name="Kill">
<message>Sub-workflow dump complete failed with error message ${wf:errorMessage()}
</message>
</kill>
<end name="End" />
</workflow-app>

View File

@ -4,8 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
@ -26,8 +25,14 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
import eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts;
import eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob;
import eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolver;
import eu.dnetlib.dhp.oa.graph.dump.subset.criteria.VerbResolverFactory;
import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.ProtoMap;
import eu.dnetlib.dhp.oa.graph.dump.subset.selectionconstraints.SelectionConstraints;
import eu.dnetlib.dhp.oa.model.*;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.schema.common.ModelConstants;
@ -76,41 +81,6 @@ public class DumpJobTest {
}
List<String> communityMap = Arrays
.asList(
"<community id=\"egi\" label=\"EGI Federation\"/>",
"<community id=\"fet-fp7\" label=\"FET FP7\"/>",
"<community id=\"fet-h2020\" label=\"FET H2020\"/>",
"<community id=\"clarin\" label=\"CLARIN\"/>",
"<community id=\"rda\" label=\"Research Data Alliance\"/>",
"<community id=\"ee\" label=\"SDSN - Greece\"/>",
"<community id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\"/>",
"<community id=\"fam\" label=\"Fisheries and Aquaculture Management\"/>",
"<community id=\"ni\" label=\"Neuroinformatics\"/>",
"<community id=\"mes\" label=\"European Marine Science\"/>",
"<community id=\"instruct\" label=\"Instruct-ERIC\"/>",
"<community id=\"elixir-gr\" label=\"ELIXIR GR\"/>",
"<community id=\"aginfra\" label=\"Agricultural and Food Sciences\"/>",
"<community id=\"dariah\" label=\"DARIAH EU\"/>",
"<community id=\"risis\" label=\"RISIS\"/>",
"<community id=\"epos\" label=\"EPOS\"/>",
"<community id=\"beopen\" label=\"Transport Research\"/>",
"<community id=\"euromarine\" label=\"EuroMarine\"/>",
"<community id=\"ifremer\" label=\"Ifremer\"/>",
"<community id=\"oa-pg\" label=\"EC Post-Grant Open Access Pilot\"/>",
"<community id=\"science-innovation-policy\" label=\"Science and Innovation Policy Studies\"/>",
"<community id=\"covid-19\" label=\"COVID-19\"/>",
"<community id=\"enermaps\" label=\"Energy Research\"/>");
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" return " +
"<community> " +
"{$x//CONFIGURATION/context/@id}" +
"{$x//CONFIGURATION/context/@label}" +
"</community>";
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(DumpJobTest.class.getSimpleName());
@ -145,21 +115,25 @@ public class DumpJobTest {
}
@Test
public void testPublicationDump() {
public void testDumpIndicators() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publicationWithMeasures")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -174,6 +148,47 @@ public class DumpJobTest {
GraphResult gr = verificationDataset.first();
Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getUsageCounts()).isPresent());
Assertions.assertFalse(Optional.ofNullable(gr.getIndicators().getImpactMeasures()).isPresent());
}
@Test
public void testPublicationDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
GraphResult gr = verificationDataset.first();
System.out.println(new ObjectMapper().writeValueAsString(gr));
Assertions.assertEquals(6, gr.getAuthor().size());
Assertions
.assertTrue(
@ -356,6 +371,45 @@ public class DumpJobTest {
Assertions.assertEquals("2020-03-23T00:20:51.392Z", gr.getDateofcollection());
Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getUsageCounts()).isPresent());
Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getImpactMeasures()).isPresent());
Assertions
.assertTrue(gr.getIndicators().getImpactMeasures().getInfluence() != null);
Assertions
.assertTrue(gr.getIndicators().getImpactMeasures().getPopularity_alt() != null);
Assertions
.assertTrue(gr.getIndicators().getImpactMeasures().getPopularity() != null);
Assertions
.assertTrue(gr.getIndicators().getImpactMeasures().getInfluence_alt() != null);
Assertions
.assertTrue(gr.getIndicators().getImpactMeasures().getImpulse() != null);
Assertions
.assertTrue(gr.getIndicators().getUsageCounts() != null);
Assertions
.assertTrue(Integer.valueOf(gr.getIndicators().getUsageCounts().getDownloads()) >= 0);
Assertions
.assertTrue(Integer.valueOf(gr.getIndicators().getUsageCounts().getViews()) >= 0);
Assertions.assertEquals("6.01504990349e-09", gr.getIndicators().getImpactMeasures().getInfluence().getScore());
Assertions.assertEquals("C", gr.getIndicators().getImpactMeasures().getInfluence().getClazz());
Assertions.assertEquals("2.304", gr.getIndicators().getImpactMeasures().getPopularity_alt().getScore());
Assertions.assertEquals("C", gr.getIndicators().getImpactMeasures().getPopularity_alt().getClazz());
Assertions.assertEquals("1.81666032463e-08", gr.getIndicators().getImpactMeasures().getPopularity().getScore());
Assertions.assertEquals("C", gr.getIndicators().getImpactMeasures().getPopularity().getClazz());
Assertions.assertEquals("8.0", gr.getIndicators().getImpactMeasures().getInfluence_alt().getScore());
Assertions.assertEquals("C", gr.getIndicators().getImpactMeasures().getInfluence_alt().getClazz());
Assertions.assertEquals("8.0", gr.getIndicators().getImpactMeasures().getImpulse().getScore());
Assertions.assertEquals("C", gr.getIndicators().getImpactMeasures().getImpulse().getClazz());
Assertions.assertEquals("0", gr.getIndicators().getUsageCounts().getDownloads());
Assertions.assertEquals("1", gr.getIndicators().getUsageCounts().getViews());
Assertions.assertEquals(1, gr.getInstance().size());
Instance instance = gr.getInstance().get(0);
@ -388,7 +442,7 @@ public class DumpJobTest {
}
@Test
public void testDatasetDump() {
public void testDatasetDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance")
.getPath();
@ -397,12 +451,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result",
communityMapPath, Dataset.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -469,7 +527,7 @@ public class DumpJobTest {
}
@Test
public void testSoftwareDump() {
public void testSoftwareDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance")
.getPath();
@ -478,12 +536,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result",
communityMapPath, Software.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -519,7 +581,7 @@ public class DumpJobTest {
}
@Test
public void testOrpDump() {
public void testOrpDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance")
.getPath();
@ -528,12 +590,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result",
communityMapPath, OtherResearchProduct.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -572,7 +638,7 @@ public class DumpJobTest {
}
@Test
public void testPublicationDumpCommunity() throws JsonProcessingException {
public void testPublicationDumpCommunity() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
@ -582,11 +648,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -632,7 +703,7 @@ public class DumpJobTest {
}
@Test
public void testDataset() {
public void testDataset() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json")
@ -642,11 +713,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -690,7 +766,7 @@ public class DumpJobTest {
}
@Test
public void testDataset2All() {
public void testDataset2All() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
@ -700,12 +776,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -721,7 +801,7 @@ public class DumpJobTest {
}
@Test
public void testDataset2Communities() {
public void testDataset2Communities() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
@ -731,11 +811,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -751,7 +836,7 @@ public class DumpJobTest {
}
@Test
public void testPublication() {
public void testPublication() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json")
@ -761,12 +846,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -785,7 +874,7 @@ public class DumpJobTest {
}
@Test
public void testSoftware() {
public void testSoftware() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json")
@ -795,12 +884,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -818,7 +911,7 @@ public class DumpJobTest {
}
@Test
public void testORP() {
public void testORP() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json")
@ -828,12 +921,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -851,7 +948,7 @@ public class DumpJobTest {
}
@Test
public void testRecord() {
public void testRecord() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
.getPath();
@ -860,12 +957,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
@ -883,7 +984,7 @@ public class DumpJobTest {
}
@Test
public void testArticlePCA() {
public void testArticlePCA() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca")
.getPath();
@ -892,11 +993,16 @@ public class DumpJobTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -968,4 +1074,35 @@ public class DumpJobTest {
.getString(2));
}
@Test
public void testresultNotDumped() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/resultNotDumped.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
Assertions.assertEquals(0, tmp.count());
}
}

View File

@ -61,6 +61,29 @@ public class SplitForCommunityTest {
spark.stop();
}
@Test
void testCommunitySplit2() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityResult")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
CommunitySplit split = new CommunitySplit();
split.run(false, sourcePath, workingDir.toString() + "/split", communityMapPath);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/split/Digital_Humanities_and_Cultural_Heritage")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
}
@Test
void testCommunitySplit() {

View File

@ -9,6 +9,8 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.model.graph.Relation;
@ -546,6 +548,14 @@ class CreateRelationTest {
cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
rList.forEach(r -> {
try {
System.out.println(new ObjectMapper().writeValueAsString(r));
} catch (JsonProcessingException e) {
e.printStackTrace();
}
});
Assertions.assertEquals(34, rList.size());
Assertions

View File

@ -76,9 +76,15 @@ public class DumpOrganizationProjectDatasourceTest {
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/organization")
.getPath();
DumpGraphEntities dg = new DumpGraphEntities();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Organization",
"-outputPath", workingDir.toString() + "/dump"
dg.run(false, sourcePath, workingDir.toString() + "/dump", Organization.class, null);
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -99,15 +105,21 @@ public class DumpOrganizationProjectDatasourceTest {
}
@Test
public void dumpProjectTest() throws NoAvailableEntityTypeException {
public void dumpProjectTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project")
.getPath();
DumpGraphEntities dg = new DumpGraphEntities();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Project",
"-outputPath", workingDir.toString() + "/dump"
dg.run(false, sourcePath, workingDir.toString() + "/dump", Project.class, null);
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -128,14 +140,20 @@ public class DumpOrganizationProjectDatasourceTest {
}
@Test
public void dumpDatasourceTest() throws NoAvailableEntityTypeException {
public void dumpDatasourceTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource")
.getPath();
DumpGraphEntities dg = new DumpGraphEntities();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Datasource",
"-outputPath", workingDir.toString() + "/dump"
dg.run(false, sourcePath, workingDir.toString() + "/dump", Datasource.class, null);
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -154,4 +172,37 @@ public class DumpOrganizationProjectDatasourceTest {
.println(OBJECT_MAPPER.writeValueAsString(o)));
}
@Test
public void dumpDatasourceNotDumpedTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasourcenotdumped")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Datasource",
"-outputPath", workingDir.toString() + "/dump"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.oa.model.graph.Datasource> tmp = sc
.textFile(workingDir.toString() + "/dump")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Datasource.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.oa.model.graph.Datasource> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Datasource.class));
Assertions.assertEquals(1, verificationDataset.count());
verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Datasource>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
}
}

View File

@ -172,4 +172,31 @@ public class ExtractRelationFromEntityTest {
.getType());
}
@Test
void sdsdTest() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/sdsn-gr_publication.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
Extractor ex = new Extractor();
ex
.run(
false, sourcePath, workingDir.toString() + "/relation",
// eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
}
}

View File

@ -23,7 +23,7 @@ class FunderParsingTest {
"Computer &amp; Information Science &amp; Engineering</description><name>Directorate for Computer &amp; " +
"Information Science &amp; Engineering</name><parent/><class>nsf:fundingStream</class></funding_level_0></parent></funding_level_1></fundingtree>";
Funder f = DumpGraphEntities.getFunder(funding_Stream);
Funder f = SparkDumpEntitiesJob.getFunder(funding_Stream);
Assertions.assertEquals("NSF", f.getShortName());
Assertions.assertEquals("National Science Foundation", f.getName());
@ -54,7 +54,7 @@ class FunderParsingTest {
"<description>Horizon 2020 Framework Programme</description><parent/>" +
"<class>ec:h2020fundings</class></funding_level_0></parent></funding_level_1></parent></funding_level_2></fundingtree>";
Funder f = DumpGraphEntities.getFunder(funding_stream);
Funder f = SparkDumpEntitiesJob.getFunder(funding_stream);
Assertions.assertEquals("EC", f.getShortName());
Assertions.assertEquals("European Commission", f.getName());

View File

@ -134,6 +134,8 @@ public class SplitPerFunderTest {
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(3, tmp.count());
tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
// MZOS 1
tmp = sc
.textFile(workingDir.toString() + "/split/MZOS")

View File

@ -0,0 +1,521 @@
package eu.dnetlib.dhp.oa.graph.dump.subset;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.Extractor;
import eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.schema.oaf.*;
/**
* @author miriam.baglioni
* @Date 16/11/22
*/
public class DumpSubsetTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(DumpSubsetTest.class);
private static final CommunityMap map = new CommunityMap();
static {
map.put("egi", "EGI Federation");
map.put("fet-fp7", "FET FP7");
map.put("fet-h2020", "FET H2020");
map.put("clarin", "CLARIN");
map.put("fam", "Fisheries and Aquaculture Management");
map.put("ni", "Neuroinformatics");
map.put("mes", "European Marine Scinece");
map.put("instruct", "Instruct-Eric");
map.put("rda", "Research Data Alliance");
map.put("elixir-gr", "ELIXIR GR");
map.put("aginfra", "Agricultural and Food Sciences");
map.put("dariah", "DARIAH EU");
map.put("risis", "RISI");
map.put("ee", "SDSN - Greece");
map.put("oa-pg", "EC Post-Grant Open Access Pilot");
map.put("beopen", "Transport Research");
map.put("euromarine", "Euromarine");
map.put("ifremer", "Ifremer");
map.put("dh-ch", "Digital Humanities and Cultural Heritage");
map.put("science-innovation-policy", "Science and Innovation Policy Studies");
map.put("covid-19", "COVID-19");
map.put("enrmaps", "Energy Research");
map.put("epos", "EPOS");
}
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(DumpSubsetTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(DumpSubsetTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(DumpSubsetTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test // Step 1
void testSelectionConstraints() throws Exception {
final String pathMap = "{\"author\" : \"$['author'][*]['fullname']\", " +
"\"title\" : \"$['title'][*]['value']\", \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\", "
+
"\"contributor\" : \"$['contributor'][*]['value']\", \"description\" : \"$['description'][*]['value']\", "
+
"\"dateofacceptance\" : \"$['dateofacceptance']['value']\", " +
"\"context\": \"['context'][*]['id']\"}";
final String constraint = "{\"criteria\":[{\"constraint\":[{\"verb\":\"lesser_than\",\"field\":\"dateofacceptance\",\"value\":\"2023-01-01\"},{\"verb\":\"greater_than\",\"field\":\"dateofacceptance\",\"value\":\"2006-12-31\"}]}]}";
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/input/publication")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpResult
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString(),
"-communityMapPath", communityMapPath,
"-pathMap", pathMap,
"-selectionCriteria", constraint,
"-resultType", "publication",
"-masterDuplicatePath", getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/masterDuplicate/correspondence")
.getPath()
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/dump/publication")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
Assertions.assertEquals(16, tmp.count());
JavaRDD<Publication> tmp_pubs = sc
.textFile(workingDir.toString() + "/original/publication")
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
JavaRDD<Publication> input = sc
.textFile(sourcePath)
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
Assertions
.assertTrue(
input
.filter(r -> r.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8"))
.first()
.getCollectedfrom()
.stream()
.anyMatch(
cf -> cf.getKey().equals("10|openaire____::806360c771262b4d6770e7cdf04b5c5a")
&& cf.getValue().equals("ZENODO")));
Assertions.assertEquals(16, tmp_pubs.count());
Assertions
.assertTrue(
tmp_pubs
.filter(r -> r.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8"))
.first()
.getCollectedfrom()
.stream()
.anyMatch(
cf -> cf.getKey().equals("10|fairsharing_::cd0f74b5955dc87fd0605745c4b49ee8")
&& cf.getValue().equals("ZENODO")));
tmp_pubs.foreach(p -> System.out.println(OBJECT_MAPPER.writeValueAsString(p)));
}
@Test // Step 1
void testSelectionConstraintsCommunity() throws Exception {
final String pathMap = "{\"author\" : \"$['author'][*]['fullname']\", " +
"\"title\" : \"$['title'][*]['value']\", \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\", "
+
"\"contributor\" : \"$['contributor'][*]['value']\", \"description\" : \"$['description'][*]['value']\", "
+
"\"dateofacceptance\" : \"$['dateofacceptance']['value']\", " +
"\"context\": \"$['context'][*]['id']\"}";
final String constraint = "{\"criteria\":[{\"constraint\":[{\"verb\":\"equals\",\"field\":\"context\",\"value\":\"dh-ch\"}]}]}";
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/input/publication")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpResult
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString(),
"-communityMapPath", communityMapPath,
"-pathMap", pathMap,
"-selectionCriteria", constraint,
"-resultType", "publication",
"-masterDuplicatePath", getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/masterDuplicate/empty")
.getPath()
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/dump/publication")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
Assertions.assertEquals(17, tmp.count());
JavaRDD<Publication> tmp_pubs = sc
.textFile(workingDir.toString() + "/original/publication")
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
Assertions.assertEquals(17, tmp_pubs.count());
Assertions
.assertEquals(
17, tmp_pubs.filter(p -> p.getContext().stream().anyMatch(c -> c.getId().equals("dh-ch"))).count());
}
@Test // Step2
void testSelectSubset() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/input/")
.getPath();
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/publication")
.getPath())
.saveAsTextFile(workingDir.toString() + "/original/publication");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/software")
.getPath())
.saveAsTextFile(workingDir.toString() + "/original/software");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/dataset")
.getPath())
.saveAsTextFile(workingDir.toString() + "/original/dataset");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/otherresearchproduct")
.getPath())
.saveAsTextFile(workingDir.toString() + "/original/otherresearchproduct");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/publication")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/publication");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/software")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/software");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/dataset")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/dataset");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/otherresearchproduct")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/otherresearchproduct");
SparkSelectSubset
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir.toString()
});
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/original/relation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Assertions.assertEquals(20, tmp.count());
Assertions
.assertEquals(
6, tmp.filter(r -> r.getSource().startsWith("50|") && r.getTarget().startsWith("50|")).count());
Assertions
.assertEquals(
3, tmp.filter(r -> r.getSource().startsWith("50|") && r.getTarget().startsWith("20|")).count());
Assertions
.assertEquals(
3, tmp.filter(r -> r.getSource().startsWith("20|") && r.getTarget().startsWith("50|")).count());
Assertions
.assertEquals(
4, tmp.filter(r -> r.getSource().startsWith("40|") && r.getTarget().startsWith("50|")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().startsWith("10|") && r.getTarget().startsWith("20|")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().startsWith("20|") && r.getTarget().startsWith("10|")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().startsWith("20|") && r.getTarget().startsWith("40|")).count());
Assertions
.assertEquals(
1, tmp.filter(r -> r.getSource().startsWith("40|") && r.getTarget().startsWith("20|")).count());
JavaRDD<eu.dnetlib.dhp.schema.oaf.Datasource> tmp_datasource = sc
.textFile(workingDir.toString() + "/original/datasource")
.map(item -> OBJECT_MAPPER.readValue(item, Datasource.class));
Assertions.assertEquals(5, tmp_datasource.count());
Assertions
.assertEquals(
0,
tmp_datasource
.filter(d -> d.getId().equals("10|issn___print::0a79337eaf5145faa478785423273355"))
.count());
JavaRDD<Organization> tmp_organization = sc
.textFile(workingDir.toString() + "/original/organization")
.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
Assertions.assertEquals(3, tmp_organization.count());
JavaRDD<Project> tmp_project = sc
.textFile(workingDir.toString() + "/original/project")
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
Assertions.assertEquals(3, tmp_project.count());
}
@Test
public void selectValidContextTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/communityMap")
.getPath();
final String contextPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/context/community_infrastructure")
.getPath();
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
SparkSelectValidContext
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir.toString() + "/dump/community_infrastructure",
"-communityMapPath", communityMapPath,
"-contextPath", contextPath
});
JavaRDD<ResearchCommunity> tmp = sc
.textFile(workingDir.toString() + "/dump/community_infrastructure")
.map(item -> OBJECT_MAPPER.readValue(item, ResearchCommunity.class));
Assertions.assertEquals(6, tmp.count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("enermaps")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("eutopia")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("dh-ch")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("beopen")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("neanias-underwater")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("sdsn-gr")).count());
}
@Test
public void selectValidRelationContextTest() throws Exception {
final String contextRelationPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/working/relation")
.getPath();
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/publication")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/publication");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/software")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/software");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/dataset")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/dataset");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/otherresearchproduct")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/otherresearchproduct");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/organization")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/organization");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/datasource")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/datasource");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/project")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/project");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/community_infrastructure");
SparkSelectValidRelationContext
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", workingDir.toString() + "/dump",
"-contextRelationPath", contextRelationPath
});
JavaRDD<eu.dnetlib.dhp.oa.model.graph.Relation> tmp = sc
.textFile(workingDir.toString() + "/dump/relation")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Relation.class));
Assertions.assertEquals(10, tmp.count());
Assertions.assertEquals(5, tmp.filter(r -> r.getSource().getId().startsWith("00")).count());
Assertions.assertEquals(5, tmp.filter(r -> r.getTarget().getId().startsWith("00")).count());
Assertions.assertEquals(2, tmp.filter(r -> r.getSource().getId().startsWith("10")).count());
Assertions.assertEquals(2, tmp.filter(r -> r.getTarget().getId().startsWith("10")).count());
Assertions.assertEquals(1, tmp.filter(r -> r.getSource().getId().startsWith("40")).count());
Assertions.assertEquals(1, tmp.filter(r -> r.getTarget().getId().startsWith("40")).count());
Assertions.assertEquals(2, tmp.filter(r -> r.getSource().getId().startsWith("20")).count());
Assertions.assertEquals(2, tmp.filter(r -> r.getTarget().getId().startsWith("20")).count());
}
@Test
public void extractRelationFromResultTest() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/publication")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/communityMap")
.getPath();
Extractor ex = new Extractor();
ex
.run(
false, sourcePath, workingDir.toString() + "/relation",
// eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.oa.model.graph.Relation> tmp = sc
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Relation.class));
Assertions.assertEquals(94, tmp.count());
Assertions.assertEquals(47, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count());
Assertions.assertEquals(36, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count());
Assertions.assertEquals(11, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count());
}
}

View File

@ -1 +1 @@
{"ee":"SDSN - Greece","epos":"EPOS","enrmaps":"Energy Research","fet-h2020":"FET H2020","instruct":"Instruct-Eric","egi":"EGI Federation","euromarine":"Euromarine","covid-19":"COVID-19","dariah":"DARIAH EU","rda":"Research Data Alliance","clarin":"CLARIN","aginfra":"Agricultural and Food Sciences","risis":"RISI","fam":"Fisheries and Aquaculture Management","beopen":"Transport Research","elixir-gr":"ELIXIR GR","fet-fp7":"FET FP7","ifremer":"Ifremer","science-innovation-policy":"Science and Innovation Policy Studies","mes":"European Marine Scinece","oa-pg":"EC Post-Grant Open Access Pilot","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}
{"sdsn-gr":"SDSN - Greece","ee":"SDSN - Greece","epos":"EPOS","enrmaps":"Energy Research","fet-h2020":"FET H2020","instruct":"Instruct-Eric","egi":"EGI Federation","euromarine":"Euromarine","covid-19":"COVID-19","dariah":"DARIAH EU","rda":"Research Data Alliance","clarin":"CLARIN","aginfra":"Agricultural and Food Sciences","risis":"RISI","fam":"Fisheries and Aquaculture Management","beopen":"Transport Research","elixir-gr":"ELIXIR GR","fet-fp7":"FET FP7","ifremer":"Ifremer","science-innovation-policy":"Science and Innovation Policy Studies","mes":"European Marine Scinece","oa-pg":"EC Post-Grant Open Access Pilot","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}

View File

@ -0,0 +1 @@
{"pid": [{"scheme": "doi", "value": "10.1023/a:1019971625315"}], "contributor": [], "collectedfrom": [{"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, {"key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a", "value": "Microsoft Academic Graph"}], "id": "50|doi_________::0027accd79214af151336e8237a2b084", "container": {"issnPrinted": "1607-6729", "conferencedate": null, "vol": "385", "conferenceplace": null, "name": "Doklady Biochemistry and Biophysics", "iss": null, "sp": "228", "edition": null, "issnOnline": null, "ep": "234", "issnLinking": null}, "lastupdatetimestamp": 1649039791345, "author": [{"surname": null, "fullname": "Vladimir S. Saakov", "pid": null, "name": null, "rank": 1}], "instance": [{"refereed": "UNKNOWN", "hostedby": {"key": "10|issn___print::55156520c3996f4d887f858c089d1e5f", "value": "Doklady Biochemistry and Biophysics"}, "url": ["https://doi.org/10.1023/a:1019971625315"], "pid": [{"scheme": "doi", "value": "10.1023/a:1019971625315"}], "publicationdate": "2002-01-01", "collectedfrom": {"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value": "Crossref"}, "type": "Article"}], "subjects": [{"provenance": null, "subject": {"scheme": "keyword", "value": "General Chemistry"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "Biochemistry"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "General Medicine"}}, {"provenance": null, "subject": {"scheme": "keyword", "value": "Biophysics"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Photosystem II"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Ion"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Chemistry"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Soil salinity"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Analytical chemistry"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Function (biology)"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Pulse (signal processing)"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Fluorescence"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Phototroph"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Kinetic energy"}}, {"provenance": null, "subject": {"scheme": "MAG", "value": "Photochemistry"}}], "publicationdate": "2002-01-01", "indicators": {"impactMeasures": {"influence": {"score": "4.901964E-9", "class": "C"}, "popularity": {"score": "6.185583E-10", "class": "C"}, "influence_alt": {"score": "3", "class": "C"}, "impulse": {"score": "0", "class": "C"}, "popularity_alt": {"score": "0.03722029", "class": "C"}}}, "dateofcollection": "2022-04-04T02:36:31Z", "type": "publication", "description": [], "format": [], "coverage": [], "publisher": "Springer Science and Business Media LLC", "language": {"code": "und", "label": "Undetermined"}, "country": [], "originalId": ["453197", "10.1023/a:1019971625315", "314096869"], "source": ["Crossref", null], "context": [{"code": "enermaps", "provenance": [{"provenance": "Inferred by OpenAIRE", "trust": "0.8"}], "label": "Energy Research"}]}

View File

@ -2,4 +2,4 @@
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-05-25","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pelitutkimuksen vuosikirja"},"extraInfo":[],"id":"10|doajarticles::9c4b678901e5276d9e3addee566816af","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj1798355X"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pelitutkimuksen vuosikirja"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1798-355X",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geography. Anthropology. Recreation: Recreation. Leisure | Science: Mathematics: Instruments and machines: Electronic computers. Computer science: Computer software"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.pelitutkimus.fi"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Statistika: Statistics and Economy Journal"},"extraInfo":[],"id":"10|doajarticles::a5314b60f79b869cb5d3a2709167bc3a","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj0322788X"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Statistika: Statistics and Economy Journal"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0322-788X",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Social Sciences: Statistics"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.czso.cz/statistika_journal"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Review of Development Finance"},"extraInfo":[],"id":"10|doajarticles::acb7c79bb85d3b3a7b75389f5d9570f5","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj18799337"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Review of Development Finance"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1879-9337",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Social Sciences: Industries. Land use. Labor: Economic growth, development, planning | Social Sciences: Finance"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.journals.elsevier.com/review-of-development-finance/"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-05-28","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Journal of Advanced Navigation Technology"},"extraInfo":[],"id":"10|issn___print::0a79337eaf5145faa478785423273355","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"jrnl12269026"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Journal of Advanced Navigation Technology"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1226-9026",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-05-28","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Journal of Advanced Navigation Technology"},"extraInfo":[],"id":"10|issn___print::0a79337eaf5145faa478785423273355","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"jrnl12269026"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Journal of Advanced Navigation Technology"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1226-9026",null],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":null,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"re3data","classname":"re3data","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"r3d100012161"}],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false}}

View File

@ -0,0 +1 @@
{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"consenttermsofuse":false,"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Arachnology"},"eoscdatasourcetype":{"classid":"Journal archive","classname":"Journal Archive","schemeid":"dnet:eosc_datasource_types","schemename":"dnet:eosc_datasource_types"},"eosctype":{"classid":"Data Source","classname":"Data Source","schemeid":"","schemename":""},"extraInfo":[],"fulltextdownload":false,"id":"10|issn___print::2d7299a5fd9d7e3db4e6b4c0245fd7c3","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2050-9936","issnPrinted":"2050-9928","name":"Arachnology"},"languages":[],"lastupdatetimestamp":1668505479963,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20509928"},"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Arachnology"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2050-9928"],"pid":[],"policies":[],"researchentitytypes":["Literature"],"subjects":[],"thematic":false,"versioncontrol":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"eut":"European University of Technology","covid-19":"COVID-19","dariah":"DARIAH EU","aurora":"Aurora Universities Network","neanias-space":"NEANIAS Space Research Community","eutopia":"EUTOPIA Alliance","neanias-underwater":"NEANIAS Underwater Research Community","neanias-atmospheric":"NEANIAS Atmospheric Research Community","beopen":"Transport Research","sdsn-gr":"SDSN - Greece","elixir-gr":"ELIXIR GR","rural-digital-europe":"Rural Digital Europe","north-america-studies":"North America Studies","mes":"European Marine Science","enermaps":"Energy Research","ni":"Neuroinformatics","dh-ch":"Digital Humanities and Cultural Heritage"}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,6 @@
{"id":"00|context_____::e15922110564cf669aaed346e871bc01","acronym":"eutopia","name":"EUTOPIA Open Research Portal","type":"Research Community","description":"<p style=text-align:justify>EUTOPIA is an ambitious alliance of 10 like-minded universities ready to reinvent themselves: the Babeș-Bolyai University in Cluj-Napoca (Romania), the Vrije Universiteit Brussels (Belgium), the Ca&#39;Foscari University of Europe (Italy), CY Cergy Paris Universit&eacute; (France), the Technische Universit&auml;t Dresden (Germany), the University of Gothenburg (Sweden), the University of Ljubljana (Slovenia), the NOVA University Lisbon (Portugal), the University of Pompeu Fabra (Spain) and the University of Warwick (United Kingdom). Together, these 10 pioneers join forces to build the university of the future.</p>","zenodo_community":null}
{"id":"00|context_____::aa0e56dd2e9d2a0be749f5debdd2b3d8","acronym":"enermaps","name":"Welcome to EnerMaps Gateway! Find the latest scientific data.","type":"Research Community","description":"","zenodo_community":null,"subject":[]}
{"id":"00|context_____::6f567d9abd1c6603b0c0205a832bc757","acronym":"neanias-underwater","name":"NEANIAS Underwater Research Community","type":"Research Community","description":"","zenodo_community":null,"subject":["Ocean mapping","Multibeam Backscatter","Bathymetry","Seabed classification","Submarine Geomorphology","Underwater Photogrammetry"]}
{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","acronym":"dh-ch","name":"Digital Humanities and Cultural Heritage","type":"Research Community","description":"This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.","zenodo_community":"https://zenodo.org/communities/oac_dh-ch","subject":["modern art","monuments","europeana data model","field walking","frescoes","LIDO metadata schema","art history","excavation","Arts and Humanities General","coins","temples","numismatics","lithics","environmental archaeology","digital cultural heritage","archaeological reports","history","CRMba","churches","cultural heritage","archaeological stratigraphy","religious art","digital humanities","archaeological sites","linguistic studies","bioarchaeology","architectural orders","palaeoanthropology","fine arts","europeana","CIDOC CRM","decorations","classic art","stratigraphy","digital archaeology","intangible cultural heritage","walls","chapels","CRMtex","Language and Literature","paintings","archaeology","mosaics","burials","medieval art","castles","CARARE metadata schema","statues","natural language processing","inscriptions","CRMsci","vaults","contemporary art","Arts and Humanities","CRMarchaeo","pottery"]}
{"id":"00|context_____::5fde864866ea5ded4cc873b3170b63c3","acronym":"beopen","name":"Transport Research","type":"Research Community","description":"Welcome to the Open Research Gateway for Transport Research. This gateway is part of the TOPOS Observatory (https://www.topos-observatory.eu). The TOPOS aims to showcase the status and progress of open science uptake in transport research. It focuses on promoting territorial and cross border cooperation and contributing in the optimization of open science in transport research.\nThe TOPOS Observatory is supported by the EC H2020 BEOPEN project (824323)","zenodo_community":"https://zenodo.org/communities/be-open-transport","subject":["Green Transport","City mobility systems","Vulnerable road users","Traffic engineering","Transport electrification","Intermodal freight transport","Clean vehicle fleets","Intelligent mobility","Inflight refueling","District mobility systems","Navigation and control systems for optimised planning and routing","European Space Technology Platform","European Transport networks","Green cars","Inter-modality infrastructures","Advanced Take Off and Landing Ideas","Sustainable urban systems","port-area railway networks","Innovative forms of urban transport","Alliance for Logistics Innovation through Collaboration in Europe","Advisory Council for Aeronautics Research in Europe","Mobility services for people and goods","Guidance and traffic management","Passenger mobility","Smart mobility and services","transport innovation","high-speed railway","Vehicle design","Inland shipping","public transportation","aviations climate impact","Road transport","On-demand public transport","Personal Air Transport","Pipeline transport","European Association of Aviation Training and Education Organisations","Defrosting of railway infrastructure","Inclusive and affordable transport","River Information Services","jel:L92","Increased use of public transport","Seamless mobility","STRIA","trolleybus transport","Intelligent Transport System","Low-emission alternative energy for transport","Shared mobility for people and goods","Business model for urban mobility","Interoperability of transport systems","Cross-border train slot booking","Air transport","Transport pricing","Sustainable transport","European Rail Transport Research Advisory Council","Alternative aircraft configurations","Railways applications","urban transport","Environmental impact of transport","urban freight delivery systems","Automated Road Transport","Alternative fuels in public transport","Active LIDAR-sensor for GHG-measurements","Autonomous logistics operations","Rational use of motorised transport","Network and traffic management systems","electrification of railway wagons","Single European Sky","Electrified road systems","Railway dynamics","Motorway of the Sea","smart railway communications","Maritime transport","Environmental- friendly transport","Combined transport","Connected automated driving technology","Innovative freight logistics services","automated and shared vehicles","Alternative Aircraft Systems","Land-use and transport interaction","Public transport system","Business plan for shared mobility","Shared mobility","Growing of mobility demand","European Road Transport Research Advisory Council","WATERBORNE ETP","Effective transport management system","Short Sea Shipping","air traffic management","Sea hubs and the motorways of the sea","Urban mobility solutions","Smart city planning","Maritime spatial planning","EUropean rail Research Network of Excellence","ENERGY CONSUMPTION BY THE TRANSPORT SECTOR","Integrated urban plan","inland waterway services","European Conference of Transport Research Institutes","air vehicles","E-freight","Automated Driving","Automated ships","pricing for cross-border passenger transport","Vehicle efficiency","Railway transport","Electric vehicles","Road traffic monitoring","Deep sea shipping","Circular economy in transport","Traffic congestion","air transport system","Urban logistics","Rail transport","OpenStreetMap","high speed rail","Transportation engineering","Intermodal travel information","Flight Data Recorders","Advanced driver assistance systems","long distance freight transport","Inland waterway transport","Smart mobility","Mobility integration","Personal Rapid Transit system","Safety measures & requirements for roads","Green rail transport","Vehicle manufacturing","Future Airport Layout","Rail technologies","European Intermodal Research Advisory Council","inland navigation","Automated urban vehicles","ECSS-standards","Traveller services","Polluting transport","Air Traffic Control","Cooperative and connected and automated transport","Innovative powertrains","Quality of transport system and services","door-to- door logistics chain","Inter-modal aspects of urban mobility","Innovative freight delivery systems","urban freight delivery infrastructures"]}
{"id":"00|context_____::a38bf77184799906a6ce86b9eb761c80","acronym":"sdsn-gr","name":"Sustainable Development Solutions Network - Greece","type":"Research Community","description":"The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.","zenodo_community":"https://zenodo.org/communities/oac_sdsn-greece","subject":["SDG13 - Climate action","SDG8 - Decent work and economic\n\t\t\t\t\tgrowth","SDG15 - Life on land","SDG2 - Zero hunger","SDG17 - Partnerships for the\n\t\t\t\t\tgoals","SDG10 - Reduced inequalities","SDG5 - Gender equality","SDG12 - Responsible\n\t\t\t\t\tconsumption and production","SDG14 - Life below water","SDG6 - Clean water and\n\t\t\t\t\tsanitation","SDG11 - Sustainable cities and communities","SDG1 - No poverty","SDG3 -\n\t\t\t\t\tGood health and well being","SDG7 - Affordable and clean energy","SDG4 - Quality\n\t\t\t\t\teducation","SDG9 - Industry innovation and infrastructure","SDG16 - Peace justice\n\t\t\t\t\tand strong institutions"]}

View File

@ -0,0 +1,4 @@
{"id":"10|doajarticles::9c4b678901e5276d9e3addee566816af","originalId":["doajarticles::1798-355X"],"pid":[],"datasourcetype":{"scheme":"pubsrepository::journal","value":"Journal"},"openairecompatibility":"not available","officialname":"Pelitutkimuksen vuosikirja","englishname":"Pelitutkimuksen vuosikirja","websiteurl":"http://www.pelitutkimus.fi","logourl":null,"dateofvalidation":null,"description":null,"subjects":["Geography. Anthropology. Recreation: Recreation. Leisure | Science: Mathematics: Instruments and machines: Electronic computers. Computer science: Computer software"],"languages":[],"contenttypes":["Journal articles"],"releasestartdate":null,"releaseenddate":null,"missionstatementurl":null,"accessrights":null,"uploadrights":null,"databaseaccessrestriction":null,"datauploadrestriction":null,"versioning":false,"citationguidelineurl":null,"pidsystems":null,"certificates":null,"policies":[],"journal":null}
{"id":"10|doajarticles::acb7c79bb85d3b3a7b75389f5d9570f5","originalId":["doajarticles::1879-9337"],"pid":[],"datasourcetype":{"scheme":"pubsrepository::journal","value":"Journal"},"openairecompatibility":"collected from a compatible aggregator","officialname":"Review of Development Finance","englishname":"Review of Development Finance","websiteurl":"http://www.journals.elsevier.com/review-of-development-finance/","logourl":null,"dateofvalidation":null,"description":null,"subjects":["Social Sciences: Industries. Land use. Labor: Economic growth, development, planning | Social Sciences: Finance"],"languages":[],"contenttypes":["Journal articles"],"releasestartdate":null,"releaseenddate":null,"missionstatementurl":null,"accessrights":null,"uploadrights":null,"databaseaccessrestriction":null,"datauploadrestriction":null,"versioning":false,"citationguidelineurl":null,"pidsystems":null,"certificates":null,"policies":[],"journal":null}
{"id":"10|doajarticles::1fa6859d71faa77b32d82f278c6ed1df","originalId":["doajarticles::1048-9533"],"pid":[],"datasourcetype":{"scheme":"pubsrepository::journal","value":"Journal"},"openairecompatibility":"collected from a compatible aggregator","officialname":"Journal of Applied Mathematics and Stochastic Analysis","englishname":"Journal of Applied Mathematics and Stochastic Analysis","websiteurl":"https://www.hindawi.com/journals/jamsa","logourl":null,"dateofvalidation":null,"description":null,"subjects":[],"languages":[],"contenttypes":["Journal articles"],"releasestartdate":null,"releaseenddate":null,"missionstatementurl":null,"accessrights":null,"uploadrights":null,"databaseaccessrestriction":null,"datauploadrestriction":null,"versioning":false,"citationguidelineurl":null,"pidsystems":null,"certificates":null,"policies":[],"journal":null}
{"id":"10|doajarticles::a5314b60f79b869cb5d3a2709167bc3a","originalId":["doajarticles::0322-788X"],"pid":[],"datasourcetype":{"scheme":"pubsrepository::journal","value":"Journal"},"openairecompatibility":"collected from a compatible aggregator","officialname":"Statistika: Statistics and Economy Journal","englishname":"Statistika: Statistics and Economy Journal","websiteurl":"http://www.czso.cz/statistika_journal","logourl":null,"dateofvalidation":null,"description":null,"subjects":["Social Sciences: Statistics"],"languages":[],"contenttypes":["Journal articles"],"releasestartdate":null,"releaseenddate":null,"missionstatementurl":null,"accessrights":null,"uploadrights":null,"databaseaccessrestriction":null,"datauploadrestriction":null,"versioning":false,"citationguidelineurl":null,"pidsystems":null,"certificates":null,"policies":[],"journal":null}

View File

@ -0,0 +1,3 @@
{"legalshortname":"SHoF","legalname":"Swedish House of Finance","websiteurl":"http://houseoffinance.se/","alternativenames":["SHoF"],"country":{"code":"SE","label":"Sweden"},"id":"20|grid________::87698402476531ba39e61f1df38f2a91","pid":[{"scheme":"grid","value":"grid.451954.8"}]}
{"legalshortname":"Korean Elementary Moral Education Society","legalname":"Korean Elementary Moral Education Society","websiteurl":"http://www.ethics.or.kr/","alternativenames":["한국초등도덕교육학회"],"country":{"code":"KR","label":"Korea (Republic of)"},"id":"20|grid________::bd5cbea5dc434b8fd811a880cb9d4a05","pid":[{"scheme":"grid","value":"grid.496778.3"}]}
{"legalshortname":"NHC","legalname":"National Health Council","websiteurl":"http://www.nationalhealthcouncil.org/","alternativenames":["NHC"],"country":{"code":"US","label":"United States"},"id":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3","pid":[{"scheme":"grid","value":"grid.487707.b"}]}

View File

@ -0,0 +1,3 @@
{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","websiteurl":null,"code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","startdate":null,"enddate":null,"callidentifier":"Fotoniikka ja modernit kuvantamismenetelmät LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","websiteurl":null,"code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","startdate":null,"enddate":null,"callidentifier":"Academy Project Funding TT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","websiteurl":null,"code":"ANR-17-CE05-0033","acronym":"MOISE","title":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,5 @@
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal of Applied Mathematics and Stochastic Analysis"},"extraInfo":[],"id":"10|doajarticles::1fa6859d71faa77b32d82f278c6ed1df","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj10489533"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal of Applied Mathematics and Stochastic Analysis"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1048-9533",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"https://www.hindawi.com/journals/jamsa"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-05-25","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pelitutkimuksen vuosikirja"},"extraInfo":[],"id":"10|doajarticles::9c4b678901e5276d9e3addee566816af","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj1798355X"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pelitutkimuksen vuosikirja"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1798-355X",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geography. Anthropology. Recreation: Recreation. Leisure | Science: Mathematics: Instruments and machines: Electronic computers. Computer science: Computer software"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.pelitutkimus.fi"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Statistika: Statistics and Economy Journal"},"extraInfo":[],"id":"10|doajarticles::a5314b60f79b869cb5d3a2709167bc3a","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj0322788X"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Statistika: Statistics and Economy Journal"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0322-788X",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Social Sciences: Statistics"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.czso.cz/statistika_journal"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2018-06-05","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Review of Development Finance"},"extraInfo":[],"id":"10|doajarticles::acb7c79bb85d3b3a7b75389f5d9570f5","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"doaj18799337"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Review of Development Finance"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1879-9337",null],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Social Sciences: Industries. Land use. Labor: Economic growth, development, planning | Social Sciences: Finance"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://www.journals.elsevier.com/review-of-development-finance/"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-05-28","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Journal of Advanced Navigation Technology"},"extraInfo":[],"id":"10|fairsharing_::cd0f74b5955dc87fd0605745c4b49ee8","lastupdatetimestamp":1592688952862,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"jrnl12269026"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"The Journal of Advanced Navigation Technology"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1226-9026",null],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":null,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"re3data","classname":"re3data","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"r3d100012161"}],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":false}}

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More