remove all not needed classes for the branch. Here only stuff related to the EOSC

This commit is contained in:
Miriam Baglioni 2023-02-28 08:55:38 +01:00
parent da2e0bb1db
commit ec1dac5847
103 changed files with 941 additions and 7727 deletions

View File

@ -12,7 +12,7 @@ import com.fasterxml.jackson.databind.SerializationFeature;
import com.github.imifou.jsonschema.module.addon.AddonModule;
import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.oa.model.graph.*;
public class ExecCreateSchemas {
@ -60,14 +60,8 @@ public class ExecCreateSchemas {
ExecCreateSchemas ecs = new ExecCreateSchemas();
ecs.init();
ecs.generate(GraphResult.class, DIRECTORY, "result_schema.json");
ecs.generate(ResearchCommunity.class, DIRECTORY, "community_infrastructure_schema.json");
ecs.generate(Datasource.class, DIRECTORY, "datasource_schema.json");
ecs.generate(Project.class, DIRECTORY, "project_schema.json");
ecs.generate(Relation.class, DIRECTORY, "relation_schema.json");
ecs.generate(Organization.class, DIRECTORY, "organization_schema.json");
ecs.generate(CommunityResult.class, DIRECTORY, "community_result_schema.json");
ecs.generate(Result.class, DIRECTORY, "eosc_result_schema.json");
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
/**
* AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.BestAccessRight

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;

View File

@ -1,8 +1,10 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**

View File

@ -1,8 +1,10 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
@ -36,15 +38,15 @@ public class AuthorPid implements Serializable {
public static AuthorPid newInstance(AuthorPidSchemeValue pid, Provenance provenance) {
AuthorPid p = new AuthorPid();
p.id = pid;
p.provenance = provenance;
p.setId(pid);
p.setProvenance(provenance);
return p;
}
public static AuthorPid newInstance(AuthorPidSchemeValue pid) {
AuthorPid p = new AuthorPid();
p.id = pid;
p.setId(pid);
return p;
}

View File

@ -1,8 +1,10 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class AuthorPidSchemeValue implements Serializable {
@ -37,4 +39,5 @@ public class AuthorPidSchemeValue implements Serializable {
return cf;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model.community;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
@ -32,16 +32,15 @@ public class CfHbKeyValue implements Serializable {
this.value = value;
}
public static CfHbKeyValue newInstance(String key, String value) {
CfHbKeyValue inst = new CfHbKeyValue();
inst.key = key;
inst.value = value;
return inst;
}
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(key) && StringUtils.isBlank(value);
}
public static CfHbKeyValue newInstance(String key, String value) {
CfHbKeyValue inst = new CfHbKeyValue();
inst.setKey(key);
inst.setValue(value);
return inst;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model.community;
package eu.dnetlib.dhp.eosc.model;
import java.util.List;
import java.util.Objects;
@ -8,8 +8,6 @@ import java.util.stream.Collectors;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Provenance;
/**
* Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with
* OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type

View File

@ -1,8 +1,10 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
@ -43,5 +45,4 @@ public class Country implements Serializable {
c.setLabel(label);
return c;
}
}

View File

@ -1,71 +0,0 @@
package eu.dnetlib.dhp.eosc.model;
import java.util.List;
import java.util.Map;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
/**
* @author miriam.baglioni
* @Date 29/07/22
*/
public class EoscResult extends CommunityResult {
@JsonSchema(description = "Describes a reference to the EOSC Interoperability Framework (IF) Guidelines")
private List<EoscInteroperabilityFramework> eoscIF;
@JsonSchema(description = "The subject dumped by type associated to the result")
private Map<String, List<Subject>> subject;
@JsonSchema(description = "The list of keywords associated to the result")
private List<String> keywords;
@JsonSchema(description = "The list of organizations the result is affiliated to")
private List<Organization> affiliation;
@JsonSchema(description = "The indicators for this result")
private Indicator indicator;
public Indicator getIndicator() {
return indicator;
}
public void setIndicator(Indicator indicator) {
this.indicator = indicator;
}
public List<String> getKeywords() {
return keywords;
}
public void setKeywords(List<String> keywords) {
this.keywords = keywords;
}
public List<EoscInteroperabilityFramework> getEoscIF() {
return eoscIF;
}
public void setEoscIF(List<EoscInteroperabilityFramework> eoscIF) {
this.eoscIF = eoscIF;
}
public Map<String, List<Subject>> getSubject() {
return subject;
}
public void setSubject(Map<String, List<Subject>> subject) {
this.subject = subject;
}
public List<Organization> getAffiliation() {
return affiliation;
}
public void setAffiliation(List<Organization> affiliation) {
this.affiliation = affiliation;
}
}

View File

@ -0,0 +1,58 @@
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* @author miriam.baglioni
* @Date 26/01/23
*/
public class Funder implements Serializable {
@JsonSchema(description = "The short name of the funder (EC)")
private String shortName;
@JsonSchema(description = "The name of the funder (European Commission)")
private String name;
@JsonSchema(
description = "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)")
private String jurisdiction;
public String getJurisdiction() {
return jurisdiction;
}
public void setJurisdiction(String jurisdiction) {
this.jurisdiction = jurisdiction;
}
public String getShortName() {
return shortName;
}
public void setShortName(String shortName) {
this.shortName = shortName;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
@JsonSchema(description = "Stream of funding (e.g. for European Commission can be H2020 or FP7)")
private String fundingStream;
public String getFundingStream() {
return fundingStream;
}
public void setFundingStream(String fundingStream) {
this.fundingStream = fundingStream;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;

View File

@ -10,18 +10,6 @@ import java.io.Serializable;
public class Indicator implements Serializable {
private UsageCounts usageCounts;
public static Indicator newInstance(UsageCounts uc) {
Indicator i = new Indicator();
i.usageCounts = uc;
return i;
}
public static Indicator newInstance(String downloads, String views) {
Indicator i = new Indicator();
i.usageCounts = UsageCounts.newInstance(views, downloads);
return i;
}
public UsageCounts getUsageCounts() {
return usageCounts;
}
@ -29,4 +17,17 @@ public class Indicator implements Serializable {
public void setUsageCounts(UsageCounts usageCounts) {
this.usageCounts = usageCounts;
}
public static Indicator newInstance(UsageCounts uc) {
Indicator i = new Indicator();
i.setUsageCounts(uc);
return i;
}
public static Indicator newInstance(String downloads, String views) {
Indicator i = new Indicator();
i.setUsageCounts(UsageCounts.newInstance(views, downloads));
return i;
}
}

View File

@ -1,30 +1,35 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published
* versions are two manifestations of the same research result. It has the following parameters: - license of type
* String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be
* dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. -
* type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
* (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - url of type
* List<String> list of locations where the instance is accessible. It corresponds to url of the instance to be dumped -
* publicationdate of type String to store the publication date of the instance ;// dateofacceptance; - refereed of type
* String to store information abour the review status of the instance. Possible values are 'Unknown',
* 'nonPeerReviewed', 'peerReviewed'. It corresponds to refereed.classname of the instance to be dumped
* - articleprocessingcharge of type APC to store the article processing charges possibly associated to the instance
* -pid of type List<ControlledField> that is the list of pids associated to the result coming from authoritative sources for that pid
* -alternateIdentifier of type List<ControlledField> that is the list of pids associated to the result coming from NON authoritative
* sources for that pid
* -measure list<KeyValue> to represent the measure computed for this instance (for example the Bip!Finder ones). It corresponds to measures in the model
* @author miriam.baglioni
* @Date 02/02/23
*/
/**
* It extends eu.dnetlib.dhp.dump.oaf.Instance with values related to the community dump. In the Result dump this
* information is not present because it is dumped as a set of relations between the result and the datasource. -
* hostedby of type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the
* instance can be viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and -
* key corresponds to hostedby.key - value corresponds to hostedby.value - collectedfrom of type
* eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been
* collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to
* collectedfrom.key - value corresponds to collectedfrom.value
*/
public class Instance implements Serializable {
@JsonSchema(description = "Information about the source from which the instance can be viewed or downloaded.")
private CfHbKeyValue hostedby;
@JsonSchema(description = "Information about the source from which the record has been collected")
@JsonInclude(JsonInclude.Include.NON_NULL)
private CfHbKeyValue collectedfrom;
@JsonSchema(description = "Measures computed for this instance, for example Bip!Finder ones")
private List<Measure> measures;
@ -138,4 +143,20 @@ public class Instance implements Serializable {
public void setMeasures(List<Measure> measures) {
this.measures = measures;
}
public CfHbKeyValue getHostedby() {
return hostedby;
}
public void setHostedby(CfHbKeyValue hostedby) {
this.hostedby = hostedby;
}
public CfHbKeyValue getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(CfHbKeyValue collectedfrom) {
this.collectedfrom = collectedfrom;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
/**
* This Enum models the OpenAccess status, currently including only the values from Unpaywall

View File

@ -1,19 +1,27 @@
package eu.dnetlib.dhp.oa.model.community;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Provenance;
/**
* To store information about the project related to the result. This information is not directly mapped from the result
* represented in the internal model because it is not there. The mapped result will be enriched with project
* information derived by relation between results and projects. Project extends eu.dnetlib.dhp.schema.dump.oaf.Project
* with the following parameters: - funder of type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information
* about the funder funding the project - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store
* information about the. provenance of the association between the result and the project
* @author miriam.baglioni
* @Date 26/01/23
*/
public class Project extends eu.dnetlib.dhp.oa.model.Project {
public class Project implements Serializable {
@JsonSchema(description = "The OpenAIRE id for the project")
protected String id;// OpenAIRE id
@JsonSchema(description = "The grant agreement number")
protected String code;
@JsonSchema(description = "The acronym of the project")
protected String acronym;
protected String title;
@JsonSchema(description = "Information about the funder funding the project")
private Funder funder;
@ -46,6 +54,38 @@ public class Project extends eu.dnetlib.dhp.oa.model.Project {
this.funder = funders;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public static Project newInstance(String id, String code, String acronym, String title, Funder funder) {
Project project = new Project();
project.setAcronym(acronym);
@ -55,4 +95,5 @@ public class Project extends eu.dnetlib.dhp.oa.model.Project {
project.setTitle(title);
return project;
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
/**
* @author miriam.baglioni
* @Date 26/01/23
*/
public class Provenance implements Serializable {
private String provenance;
private String trust;
public String getProvenance() {
return provenance;
}
public void setProvenance(String provenance) {
this.provenance = provenance;
}
public String getTrust() {
return trust;
}
public void setTrust(String trust) {
this.trust = trust;
}
public static Provenance newInstance(String provenance, String trust) {
Provenance p = new Provenance();
p.setProvenance(provenance);
p.setTrust(trust);
return p;
}
// public String toStringProvenance(Provenance p) {
// return p.getProvenance() + p.getTrust();
// }
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model.graph;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;

View File

@ -6,10 +6,6 @@ import java.util.Objects;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.graph.Node;
import eu.dnetlib.dhp.oa.model.graph.RelType;
/**
* To represent the gereric relation between two entities. It has the following parameters: - private Node source to
* represent the entity source of the relation - private Node target to represent the entity target of the relation -

View File

@ -1,76 +1,49 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To represent the dumped result. It will be extended in the dump for Research Communities - Research
* Initiative/Infrastructures. It has the following parameters:
* - author of type
* List<eu.dnetlib.dhpschema.dump.oaf.Author> to describe the authors of a result. For each author in the result
* represented in the internal model one author in the esternal model is produced.
* - type of type String to represent
* the category of the result. Possible values are publication, dataset, software, other. It corresponds to
* resulttype.classname of the dumped result
* - language of type eu.dnetlib.dhp.schema.dump.oaf.Language to store
* information about the language of the result. It is dumped as - code corresponds to language.classid - value
* corresponds to language.classname
* - country of type List<eu.dnetlib.dhp.schema.dump.oaf.Country> to store the country
* list to which the result is associated. For each country in the result respresented in the internal model one country
* in the external model is produces - subjects of type List<eu.dnetlib.dhp.dump.oaf.Subject> to store the subjects for
* the result. For each subject in the result represented in the internal model one subject in the external model is
* produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first
* title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle
* of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to
* "subtitle" - description of type List<String> to store the description of the result. It corresponds to the list of
* description.value in the result represented in the internal model - publicationdate of type String to store the
* pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model -
* publisher of type String to store information about the publisher. It corresponds to publisher.value of the result
* represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to
* embargoenddate.value of the result represented in the internal model - source of type List<String> See definition of
* Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal
* model - format of type List<String> It corresponds to the list of format.value in the result represented in the
* internal model - contributor of type List<String> to represent contributors for this result. It corresponds to the
* list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds
* to the list of coverage.value in the result represented in the internal model - bestaccessright of type
* eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the
* manifestations of this research results. It corresponds to the same parameter in the result represented in the
* internal model - container of type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It
* corresponds to the parameter journal of the result represented in the internal model - documentationUrl of type
* List<String> (only for results of type software) to store the URLs to the software documentation. It corresponds to
* the list of documentationUrl.value of the result represented in the internal model - codeRepositoryUrl of type String
* (only for results of type software) to store the URL to the repository with the source code. It corresponds to
* codeRepositoryUrl.value of the result represented in the internal model - programmingLanguage of type String (only
* for results of type software) to store the programming language. It corresponds to programmingLanguaga.classid of the
* result represented in the internal model - contactperson of type List<String> (only for results of type other) to
* store the contact person for this result. It corresponds to the list of contactperson.value of the result represented
* in the internal model - contactgroup of type List<String> (only for results of type other) to store the information
* for the contact group. It corresponds to the list of contactgroup.value of the result represented in the internal
* model - tool of type List<String> (only fro results of type other) to store information about tool useful for the
* interpretation and/or re-used of the research product. It corresponds to the list of tool.value in the result
* represented in the internal modelt - size of type String (only for results of type dataset) to store the size of the
* dataset. It corresponds to size.value in the result represented in the internal model - version of type String (only
* for results of type dataset) to store the version. It corresponds to version.value of the result represented in the
* internal model - geolocation fo type List<eu.dnetlib.dhp.schema.dump.oaf.GeoLocation> (only for results of type
* dataset) to store geolocation information. For each geolocation element in the result represented in the internal
* model a GeoLocation in the external model il produced - id of type String to store the OpenAIRE id of the result. It
* corresponds to the id of the result represented in the internal model - originalId of type List<String> to store the
* original ids of the result. It corresponds to the originalId of the result represented in the internal model - pid of
* type List<eu.dnetlib.dhp.schema.dump.oaf.ControlledField> to store the persistent identifiers for the result. For
* each pid in the results represented in the internal model one pid in the external model is produced. The value
* correspondence is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model -
* value corresponds to the pid.value of the result represented in the internal model - dateofcollection of type String
* to store information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result
* represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of
* the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model
*
* @author miriam.baglioni
* @Date 29/07/22
*/
public class Result implements Serializable {
@JsonSchema(description = "Describes a reference to the EOSC Interoperability Framework (IF) Guidelines")
private List<EoscInteroperabilityFramework> eoscIF;
@JsonSchema(description = "The subject dumped by type associated to the result")
private Map<String, List<Subject>> subject;
@JsonSchema(description = "The list of keywords associated to the result")
private List<String> keywords;
@JsonSchema(description = "The list of organizations the result is affiliated to")
private List<Organization> affiliation;
@JsonSchema(description = "The indicators for this result")
private Indicator indicator;
@JsonSchema(description = "List of projects (i.e. grants) that (co-)funded the production ofn the research results")
private List<Project> projects;
@JsonSchema(
description = "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu")
private List<Context> context;
@JsonSchema(description = "Information about the sources from which the record has been collected")
@JsonInclude(JsonInclude.Include.NON_NULL)
protected List<CfHbKeyValue> collectedfrom;
@JsonSchema(
description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version")
private List<Instance> instance;
private List<Author> author;
// resulttype allows subclassing results into publications | datasets | software
@ -84,9 +57,6 @@ public class Result implements Serializable {
@JsonSchema(description = "The list of countries associated to this result")
private List<ResultCountry> country;
@JsonSchema(description = "Keywords associated to the result")
private List<Subject> subjects;
@JsonSchema(
description = "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software.")
private String maintitle;
@ -169,6 +139,9 @@ public class Result implements Serializable {
@JsonSchema(description = "Timestamp of last update of the record in OpenAIRE")
private Long lastupdatetimestamp;
@JsonSchema(description = "The set of relations associated to this result")
private List<Relation> relations;
public Long getLastupdatetimestamp() {
return lastupdatetimestamp;
}
@ -249,15 +222,6 @@ public class Result implements Serializable {
this.country = country;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<Subject> getSubjects() {
return subjects;
}
public void setSubjects(List<Subject> subjects) {
this.subjects = subjects;
}
public String getMaintitle() {
return maintitle;
}
@ -418,4 +382,83 @@ public class Result implements Serializable {
this.geolocation = geolocation;
}
public List<Instance> getInstance() {
return instance;
}
public void setInstance(List<Instance> instance) {
this.instance = instance;
}
public List<CfHbKeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<CfHbKeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public List<Project> getProjects() {
return projects;
}
public void setProjects(List<Project> projects) {
this.projects = projects;
}
public List<Context> getContext() {
return context;
}
public void setContext(List<Context> context) {
this.context = context;
}
public List<Relation> getRelations() {
return relations;
}
public void setRelations(List<Relation> relations) {
this.relations = relations;
}
public Indicator getIndicator() {
return indicator;
}
public void setIndicator(Indicator indicator) {
this.indicator = indicator;
}
public List<String> getKeywords() {
return keywords;
}
public void setKeywords(List<String> keywords) {
this.keywords = keywords;
}
public List<EoscInteroperabilityFramework> getEoscIF() {
return eoscIF;
}
public void setEoscIF(List<EoscInteroperabilityFramework> eoscIF) {
this.eoscIF = eoscIF;
}
public Map<String, List<Subject>> getSubject() {
return subject;
}
public void setSubject(Map<String, List<Subject>> subject) {
this.subject = subject;
}
public List<Organization> getAffiliation() {
return affiliation;
}
public void setAffiliation(List<Organization> affiliation) {
this.affiliation = affiliation;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
@ -38,4 +38,5 @@ public class ResultCountry extends Country {
public static ResultCountry newInstance(String code, String label, String provenance, String trust) {
return newInstance(code, label, Provenance.newInstance(provenance, trust));
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.model;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;

View File

@ -5,8 +5,6 @@ import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Provenance;
/**
* @author miriam.baglioni
* @Date 10/08/22

View File

@ -3,6 +3,8 @@ package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
/**
* @author miriam.baglioni
* @Date 04/11/22
@ -29,8 +31,13 @@ public class UsageCounts implements Serializable {
public static UsageCounts newInstance(String views, String downloads) {
UsageCounts uc = new UsageCounts();
uc.views = views;
uc.downloads = downloads;
uc.setViews(views);
uc.setDownloads(downloads);
return uc;
}
public boolean isEmpty() {
return StringUtils.isEmpty(this.downloads) || StringUtils.isEmpty(this.views);
}
}

View File

@ -1,13 +1,13 @@
package eu.dnetlib.dhp.oa.model.community;
package eu.dnetlib.dhp.eosc.model;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
/**
* To store information about the funder funding the project related to the result. It has the following parameters: -
* shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
* (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
* store the jurisdiction of the funder
* @author miriam.baglioni
* @Date 26/01/23
*/
public class Validated implements Serializable {
@ -32,8 +32,9 @@ public class Validated implements Serializable {
public static Validated newInstance(Boolean validated, String validationDate) {
Validated v = new Validated();
v.validatedByFunder = validated;
v.validationDate = validationDate;
v.setValidatedByFunder(validated);
v.setValidationDate(validationDate);
return v;
}
}

View File

@ -1,40 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To represent keywords associated to the result. It has two parameters:
* - subject of type eu.dnetlib.dhp.schema.dump.oaf.SubjectSchemeValue to describe the subject. It mapped as:
* - schema it corresponds to qualifier.classid of the dumped subject
* - value it corresponds to the subject value
* - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo
* is not null. In this case:
* - provenance corresponds to dataInfo.provenanceaction.classname
* - trust corresponds to dataInfo.trust
*/
public class Subject implements Serializable {
private SubjectSchemeValue subject;
@JsonSchema(description = "Why this subject is associated to the result")
private Provenance provenance;
public SubjectSchemeValue getSubject() {
return subject;
}
public void setSubject(SubjectSchemeValue subject) {
this.subject = subject;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
}

View File

@ -1,42 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class SubjectSchemeValue implements Serializable {
@JsonSchema(
description = "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies).")
private String scheme;
@JsonSchema(
description = "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary).")
private String value;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static SubjectSchemeValue newInstance(String scheme, String value) {
SubjectSchemeValue cf = new SubjectSchemeValue();
cf.setScheme(scheme);
cf.setValue(value);
return cf;
}
}

View File

@ -1,42 +0,0 @@
package eu.dnetlib.dhp.oa.model.community;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Instance;
/**
* It extends eu.dnetlib.dhp.dump.oaf.Instance with values related to the community dump. In the Result dump this
* information is not present because it is dumped as a set of relations between the result and the datasource. -
* hostedby of type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the
* instance can be viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and -
* key corresponds to hostedby.key - value corresponds to hostedby.value - collectedfrom of type
* eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been
* collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to
* collectedfrom.key - value corresponds to collectedfrom.value
*/
public class CommunityInstance extends Instance {
@JsonSchema(description = "Information about the source from which the instance can be viewed or downloaded.")
private CfHbKeyValue hostedby;
@JsonSchema(description = "Information about the source from which the record has been collected")
@JsonInclude(JsonInclude.Include.NON_NULL)
private CfHbKeyValue collectedfrom;
public CfHbKeyValue getHostedby() {
return hostedby;
}
public void setHostedby(CfHbKeyValue hostedby) {
this.hostedby = hostedby;
}
public CfHbKeyValue getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(CfHbKeyValue collectedfrom) {
this.collectedfrom = collectedfrom;
}
}

View File

@ -1,72 +0,0 @@
package eu.dnetlib.dhp.oa.model.community;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Result;
/**
* extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Project> to store the list of projects related to the result. The
* information is added after the result is mapped to the external model - context of type
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Context> to store information about the RC RI related to the result.
* For each context in the result represented in the internal model one context in the external model is produced -
* collectedfrom of type List<eu.dnetliv.dhp.schema.dump.oaf.KeyValue> to store information about the sources from which
* the record has been collected. For each collectedfrom in the result represented in the internal model one
* collectedfrom in the external model is produced - instance of type
* List<eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance> to store all the instances associated to the result.
* It corresponds to the same parameter in the result represented in the internal model
*/
public class CommunityResult extends Result {
@JsonSchema(description = "List of projects (i.e. grants) that (co-)funded the production ofn the research results")
private List<Project> projects;
@JsonSchema(
description = "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu")
private List<Context> context;
@JsonSchema(description = "Information about the sources from which the record has been collected")
@JsonInclude(JsonInclude.Include.NON_NULL)
protected List<CfHbKeyValue> collectedfrom;
@JsonSchema(
description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version")
private List<CommunityInstance> instance;
public List<CommunityInstance> getInstance() {
return instance;
}
public void setInstance(List<CommunityInstance> instance) {
this.instance = instance;
}
public List<CfHbKeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<CfHbKeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public List<Project> getProjects() {
return projects;
}
public void setProjects(List<Project> projects) {
this.projects = projects;
}
public List<Context> getContext() {
return context;
}
public void setContext(List<Context> context) {
this.context = context;
}
}

View File

@ -1,24 +0,0 @@
package eu.dnetlib.dhp.oa.model.community;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To store information about the funder funding the project related to the result. It has the following parameters: -
* shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
* (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
* store the jurisdiction of the funder
*/
public class Funder extends eu.dnetlib.dhp.oa.model.Funder {
@JsonSchema(description = "Stream of funding (e.g. for European Commission can be H2020 or FP7)")
private String fundingStream;
public String getFundingStream() {
return fundingStream;
}
public void setFundingStream(String fundingStream) {
this.fundingStream = fundingStream;
}
}

View File

@ -6,7 +6,7 @@ import java.util.List;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Container;
import eu.dnetlib.dhp.eosc.model.Container;
/**
* To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -

View File

@ -1,28 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.util.List;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.Result;
/**
* It extends the eu.dnetlib.dhp.schema.dump.oaf.Result with - instance of type
* List<eu.dnetlib.dhp.schema.dump.oaf.Instance> to store all the instances associated to the result. It corresponds to
* the same parameter in the result represented in the internal model
*/
public class GraphResult extends Result {
@JsonSchema(
description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version")
private List<Instance> instance;
public List<Instance> getInstance() {
return instance;
}
public void setInstance(List<Instance> instance) {
this.instance = instance;
}
}

View File

@ -6,7 +6,7 @@ import java.util.List;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Country;
import eu.dnetlib.dhp.eosc.model.Country;
/**
* To represent the generic organizaiton. It has the following parameters:

View File

@ -1,99 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import java.util.Objects;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Provenance;
/**
* To represent the gereric relation between two entities. It has the following parameters: - private Node source to
* represent the entity source of the relation - private Node target to represent the entity target of the relation -
* private RelType reltype to represent the semantics of the relation - private Provenance provenance to represent the
* provenance of the relation
*/
public class Relation implements Serializable {
@JsonSchema(description = "The node source in the relation")
private Node source;
@JsonSchema(description = "The node target in the relation")
private Node target;
@JsonSchema(description = "To represent the semantics of a relation between two entities")
private RelType reltype;
@JsonSchema(description = "The reason why OpenAIRE holds the relation ")
private Provenance provenance;
@JsonSchema(
description = "True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)")
private boolean validated;
@JsonSchema(description = "The date when the relation was collected from OpenAIRE")
private String validationDate;
public Node getSource() {
return source;
}
public void setSource(Node source) {
this.source = source;
}
public Node getTarget() {
return target;
}
public void setTarget(Node target) {
this.target = target;
}
public RelType getReltype() {
return reltype;
}
public void setReltype(RelType reltype) {
this.reltype = reltype;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public void setValidated(boolean validate) {
this.validated = validate;
}
public boolean getValidated() {
return validated;
}
public void setValidationDate(String validationDate) {
this.validationDate = validationDate;
}
public String getValidationDate() {
return validationDate;
}
@Override
public int hashCode() {
return Objects.hash(source.getId(), target.getId(), reltype.getType() + ":" + reltype.getName());
}
public static Relation newInstance(Node source, Node target, RelType reltype, Provenance provenance) {
Relation relation = new Relation();
relation.source = source;
relation.target = target;
relation.reltype = reltype;
relation.provenance = provenance;
return relation;
}
}

View File

@ -1,24 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.util.List;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject
* to store the list of subjects related to the community
*/
public class ResearchCommunity extends ResearchInitiative {
@JsonSchema(
description = "Only for research communities: the list of the subjects associated to the research community")
private List<String> subject;
public List<String> getSubject() {
return subject;
}
public void setSubject(List<String> subject) {
this.subject = subject;
}
}

View File

@ -1,89 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile
* - private
* String id to store the openaire id for the entity. Is has as code 00 and will be created as
* 00|context_____::md5(originalId) private
* String originalId to store the id of the context as provided in the profile
* (i.e. mes)
* - private String name to store the name of the context (got from the label attribute in the context
* definition)
* - private String type to store the type of the context (i.e.: research initiative or research community)
* - private String description to store the description of the context as given in the profile
* -private String
* zenodo_community to store the zenodo community associated to the context (main zenodo community)
*/
public class ResearchInitiative implements Serializable {
@JsonSchema(description = "The OpenAIRE id for the community/research infrastructure")
private String id; // openaireId
@JsonSchema(description = "The acronym of the community")
private String acronym; // context id
@JsonSchema(description = "The long name of the community")
private String name; // context name
@JsonSchema(description = "One of {Research Community, Research infrastructure}")
private String type; // context type: research initiative or research community
@JsonSchema(description = "Description of the research community/research infrastructure")
private String description;
@JsonSchema(
description = "The URL of the Zenodo community associated to the Research community/Research infrastructure")
private String zenodo_community;
public String getZenodo_community() {
return zenodo_community;
}
public void setZenodo_community(String zenodo_community) {
this.zenodo_community = zenodo_community;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String label) {
this.name = label;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}

View File

@ -1,6 +1,5 @@
import java.io.IOException;
import eu.dnetlib.dhp.eosc.model.Relation;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.core.JsonProcessingException;
@ -10,43 +9,12 @@ import com.github.imifou.jsonschema.module.addon.AddonModule;
import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.ExecCreateSchemas;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.eosc.model.Relation;
import eu.dnetlib.dhp.eosc.model.Result;
//@Disabled
class GenerateJsonSchema {
@Test
void generateSchema() {
SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_7,
OptionPreset.PLAIN_JSON)
.with(Option.SCHEMA_VERSION_INDICATOR)
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(GraphResult.class);
System.out.println(jsonSchema.toString());
}
@Test
void generateSchema2() {
ObjectMapper objectMapper = new ObjectMapper();
AddonModule module = new AddonModule();
SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(objectMapper,
SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON)
.with(module)
.with(Option.SCHEMA_VERSION_INDICATOR)
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(GraphResult.class);
System.out.println(jsonSchema.toString());
}
@Test
void generateSchema3() throws JsonProcessingException {
@ -59,7 +27,7 @@ class GenerateJsonSchema {
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(EoscResult.class);
JsonNode jsonSchema = generator.generateSchema(Result.class);
System.out.println(new ObjectMapper().writeValueAsString(jsonSchema));
}
@ -70,13 +38,13 @@ class GenerateJsonSchema {
ObjectMapper objectMapper = new ObjectMapper();
AddonModule module = new AddonModule();
SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(objectMapper,
SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON)
SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON)
.with(module)
.with(Option.SCHEMA_VERSION_INDICATOR)
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(Relation.class);
JsonNode jsonSchema = generator.generateSchema(Result.class);
System.out.println(new ObjectMapper().writeValueAsString(jsonSchema));
}

View File

@ -4,11 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
@ -17,11 +13,11 @@ import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.oa.graph.dump.eosc.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.eosc.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
@ -32,9 +28,7 @@ import eu.dnetlib.dhp.schema.oaf.OafEntity;
public class DumpProducts implements Serializable {
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath,
Class<? extends OafEntity> inputClazz,
Class<? extends Result> outputClazz,
String dumpType) {
Class<? extends OafEntity> inputClazz) {
SparkConf conf = new SparkConf();
@ -44,25 +38,23 @@ public class DumpProducts implements Serializable {
spark -> {
Utils.removeOutputDir(spark, outputPath);
execDump(
spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType);
spark, inputPath, outputPath, communityMapPath, inputClazz);
});
}
public static <I extends OafEntity, O extends Result> void execDump(
public static <I extends OafEntity> void execDump(
SparkSession spark,
String inputPath,
String outputPath,
String communityMapPath,
Class<I> inputClazz,
Class<O> outputClazz,
String dumpType) {
Class<I> inputClazz) {
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
Utils
.readPath(spark, inputPath, inputClazz)
.map((MapFunction<I, O>) value -> execMap(value, communityMap, dumpType), Encoders.bean(outputClazz))
.filter((FilterFunction<O>) value -> value != null)
.map((MapFunction<I, Result>) value -> execMap(value, communityMap), Encoders.bean(Result.class))
.filter((FilterFunction<Result>) value -> value != null)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
@ -70,9 +62,8 @@ public class DumpProducts implements Serializable {
}
private static <I extends OafEntity, O extends Result> O execMap(I value,
CommunityMap communityMap,
String dumpType) throws NoAvailableEntityTypeException, CardinalityTooHighException {
private static <I extends OafEntity> Result execMap(I value,
CommunityMap communityMap) throws NoAvailableEntityTypeException, CardinalityTooHighException {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
if (odInfo.isPresent()) {
@ -83,29 +74,7 @@ public class DumpProducts implements Serializable {
return null;
}
if (Constants.DUMPTYPE.COMMUNITY.getType().equals(dumpType)) {
Set<String> communities = communityMap.keySet();
Optional<List<Context>> inputContext = Optional
.ofNullable(((eu.dnetlib.dhp.schema.oaf.Result) value).getContext());
if (!inputContext.isPresent()) {
return null;
}
List<String> toDumpFor = inputContext.get().stream().map(c -> {
if (communities.contains(c.getId())) {
return c.getId();
}
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
return c.getId().substring(0, c.getId().indexOf("::"));
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
if (toDumpFor.isEmpty()) {
return null;
}
}
return (O) ResultMapper.map(value, communityMap, dumpType);
return ResultMapper.map(value, communityMap);
}
}

View File

@ -4,32 +4,22 @@ package eu.dnetlib.dhp.oa.graph.dump;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.function.FilterFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.eosc.model.EoscInteroperabilityFramework;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.eosc.model.Indicator;
import eu.dnetlib.dhp.eosc.model.UsageCounts;
import eu.dnetlib.dhp.eosc.model.*;
import eu.dnetlib.dhp.eosc.model.AccessRight;
import eu.dnetlib.dhp.eosc.model.Author;
import eu.dnetlib.dhp.eosc.model.Context;
import eu.dnetlib.dhp.eosc.model.GeoLocation;
import eu.dnetlib.dhp.eosc.model.Measure;
import eu.dnetlib.dhp.eosc.model.OpenAccessRoute;
import eu.dnetlib.dhp.eosc.model.Provenance;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.*;
import eu.dnetlib.dhp.oa.model.AccessRight;
import eu.dnetlib.dhp.oa.model.Author;
import eu.dnetlib.dhp.oa.model.GeoLocation;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.Measure;
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.community.CfHbKeyValue;
import eu.dnetlib.dhp.oa.model.community.CommunityInstance;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.community.Context;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
@ -37,17 +27,10 @@ public class ResultMapper implements Serializable {
private static final Logger log = LoggerFactory.getLogger(ResultMapper.class);
public static <E extends eu.dnetlib.dhp.schema.oaf.OafEntity> Result map(
E in, Map<String, String> communityMap, String dumpType)
E in, Map<String, String> communityMap)
throws NoAvailableEntityTypeException, CardinalityTooHighException {
Result out;
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
out = new GraphResult();
} else if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) {
out = new EoscResult();
} else {
out = new CommunityResult();
}
Result out = new Result();
eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in;
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort = Optional.ofNullable(input.getResulttype());
@ -126,15 +109,31 @@ public class ResultMapper implements Serializable {
.ofNullable(input.getDescription())
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
out.setDescription(descriptionList);
Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
if (oStr.isPresent()) {
out.setEmbargoenddate(oStr.get().getValue());
if (Optional.ofNullable(input.getEmbargoenddate()).isPresent()) {
out.setEmbargoenddate(input.getEmbargoenddate().getValue());
}
if (Optional.ofNullable(input.getMeasures()).isPresent()) {
Indicator i = new Indicator();
UsageCounts uc = new UsageCounts();
input.getMeasures().forEach(m -> {
if (m.getId().equals("downloads")) {
uc.setDownloads(m.getUnit().get(0).getValue());
}
if (m.getId().equals("views")) {
uc.setViews(m.getUnit().get(0).getValue());
}
});
if (!uc.isEmpty()) {
i.setUsageCounts(uc);
out.setIndicator(i);
}
}
final List<String> formatList = new ArrayList<>();
Optional
.ofNullable(input.getFormat())
.ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
.ifPresent(value -> value.forEach(f -> formatList.add(f.getValue())));
out.setFormat(formatList);
out.setId(input.getId());
out.setOriginalId(new ArrayList<>());
@ -149,38 +148,31 @@ public class ResultMapper implements Serializable {
.filter(s -> !s.startsWith("50|"))
.collect(Collectors.toList())));
Optional<List<eu.dnetlib.dhp.schema.oaf.Instance>> oInst = Optional
.ofNullable(input.getInstance());
if (oInst.isPresent()) {
if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
((GraphResult) out)
.setInstance(
oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList()));
} else {
((CommunityResult) out)
.setInstance(
oInst
.get()
.stream()
.map(i -> getCommunityInstance(i, dumpType))
.collect(Collectors.toList()));
}
if (Optional
.ofNullable(input.getInstance())
.isPresent()) {
out
.setInstance(
input
.getInstance()
.stream()
.map(i -> getCommunityInstance(i))
.collect(Collectors.toList()));
}
Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oL = Optional.ofNullable(input.getLanguage());
if (oL.isPresent()) {
eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
out.setLanguage(Language.newInstance(language.getClassid(), language.getClassname()));
if (Optional.ofNullable(input.getLanguage()).isPresent()) {
out
.setLanguage(
Language.newInstance(input.getLanguage().getClassid(), input.getLanguage().getClassname()));
}
Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
if (oLong.isPresent()) {
out.setLastupdatetimestamp(oLong.get());
if (Optional.ofNullable(input.getLastupdatetimestamp()).isPresent()) {
out.setLastupdatetimestamp(input.getLastupdatetimestamp());
}
Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
if (otitle.isPresent()) {
List<StructuredProperty> iTitle = otitle
.get()
if (Optional.ofNullable(input.getTitle()).isPresent()) {
List<StructuredProperty> iTitle = input
.getTitle()
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
.collect(Collectors.toList());
@ -188,8 +180,8 @@ public class ResultMapper implements Serializable {
out.setMaintitle(iTitle.get(0).getValue());
}
iTitle = otitle
.get()
iTitle = input
.getTitle()
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
.collect(Collectors.toList());
@ -211,13 +203,12 @@ public class ResultMapper implements Serializable {
.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
oStr = Optional.ofNullable(input.getDateofacceptance());
if (oStr.isPresent()) {
out.setPublicationdate(oStr.get().getValue());
if (Optional.ofNullable(input.getDateofacceptance()).isPresent()) {
out.setPublicationdate(input.getDateofacceptance().getValue());
}
oStr = Optional.ofNullable(input.getPublisher());
if (oStr.isPresent()) {
out.setPublisher(oStr.get().getValue());
if (Optional.ofNullable(input.getPublisher()).isPresent()) {
out.setPublisher(input.getPublisher().getValue());
}
Optional
@ -225,123 +216,102 @@ public class ResultMapper implements Serializable {
.ifPresent(
value -> out.setSource(value.stream().map(Field::getValue).collect(Collectors.toList())));
if (!Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) {
List<Subject> subjectList = new ArrayList<>();
Optional
.ofNullable(input.getSubject())
.ifPresent(
value -> value
.forEach(s -> subjectList.add(getSubject(s))));
if (Optional.ofNullable(input.getSubject()).isPresent()) {
out.setSubject(createSubjectMap(input));
out
.setKeywords(
input
.getSubject()
.stream()
.filter(
s -> s.getQualifier().getClassid().equalsIgnoreCase("keyword") &&
!s.getValue().equalsIgnoreCase("EOSC::RO-crate"))
.map(s -> s.getValue())
.collect(Collectors.toList()));
out.setSubjects(subjectList);
} else {
if (Optional.ofNullable(input.getSubject()).isPresent()) {
((EoscResult) out).setSubject(createSubjectMap(input));
((EoscResult) out)
.setKeywords(
if (Optional.ofNullable(input.getEoscifguidelines()).isPresent()) {
out
.setEoscIF(
input
.getSubject()
.getEoscifguidelines()
.stream()
.filter(
s -> s.getQualifier().getClassid().equalsIgnoreCase("keyword") &&
!s.getValue().equalsIgnoreCase("EOSC::RO-crate"))
.map(s -> s.getValue())
.map(
eig -> EoscInteroperabilityFramework
.newInstance(
eig.getCode(), eig.getLabel(), eig.getUrl(),
eig.getSemanticRelation()))
.collect(Collectors.toList()));
if (Optional.ofNullable(input.getEoscifguidelines()).isPresent()) {
((EoscResult) out)
.setEoscIF(
input
.getEoscifguidelines()
.stream()
.map(
eig -> EoscInteroperabilityFramework
.newInstance(
eig.getCode(), eig.getLabel(), eig.getUrl(),
eig.getSemanticRelation()))
.collect(Collectors.toList()));
}
}
}
out.setType(input.getResulttype().getClassid());
if (!Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) {
if (!Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) {
((CommunityResult) out)
.setCollectedfrom(
input
.getCollectedfrom()
.stream()
.map(cf -> CfHbKeyValue.newInstance(cf.getKey(), cf.getValue()))
.collect(Collectors.toList()));
}
Set<String> communities = communityMap.keySet();
List<Context> contextList = Optional
.ofNullable(
input
.getContext())
.map(
value -> value
.stream()
.map(c -> {
String communityId = c.getId();
if (communityId.contains("::")) {
communityId = communityId.substring(0, communityId.indexOf("::"));
}
if (communities.contains(communityId)) {
Context context = new Context();
context.setCode(communityId);
context.setLabel(communityMap.get(communityId));
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
if (dataInfo.isPresent()) {
List<Provenance> provenance = new ArrayList<>();
provenance
.addAll(
dataInfo
.get()
.stream()
.map(
di -> Optional
.ofNullable(di.getProvenanceaction())
.map(
provenanceaction -> Provenance
.newInstance(
provenanceaction.getClassname(),
di.getTrust()))
.orElse(null))
.filter(Objects::nonNull)
.collect(Collectors.toSet()));
Set<String> communities = communityMap.keySet();
List<Context> contextList = Optional
.ofNullable(
input
.getContext())
.map(
value -> value
.stream()
.map(c -> {
String communityId = c.getId();
if (communityId.contains("::")) {
communityId = communityId.substring(0, communityId.indexOf("::"));
}
if (communities.contains(communityId)) {
Context context = new Context();
context.setCode(communityId);
context.setLabel(communityMap.get(communityId));
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
if (dataInfo.isPresent()) {
List<Provenance> provenance = new ArrayList<>();
provenance
.addAll(
dataInfo
.get()
.stream()
.map(
di -> Optional
.ofNullable(di.getProvenanceaction())
.map(
provenanceaction -> Provenance
.newInstance(
provenanceaction.getClassname(),
di.getTrust()))
.orElse(null))
.filter(Objects::nonNull)
.collect(Collectors.toSet()));
try {
context.setProvenance(getUniqueProvenance(provenance));
} catch (NoAvailableEntityTypeException e) {
e.printStackTrace();
}
try {
context.setProvenance(getUniqueProvenance(provenance));
} catch (NoAvailableEntityTypeException e) {
e.printStackTrace();
}
return context;
}
return null;
})
.filter(Objects::nonNull)
.collect(Collectors.toList()))
.orElse(new ArrayList<>());
return context;
}
return null;
})
.filter(Objects::nonNull)
.collect(Collectors.toList()))
.orElse(new ArrayList<>());
if (!contextList.isEmpty()) {
Set<Integer> hashValue = new HashSet<>();
List<Context> remainigContext = new ArrayList<>();
contextList.forEach(c -> {
if (!hashValue.contains(c.hashCode())) {
remainigContext.add(c);
hashValue.add(c.hashCode());
}
});
((CommunityResult) out).setContext(remainigContext);
}
if (!contextList.isEmpty()) {
Set<Integer> hashValue = new HashSet<>();
List<Context> remainigContext = new ArrayList<>();
contextList.forEach(c -> {
if (!hashValue.contains(c.hashCode())) {
remainigContext.add(c);
hashValue.add(c.hashCode());
}
});
out.setContext(remainigContext);
}
} catch (ClassCastException cce) {
return out;
return null;
}
}
@ -476,8 +446,8 @@ public class ResultMapper implements Serializable {
}
}
private static Instance getGraphInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
Instance instance = new Instance();
private static eu.dnetlib.dhp.eosc.model.Instance getGraphInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
eu.dnetlib.dhp.eosc.model.Instance instance = new eu.dnetlib.dhp.eosc.model.Instance();
setCommonValue(i, instance);
@ -485,18 +455,11 @@ public class ResultMapper implements Serializable {
}
private static CommunityInstance getCommunityInstance(eu.dnetlib.dhp.schema.oaf.Instance i, String dumpType) {
CommunityInstance instance = new CommunityInstance();
private static eu.dnetlib.dhp.eosc.model.Instance getCommunityInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
eu.dnetlib.dhp.eosc.model.Instance instance = new eu.dnetlib.dhp.eosc.model.Instance();
setCommonValue(i, instance);
if (!Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) {
instance
.setCollectedfrom(
CfHbKeyValue
.newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
}
instance
.setHostedby(
CfHbKeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
@ -505,7 +468,8 @@ public class ResultMapper implements Serializable {
}
private static <I extends Instance> void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i,
eu.dnetlib.dhp.eosc.model.Instance instance) {
Optional<eu.dnetlib.dhp.schema.oaf.AccessRight> opAr = Optional.ofNullable(i.getAccessright());
if (opAr.isPresent() && Constants.ACCESS_RIGHTS_COAR_MAP.containsKey(opAr.get().getClassid())) {
@ -655,17 +619,6 @@ public class ResultMapper implements Serializable {
return null;
}
private static Subject getSubject(StructuredProperty s) {
Subject subject = new Subject();
subject.setSubject(SubjectSchemeValue.newInstance(s.getQualifier().getClassid(), s.getValue()));
Provenance p = getProvenance(s);
if (p != null) {
subject.setProvenance(p);
}
return subject;
}
private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
Author a = new Author();
a.setFullname(oa.getFullname());
@ -704,7 +657,8 @@ public class ResultMapper implements Serializable {
AuthorPidSchemeValue
.newInstance(
pid.getQualifier().getClassid(),
pid.getValue())
pid.getValue()),
null
);
}

View File

@ -1,81 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.community.Context;
/**
* This class splits the dumped results according to the research community - research initiative/infrastructure they
* are related to. The information about the community is found in the element "context.id" in the result. Since the
* context that can be found in the result can be associated not only to communities, a community Map is provided. It
* will guide the splitting process. Note: the repartition(1) just before writing the results related to a community.
* This is a choice due to uploading constraints (just one file for each community) As soon as a better solution will be
* in place remove the repartition
*/
public class CommunitySplit implements Serializable {
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, String communityMapPath) {
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
execSplit(spark, inputPath, outputPath, communityMap);
});
}
private static void execSplit(SparkSession spark, String inputPath, String outputPath,
CommunityMap communities) {
Dataset<CommunityResult> result = Utils
.readPath(spark, inputPath + "/publication", CommunityResult.class)
.union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class))
.union(Utils.readPath(spark, inputPath + "/orp", CommunityResult.class))
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
communities
.keySet()
.stream()
.forEach(c -> printResult(c, result, outputPath + "/" + communities.get(c).replace(" ", "_")));
}
private static void printResult(String c, Dataset<CommunityResult> result, String outputPath) {
Dataset<CommunityResult> communityProducts = result
.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));
communityProducts
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
}
private static boolean containsCommunity(CommunityResult r, String c) {
if (Optional.ofNullable(r.getContext()).isPresent()) {
return r
.getContext()
.stream()
.map(Context::getCode)
.collect(Collectors.toList())
.contains(c);
}
return false;
}
}

View File

@ -1,67 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.schema.oaf.Result;
/**
* Spark action to trigger the dump of results associated to research community - reseach initiative/infrasctructure The
* actual dump if performed via the class DumpProducts that is used also for the entire graph dump
*/
public class SparkDumpCommunityProducts implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkDumpCommunityProducts.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkDumpCommunityProducts.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
String communityMapPath = parser.get("communityMapPath");
final String dumpType = Optional
.ofNullable(parser.get("dumpType"))
.map(String::valueOf)
.orElse("community");
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
DumpProducts dump = new DumpProducts();
dump
.run(
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, CommunityResult.class,
dumpType);
}
}

View File

@ -1,50 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
/**
* Spark job to trigger the split of results associated to research community - reseach initiative/infrasctructure. The
* actual split is performed by the class CommunitySplit
*/
public class SparkSplitForCommunity implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSplitForCommunity.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSplitForCommunity.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String communityMapPath = parser.get("communityMapPath");
CommunitySplit split = new CommunitySplit();
split.run(isSparkSessionManaged, inputPath, outputPath, communityMapPath);
}
}

View File

@ -1,84 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.Serializable;
import java.util.List;
/**
* Deserialization of the information in the context needed to create Context Entities, and relations between context
* entities and datasources and projects
*/
public class ContextInfo implements Serializable {
private String id;
private String description;
private String type;
private String zenodocommunity;
private String name;
private List<String> projectList;
private List<String> datasourceList;
private List<String> subject;
public List<String> getSubject() {
return subject;
}
public void setSubject(List<String> subject) {
this.subject = subject;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getZenodocommunity() {
return zenodocommunity;
}
public void setZenodocommunity(String zenodocommunity) {
this.zenodocommunity = zenodocommunity;
}
public List<String> getProjectList() {
return projectList;
}
public void setProjectList(List<String> projectList) {
this.projectList = projectList;
}
public List<String> getDatasourceList() {
return datasourceList;
}
public void setDatasourceList(List<String> datasourceList) {
this.datasourceList = datasourceList;
}
}

View File

@ -1,110 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
* collects the general information for contexes of type community or ri. The general information is the id of the
* context, its label, the subjects associated to the context, its zenodo community, description and type. This
* information is used to create a new Context Entity
*/
public class CreateContextEntities implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class);
private final transient Configuration conf;
private final transient BufferedWriter writer;
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
CreateContextEntities.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
final String hdfsPath = parser.get("hdfsPath");
log.info("hdfsPath: {}", hdfsPath);
final String hdfsNameNode = parser.get("nameNode");
log.info("nameNode: {}", hdfsNameNode);
final String isLookUpUrl = parser.get("isLookUpUrl");
log.info("isLookUpUrl: {}", isLookUpUrl);
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
log.info("Processing contexts...");
cce.execute(Process::getEntity, isLookUpUrl);
cce.close();
}
private void close() throws IOException {
writer.close();
}
public CreateContextEntities(String hdfsPath, String hdfsNameNode) throws IOException {
this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) {
fsDataOutputStream = fileSystem.append(hdfsWritePath);
} else {
fsDataOutputStream = fileSystem.create(hdfsWritePath);
}
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec");
this.writer = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(fsDataOutputStream),
StandardCharsets.UTF_8));
}
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer, String isLookUpUrl)
throws ISLookUpException {
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
final Consumer<ContextInfo> consumer = ci -> writeEntity(producer.apply(ci));
queryInformationSystem.getContextInformation(consumer);
}
protected <R extends ResearchInitiative> void writeEntity(final R r) {
try {
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
writer.newLine();
} catch (final IOException e) {
throw new IllegalArgumentException(e);
}
}
}

View File

@ -1,128 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
* and the project is not created because of a low coverage in the profiles of openaire ids related to projects
*/
public class CreateContextRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
private final transient Configuration conf;
private final transient BufferedWriter writer;
private final transient QueryInformationSystem queryInformationSystem;
private static final String CONTEX_RELATION_DATASOURCE = "contentproviders";
private static final String CONTEX_RELATION_PROJECT = "projects";
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
Objects
.requireNonNull(
CreateContextRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String hdfsPath = parser.get("hdfsPath");
log.info("hdfsPath: {}", hdfsPath);
final String hdfsNameNode = parser.get("nameNode");
log.info("nameNode: {}", hdfsNameNode);
final String isLookUpUrl = parser.get("isLookUpUrl");
log.info("isLookUpUrl: {}", isLookUpUrl);
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl);
log.info("Creating relation for datasource...");
cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class));
log.info("Creating relations for projects... ");
cce
.execute(
Process::getRelation, CONTEX_RELATION_PROJECT,
ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
cce.close();
}
private void close() throws IOException {
writer.close();
}
public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl)
throws IOException, ISLookUpException {
this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode);
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
queryInformationSystem.execContextRelationQuery();
FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) {
fsDataOutputStream = fileSystem.append(hdfsWritePath);
} else {
fsDataOutputStream = fileSystem.create(hdfsWritePath);
}
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
}
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix) {
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity);
queryInformationSystem.getContextRelation(consumer, category, prefix);
}
protected void writeEntity(final Relation r) {
try {
writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r));
writer.newLine();
} catch (final Exception e) {
throw new MyRuntimeException(e);
}
}
}

View File

@ -1,520 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.io.StringReader;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.*;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.oa.model.graph.Funder;
import eu.dnetlib.dhp.oa.model.graph.Project;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
/**
* Dumps of entities in the model defined in eu.dnetlib.dhp.schema.dump.oaf.graph. Results are dumped using the same
* Mapper as for eu.dnetlib.dhp.schema.dump.oaf.community, while for the other entities the mapping is defined below
*/
public class DumpGraphEntities implements Serializable {
public void run(Boolean isSparkSessionManaged,
String inputPath,
String outputPath,
Class<? extends OafEntity> inputClazz,
String communityMapPath) {
SparkConf conf = new SparkConf();
switch (ModelSupport.idPrefixMap.get(inputClazz)) {
case "50":
DumpProducts d = new DumpProducts();
d
.run(
isSparkSessionManaged, inputPath, outputPath, communityMapPath, inputClazz, GraphResult.class,
eu.dnetlib.dhp.oa.graph.dump.Constants.DUMPTYPE.COMPLETE.getType());
break;
case "40":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
projectMap(spark, inputPath, outputPath, inputClazz);
});
break;
case "20":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
organizationMap(spark, inputPath, outputPath, inputClazz);
});
break;
case "10":
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
datasourceMap(spark, inputPath, outputPath, inputClazz);
});
break;
}
}
private static <E extends OafEntity> void datasourceMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Datasource>) d -> mapDatasource((eu.dnetlib.dhp.schema.oaf.Datasource) d),
Encoders.bean(Datasource.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static <E extends OafEntity> void projectMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Project>) p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project) p),
Encoders.bean(Project.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static Datasource mapDatasource(eu.dnetlib.dhp.schema.oaf.Datasource d) {
Datasource datasource = new Datasource();
datasource.setId(d.getId());
Optional
.ofNullable(d.getOriginalId())
.ifPresent(
oId -> datasource.setOriginalId(oId.stream().filter(Objects::nonNull).collect(Collectors.toList())));
Optional
.ofNullable(d.getPid())
.ifPresent(
pids -> datasource
.setPid(
pids
.stream()
.map(p -> DatasourcePid.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
Optional
.ofNullable(d.getDatasourcetype())
.ifPresent(
dsType -> datasource
.setDatasourcetype(DatasourceSchemeValue.newInstance(dsType.getClassid(), dsType.getClassname())));
Optional
.ofNullable(d.getOpenairecompatibility())
.ifPresent(v -> datasource.setOpenairecompatibility(v.getClassname()));
Optional
.ofNullable(d.getOfficialname())
.ifPresent(oname -> datasource.setOfficialname(oname.getValue()));
Optional
.ofNullable(d.getEnglishname())
.ifPresent(ename -> datasource.setEnglishname(ename.getValue()));
Optional
.ofNullable(d.getWebsiteurl())
.ifPresent(wsite -> datasource.setWebsiteurl(wsite.getValue()));
Optional
.ofNullable(d.getLogourl())
.ifPresent(lurl -> datasource.setLogourl(lurl.getValue()));
Optional
.ofNullable(d.getDateofvalidation())
.ifPresent(dval -> datasource.setDateofvalidation(dval.getValue()));
Optional
.ofNullable(d.getDescription())
.ifPresent(dex -> datasource.setDescription(dex.getValue()));
Optional
.ofNullable(d.getSubjects())
.ifPresent(
sbjs -> datasource.setSubjects(sbjs.stream().map(sbj -> sbj.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getOdpolicies())
.ifPresent(odp -> datasource.setPolicies(Arrays.asList(odp.getValue())));
Optional
.ofNullable(d.getOdlanguages())
.ifPresent(
langs -> datasource
.setLanguages(langs.stream().map(lang -> lang.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getOdcontenttypes())
.ifPresent(
ctypes -> datasource
.setContenttypes(ctypes.stream().map(ctype -> ctype.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getReleasestartdate())
.ifPresent(rd -> datasource.setReleasestartdate(rd.getValue()));
Optional
.ofNullable(d.getReleaseenddate())
.ifPresent(ed -> datasource.setReleaseenddate(ed.getValue()));
Optional
.ofNullable(d.getMissionstatementurl())
.ifPresent(ms -> datasource.setMissionstatementurl(ms.getValue()));
Optional
.ofNullable(d.getDatabaseaccesstype())
.ifPresent(ar -> datasource.setAccessrights(ar.getValue()));
Optional
.ofNullable(d.getDatauploadtype())
.ifPresent(dut -> datasource.setUploadrights(dut.getValue()));
Optional
.ofNullable(d.getDatabaseaccessrestriction())
.ifPresent(dar -> datasource.setDatabaseaccessrestriction(dar.getValue()));
Optional
.ofNullable(d.getDatauploadrestriction())
.ifPresent(dur -> datasource.setDatauploadrestriction(dur.getValue()));
Optional
.ofNullable(d.getVersioning())
.ifPresent(v -> datasource.setVersioning(v.getValue()));
Optional
.ofNullable(d.getCitationguidelineurl())
.ifPresent(cu -> datasource.setCitationguidelineurl(cu.getValue()));
Optional
.ofNullable(d.getPidsystems())
.ifPresent(ps -> datasource.setPidsystems(ps.getValue()));
Optional
.ofNullable(d.getCertificates())
.ifPresent(c -> datasource.setCertificates(c.getValue()));
Optional
.ofNullable(d.getPolicies())
.ifPresent(ps -> datasource.setPolicies(ps.stream().map(p -> p.getValue()).collect(Collectors.toList())));
Optional
.ofNullable(d.getJournal())
.ifPresent(j -> datasource.setJournal(getContainer(j)));
return datasource;
}
private static Container getContainer(Journal j) {
Container c = new Container();
Optional
.ofNullable(j.getName())
.ifPresent(n -> c.setName(n));
Optional
.ofNullable(j.getIssnPrinted())
.ifPresent(issnp -> c.setIssnPrinted(issnp));
Optional
.ofNullable(j.getIssnOnline())
.ifPresent(issno -> c.setIssnOnline(issno));
Optional
.ofNullable(j.getIssnLinking())
.ifPresent(isnl -> c.setIssnLinking(isnl));
Optional
.ofNullable(j.getEp())
.ifPresent(ep -> c.setEp(ep));
Optional
.ofNullable(j.getIss())
.ifPresent(iss -> c.setIss(iss));
Optional
.ofNullable(j.getSp())
.ifPresent(sp -> c.setSp(sp));
Optional
.ofNullable(j.getVol())
.ifPresent(vol -> c.setVol(vol));
Optional
.ofNullable(j.getEdition())
.ifPresent(edition -> c.setEdition(edition));
Optional
.ofNullable(j.getConferencedate())
.ifPresent(cdate -> c.setConferencedate(cdate));
Optional
.ofNullable(j.getConferenceplace())
.ifPresent(cplace -> c.setConferenceplace(cplace));
return c;
}
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) throws DocumentException {
Project project = new Project();
Optional
.ofNullable(p.getId())
.ifPresent(id -> project.setId(id));
Optional
.ofNullable(p.getWebsiteurl())
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
Optional
.ofNullable(p.getCode())
.ifPresent(code -> project.setCode(code.getValue()));
Optional
.ofNullable(p.getAcronym())
.ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
Optional
.ofNullable(p.getTitle())
.ifPresent(title -> project.setTitle(title.getValue()));
Optional
.ofNullable(p.getStartdate())
.ifPresent(sdate -> project.setStartdate(sdate.getValue()));
Optional
.ofNullable(p.getEnddate())
.ifPresent(edate -> project.setEnddate(edate.getValue()));
Optional
.ofNullable(p.getCallidentifier())
.ifPresent(cide -> project.setCallidentifier(cide.getValue()));
Optional
.ofNullable(p.getKeywords())
.ifPresent(key -> project.setKeywords(key.getValue()));
Optional<Field<String>> omandate = Optional.ofNullable(p.getOamandatepublications());
Optional<Field<String>> oecsc39 = Optional.ofNullable(p.getEcsc39());
boolean mandate = false;
if (omandate.isPresent()) {
if (omandate.get().getValue().equals("true")) {
mandate = true;
}
}
if (oecsc39.isPresent()) {
if (oecsc39.get().getValue().equals("true")) {
mandate = true;
}
}
project.setOpenaccessmandateforpublications(mandate);
project.setOpenaccessmandatefordataset(false);
Optional
.ofNullable(p.getEcarticle29_3())
.ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("true")));
project
.setSubject(
Optional
.ofNullable(p.getSubjects())
.map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
.orElse(new ArrayList<>()));
Optional
.ofNullable(p.getSummary())
.ifPresent(summary -> project.setSummary(summary.getValue()));
Optional<Float> ofundedamount = Optional.ofNullable(p.getFundedamount());
Optional<Field<String>> ocurrency = Optional.ofNullable(p.getCurrency());
Optional<Float> ototalcost = Optional.ofNullable(p.getTotalcost());
if (ocurrency.isPresent()) {
if (ofundedamount.isPresent()) {
if (ototalcost.isPresent()) {
project
.setGranted(
Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
} else {
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
}
}
}
project
.setH2020programme(
Optional
.ofNullable(p.getH2020classification())
.map(
classification -> classification
.stream()
.map(
c -> Programme
.newInstance(
c.getH2020Programme().getCode(), c.getH2020Programme().getDescription()))
.collect(Collectors.toList()))
.orElse(new ArrayList<>()));
Optional<List<Field<String>>> ofundTree = Optional
.ofNullable(p.getFundingtree());
List<Funder> funList = new ArrayList<>();
if (ofundTree.isPresent()) {
for (Field<String> fundingtree : ofundTree.get()) {
funList.add(getFunder(fundingtree.getValue()));
}
}
project.setFunding(funList);
return project;
}
public static Funder getFunder(String fundingtree) throws DocumentException {
Funder f = new Funder();
final Document doc;
doc = new SAXReader().read(new StringReader(fundingtree));
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
// f.setId(((org.dom4j.Node) (doc.selectNodes("//funder/id").get(0))).getText());
String id = "";
String description = "";
// List<Levels> fundings = new ArrayList<>();
int level = 0;
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_" + level);
while (nodes.size() > 0) {
for (org.dom4j.Node n : nodes) {
List node = n.selectNodes("./id");
id = ((org.dom4j.Node) node.get(0)).getText();
id = id.substring(id.indexOf("::") + 2);
node = n.selectNodes("./description");
description += ((Node) node.get(0)).getText() + " - ";
}
level += 1;
nodes = doc.selectNodes("//funding_level_" + level);
}
if (!id.equals("")) {
Fundings fundings = new Fundings();
fundings.setId(id);
fundings.setDescription(description.substring(0, description.length() - 3).trim());
f.setFunding_stream(fundings);
}
return f;
}
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
Encoders.bean(Organization.class))
.filter((FilterFunction<Organization>) o -> o != null)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static eu.dnetlib.dhp.oa.model.graph.Organization mapOrganization(
eu.dnetlib.dhp.schema.oaf.Organization org) {
if (org.getDataInfo().getDeletedbyinference())
return null;
Organization organization = new Organization();
Optional
.ofNullable(org.getLegalshortname())
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
Optional
.ofNullable(org.getLegalname())
.ifPresent(value -> organization.setLegalname(value.getValue()));
Optional
.ofNullable(org.getWebsiteurl())
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
Optional
.ofNullable(org.getAlternativeNames())
.ifPresent(
value -> organization
.setAlternativenames(
value
.stream()
.map(v -> v.getValue())
.collect(Collectors.toList())));
Optional
.ofNullable(org.getCountry())
.ifPresent(
value -> {
if (!value.getClassid().equals(Constants.UNKNOWN)) {
organization.setCountry(Country.newInstance(value.getClassid(), value.getClassname()));
}
});
Optional
.ofNullable(org.getId())
.ifPresent(value -> organization.setId(value));
Optional
.ofNullable(org.getPid())
.ifPresent(
value -> organization
.setPid(
value
.stream()
.map(p -> OrganizationPid.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
return organization;
}
}

View File

@ -1,201 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.*;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.graph.Node;
import eu.dnetlib.dhp.oa.model.graph.RelType;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Result;
/**
* Creates new Relations (as in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation) from the information in the Entity. The
* new Relations are created for the datasource in the collectedfrom and hostedby elements and for the context related
* to communities and research initiative/infrastructures. For collectedfrom elements it creates: datasource -> provides
* -> result and result -> isProvidedBy -> datasource For hostedby elements it creates: datasource -> hosts -> result
* and result -> isHostedBy -> datasource For context elements it creates: context <-> isRelatedTo <-> result. Note for
* context: it gets the first provenance in the dataInfo. If more than one is present the others are not dumped
*/
public class Extractor implements Serializable {
public void run(Boolean isSparkSessionManaged,
String inputPath,
String outputPath,
Class<? extends Result> inputClazz,
String communityMapPath) {
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extractRelationResult(
spark, inputPath, outputPath, inputClazz, Utils.getCommunityMap(spark, communityMapPath));
});
}
private <R extends Result> void extractRelationResult(SparkSession spark,
String inputPath,
String outputPath,
Class<R> inputClazz,
CommunityMap communityMap) {
Set<Integer> hashCodes = new HashSet<>();
Utils
.readPath(spark, inputPath, inputClazz)
.flatMap((FlatMapFunction<R, Relation>) value -> {
List<Relation> relationList = new ArrayList<>();
extractRelationsFromInstance(hashCodes, value, relationList);
Set<String> communities = communityMap.keySet();
Optional
.ofNullable(value.getContext())
.ifPresent(contexts -> contexts.forEach(context -> {
String id = context.getId();
if (id.contains(":")) {
id = id.substring(0, id.indexOf(":"));
}
if (communities.contains(id)) {
String contextId = Utils.getContextId(id);
Provenance provenance = Optional
.ofNullable(context.getDataInfo())
.map(
dinfo -> Optional
.ofNullable(dinfo.get(0).getProvenanceaction())
.map(
paction -> Provenance
.newInstance(
paction.getClassid(),
dinfo.get(0).getTrust()))
.orElse(null))
.orElse(null);
Relation r = getRelation(
value.getId(), contextId,
Constants.RESULT_ENTITY,
Constants.CONTEXT_ENTITY,
ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP, provenance);
if (!hashCodes.contains(r.hashCode())) {
relationList
.add(r);
hashCodes.add(r.hashCode());
}
r = getRelation(
contextId, value.getId(),
Constants.CONTEXT_ENTITY,
Constants.RESULT_ENTITY,
ModelConstants.IS_RELATED_TO,
ModelConstants.RELATIONSHIP, provenance);
if (!hashCodes.contains(r.hashCode())) {
relationList
.add(
r);
hashCodes.add(r.hashCode());
}
}
}));
return relationList.iterator();
}, Encoders.bean(Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
}
private <R extends Result> void extractRelationsFromInstance(Set<Integer> hashCodes, R value,
List<Relation> relationList) {
Optional
.ofNullable(value.getInstance())
.ifPresent(inst -> inst.forEach(instance -> {
Optional
.ofNullable(instance.getCollectedfrom())
.ifPresent(
cf -> getRelatioPair(
value, relationList, cf,
ModelConstants.IS_PROVIDED_BY, ModelConstants.PROVIDES, hashCodes));
Optional
.ofNullable(instance.getHostedby())
.ifPresent(
hb -> getRelatioPair(
value, relationList, hb,
Constants.IS_HOSTED_BY, Constants.HOSTS, hashCodes));
}));
}
private static <R extends Result> void getRelatioPair(R value, List<Relation> relationList, KeyValue cf,
String resultDatasource, String datasourceResult,
Set<Integer> hashCodes) {
Provenance provenance = Optional
.ofNullable(cf.getDataInfo())
.map(
dinfo -> Optional
.ofNullable(dinfo.getProvenanceaction())
.map(
paction -> Provenance
.newInstance(
paction.getClassname(),
dinfo.getTrust()))
.orElse(
Provenance
.newInstance(
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)))
.orElse(
Provenance
.newInstance(
eu.dnetlib.dhp.oa.graph.dump.Constants.HARVESTED,
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST));
Relation r = getRelation(
value.getId(),
cf.getKey(), Constants.RESULT_ENTITY, Constants.DATASOURCE_ENTITY,
resultDatasource, ModelConstants.PROVISION,
provenance);
if (!hashCodes.contains(r.hashCode())) {
relationList
.add(r);
hashCodes.add(r.hashCode());
}
r = getRelation(
cf.getKey(), value.getId(),
Constants.DATASOURCE_ENTITY, Constants.RESULT_ENTITY,
datasourceResult, ModelConstants.PROVISION,
provenance);
if (!hashCodes.contains(r.hashCode())) {
relationList
.add(r);
hashCodes.add(r.hashCode());
}
}
private static Relation getRelation(String source, String target, String sourceType, String targetType,
String relName, String relType, Provenance provenance) {
Relation r = new Relation();
r.setSource(Node.newInstance(source, sourceType));
r.setTarget(Node.newInstance(target, targetType));
r.setReltype(RelType.newInstance(relName, relType));
r.setProvenance(provenance);
return r;
}
}

View File

@ -1,21 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class OrganizationMap extends HashMap<String, List<String>> {
public OrganizationMap() {
super();
}
public List<String> get(String key) {
if (super.get(key) == null) {
return new ArrayList<>();
}
return super.get(key);
}
}

View File

@ -1,99 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
/**
* It process the ContextInfo information to produce a new Context Entity or a set of Relations between the generic
* context entity and datasource/projects related to the context.
*/
public class Process implements Serializable {
@SuppressWarnings("unchecked")
public static <R extends ResearchInitiative> R getEntity(ContextInfo ci) {
try {
ResearchInitiative ri;
if (ci.getType().equals("community")) {
ri = new ResearchCommunity();
((ResearchCommunity) ri).setSubject(ci.getSubject());
ri.setType(Constants.RESEARCH_COMMUNITY);
} else {
ri = new ResearchInitiative();
ri.setType(Constants.RESEARCH_INFRASTRUCTURE);
}
ri.setId(Utils.getContextId(ci.getId()));
ri.setAcronym(ci.getId());
ri.setDescription(ci.getDescription());
ri.setName(ci.getName());
if (StringUtils.isNotEmpty(ci.getZenodocommunity())) {
ri.setZenodo_community(Constants.ZENODO_COMMUNITY_PREFIX + ci.getZenodocommunity());
}
return (R) ri;
} catch (final Exception e) {
throw new MyRuntimeException(e);
}
}
public static List<Relation> getRelation(ContextInfo ci) {
try {
List<Relation> relationList = new ArrayList<>();
ci
.getDatasourceList()
.forEach(ds -> {
String nodeType = ModelSupport.idPrefixEntity.get(ds.substring(0, 2));
String contextId = Utils.getContextId(ci.getId());
relationList
.add(
Relation
.newInstance(
Node
.newInstance(
contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY),
Node.newInstance(ds, nodeType),
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
Provenance
.newInstance(
Constants.USER_CLAIM,
Constants.DEFAULT_TRUST)));
relationList
.add(
Relation
.newInstance(
Node.newInstance(ds, nodeType),
Node
.newInstance(
contextId, eu.dnetlib.dhp.oa.model.graph.Constants.CONTEXT_ENTITY),
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
Provenance
.newInstance(
Constants.USER_CLAIM,
Constants.DEFAULT_TRUST)));
});
return relationList;
} catch (final Exception e) {
throw new MyRuntimeException(e);
}
}
}

View File

@ -1,198 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class QueryInformationSystem {
private ISLookUpService isLookUp;
private List<String> contextRelationResult;
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and $x//context/param[./@name = 'status']/text() = 'all' " +
" return " +
"$x//context";
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
+
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
+
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
public void getContextInformation(final Consumer<ContextInfo> consumer) throws ISLookUpException {
isLookUp
.quickSearchProfile(XQUERY_ENTITY)
.forEach(c -> {
ContextInfo cinfo = new ContextInfo();
String[] cSplit = c.split("@@");
cinfo.setId(cSplit[0]);
cinfo.setName(cSplit[1]);
cinfo.setDescription(cSplit[2]);
if (!cSplit[3].trim().equals("")) {
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
}
cinfo.setZenodocommunity(cSplit[4]);
cinfo.setType(cSplit[5]);
consumer.accept(cinfo);
});
}
public List<String> getContextRelationResult() {
return contextRelationResult;
}
public void setContextRelationResult(List<String> contextRelationResult) {
this.contextRelationResult = contextRelationResult;
}
public ISLookUpService getIsLookUp() {
return isLookUp;
}
public void setIsLookUp(ISLookUpService isLookUpService) {
this.isLookUp = isLookUpService;
}
public void execContextRelationQuery() throws ISLookUpException {
contextRelationResult = isLookUp.quickSearchProfile(XQUERY);
}
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix) {
contextRelationResult.forEach(xml -> {
ContextInfo cinfo = new ContextInfo();
final Document doc;
try {
final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
doc = reader.read(new StringReader(xml));
Element root = doc.getRootElement();
cinfo.setId(root.attributeValue("id"));
Iterator<Element> it = root.elementIterator();
while (it.hasNext()) {
Element el = it.next();
if (el.getName().equals("category")) {
String categoryId = el.attributeValue("id");
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
if (categoryId.equals(category)) {
cinfo.setDatasourceList(getCategoryList(el, prefix));
}
}
}
consumer.accept(cinfo);
} catch (DocumentException | SAXException e) {
e.printStackTrace();
}
});
}
@NotNull
private List<String> getCategoryList(Element el, String prefix) {
List<String> datasourceList = new ArrayList<>();
for (Object node : el.selectNodes(".//concept")) {
String oid = getOpenaireId((Node) node, prefix);
if (oid != null)
datasourceList.add(oid);
}
return datasourceList;
}
private String getOpenaireId(Node el, String prefix) {
for (Object node : el.selectNodes(".//param")) {
Node n = (Node) node;
if (n.valueOf("./@name").equals("openaireId")) {
return prefix + "|" + n.getText();
}
}
return makeOpenaireId(el, prefix);
}
private String makeOpenaireId(Node el, String prefix) {
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
return null;
}
String funder = "";
String grantId = null;
String funding = null;
for (Object node : el.selectNodes(".//param")) {
Node n = (Node) node;
switch (n.valueOf("./@name")) {
case "funding":
funding = n.getText();
break;
case "funder":
funder = n.getText();
break;
case "CD_PROJECT_NUMBER":
grantId = n.getText();
break;
default:
break;
}
}
String nsp = null;
switch (funder.toLowerCase()) {
case "ec":
if (funding == null) {
return null;
}
if (funding.toLowerCase().contains("h2020")) {
nsp = "corda__h2020::";
} else {
nsp = "corda_______::";
}
break;
case "tubitak":
nsp = "tubitakf____::";
break;
case "dfg":
nsp = "dfgf________::";
break;
default:
StringBuilder bld = new StringBuilder();
bld.append(funder.toLowerCase());
for (int i = funder.length(); i < 12; i++)
bld.append("_");
bld.append("::");
nsp = bld.toString();
}
return prefix + "|" + nsp + DHPUtils.md5(grantId);
}
}

View File

@ -1,122 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Relation;
/**
* Reads all the entities of the same type (Relation / Results) and saves them in the same folder
*/
public class SparkCollectAndSave implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkCollectAndSave.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkCollectAndSave.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_collect_and_save.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final Boolean aggregateResult = Optional
.ofNullable(parser.get("resultAggregation"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath + "/result");
run(spark, inputPath, outputPath, aggregateResult);
});
}
private static void run(SparkSession spark, String inputPath, String outputPath, boolean aggregate) {
if (aggregate) {
Utils
.readPath(spark, inputPath + "/result/publication", GraphResult.class)
.union(Utils.readPath(spark, inputPath + "/result/dataset", GraphResult.class))
.union(Utils.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class))
.union(Utils.readPath(spark, inputPath + "/result/software", GraphResult.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath + "/result");
} else {
write(
Utils
.readPath(spark, inputPath + "/result/publication", GraphResult.class),
outputPath + "/publication");
write(
Utils
.readPath(spark, inputPath + "/result/dataset", GraphResult.class),
outputPath + "/dataset");
write(
Utils
.readPath(spark, inputPath + "/result/otherresearchproduct", GraphResult.class),
outputPath + "/otheresearchproduct");
write(
Utils
.readPath(spark, inputPath + "/result/software", GraphResult.class),
outputPath + "/software");
}
Utils
.readPath(spark, inputPath + "/relation/publication", Relation.class)
.union(Utils.readPath(spark, inputPath + "/relation/dataset", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/orp", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/software", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/contextOrg", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/context", Relation.class))
.union(Utils.readPath(spark, inputPath + "/relation/relation", Relation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/relation");
}
private static void write(Dataset<GraphResult> dataSet, String outputPath) {
dataSet
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
}
}

View File

@ -1,54 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
/**
* Spark Job that fires the dump for the entites
*/
public class SparkDumpEntitiesJob implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkDumpEntitiesJob.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkDumpEntitiesJob.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String communityMapPath = parser.get("communityMapPath");
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
DumpGraphEntities dg = new DumpGraphEntities();
dg.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
}
}

View File

@ -1,135 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.graph.Node;
import eu.dnetlib.dhp.oa.model.graph.RelType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Relation;
/**
* Dumps eu.dnetlib.dhp.schema.oaf.Relation in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation
*/
public class SparkDumpRelationJob implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkDumpRelationJob.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkDumpRelationJob.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
Optional<String> rs = Optional.ofNullable(parser.get("removeSet"));
final Set<String> removeSet = new HashSet<>();
if (rs.isPresent()) {
Collections.addAll(removeSet, rs.get().split(";"));
}
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
dumpRelation(spark, inputPath, outputPath, removeSet);
});
}
private static void dumpRelation(SparkSession spark, String inputPath, String outputPath, Set<String> removeSet) {
Dataset<Relation> relations = Utils.readPath(spark, inputPath, Relation.class);
relations
.filter((FilterFunction<Relation>) r -> !removeSet.contains(r.getRelClass()))
.map((MapFunction<Relation, eu.dnetlib.dhp.oa.model.graph.Relation>) relation -> {
eu.dnetlib.dhp.oa.model.graph.Relation relNew = new eu.dnetlib.dhp.oa.model.graph.Relation();
relNew
.setSource(
Node
.newInstance(
relation.getSource(),
ModelSupport.idPrefixEntity.get(relation.getSource().substring(0, 2))));
relNew
.setTarget(
Node
.newInstance(
relation.getTarget(),
ModelSupport.idPrefixEntity.get(relation.getTarget().substring(0, 2))));
relNew
.setReltype(
RelType
.newInstance(
relation.getRelClass(),
relation.getSubRelType()));
Optional<DataInfo> odInfo = Optional.ofNullable(relation.getDataInfo());
if (odInfo.isPresent()) {
DataInfo dInfo = odInfo.get();
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
relNew
.setProvenance(
Provenance
.newInstance(
dInfo.getProvenanceaction().getClassname(),
dInfo.getTrust()));
}
}
if (Boolean.TRUE.equals(relation.getValidated())) {
relNew.setValidated(relation.getValidated());
relNew.setValidationDate(relation.getValidationDate());
}
return relNew;
}, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
}
}

View File

@ -1,54 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Result;
/**
* Spark job that fires the extraction of relations from entities
*/
public class SparkExtractRelationFromEntities implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkExtractRelationFromEntities.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkExtractRelationFromEntities.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String communityMapPath = parser.get("communityMapPath");
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
Extractor extractor = new Extractor();
extractor.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMapPath);
}
}

View File

@ -1,179 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Consumer;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.graph.Node;
import eu.dnetlib.dhp.oa.model.graph.RelType;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Relation;
/**
* Create new Relations between Context Entities and Organizations whose products are associated to the context. It
* produces relation such as: organization <-> isRelatedTo <-> context
*/
public class SparkOrganizationRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkOrganizationRelation.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkOrganizationRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final OrganizationMap organizationMap = new Gson()
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
log.info("organization map : {}", serializedOrganizationMap);
final String communityMapPath = parser.get("communityMapPath");
log.info("communityMapPath: {}", communityMapPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extractRelation(spark, inputPath, organizationMap, outputPath, communityMapPath);
});
}
private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap,
String outputPath, String communityMapPath) {
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
Dataset<Relation> relationDataset = Utils.readPath(spark, inputPath, Relation.class);
relationDataset.createOrReplaceTempView("relation");
List<eu.dnetlib.dhp.oa.model.graph.Relation> relList = new ArrayList<>();
Dataset<MergedRels> mergedRelsDataset = spark
.sql(
"SELECT target organizationId, source representativeId " +
"FROM relation " +
"WHERE datainfo.deletedbyinference = false " +
"AND relclass = 'merges' " +
"AND substr(source, 1, 2) = '20'")
.as(Encoders.bean(MergedRels.class));
mergedRelsDataset.map((MapFunction<MergedRels, MergedRels>) mergedRels -> {
if (organizationMap.containsKey(mergedRels.getOrganizationId())) {
return mergedRels;
}
return null;
}, Encoders.bean(MergedRels.class))
.filter(Objects::nonNull)
.collectAsList()
.forEach(getMergedRelsConsumer(organizationMap, relList, communityMap));
organizationMap
.keySet()
.forEach(
oId -> organizationMap
.get(oId)
.forEach(community -> {
if (communityMap.containsKey(community)) {
addRelations(relList, community, oId);
}
}));
spark
.createDataset(relList, Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
@NotNull
private static Consumer<MergedRels> getMergedRelsConsumer(OrganizationMap organizationMap,
List<eu.dnetlib.dhp.oa.model.graph.Relation> relList, CommunityMap communityMap) {
return mergedRels -> {
String oId = mergedRels.getOrganizationId();
organizationMap
.get(oId)
.forEach(community -> {
if (communityMap.containsKey(community)) {
addRelations(relList, community, mergedRels.getRepresentativeId());
}
});
organizationMap.remove(oId);
};
}
private static void addRelations(List<eu.dnetlib.dhp.oa.model.graph.Relation> relList, String community,
String organization) {
String id = Utils.getContextId(community);
log.info("create relation for organization: {}", organization);
relList
.add(
eu.dnetlib.dhp.oa.model.graph.Relation
.newInstance(
Node.newInstance(id, Constants.CONTEXT_ENTITY),
Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
Provenance
.newInstance(
eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM,
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)));
relList
.add(
eu.dnetlib.dhp.oa.model.graph.Relation
.newInstance(
Node.newInstance(organization, ModelSupport.idPrefixEntity.get(organization.substring(0, 2))),
Node.newInstance(id, Constants.CONTEXT_ENTITY),
RelType.newInstance(ModelConstants.IS_RELATED_TO, ModelConstants.RELATIONSHIP),
Provenance
.newInstance(
eu.dnetlib.dhp.oa.graph.dump.Constants.USER_CLAIM,
eu.dnetlib.dhp.oa.graph.dump.Constants.DEFAULT_TRUST)));
}
}

View File

@ -1,136 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.oaf.*;
/**
* It selects the valid relations among those present in the graph. One relation is valid if it is not deletedbyinference
* and if both the source and the target node are present in the graph and are not deleted by inference nor invisible.
* To check this I made a view of the ids of all the entities in the graph, and select the relations for which a join exists
* with this view for both the source and the target
*/
public class SparkSelectValidRelationsJob implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationsJob.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkSelectValidRelationsJob.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_relationdump_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
selectValidRelation(spark, inputPath, outputPath);
});
}
private static void selectValidRelation(SparkSession spark, String inputPath, String outputPath) {
Dataset<Relation> relation = Utils.readPath(spark, inputPath + "/relation", Relation.class);
Dataset<Publication> publication = Utils.readPath(spark, inputPath + "/publication", Publication.class);
Dataset<eu.dnetlib.dhp.schema.oaf.Dataset> dataset = Utils
.readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class);
Dataset<Software> software = Utils.readPath(spark, inputPath + "/software", Software.class);
Dataset<OtherResearchProduct> other = Utils
.readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class);
Dataset<Organization> organization = Utils.readPath(spark, inputPath + "/organization", Organization.class);
Dataset<Project> project = Utils.readPath(spark, inputPath + "/project", Project.class);
Dataset<Datasource> datasource = Utils.readPath(spark, inputPath + "/datasource", Datasource.class);
relation.createOrReplaceTempView("relation");
publication.createOrReplaceTempView("publication");
dataset.createOrReplaceTempView("dataset");
other.createOrReplaceTempView("other");
software.createOrReplaceTempView("software");
organization.createOrReplaceTempView("organization");
project.createOrReplaceTempView("project");
datasource.createOrReplaceTempView("datasource");
spark
.sql(
"SELECT id " +
"FROM publication " +
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
"UNION ALL " +
"SELECT id " +
"FROM dataset " +
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
"UNION ALL " +
"SELECT id " +
"FROM other " +
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
"UNION ALL " +
"SELECT id " +
"FROM software " +
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
"UNION ALL " +
"SELECT id " +
"FROM organization " +
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
"UNION ALL " +
"SELECT id " +
"FROM project " +
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false " +
"UNION ALL " +
"SELECT id " +
"FROM datasource " +
"WHERE datainfo.deletedbyinference = false AND datainfo.invisible = false ")
.createOrReplaceTempView("identifiers");
spark
.sql(
"SELECT relation.* " +
"FROM relation " +
"JOIN identifiers i1 " +
"ON source = i1.id " +
"JOIN identifiers i2 " +
"ON target = i2.id " +
"WHERE datainfo.deletedbyinference = false")
.as(Encoders.bean(Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import java.io.Serializable;
import java.util.HashMap;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import java.io.Serializable;

View File

@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
@ -20,13 +19,11 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.eosc.model.OrganizationPid;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
/**
@ -74,8 +71,8 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable {
private static void addOrganizations(SparkSession spark, String inputPath, String outputPath,
String resultPath) {
Dataset<EoscResult> results = Utils
.readPath(spark, resultPath, EoscResult.class);
Dataset<Result> results = Utils
.readPath(spark, resultPath, Result.class);
Dataset<Relation> relations = Utils
.readPath(spark, inputPath + "/relation", Relation.class)
@ -126,14 +123,14 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable {
results
.joinWith(resultOrganization, results.col("id").equalTo(resultOrganization.col("resultId")), "left")
.groupByKey(
(MapFunction<Tuple2<EoscResult, ResultOrganizations>, String>) t2 -> t2._1().getId(), Encoders.STRING())
(MapFunction<Tuple2<Result, ResultOrganizations>, String>) t2 -> t2._1().getId(), Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, Tuple2<EoscResult, ResultOrganizations>, EoscResult>) (s, it) -> {
Tuple2<EoscResult, ResultOrganizations> first = it.next();
(MapGroupsFunction<String, Tuple2<Result, ResultOrganizations>, Result>) (s, it) -> {
Tuple2<Result, ResultOrganizations> first = it.next();
if (first._2() == null) {
return first._1();
}
EoscResult ret = first._1();
Result ret = first._1();
List<eu.dnetlib.dhp.eosc.model.Organization> affiliation = new ArrayList<>();
Set<String> alreadyInsertedAffiliations = new HashSet<>();
affiliation.add(first._2().getAffiliation());
@ -147,7 +144,7 @@ public class ExtendEoscResultWithOrganizationStep2 implements Serializable {
});
ret.setAffiliation(affiliation);
return ret;
}, Encoders.bean(EoscResult.class))
}, Encoders.bean(Result.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")

View File

@ -22,12 +22,10 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.eosc.model.Indicator;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.eosc.model.UsageCounts;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
/**
@ -76,20 +74,20 @@ public class ExtendWithUsageCounts implements Serializable {
private static void addIndicators(SparkSession spark, String actionSetPath, String outputPath, String resultPath) {
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
JavaRDD<Result> resultsWithIndicatorsRdd = sc
JavaRDD<eu.dnetlib.dhp.schema.oaf.Result> resultsWithIndicatorsRdd = sc
.sequenceFile(actionSetPath, Text.class, Text.class)
.map(value -> new ObjectMapper().readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Result) aa.getPayload()));
.map(aa -> ((eu.dnetlib.dhp.schema.oaf.Result) aa.getPayload()));
Dataset<Result> resultWithIndicators = spark
.createDataset(resultsWithIndicatorsRdd.rdd(), Encoders.bean(Result.class));
Dataset<eu.dnetlib.dhp.schema.oaf.Result> resultWithIndicators = spark
.createDataset(resultsWithIndicatorsRdd.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.oaf.Result.class));
Dataset<EoscResult> result = Utils
.readPath(spark, resultPath, EoscResult.class);
Dataset<Result> result = Utils
.readPath(spark, resultPath, Result.class);
result
.joinWith(resultWithIndicators, result.col("id").equalTo(resultWithIndicators.col("id")), "left")
.map((MapFunction<Tuple2<EoscResult, Result>, EoscResult>) t2 -> {
.map((MapFunction<Tuple2<Result, eu.dnetlib.dhp.schema.oaf.Result>, Result>) t2 -> {
if (Optional.ofNullable(t2._2()).isPresent()) {
Indicator indicator = new Indicator();
UsageCounts uc = new UsageCounts();
@ -105,7 +103,7 @@ public class ExtendWithUsageCounts implements Serializable {
}
return t2._1();
}, Encoders.bean(EoscResult.class))
}, Encoders.bean(Result.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump;
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import java.io.StringReader;
import java.util.List;
@ -10,7 +10,6 @@ import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;

View File

@ -1,10 +1,10 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import java.io.Serializable;
import java.util.List;
import eu.dnetlib.dhp.oa.model.community.Project;
import eu.dnetlib.dhp.eosc.model.Project;
public class ResultProject implements Serializable {
private String resultId;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump;
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import java.io.BufferedWriter;
import java.io.IOException;

View File

@ -17,12 +17,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.schema.oaf.Result;
/**
* @author miriam.baglioni
@ -59,7 +55,8 @@ public class SelectEoscResultsJobStep1 implements Serializable {
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
Class<? extends eu.dnetlib.dhp.schema.oaf.Result> inputClazz = (Class<? extends eu.dnetlib.dhp.schema.oaf.Result>) Class
.forName(resultClassName);
SparkConf conf = new SparkConf();
@ -72,7 +69,8 @@ public class SelectEoscResultsJobStep1 implements Serializable {
});
}
private static <R extends Result> void selectEoscResults(SparkSession spark, String inputPath, String outputPath,
private static <R extends eu.dnetlib.dhp.schema.oaf.Result> void selectEoscResults(SparkSession spark,
String inputPath, String outputPath,
Class<R> inputClazz, String communityMapPath) {
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
Utils
@ -80,18 +78,11 @@ public class SelectEoscResultsJobStep1 implements Serializable {
.filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible()
&& r.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
// ||
// Optional
// .ofNullable(r.getSubject())
// .map(
// s -> s
// .stream()
// .anyMatch(sbj -> sbj.getValue().equalsIgnoreCase("EOSC::RO-crate")))
// .orElse(false)))
.map(
(MapFunction<R, EoscResult>) r -> (EoscResult) ResultMapper
.map(r, communityMap, Constants.DUMPTYPE.EOSC.getType()),
Encoders.bean(EoscResult.class))
(MapFunction<R, Result>) r -> (Result) ResultMapper
.map(r, communityMap),
Encoders.bean(Result.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")

View File

@ -4,14 +4,10 @@ package eu.dnetlib.dhp.oa.graph.dump.eosc;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
@ -21,11 +17,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.graph.Node;
import eu.dnetlib.dhp.oa.model.graph.RelType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.eosc.model.Provenance;
import eu.dnetlib.dhp.eosc.model.RelType;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Relation;

View File

@ -0,0 +1,96 @@
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Optional;
import javax.rmi.CORBA.Util;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.eosc.model.Relation;
import eu.dnetlib.dhp.eosc.model.Result;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 02/02/23
*/
public class SparkExtendResultWithRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkExtendResultWithRelation.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkExtendResultWithRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_extendwithrelation_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String resultPath = parser.get("resultPath");
log.info("resultPath: {}", resultPath);
final String relationPath = parser.get("relationPath");
log.info("relationPath: {}", relationPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extendResultWithRelation(spark, resultPath, relationPath, outputPath);
});
}
private static void extendResultWithRelation(SparkSession spark, String resultPath, String relationPath,
String outputPath) {
Dataset<Relation> relationDataset = Utils.readPath(spark, relationPath, Relation.class);
Dataset<Result> resultDataset = Utils.readPath(spark, resultPath, Result.class);
resultDataset
.joinWith(relationDataset, resultDataset.col("id").equalTo(relationDataset.col("source")), "left")
.groupByKey((MapFunction<Tuple2<Result, Relation>, String>) t2 -> t2._1().getId(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Tuple2<Result, Relation>, Result>) (k, it) -> {
Tuple2<Result, Relation> first = it.next();
Result r = first._1();
if (Optional.ofNullable(first._2()).isPresent()) {
if (r.getRelations() == null) {
r.setRelations(new ArrayList<>());
}
r.getRelations().add(first._2());
it.forEachRemaining(t2 -> r.getRelations().add(t2._2()));
}
return r;
}, Encoders.bean(Result.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath);
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
@ -26,11 +26,10 @@ import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.Provenance;
import eu.dnetlib.dhp.oa.model.community.Funder;
import eu.dnetlib.dhp.oa.model.community.Project;
import eu.dnetlib.dhp.oa.model.community.Validated;
import eu.dnetlib.dhp.eosc.model.Funder;
import eu.dnetlib.dhp.eosc.model.Project;
import eu.dnetlib.dhp.eosc.model.Provenance;
import eu.dnetlib.dhp.eosc.model.Validated;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;

View File

@ -9,9 +9,7 @@ import java.util.*;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
@ -20,9 +18,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.eosc.model.Provenance;
import eu.dnetlib.dhp.eosc.model.RelType;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;
@ -81,24 +79,24 @@ public class SparkSelectRelation implements Serializable {
&& !removeSet.contains(r.getRelClass()));
Dataset<String> resultIds = Utils
.readPath(spark, outputPath + "/publication", EoscResult.class)
.readPath(spark, outputPath + "/publication", Result.class)
.map((MapFunction<EoscResult, String>) p -> p.getId(), Encoders.STRING())
.map((MapFunction<Result, String>) p -> p.getId(), Encoders.STRING())
.union(
Utils
.readPath(spark, outputPath + "/dataset", EoscResult.class)
.readPath(spark, outputPath + "/dataset", Result.class)
.map((MapFunction<EoscResult, String>) d -> d.getId(), Encoders.STRING()))
.map((MapFunction<Result, String>) d -> d.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, outputPath + "/software", EoscResult.class)
.readPath(spark, outputPath + "/software", Result.class)
.map((MapFunction<EoscResult, String>) s -> s.getId(), Encoders.STRING()))
.map((MapFunction<Result, String>) s -> s.getId(), Encoders.STRING()))
.union(
Utils
.readPath(spark, outputPath + "/otherresearchproduct", EoscResult.class)
.readPath(spark, outputPath + "/otherresearchproduct", Result.class)
.map((MapFunction<EoscResult, String>) o -> o.getId(), Encoders.STRING()));
.map((MapFunction<Result, String>) o -> o.getId(), Encoders.STRING()));
// select result -> result relations
Dataset<Relation> relResultResult = relation
@ -108,6 +106,42 @@ public class SparkSelectRelation implements Serializable {
relResultResult
.joinWith(resultIds, relResultResult.col("target").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
.map((MapFunction<Relation, eu.dnetlib.dhp.eosc.model.Relation>) rel -> {
eu.dnetlib.dhp.eosc.model.Relation relNew = new eu.dnetlib.dhp.eosc.model.Relation();
relNew
.setSource(
rel.getSource());
relNew
.setTarget(
rel.getTarget());
relNew
.setReltype(
RelType
.newInstance(
rel.getRelClass(),
rel.getSubRelType()));
Optional<DataInfo> odInfo = Optional.ofNullable(rel.getDataInfo());
if (odInfo.isPresent()) {
DataInfo dInfo = odInfo.get();
if (Optional.ofNullable(dInfo.getProvenanceaction()).isPresent() &&
Optional.ofNullable(dInfo.getProvenanceaction().getClassname()).isPresent()) {
relNew
.setProvenance(
Provenance
.newInstance(
dInfo.getProvenanceaction().getClassname(),
dInfo.getTrust()));
}
}
return relNew;
}, Encoders.bean(eu.dnetlib.dhp.eosc.model.Relation.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
@ -19,12 +19,8 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import scala.Tuple2;
public class SparkUpdateProjectInfo implements Serializable {
@ -64,43 +60,33 @@ public class SparkUpdateProjectInfo implements Serializable {
SparkConf conf = new SparkConf();
Class<? extends CommunityResult> clazz;
if (Constants.DUMPTYPE.EOSC.getType().equals(dumpType)) {
clazz = (Class<? extends CommunityResult>) Class.forName("eu.dnetlib.dhp.eosc.model.EoscResult");
} else {
clazz = (Class<? extends CommunityResult>) Class
.forName("eu.dnetlib.dhp.oa.model.community.CommunityResult");
}
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extend(spark, inputPath, outputPath, preparedInfoPath, clazz);
extend(spark, inputPath, outputPath, preparedInfoPath);
});
}
private static <E extends CommunityResult> void extend(
private static void extend(
SparkSession spark,
String inputPath,
String outputPath,
String preparedInfoPath,
Class<E> clazz) {
String preparedInfoPath) {
Dataset<E> result = Utils.readPath(spark, inputPath, clazz);
Dataset<Result> result = Utils.readPath(spark, inputPath, Result.class);
Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
result
.joinWith(
resultProject, result.col("id").equalTo(resultProject.col("resultId")),
"left")
.map((MapFunction<Tuple2<E, ResultProject>, E>) value -> {
E r = value._1();
.map((MapFunction<Tuple2<Result, ResultProject>, Result>) value -> {
Result r = value._1();
Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList()));
return r;
}, Encoders.bean(clazz))
}, Encoders.bean(Result.class))
.write()
.option("compression", "gzip")
.mode(SaveMode.Append)

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.graph.dump;
package eu.dnetlib.dhp.oa.graph.dump.eosc;
import java.io.BufferedReader;
import java.io.IOException;
@ -16,8 +16,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.Constants;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;

View File

@ -1,128 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.funderresults;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.community.Funder;
import eu.dnetlib.dhp.oa.model.community.Project;
/**
* Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC
* for the EC it specifies also the fundingStream (FP7 or H2020)
*/
public class SparkDumpFunderResults implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkDumpFunderResults.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkDumpFunderResults.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/funder_result_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
writeResultProjectList(spark, inputPath, outputPath);
});
}
private static void writeResultProjectList(SparkSession spark, String inputPath, String outputPath) {
Dataset<CommunityResult> result = Utils
.readPath(spark, inputPath + "/publication", CommunityResult.class)
.union(Utils.readPath(spark, inputPath + "/dataset", CommunityResult.class))
.union(Utils.readPath(spark, inputPath + "/otherresearchproduct", CommunityResult.class))
.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));
log.info("Number of result {}", result.count());
Dataset<String> tmp = result
.flatMap((FlatMapFunction<CommunityResult, String>) cr -> cr.getProjects().stream().map(p -> {
return getFunderName(p);
}).collect(Collectors.toList()).iterator(), Encoders.STRING())
.distinct();
List<String> funderList = tmp.collectAsList();
funderList.forEach(funder -> {
dumpResults(funder, result, outputPath);
});
}
@NotNull
private static String getFunderName(Project p) {
Optional<Funder> ofunder = Optional.ofNullable(p.getFunder());
if (ofunder.isPresent()) {
String fName = ofunder.get().getShortName();
if (fName.equalsIgnoreCase("ec")) {
fName += "_" + ofunder.get().getFundingStream();
}
return fName;
} else {
String fName = p.getId().substring(3, p.getId().indexOf("_")).toUpperCase();
if (fName.equalsIgnoreCase("ec")) {
if (p.getId().contains("h2020")) {
fName += "_H2020";
} else {
fName += "_FP7";
}
} else if (fName.equalsIgnoreCase("conicytf")) {
fName = "CONICYT";
} else if (fName.equalsIgnoreCase("dfgf")) {
fName = "DFG";
} else if (fName.equalsIgnoreCase("tubitakf")) {
fName = "TUBITAK";
} else if (fName.equalsIgnoreCase("euenvagency")) {
fName = "EEA";
}
return fName;
}
}
private static void dumpResults(String funder, Dataset<CommunityResult> results, String outputPath) {
results.map((MapFunction<CommunityResult, CommunityResult>) r -> {
if (!Optional.ofNullable(r.getProjects()).isPresent()) {
return null;
}
for (Project p : r.getProjects()) {
String fName = getFunderName(p);
if (fName.equalsIgnoreCase(funder)) {
return r;
}
}
return null;
}, Encoders.bean(CommunityResult.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/" + funder);
}
}

View File

@ -1,111 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.funderresults;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.community.ResultProject;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
/**
* Selects the results linked to projects. Only for these results the dump will be performed.
* The code to perform the dump and to expend the dumped results with the information related to projects
* is the one used for the dump of the community products
*/
public class SparkResultLinkedToProject implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkResultLinkedToProject.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SparkResultLinkedToProject.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters_link_prj.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final String resultProjectsPath = parser.get("graphPath");
log.info("graphPath: {}", resultProjectsPath);
String communityMapPath = parser.get("communityMapPath");
@SuppressWarnings("unchecked")
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
writeResultsLinkedToProjects(
communityMapPath, spark, inputClazz, inputPath, outputPath, resultProjectsPath);
});
}
private static <R extends Result> void writeResultsLinkedToProjects(String communityMapPath, SparkSession spark,
Class<R> inputClazz,
String inputPath, String outputPath, String resultProjectsPath) {
Dataset<R> results = Utils
.readPath(spark, inputPath, inputClazz)
.filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
!r.getDataInfo().getInvisible());
Dataset<ResultProject> resultProjectDataset = Utils
.readPath(spark, resultProjectsPath, ResultProject.class);
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
results
.joinWith(resultProjectDataset, results.col("id").equalTo(resultProjectDataset.col("resultId")))
.map((MapFunction<Tuple2<R, ResultProject>, CommunityResult>) t2 -> {
CommunityResult cr = (CommunityResult) ResultMapper
.map(
t2._1(),
communityMap, Constants.DUMPTYPE.FUNDER.getType());
cr.setProjects(t2._2().getProjectsList());
return cr;
}, Encoders.bean(CommunityResult.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
}

View File

@ -1,82 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.projectssubset;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.Project;
import scala.Tuple2;
public class ProjectsSubsetSparkJob implements Serializable {
private static final Logger log = LoggerFactory.getLogger(ProjectsSubsetSparkJob.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
ProjectsSubsetSparkJob.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/project_subset_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String projectListPath = parser.get("projectListPath");
log.info("projectListPath: {}", projectListPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
getNewProjectList(spark, inputPath, outputPath, projectListPath);
});
}
private static void getNewProjectList(SparkSession spark, String inputPath, String outputPath,
String projectListPath) {
Dataset<String> projectList = spark.read().textFile(projectListPath);
Dataset<Project> projects;
projects = Utils.readPath(spark, inputPath, Project.class);
projects
.joinWith(projectList, projects.col("id").equalTo(projectList.col("value")), "left")
.map((MapFunction<Tuple2<Project, String>, Project>) t2 -> {
if (Optional.ofNullable(t2._2()).isPresent())
return null;
return t2._1();
}, Encoders.bean(Project.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
Utils
.readPath(spark, outputPath, Project.class)
.map((MapFunction<Project, String>) p -> p.getId(), Encoders.STRING())
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.text(projectListPath);
}
}

View File

@ -1,4 +1,4 @@
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
<workflow-app name="dump_graph_for_eosc" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
@ -92,7 +92,7 @@
<action name="save_community_map">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
<main-class>eu.dnetlib.dhp.oa.graph.dump.eosc.SaveCommunityMap</main-class>
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
@ -156,7 +156,7 @@
<arg>--resultPath</arg><arg>${workingDir}/dump/publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/publicationextendedaffiliation</arg>
</spark>
<ok to="extend_publication_with_indicators"/>
<ok to="wait_eosc_dump"/>
<error to="Kill"/>
</action>
<action name="extend_publication_with_indicators">
@ -231,7 +231,7 @@
<arg>--resultPath</arg><arg>${workingDir}/dump/dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/datasetextendedaffiliation</arg>
</spark>
<ok to="extend_dataset_with_indicators"/>
<ok to="wait_eosc_dump"/>
<error to="Kill"/>
</action>
<action name="extend_dataset_with_indicators">
@ -306,7 +306,7 @@
<arg>--resultPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproductextendedaffiliation</arg>
</spark>
<ok to="extend_orp_with_indicators"/>
<ok to="wait_eosc_dump"/>
<error to="Kill"/>
</action>
<action name="extend_orp_with_indicators">
@ -381,7 +381,7 @@
<arg>--resultPath</arg><arg>${workingDir}/dump/software</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/softwareextendedaffiliation</arg>
</spark>
<ok to="extend_software_with_indicators"/>
<ok to="wait_eosc_dump"/>
<error to="Kill"/>
</action>
<action name="extend_software_with_indicators">
@ -409,7 +409,6 @@
<error to="Kill"/>
</action>
<join name="wait_eosc_dump" to="prepareResultProject"/>
<action name="prepareResultProject">
@ -417,7 +416,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Prepare association result subset of project info</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkPrepareResultProject</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
@ -448,7 +447,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped publications with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
@ -460,8 +459,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/publicationextended</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/publication</arg>
<arg>--sourcePath</arg><arg>${workingDir}/dump/publicationextendedaffiliation</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/publicationextendedproject</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
<arg>--dumpType</arg><arg>eosc</arg>
</spark>
@ -474,7 +473,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped dataset with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
@ -486,8 +485,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/datasetextended</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/dataset</arg>
<arg>--sourcePath</arg><arg>${workingDir}/dump/datasetextendedaffiliation</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/datasetextendedproject</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
<arg>--dumpType</arg><arg>eosc</arg>
</spark>
@ -500,7 +499,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped ORP with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
@ -512,8 +511,8 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproductextended</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/otherresearchproduct</arg>
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproductextendedaffiliation</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproductextendedproject</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
<arg>--dumpType</arg><arg>eosc</arg>
</spark>
@ -526,7 +525,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped software with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
@ -538,8 +537,9 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/softwareextended</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/software</arg>
<arg>--sourcePath</arg><arg>${workingDir}/dump/softwareextendedaffiliation
</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/softwareextendedproject</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
<arg>--dumpType</arg><arg>eosc</arg>
</spark>
@ -570,15 +570,23 @@
<arg>--outputPath</arg><arg>${workingDir}/dump</arg>
<arg>--removeSet</arg><arg>${removeSet}</arg>
</spark>
<ok to="dump_relation"/>
<ok to="fork_extendWithRelation"/>
<error to="Kill"/>
</action>
<action name="dump_relation">
<fork name="fork_extendWithRelation">
<path start="extend_publication_with_relation"/>
<path start="extend_dataset_with_relation"/>
<path start="extend_orp_with_relation"/>
<path start="extend_software_with_relation"/>
</fork>
<action name="extend_publication_with_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Select the set of relations between the results in the selected set</name>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkDumpRelation</class>
<name>Extends the publication adding the relations of which the publication is the source node</name>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkExtendResultWithRelation</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
@ -590,12 +598,91 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/relation</arg>
<arg>--relationPath</arg><arg>${workingDir}/dump/relation</arg>
<arg>--resultPath</arg><arg>${workingDir}/dump/publicationextendedproject</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/publication</arg>
</spark>
<ok to="make_archive"/>
<ok to="join_extend_relation"/>
<error to="Kill"/>
</action>
<action name="extend_dataset_with_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extends the dataset adding the relations of which the dataset is the source node</name>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkExtendResultWithRelation</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--relationPath</arg><arg>${workingDir}/dump/relation</arg>
<arg>--resultPath</arg><arg>${workingDir}/dump/datasetextendedproject</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/dataset</arg>
</spark>
<ok to="join_extend_relation"/>
<error to="Kill"/>
</action>
<action name="extend_software_with_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extends the software adding the relations of which the software is the source node</name>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkExtendResultWithRelation</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--relationPath</arg><arg>${workingDir}/dump/relation</arg>
<arg>--resultPath</arg><arg>${workingDir}/dump/softwareextendedproject</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/software</arg>
</spark>
<ok to="join_extend_relation"/>
<error to="Kill"/>
</action>
<action name="extend_orp_with_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extends the orp adding the relations of which the orp is the source node</name>
<class>eu.dnetlib.dhp.oa.graph.dump.eosc.SparkExtendResultWithRelation</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--relationPath</arg><arg>${workingDir}/dump/relation</arg>
<arg>--resultPath</arg><arg>${workingDir}/dump/otherresearchproductextendedproject</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/otherresearchproduct</arg>
</spark>
<ok to="join_extend_relation"/>
<error to="Kill"/>
</action>
<join name="join_extend_relation" to="make_archive"/>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>

View File

@ -0,0 +1,24 @@
[
{
"paramName":"rp",
"paramLongName":"resultPath",
"paramDescription": "the path where to find the result",
"paramRequired": true
},
{
"paramName": "relP",
"paramLongName": "relationPath",
"paramDescription": "the path where the relations are stored",
"paramRequired": true
},
{
"paramName": "op",
"paramLongName": "outputPath",
"paramDescription": "the path where to store the results",
"paramRequired": true
}
]

View File

@ -6,13 +6,11 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import org.apache.commons.collections.map.HashedMap;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
@ -25,16 +23,10 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.eosc.model.Subject;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.eosc.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.eosc.SelectEoscResultsJobStep1;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Software;
@ -147,744 +139,6 @@ public class DumpJobTest {
System.out.println(new Gson().toJson(map));
}
@Test
public void testPublicationDump() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
GraphResult gr = verificationDataset.first();
Assertions.assertEquals(6, gr.getAuthor().size());
Assertions
.assertTrue(
gr
.getAuthor()
.stream()
.anyMatch(
a -> a.getFullname().equals("Nikolaidou,Charitini") &&
a.getName().equals("Charitini") && a.getSurname().equals("Nikolaidou")
&& a.getRank() == 1 && a.getPid() == null));
Assertions
.assertTrue(
gr
.getAuthor()
.stream()
.anyMatch(
a -> a.getFullname().equals("Votsi,Nefta") &&
a.getName().equals("Nefta") && a.getSurname().equals("Votsi")
&& a.getRank() == 2 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID)
&& a.getPid().getId().getValue().equals("0000-0001-6651-1178")
&& a.getPid().getProvenance() != null));
Assertions
.assertTrue(
gr
.getAuthor()
.stream()
.anyMatch(
a -> a.getFullname().equals("Sgardelis,Steanos") &&
a.getName().equals("Steanos") && a.getSurname().equals("Sgardelis")
&& a.getRank() == 3 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING)
&& a.getPid().getId().getValue().equals("0000-0001-6651-1178")
&& a.getPid().getProvenance() != null));
Assertions
.assertTrue(
gr
.getAuthor()
.stream()
.anyMatch(
a -> a.getFullname().equals("Halley,John") &&
a.getName().equals("John") && a.getSurname().equals("Halley")
&& a.getRank() == 4 && a.getPid() == null));
Assertions
.assertTrue(
gr
.getAuthor()
.stream()
.anyMatch(
a -> a.getFullname().equals("Pantis,John") &&
a.getName().equals("John") && a.getSurname().equals("Pantis")
&& a.getRank() == 5 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID)
&& a.getPid().getId().getValue().equals("0000-0001-6651-1178")
&& a.getPid().getProvenance() != null));
Assertions
.assertTrue(
gr
.getAuthor()
.stream()
.anyMatch(
a -> a.getFullname().equals("Tsiafouli,Maria") &&
a.getName().equals("Maria") && a.getSurname().equals("Tsiafouli")
&& a.getRank() == 6 && a.getPid().getId().getScheme().equals(ModelConstants.ORCID_PENDING)
&& a.getPid().getId().getValue().equals("0000-0001-6651-1178")
&& a.getPid().getProvenance() != null));
Assertions.assertEquals("publication", gr.getType());
Assertions.assertEquals("eng", gr.getLanguage().getCode());
Assertions.assertEquals("English", gr.getLanguage().getLabel());
Assertions.assertEquals(1, gr.getCountry().size());
Assertions.assertEquals("IT", gr.getCountry().get(0).getCode());
Assertions.assertEquals("Italy", gr.getCountry().get(0).getLabel());
Assertions.assertTrue(gr.getCountry().get(0).getProvenance() == null);
Assertions.assertEquals(12, gr.getSubjects().size());
Assertions
.assertTrue(
gr
.getSubjects()
.stream()
.anyMatch(
s -> s.getSubject().getValue().equals("Ecosystem Services hotspots")
&& s.getSubject().getScheme().equals("ACM") && s.getProvenance() != null &&
s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository")));
Assertions
.assertTrue(
gr
.getSubjects()
.stream()
.anyMatch(
s -> s.getSubject().getValue().equals("Natura 2000")
&& s.getSubject().getScheme().equals("") && s.getProvenance() != null &&
s.getProvenance().getProvenance().equals("sysimport:crosswalk:repository")));
Assertions
.assertEquals(
"Ecosystem Service capacity is higher in areas of multiple designation types",
gr.getMaintitle());
Assertions.assertEquals(null, gr.getSubtitle());
Assertions.assertEquals(1, gr.getDescription().size());
Assertions
.assertTrue(
gr
.getDescription()
.get(0)
.startsWith("The implementation of the Ecosystem Service (ES) concept into practice"));
Assertions
.assertTrue(
gr
.getDescription()
.get(0)
.endsWith(
"start complying with new standards and demands for nature conservation and environmental management."));
Assertions.assertEquals("2017-01-01", gr.getPublicationdate());
Assertions.assertEquals("Pensoft Publishers", gr.getPublisher());
Assertions.assertEquals(null, gr.getEmbargoenddate());
Assertions.assertEquals(1, gr.getSource().size());
Assertions.assertEquals("One Ecosystem 2: e13718", gr.getSource().get(0));
Assertions.assertEquals(1, gr.getFormat().size());
Assertions.assertEquals("text/html", gr.getFormat().get(0));
Assertions.assertEquals(0, gr.getContributor().size());
Assertions.assertEquals(0, gr.getCoverage().size());
Assertions.assertEquals(ModelConstants.ACCESS_RIGHT_OPEN, gr.getBestaccessright().getLabel());
Assertions
.assertEquals(
Constants.ACCESS_RIGHTS_COAR_MAP.get(ModelConstants.ACCESS_RIGHT_OPEN),
gr.getBestaccessright().getCode());
Assertions.assertEquals("One Ecosystem", gr.getContainer().getName());
Assertions.assertEquals("2367-8194", gr.getContainer().getIssnOnline());
Assertions.assertEquals("", gr.getContainer().getIssnPrinted());
Assertions.assertEquals("", gr.getContainer().getIssnLinking());
Assertions.assertTrue(null == gr.getDocumentationUrl() || gr.getDocumentationUrl().size() == 0);
Assertions.assertTrue(null == gr.getCodeRepositoryUrl());
Assertions.assertEquals(null, gr.getProgrammingLanguage());
Assertions.assertTrue(null == gr.getContactperson() || gr.getContactperson().size() == 0);
Assertions.assertTrue(null == gr.getContactgroup() || gr.getContactgroup().size() == 0);
Assertions.assertTrue(null == gr.getTool() || gr.getTool().size() == 0);
Assertions.assertEquals(null, gr.getSize());
Assertions.assertEquals(null, gr.getVersion());
Assertions.assertTrue(null == gr.getGeolocation() || gr.getGeolocation().size() == 0);
Assertions.assertEquals("50|pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2", gr.getId());
Assertions.assertEquals(1, gr.getOriginalId().size());
Assertions
.assertTrue(
gr.getOriginalId().contains("10.3897/oneeco.2.e13718"));
Assertions.assertEquals(1, gr.getPid().size());
Assertions
.assertTrue(
gr.getPid().get(0).getScheme().equals("doi")
&& gr.getPid().get(0).getValue().equals("10.1016/j.triboint.2014.05.004"));
Assertions.assertEquals("2020-03-23T00:20:51.392Z", gr.getDateofcollection());
Assertions.assertEquals(1, gr.getInstance().size());
Instance instance = gr.getInstance().get(0);
Assertions.assertEquals(0, instance.getPid().size());
Assertions.assertEquals(1, instance.getAlternateIdentifier().size());
Assertions
.assertTrue(
instance.getAlternateIdentifier().get(0).getScheme().equals("doi")
&& instance.getAlternateIdentifier().get(0).getValue().equals("10.3897/oneeco.2.e13718"));
Assertions.assertEquals(null, instance.getLicense());
Assertions
.assertTrue(
instance
.getAccessright()
.getCode()
.equals(
Constants.ACCESS_RIGHTS_COAR_MAP
.get(ModelConstants.ACCESS_RIGHT_OPEN)));
Assertions.assertTrue(instance.getAccessright().getLabel().equals(ModelConstants.ACCESS_RIGHT_OPEN));
Assertions.assertTrue(instance.getAccessright().getOpenAccessRoute().equals(OpenAccessRoute.green));
Assertions.assertTrue(instance.getType().equals("Article"));
Assertions.assertEquals(2, instance.getUrl().size());
Assertions
.assertTrue(
instance.getUrl().contains("https://doi.org/10.3897/oneeco.2.e13718")
&& instance.getUrl().contains("https://oneecosystem.pensoft.net/article/13718/"));
Assertions.assertEquals("2017-01-01", instance.getPublicationdate());
Assertions.assertEquals(null, instance.getArticleprocessingcharge());
Assertions.assertEquals("peerReviewed", instance.getRefereed());
}
@Test
public void testDatasetDump() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result",
communityMapPath, Dataset.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'dataset'").count());
// the common fields in the result have been already checked. Now checking only
// community specific fields
GraphResult gr = verificationDataset.first();
Assertions.assertEquals(2, gr.getGeolocation().size());
Assertions.assertEquals(2, gr.getGeolocation().stream().filter(gl -> gl.getBox().equals("")).count());
Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPlace().equals("")).count());
Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("")).count());
Assertions
.assertEquals(
1,
gr
.getGeolocation()
.stream()
.filter(gl -> gl.getPlace().equals("18 York St, Ottawa, ON K1N 5S6; Ottawa; Ontario; Canada"))
.count());
Assertions
.assertEquals(
1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("45.427242 -75.693904")).count());
Assertions
.assertEquals(
1,
gr
.getGeolocation()
.stream()
.filter(gl -> gl.getPoint().equals("") && !gl.getPlace().equals(""))
.count());
Assertions
.assertEquals(
1,
gr
.getGeolocation()
.stream()
.filter(gl -> !gl.getPoint().equals("") && gl.getPlace().equals(""))
.count());
Assertions.assertEquals("1024Gb", gr.getSize());
Assertions.assertEquals("1.01", gr.getVersion());
Assertions.assertEquals(null, gr.getContainer());
Assertions.assertEquals(null, gr.getCodeRepositoryUrl());
Assertions.assertEquals(null, gr.getProgrammingLanguage());
Assertions.assertEquals(null, gr.getDocumentationUrl());
Assertions.assertEquals(null, gr.getContactperson());
Assertions.assertEquals(null, gr.getContactgroup());
Assertions.assertEquals(null, gr.getTool());
}
@Test
public void testSoftwareDump() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result",
communityMapPath, Software.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'software'").count());
GraphResult gr = verificationDataset.first();
Assertions.assertEquals(2, gr.getDocumentationUrl().size());
Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_1"));
Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_2"));
Assertions.assertEquals("code_repo", gr.getCodeRepositoryUrl());
Assertions.assertEquals("perl", gr.getProgrammingLanguage());
Assertions.assertEquals(null, gr.getContainer());
Assertions.assertEquals(null, gr.getContactperson());
Assertions.assertEquals(null, gr.getContactgroup());
Assertions.assertEquals(null, gr.getTool());
Assertions.assertEquals(null, gr.getGeolocation());
Assertions.assertEquals(null, gr.getSize());
Assertions.assertEquals(null, gr.getVersion());
}
@Test
public void testOrpDump() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result",
communityMapPath, OtherResearchProduct.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'other'").count());
GraphResult gr = verificationDataset.first();
Assertions.assertEquals(2, gr.getContactperson().size());
Assertions.assertTrue(gr.getContactperson().contains(("contact_person1")));
Assertions.assertTrue(gr.getContactperson().contains(("contact_person2")));
Assertions.assertEquals(1, gr.getContactgroup().size());
Assertions.assertTrue(gr.getContactgroup().contains(("contact_group")));
Assertions.assertEquals(2, gr.getTool().size());
Assertions.assertTrue(gr.getTool().contains("tool1"));
Assertions.assertTrue(gr.getTool().contains("tool2"));
Assertions.assertEquals(null, gr.getContainer());
Assertions.assertEquals(null, gr.getDocumentationUrl());
Assertions.assertEquals(null, gr.getCodeRepositoryUrl());
Assertions.assertEquals(null, gr.getProgrammingLanguage());
Assertions.assertEquals(null, gr.getGeolocation());
Assertions.assertEquals(null, gr.getSize());
Assertions.assertEquals(null, gr.getVersion());
}
@Test
public void testPublicationDumpCommunity() throws JsonProcessingException {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'publication'").count());
// the common fields in the result have been already checked. Now checking only
// community specific fields
CommunityResult cr = verificationDataset.first();
Assertions.assertEquals(1, cr.getContext().size());
Assertions.assertEquals("dh-ch", cr.getContext().get(0).getCode());
Assertions.assertEquals("Digital Humanities and Cultural Heritage", cr.getContext().get(0).getLabel());
Assertions.assertEquals(1, cr.getContext().get(0).getProvenance().size());
Assertions.assertEquals("Inferred by OpenAIRE", cr.getContext().get(0).getProvenance().get(0).getProvenance());
Assertions.assertEquals("0.9", cr.getContext().get(0).getProvenance().get(0).getTrust());
Assertions.assertEquals(1, cr.getCollectedfrom().size());
Assertions
.assertEquals("10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getCollectedfrom().get(0).getKey());
Assertions.assertEquals("Pensoft", cr.getCollectedfrom().get(0).getValue());
Assertions.assertEquals(1, cr.getInstance().size());
Assertions
.assertEquals(
"10|openaire____::fdc7e0400d8c1634cdaf8051dbae23db",
cr.getInstance().get(0).getCollectedfrom().getKey());
Assertions.assertEquals("Pensoft", cr.getInstance().get(0).getCollectedfrom().getValue());
Assertions
.assertEquals(
"10|openaire____::e707e544b9a5bd23fc27fbfa65eb60dd", cr.getInstance().get(0).getHostedby().getKey());
Assertions.assertEquals("One Ecosystem", cr.getInstance().get(0).getHostedby().getValue());
}
@Test
public void testDataset() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(90, verificationDataset.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset
.filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset
.filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset
.filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset
.filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'")
.count());
Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90);
Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90);
}
@Test
public void testDataset2All() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(5, verificationDataset.count());
}
@Test
public void testDataset2Communities() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Dataset.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(0, verificationDataset.count());
}
@Test
public void testPublication() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(74, verificationDataset.count());
verificationDataset.show(false);
Assertions.assertEquals(74, verificationDataset.filter("type = 'publication'").count());
}
@Test
public void testSoftware() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Software.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(6, verificationDataset.count());
Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count());
}
@Test
public void testORP() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
// false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
false, sourcePath, workingDir.toString() + "/result", communityMapPath, OtherResearchProduct.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(3, verificationDataset.count());
Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count());
}
@Test
public void testRecord() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
CommunityResult.class, Constants.DUMPTYPE.COMMUNITY.getType());
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(2, verificationDataset.count());
verificationDataset.show(false);
Assertions.assertEquals(2, verificationDataset.filter("type = 'publication'").count());
}
@Test
public void testEOSCDump() throws Exception {
final String sourcePath = getClass()
@ -910,12 +164,12 @@ public class DumpJobTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<EoscResult> tmp = sc
JavaRDD<Result> tmp = sc
.textFile(workingDir.toString() + "/working")
.map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
org.apache.spark.sql.Dataset<EoscResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(EoscResult.class));
org.apache.spark.sql.Dataset<Result> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class));
Assertions.assertEquals(1, verificationDataset.count());
@ -960,12 +214,11 @@ public class DumpJobTest {
workingDir.toString() + "/orp", getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath(),
OtherResearchProduct.class,
EoscResult.class, Constants.DUMPTYPE.EOSC.getType());
OtherResearchProduct.class);
JavaRDD<EoscResult> tmp = sc
JavaRDD<Result> tmp = sc
.textFile(workingDir.toString() + "/orp")
.map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
System.out.println(OBJECT_MAPPER.writeValueAsString(tmp.first()));
@ -977,12 +230,11 @@ public class DumpJobTest {
workingDir.toString() + "/soft", getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath(),
Software.class,
EoscResult.class, Constants.DUMPTYPE.EOSC.getType());
Software.class);
JavaRDD<EoscResult> tmp2 = sc
JavaRDD<Result> tmp2 = sc
.textFile(workingDir.toString() + "/soft")
.map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
System.out.println(OBJECT_MAPPER.writeValueAsString(tmp2.first()));
@ -994,12 +246,11 @@ public class DumpJobTest {
workingDir.toString() + "/soft2", getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath(),
Software.class,
EoscResult.class, Constants.DUMPTYPE.EOSC.getType());
Software.class);
JavaRDD<EoscResult> tmp3 = sc
JavaRDD<Result> tmp3 = sc
.textFile(workingDir.toString() + "/soft2")
.map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
System.out.println(OBJECT_MAPPER.writeValueAsString(tmp3.first()));
@ -1011,101 +262,38 @@ public class DumpJobTest {
workingDir.toString() + "/orp2", getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath(),
OtherResearchProduct.class,
EoscResult.class, Constants.DUMPTYPE.EOSC.getType());
OtherResearchProduct.class);
JavaRDD<EoscResult> tmp4 = sc
JavaRDD<Result> tmp4 = sc
.textFile(workingDir.toString() + "/orp2")
.map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
System.out.println(OBJECT_MAPPER.writeValueAsString(tmp4.first()));
}
@Test
public void testArticlePCA() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca")
.getPath();
public void testEOSCDumpIndicators() throws Exception {
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
DumpProducts dump = new DumpProducts();
dump
.run(
false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
false, getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/eosc/input/indicators/publication.json")
.getPath(),
workingDir.toString() + "/publication", getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath(),
Publication.class);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Result> tmp = sc
.textFile(workingDir.toString() + "/publication")
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
System.out.println(OBJECT_MAPPER.writeValueAsString(tmp.first()));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(23, verificationDataset.count());
Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count());
verificationDataset.createOrReplaceTempView("check");
org.apache.spark.sql.Dataset<Row> temp = spark
.sql(
"select id " +
"from check " +
"lateral view explode (instance) i as inst " +
"where inst.articleprocessingcharge is not null");
Assertions.assertTrue(temp.count() == 2);
Assertions.assertTrue(temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").count() == 1);
Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1);
temp = spark
.sql(
"select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " +
"from check " +
"lateral view explode (instance) i as inst " +
"where inst.articleprocessingcharge is not null");
Assertions
.assertEquals(
"3131.64",
temp
.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'")
.collectAsList()
.get(0)
.getString(1));
Assertions
.assertEquals(
"EUR",
temp
.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'")
.collectAsList()
.get(0)
.getString(2));
Assertions
.assertEquals(
"2578.35",
temp
.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'")
.collectAsList()
.get(0)
.getString(1));
Assertions
.assertEquals(
"EUR",
temp
.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'")
.collectAsList()
.get(0)
.getString(2));
}
@Test

View File

@ -6,7 +6,7 @@ import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.JsonNode;
import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.eosc.model.Result;
//@Disabled
class GenerateJsonSchema {
@ -20,7 +20,7 @@ class GenerateJsonSchema {
configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(GraphResult.class);
JsonNode jsonSchema = generator.generateSchema(Result.class);
System.out.println(jsonSchema.toString());
}

View File

@ -23,8 +23,8 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.community.ResultProject;
import eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject;
import eu.dnetlib.dhp.oa.graph.dump.eosc.ResultProject;
import eu.dnetlib.dhp.oa.graph.dump.eosc.SparkPrepareResultProject;
public class PrepareResultProjectJobTest {

View File

@ -16,6 +16,7 @@ import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.oa.graph.dump.eosc.QueryInformationSystem;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;

View File

@ -1,143 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunitySplit;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
public class SplitForCommunityTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(DumpJobTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(SplitForCommunityTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(SplitForCommunityTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(SplitForCommunityTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void testCommunitySplit() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
CommunitySplit split = new CommunitySplit();
split.run(false, sourcePath, workingDir.toString() + "/split", communityMapPath);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/split/Digital_Humanities_and_Cultural_Heritage")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(19, verificationDataset.count());
Assertions
.assertEquals(
1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
tmp = sc
.textFile(workingDir.toString() + "/split/EGI_Federation")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions
.assertEquals(
1, verificationDataset.filter("id = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count());
tmp = sc
.textFile(workingDir.toString() + "/split/Neuroinformatics")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(5, verificationDataset.count());
Assertions
.assertEquals(
1, verificationDataset.filter("id = '50|datacite____::6b1e3a2fa60ed8c27317a66d6357f795'").count());
tmp = sc
.textFile(workingDir.toString() + "/split/Science_and_Innovation_Policy_Studies")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(4, verificationDataset.count());
Assertions
.assertEquals(
1, verificationDataset.filter("id = '50|dedup_wf_001::0347b1cd516fc59e41ba92e0d74e4e9f'").count());
Assertions
.assertEquals(
1, verificationDataset.filter("id = '50|dedup_wf_001::1432beb6171baa5da8a85a7f99545d69'").count());
Assertions
.assertEquals(
1, verificationDataset.filter("id = '50|dedup_wf_001::1c8bd19e633976e314b88ce5c3f92d69'").count());
Assertions
.assertEquals(
1, verificationDataset.filter("id = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count());
}
}

View File

@ -24,9 +24,9 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.community.Project;
import eu.dnetlib.dhp.eosc.model.Project;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.oa.graph.dump.eosc.SparkUpdateProjectInfo;
public class UpdateProjectInfoTest {
@ -85,12 +85,12 @@ public class UpdateProjectInfoTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
JavaRDD<Result> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
org.apache.spark.sql.Dataset<Result> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class));
verificationDataset.show(false);
@ -152,12 +152,12 @@ public class UpdateProjectInfoTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
JavaRDD<Result> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
org.apache.spark.sql.Dataset<Result> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Result.class));
verificationDataset.show(false);
@ -196,7 +196,7 @@ public class UpdateProjectInfoTest {
Project project = verificationDataset
.map(
(MapFunction<CommunityResult, Project>) cr -> cr
(MapFunction<Result, Project>) cr -> cr
.getProjects()
.stream()
.filter(p -> p.getValidated() != null)
@ -213,7 +213,7 @@ public class UpdateProjectInfoTest {
project = verificationDataset
.map(
(MapFunction<CommunityResult, Project>) cr -> cr
(MapFunction<Result, Project>) cr -> cr
.getProjects()
.stream()
.filter(p -> p.getValidated() == null)

View File

@ -14,7 +14,8 @@ import com.google.gson.Gson;
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.eosc.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.eosc.Utils;
@Disabled
public class ZenodoUploadTest {

View File

@ -1,173 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static org.mockito.Mockito.lenient;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.Consumer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
public class CreateEntityTest {
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
+
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
+
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
List<String> communityMap = Arrays
.asList(
"clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri",
"ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. "
+
"SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - "
+
"Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community",
"dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community",
"fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community",
"ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community",
"mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community",
"instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community",
"elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri",
"aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community",
"dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri",
"epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri",
"covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community");
@Mock
private ISLookUpService isLookUpService;
private QueryInformationSystem queryInformationSystem;
private static String workingDir;
@BeforeEach
public void setUp() throws ISLookUpException {
lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap);
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(isLookUpService);
}
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(CreateEntityTest.class.getSimpleName())
.toString();
}
@Test
void test1() throws ISLookUpException, IOException {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem.getContextInformation(consumer);
List<ResearchInitiative> riList = new ArrayList<>();
cInfoList.forEach(cInfo -> riList.add(Process.getEntity(cInfo)));
Assertions.assertEquals(12, riList.size());
riList.stream().forEach(c -> {
switch (c.getAcronym()) {
case "mes":
Assertions
.assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY));
Assertions.assertTrue(((ResearchCommunity) c).getSubject().size() == 5);
Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("marine"));
Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("ocean"));
Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("fish"));
Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("aqua"));
Assertions.assertTrue(((ResearchCommunity) c).getSubject().contains("sea"));
Assertions
.assertTrue(
c
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5(c.getAcronym()))));
Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes"));
Assertions.assertTrue("mes".equals(c.getAcronym()));
break;
case "clarin":
Assertions
.assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_INFRASTRUCTURE));
Assertions
.assertTrue(
c
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5(c.getAcronym()))));
Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin"));
Assertions.assertTrue("clarin".equals(c.getAcronym()));
break;
}
// TODO add check for all the others Entities
});
riList.forEach(c -> System.out.println(new Gson().toJson(c)));
}
@Test
@Disabled
void test2() throws IOException, ISLookUpException {
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
Path hdfsWritePath = new Path(workingDir + "/prova");
FSDataOutputStream fsDataOutputStream = null;
if (fs.exists(hdfsWritePath)) {
fsDataOutputStream = fs.append(hdfsWritePath);
} else {
fsDataOutputStream = fs.create(hdfsWritePath);
}
CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf());
CompressionCodec codec = factory.getCodecByClassName("org.apache.hadoop.io.compress.GzipCodec");
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(fsDataOutputStream),
StandardCharsets.UTF_8));
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem.getContextInformation(consumer);
for (ContextInfo cInfo : cInfoList) {
writer.write(new Gson().toJson(Process.getEntity(cInfo)));
}
writer.close();
}
}

View File

@ -1,723 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.util.*;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.utils.DHPUtils;
class CreateRelationTest {
List<String> communityContext = Arrays
.asList(
"<context id=\"clarin\" label=\"CLARIN\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">CLARIN</param>\n" +
" <param name=\"logourl\">https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg</param>\n"
+
" <param name=\"name\">Common Language Resources and Technology Infrastructure</param>\n" +
" <param name=\"manager\">maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\">(Part of) the work reported here was made possible by using the CLARIN infrastructure.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding through &lt;CLARIN national consortium member, e.g. CLARIN.SI&gt;, &lt;XYZ&gt; project, grant no. &lt;XYZ&gt;.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding (through CLARIN ERIC) from the European Unions Horizon 2020 research and innovation programme under grant agreement No &lt;0-9&gt; for project &lt;XYZ&gt;.\n"
+
" (E.g. No 676529 for project CLARIN-PLUS.)</param>\n" +
" <param name=\"zenodoCommunity\">oac_clarin</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"true\" id=\"clarin::projects\" label=\"CLARIN Projects\">\n" +
" <concept claim=\"false\" id=\"clarin::projects::1\" label=\"CLARIN-PLUS\">\n" +
" <param name=\"projectfullname\">CLARIN-PLUS</param>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">676529</param>\n" +
" <param name=\"url\">http://www.clarin.eu</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"funding\">H2020-INFRADEV-1-2015-1</param>\n" +
" <param name=\"acronym\">CLARIN+</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"clarin::projects::2\" label=\"CLARIN\">\n" +
" <param name=\"projectfullname\">Common Language Resources and Technology Infrastructure</param>\n"
+
" <param name=\"acronym\">CLARIN</param>\n" +
" <param name=\"CD_PROJECT_NUMBER\">212230</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"openaireId\">corda_______::ef782b2d85676aa3e5a907427feb18c4</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"clarin::contentproviders\" label=\"CLARIN Content providers\">" +
"<!--<concept claim=\"true\" id=\"clarin::contentproviders::1\" label=\"Zotero\">\n" +
" <param name=\"openaireId\">opendoar____::d96409bf894217686ba124d7356686c9</param>\n"
+
" <param name=\"name\">Public Knowledge Project EPrint Archive</param>\n" +
" <param name=\"officialname\">Public Knowledge Project EPrint Archive</param>\n"
+
" <param name=\"enabled\">true</param>\n" +
" </concept> -->\n" +
" <concept claim=\"false\" id=\"clarin::contentproviders::2\" label=\"\">\n" +
" <param name=\"name\">LINDAT/CLARIN repository</param>\n" +
" <param name=\"officialname\">LINDAT/CLARIN repository</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"true\" id=\"clarin::subcommunity\" label=\"CLARIN communities\">\n" +
" <concept claim=\"true\" id=\"clarin::subcommunity::1\" label=\"CLARIN-D\">\n" +
" <param name=\"fullname\">CLARIN-D</param>\n" +
" <param name=\"homepageUrl\">https://www.clarin-d.de/en/</param>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"example\">http://www.lrec-conf.org/proceedings/lrec2018/pdf/504.pdf</param>\n"
+
" <param name=\"nation\">Germany</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"clarin::zenodocommunities\" label=\"CLARIN Zenodo Communities\"/>\n"
+
" <category claim=\"false\" id=\"clarin::organizations\" label=\"CLARIN Organizations\"/>\n" +
"</context>",
"<context id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\" type=\"community\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.</param>\n"
+
" <param name=\"logourl\">http://sanmamante.org/DH_CH_logo.png</param>\n" +
" <param name=\"name\">Digital Humanities and Cultural Heritage</param>\n" +
" <param name=\"manager\">ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk</param>\n"
+
" <param name=\"subject\">modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
+
" <param name=\"zenodoCommunity\">oac_dh-ch</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"dh-ch::projects\" label=\"DH-CH Projects\">\n" +
" <concept claim=\"false\" id=\"dh-ch::projects::1\" label=\"Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\">\n"
+
" <param name=\"projectfullname\">Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">654119</param>\n" +
" <param name=\"url\">http://www.parthenos-project.eu</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">PARTHENOS</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"dh-ch::contentproviders\" label=\"DH-CH Content providers\">\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::2\" label=\"The UK's largest collection of digital research data in the social sciences and humanities\">\n"
+
" <param name=\"openaireId\">re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</param>\n" +
" <param name=\"name\">The UK's largest collection of digital research data in the social sciences and humanities</param>\n"
+
" <param name=\"officialname\">UK Data Archive</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::3\" label=\"Journal of Data Mining and Digital Humanities\">\n"
+
" <param name=\"openaireId\">doajarticles::c6cd4b532e12868c1d760a8d7cda6815</param>\n" +
" <param name=\"name\">Journal of Data Mining and Digital Humanities</param>\n" +
" <param name=\"officialname\">Journal of Data Mining and Digital Humanities</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::6\" label=\"Frontiers in Digital Humanities\">\n"
+
" <param name=\"openaireId\">doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</param>\n" +
" <param name=\"name\">Frontiers in Digital Humanities</param>\n" +
" <param name=\"officialname\">Frontiers in Digital Humanities</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::7\" label=\"Il Capitale Culturale: Studies on the Value of Cultural Heritage\">\n"
+
" <param name=\"openaireId\">doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</param>\n" +
" <param name=\"name\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
+
" <param name=\"officialname\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
+
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::8\" label=\"Conservation Science in Cultural Heritage\">\n"
+
" <param name=\"openaireId\">doajarticles::0da84e9dfdc8419576169e027baa8028</param>\n" +
" <param name=\"name\">Conservation Science in Cultural Heritage</param>\n" +
" <param name=\"officialname\">Conservation Science in Cultural Heritage</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::9\" label=\"Electronic Archiving System\">\n"
+
" <param name=\"openaireId\">re3data_____::84e123776089ce3c7a33db98d9cd15a8</param>\n" +
" <param name=\"name\">Electronic Archiving System</param>\n" +
" <param name=\"officialname\">EASY</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::10\" label=\"DANS-KB Harvester\">\n" +
" <param name=\"openaireId\">openaire____::c5502a43e76feab55dd00cf50f519125</param>\n" +
" <param name=\"name\">DANS-KB Harvester</param>\n" +
" <param name=\"officialname\">Gemeenschappelijke Harvester DANS-KB</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::11\" label=\"ads\">\n" +
" <param name=\"openaireId\">re3data_____::a48f09c562b247a9919acfe195549b47</param>\n" +
" <param name=\"name\">ads</param>\n" +
" <param name=\"officialname\">Archaeology Data Service</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::12\" label=\"\">\n" +
" <param name=\"openaireId\">opendoar____::97275a23ca44226c9964043c8462be96</param>\n" +
" <param name=\"name\">KNAW Repository</param>\n" +
" <param name=\"officialname\">KNAW Repository</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::13\" label=\"Internet Archaeology\">\n"
+
" <param name=\"openaireId\">doajarticles::2899208a99aa7d142646e0a80bfeef05</param>\n" +
" <param name=\"name\">Internet Archaeology</param>\n" +
" <param name=\"officialname\">Internet Archaeology</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"ni\" label=\"Neuroinformatics\" type=\"community\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.</param>\n"
+
" <param name=\"logourl\">https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png</param>\n"
+
" <param name=\"name\">Neuroinformatics</param>\n" +
" <param name=\"manager\">sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\">brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities</param>\n"
+
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"zenodoCommunity\">oac_ni</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"ni::projects\" label=\"NI Content providers\"/>\n" +
" <category claim=\"false\" id=\"ni::contentproviders\" label=\"NI Content providers\">\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::1\" label=\"OpenNeuro\">\n" +
" <param name=\"openaireId\">re3data_____::5b9bf9171d92df854cf3c520692e9122</param>\n" +
" <param name=\"name\">Formerly:OpenFMRI</param>\n" +
" <param name=\"officialname\">OpenNeuro</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::2\" label=\"RIO\">\n" +
" <param name=\"openaireId\">doajarticles::c7d3de67dc77af72f6747157441252ec</param>\n" +
" <param name=\"name\">Research Ideas and Outcomes</param>\n" +
" <param name=\"officialname\">Research Ideas and Outcomes</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::3\" label=\"NITRC\">\n" +
" <param name=\"openaireId\">re3data_____::8515794670370f49c1d176c399c714f5</param>\n" +
" <param name=\"name\">Neuroimaging Informatics Tools and Resources Clearinghouse</param>\n"
+
" <param name=\"officialname\">NITRC</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::4\" label=\"FRONTIERSNI\">\n" +
" <param name=\"openaireId\">doajarticles::d640648c84b10d425f96f11c3de468f3</param>\n" +
" <param name=\"name\">Frontiers in Neuroinformatics</param>\n" +
" <param name=\"officialname\">Frontiers in Neuroinformatics</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::5\" label=\"NeuroImage: Clinical\">\n" +
" <param name=\"openaireId\">doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</param>\n" +
" <param name=\"name\">NeuroImage: Clinical</param>\n" +
" <param name=\"officialname\">NeuroImage: Clinical</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::6\" label=\"NeuroVault\">\n" +
" <param name=\"openaireId\">rest________::fb1a3d4523c95e63496e3bc7ba36244b</param>\n" +
" <param name=\"name\">NeuroVault</param>\n" +
" <param name=\"officialname\">NeuroVault</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"instruct\" label=\"Instruct-ERIC\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">Instruct-ERIC is the European Research Infrastructure for Structural Biology</param>\n"
+
" <param name=\"logourl\">https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png</param>\n"
+
" <param name=\"name\">Instruct-ERIC</param>\n" +
" <param name=\"manager\">claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct-ERIC.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project</param>\n"
+
" <param name=\"zenodoCommunity\">oac_instruct</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"instruct::projects\" label=\"Instruct-ERIC Projects\">\n" +
" <concept claim=\"false\" id=\"instruct::projects::1\" label=\"Authentication and Authorisation For Research and Collaboration\">\n"
+
" <param name=\"projectfullname\">Authentication and Authorisation For Research and Collaboration</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730941</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-EINFRA-2016-1</param>\n" +
" <param name=\"acronym\">AARC2</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::2\" label=\"Building data bridges between biological and medical infrastructures in Europe\">\n"
+
" <param name=\"projectfullname\">Building data bridges between biological and medical infrastructures in Europe</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">284209</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">BioMedBridges</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::3\" label=\"Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\">\n"
+
" <param name=\"projectfullname\">Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">283570</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">BioStruct-X</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::4\" label=\"Coordinated Research Infrastructures Building Enduring Life-science services\">\n"
+
" <param name=\"projectfullname\">Coordinated Research Infrastructures Building Enduring Life-science services</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">654248</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRADEV-1-2014-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">CORBEL</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::5\" label=\"Infrastructure for NMR, EM and X-rays for translational research\">\n"
+
" <param name=\"projectfullname\">Infrastructure for NMR, EM and X-rays for translational research</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">653706</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRAIA-2014-2015</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">iNEXT</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::6\" label=\"Integrated Structural Biology Infrastructure\">\n"
+
" <param name=\"projectfullname\">Integrated Structural Biology Infrastructure</param>\n" +
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">211252</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2007-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">INSTRUCT</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::7\" label=\"Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\">\n"
+
" <param name=\"projectfullname\">Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">731005</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRADEV-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">INSTRUCT-ULTRA</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::8\" label=\"Opening Synchrotron Light for Experimental Science and Applications in the Middle East\">\n"
+
" <param name=\"projectfullname\">Opening Synchrotron Light for Experimental Science and Applications in the Middle East</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730943</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRASUPP-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">OPEN SESAME</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::9\" label=\"Infrastructure for Protein Production Platforms\">\n"
+
" <param name=\"projectfullname\">Infrastructure for Protein Production Platforms</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">227764</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2008-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">PCUBE</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::10\" label=\"European Vaccine Research and Development Infrastructure\">\n"
+
" <param name=\"projectfullname\">European Vaccine Research and Development Infrastructure</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730964</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRAIA-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">TRAMSVAC2</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::11\" label=\"World-wide E-infrastructure for structural biology\">\n"
+
" <param name=\"projectfullname\">World-wide E-infrastructure for structural biology</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">675858</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">EC | H2020 | RIA</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">West-Life</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::12\" label=\"RI-VIS\">\n" +
" <param name=\"projectfullname\">Expanding research infrastructure visibility to strengthen strategic partnerships</param>\n"
+
" <param name=\"acronym\">RI-VIS</param>\n" +
" <param name=\"CD_PROJECT_NUMBER\">824063</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"openaireId\">corda__h2020::af93b591b76991d8437993a8f6fc6538</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"instruct::contentproviders\" label=\"Instruct-ERIC Content providers\"/>\n"
+
" <category claim=\"false\" id=\"instruct::zenodocommunities\" label=\"Instruct-ERIC Zenodo Communities\">\n"
+
" <concept claim=\"false\" id=\"instruct::zenodocommunities::1\" label=\"Instruct\">\n" +
" <param name=\"zenodoid\">instruct</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::zenodocommunities::2\" label=\"West-Life Virtual Research Environment for Structural Biology\">\n"
+
" <param name=\"zenodoid\">west-life</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"instruct::organizations\" label=\"Instruct-ERIC Organizations\">\n"
+
" <concept claim=\"false\" id=\"instruct::organizations::1\" label=\"FRISBI\">\n" +
" <param name=\"name\">FRISBI</param>\n" +
" <param name=\"logourl\">aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==</param>\n"
+
" <param name=\"websiteurl\">aHR0cDovL2ZyaXNiaS5ldS8=</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::organizations::2\" label=\"RI-VIS\">\n" +
" <param name=\"name\">RI-VIS</param>\n" +
" <param name=\"logourl\">aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=</param>\n"
+
" <param name=\"websiteurl\">aHR0cHM6Ly9yaS12aXMuZXU=</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::organizations::3\" label=\"CIISB\">\n" +
" <param name=\"name\">CIISB</param>\n" +
" <param name=\"logourl\">aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=</param>\n" +
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuY2lpc2Iub3Jn</param>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"elixir-gr\" label=\"ELIXIR GR\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.</param>\n"
+
" <param name=\"logourl\">https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png</param>\n"
+
" <param name=\"name\">The Greek National Node of the ESFRI European RI ELIXIR</param>\n" +
" <param name=\"manager\">vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"zenodoCommunity\">oaa_elixir-gr</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"elixir-gr::projects\" label=\"ELIXIR GR Projects\">\n" +
" <concept claim=\"false\" id=\"ni::projects::12\" label=\"\">\n" +
" <param name=\"projectfullname\">BIO-INFORMATICS RESEARCH NETWORK COORDINATING CENTER (BIRN-CC)</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1U24RR025736-01</param>\n" +
" <param name=\"funder\">NIH</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::13\" label=\"\">\n" +
" <param name=\"projectfullname\">COLLABORATIVE RESEARCH: The Cognitive Neuroscience of Category Learning</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">0223843</param>\n" +
" <param name=\"funder\">NSF</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::14\" label=\"\">\n" +
" <param name=\"projectfullname\">The Cognitive Atlas: Developing an Interdisciplinary Knowledge Base Through Socia</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">5R01MH082795-05</param>\n" +
" <param name=\"funder\">NIH</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::15\" label=\"\">\n" +
" <param name=\"projectfullname\">Fragmented early life environmental and emotional / cognitive vulnerabilities</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1P50MH096889-01A1</param>\n" +
" <param name=\"funder\">NIH</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::16\" label=\"\">\n" +
" <param name=\"projectfullname\">Enhancement of the 1000 Functional Connectome Project</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1R03MH096321-01A1</param>\n" +
" <param name=\"funder\">TUBITAK</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::17\" label=\"\">\n" +
" <param name=\"projectfullname\">CRCNS Data Sharing: An open data repository for cognitive neuroscience: The OpenfMRI Project</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">1131441</param>\n" +
" <param name=\"funder\">NSF</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::18\" label=\"\">\n" +
" <param name=\"projectfullname\">Enhancing Human Cortical Plasticity: Visual Psychophysics and fMRI</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">0121950</param>\n" +
" <param name=\"funder\">NSF</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::projects::18\" label=\"\">\n" +
" <param name=\"projectfullname\">Transforming statistical methodology for neuroimaging meta-analysis.</param>\n"
+
" <param name=\"acronym\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">100309</param>\n" +
" <param name=\"funder\">WT</param>\n" +
" </concept>\n" +
" </category>" +
" <category claim=\"false\" id=\"elixir-gr::contentproviders\" label=\"Elixir-GR Content providers\">\n"
+
" <concept claim=\"false\" id=\"elixir-gr::contentproviders::1\" label=\"bio.tools\">\n" +
" <param name=\"openaireId\">rest________::b8e502674c3c3499d5374e9b2ea6d8d5</param>\n" +
" <param name=\"name\">bio.tools</param>\n" +
" <param name=\"officialname\">bio.tools</param>\n" +
" <param name=\"enabled\">false</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"elixir-gr::zenodocommunities\" label=\"Elixir-GR Zenodo Communities\"/>\n"
+
" <category claim=\"false\" id=\"elixir-gr::organizations\" label=\"Elixir-GR Organizations\">\n" +
" <concept claim=\"false\" id=\"elixir-gr::organizations::1\" label=\"ATHENA RC\">\n" +
" <param name=\"name\">ATHENA RC</param>\n" +
" <param name=\"logourl\">aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=</param>\n"
+
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=</param>\n" +
" </concept>\n" +
" </category><!-- <category claim=\"false\" id=\"elixir-gr::resultorganizations\" label=\"Elixir-GR Results through organizations\"/> -->\n"
+
"</context>");
private QueryInformationSystem queryInformationSystem;
private Map<String, String> map;
@BeforeEach
public void setUp() {
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setContextRelationResult(communityContext);
}
@Test
void test1() {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem
.getContextRelation(consumer, "contentproviders", ModelSupport.getIdPrefix(Datasource.class));
cInfoList.forEach(c -> System.out.println(new Gson().toJson(c)));
List<Relation> rList = new ArrayList<>();
cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
Assertions.assertEquals(34, rList.size());
Assertions
.assertTrue(
rList
.stream()
.map(r -> r.getSource().getId())
.collect(Collectors.toSet())
.contains(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))));
Assertions
.assertEquals(
10,
rList
.stream()
.filter(
r -> r
.getSource()
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))))
.collect(Collectors.toList())
.size());
Assertions
.assertEquals(
10,
rList
.stream()
.filter(
r -> r
.getTarget()
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))))
.collect(Collectors.toList())
.size());
Set<String> tmp = rList
.stream()
.filter(
r -> r
.getSource()
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))))
.map(r -> r.getTarget().getId())
.collect(Collectors.toSet());
Assertions
.assertTrue(
tmp.contains("10|re3data_____::9ebe127e5f3a0bf401875690f3bb6b81") &&
tmp.contains("10|doajarticles::c6cd4b532e12868c1d760a8d7cda6815") &&
tmp.contains("10|doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b") &&
tmp.contains("10|doajarticles::6eb31d13b12bc06bbac06aef63cf33c9") &&
tmp.contains("10|doajarticles::0da84e9dfdc8419576169e027baa8028") &&
tmp.contains("10|re3data_____::84e123776089ce3c7a33db98d9cd15a8") &&
tmp.contains("10|openaire____::c5502a43e76feab55dd00cf50f519125") &&
tmp.contains("10|re3data_____::a48f09c562b247a9919acfe195549b47") &&
tmp.contains("10|opendoar____::97275a23ca44226c9964043c8462be96") &&
tmp.contains("10|doajarticles::2899208a99aa7d142646e0a80bfeef05"));
}
@Test
public void test2() {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem
.getContextRelation(consumer, "projects", ModelSupport.getIdPrefix(Project.class));
cInfoList.forEach(c -> System.out.println(new Gson().toJson(c)));
List<Relation> rList = new ArrayList<>();
cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
Assertions.assertEquals(44, rList.size());
Assertions
.assertFalse(
rList
.stream()
.map(r -> r.getSource().getId())
.collect(Collectors.toSet())
.contains(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("dh-ch"))));
Assertions
.assertEquals(
2,
rList
.stream()
.filter(
r -> r
.getSource()
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("clarin"))))
.collect(Collectors.toList())
.size());
Assertions
.assertEquals(
2,
rList
.stream()
.filter(
r -> r
.getTarget()
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("clarin"))))
.collect(Collectors.toList())
.size());
Set<String> tmp = rList
.stream()
.filter(
r -> r
.getSource()
.getId()
.equals(
String
.format(
"%s|%s::%s", Constants.CONTEXT_ID,
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("clarin"))))
.map(r -> r.getTarget().getId())
.collect(Collectors.toSet());
Assertions
.assertTrue(
tmp.contains("40|corda__h2020::b5a4eb56bf84bef2ebc193306b4d423f") &&
tmp.contains("40|corda_______::ef782b2d85676aa3e5a907427feb18c4"));
rList.forEach(rel -> {
if (rel.getSource().getId().startsWith("40|")) {
String proj = rel.getSource().getId().substring(3);
Assertions.assertTrue(proj.substring(0, proj.indexOf("::")).length() == 12);
}
});
}
}

View File

@ -1,157 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Project;
public class DumpOrganizationProjectDatasourceTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(DumpOrganizationProjectDatasourceTest.class);
private static final HashMap<String, String> map = new HashMap<>();
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(DumpOrganizationProjectDatasourceTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(DumpOrganizationProjectDatasourceTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(DumpOrganizationProjectDatasourceTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void dumpOrganizationTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/organization")
.getPath();
DumpGraphEntities dg = new DumpGraphEntities();
dg.run(false, sourcePath, workingDir.toString() + "/dump", Organization.class, null);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.oa.model.graph.Organization> tmp = sc
.textFile(workingDir.toString() + "/dump")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Organization.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.oa.model.graph.Organization> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Organization.class));
Assertions.assertEquals(15, verificationDataset.count());
verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Organization>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
}
@Test
public void dumpProjectTest() throws NoAvailableEntityTypeException {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/project")
.getPath();
DumpGraphEntities dg = new DumpGraphEntities();
dg.run(false, sourcePath, workingDir.toString() + "/dump", Project.class, null);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.oa.model.graph.Project> tmp = sc
.textFile(workingDir.toString() + "/dump")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Project.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.oa.model.graph.Project> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Project.class));
Assertions.assertEquals(12, verificationDataset.count());
verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Project>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
}
@Test
public void dumpDatasourceTest() throws NoAvailableEntityTypeException {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/complete/datasource")
.getPath();
DumpGraphEntities dg = new DumpGraphEntities();
dg.run(false, sourcePath, workingDir.toString() + "/dump", Datasource.class, null);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.oa.model.graph.Datasource> tmp = sc
.textFile(workingDir.toString() + "/dump")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Datasource.class));
org.apache.spark.sql.Dataset<eu.dnetlib.dhp.oa.model.graph.Datasource> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Datasource.class));
Assertions.assertEquals(5, verificationDataset.count());
verificationDataset
.foreach(
(ForeachFunction<eu.dnetlib.dhp.oa.model.graph.Datasource>) o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
}
}

View File

@ -1,305 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.model.graph.Relation;
public class DumpRelationTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(DumpRelationTest.class);
private static final HashMap<String, String> map = new HashMap<>();
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(DumpRelationTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(DumpRelationTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(DumpRelationTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void test1() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation")
.getPath();
SparkDumpRelationJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/relation",
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Dataset<Relation> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
verificationDataset.createOrReplaceTempView("table");
verificationDataset
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
Dataset<Row> check = spark
.sql(
"SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance "
+
"from table ");
Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count());
Assertions
.assertEquals(
22, check
.filter(
"name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " +
"provenance = 'Harvested'")
.count());
Assertions.assertEquals(7, check.filter("name = 'isParticipant'").count());
Assertions
.assertEquals(
7, check
.filter(
"name = 'isParticipant' and stype = 'organization' and ttype = 'project' " +
"and provenance = 'Harvested'")
.count());
Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count());
Assertions
.assertEquals(
1, check
.filter(
"name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " +
"and provenance = 'Inferred by OpenAIRE'")
.count());
}
@Test
public void test2() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation_validated")
.getPath();
SparkDumpRelationJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/relation",
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Dataset<Relation> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
verificationDataset.createOrReplaceTempView("table");
verificationDataset
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
Dataset<Row> check = spark
.sql(
"SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance "
+
"from table ");
Assertions.assertEquals(20, check.filter("name = 'isProvidedBy'").count());
Assertions
.assertEquals(
20, check
.filter(
"name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " +
"provenance = 'Harvested'")
.count());
Assertions.assertEquals(7, check.filter("name = 'isParticipant'").count());
Assertions
.assertEquals(
7, check
.filter(
"name = 'isParticipant' and stype = 'organization' and ttype = 'project' " +
"and provenance = 'Harvested'")
.count());
Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count());
Assertions
.assertEquals(
1, check
.filter(
"name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " +
"and provenance = 'Inferred by OpenAIRE'")
.count());
Assertions.assertEquals(2, check.filter("name = 'isProducedBy'").count());
Assertions
.assertEquals(
2, check
.filter(
"name = 'isProducedBy' and stype = 'project' and ttype = 'result' " +
"and provenance = 'Harvested' and validated = true " +
"and validationDate = '2021-08-06'")
.count());
}
@Test
public void test3() throws Exception {//
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation")
.getPath();
SparkDumpRelationJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/relation",
"-sourcePath", sourcePath,
"-removeSet", "isParticipant"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Dataset<Relation> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
verificationDataset.createOrReplaceTempView("table");
verificationDataset
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
Dataset<Row> check = spark
.sql(
"SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance "
+
"from table ");
Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count());
Assertions
.assertEquals(
22, check
.filter(
"name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " +
"provenance = 'Harvested'")
.count());
Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count());
Assertions.assertEquals(1, check.filter("name = 'isAuthorInstitutionOf'").count());
Assertions
.assertEquals(
1, check
.filter(
"name = 'isAuthorInstitutionOf' and stype = 'organization' and ttype = 'result' " +
"and provenance = 'Inferred by OpenAIRE'")
.count());
}
@Test
public void test4() throws Exception {//
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/relation/relation")
.getPath();
SparkDumpRelationJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/relation",
"-sourcePath", sourcePath,
"-removeSet", "isParticipant;isAuthorInstitutionOf"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Dataset<Relation> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
verificationDataset.createOrReplaceTempView("table");
verificationDataset
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
Dataset<Row> check = spark
.sql(
"SELECT reltype.name, source.id source, source.type stype, target.id target,target.type ttype, provenance.provenance "
+
"from table ");
Assertions.assertEquals(22, check.filter("name = 'isProvidedBy'").count());
Assertions
.assertEquals(
22, check
.filter(
"name = 'isProvidedBy' and stype = 'datasource' and ttype = 'organization' and " +
"provenance = 'Harvested'")
.count());
Assertions.assertEquals(0, check.filter("name = 'isParticipant'").count());
Assertions.assertEquals(0, check.filter("name = 'isAuthorInstitutionOf'").count());
}
}

View File

@ -1,175 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.model.graph.Relation;
public class ExtractRelationFromEntityTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(ExtractRelationFromEntityTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(ExtractRelationFromEntityTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(ExtractRelationFromEntityTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(ExtractRelationFromEntityTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void test1() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
Extractor ex = new Extractor();
ex
.run(
false, sourcePath, workingDir.toString() + "/relation",
// eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
org.apache.spark.sql.Dataset<Relation> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
Assertions
.assertEquals(
9,
verificationDataset.filter("source.id = '50|dedup_wf_001::15270b996fa8fd2fb5723daeab3685c3'").count());
Assertions
.assertEquals(
9,
verificationDataset.filter("source.id = '50|dedup_wf_001::15270b996fa8fd2fb5723daxab3685c3'").count());
Assertions
.assertEquals(
"IsRelatedTo", verificationDataset
.filter((FilterFunction<Relation>) row -> row.getSource().getId().startsWith("00"))
.collectAsList()
.get(0)
.getReltype()
.getName());
Assertions
.assertEquals(
"relationship", verificationDataset
.filter((FilterFunction<Relation>) row -> row.getSource().getId().startsWith("00"))
.collectAsList()
.get(0)
.getReltype()
.getType());
Assertions
.assertEquals(
"context", verificationDataset
.filter((FilterFunction<Relation>) row -> row.getSource().getId().startsWith("00"))
.collectAsList()
.get(0)
.getSource()
.getType());
Assertions
.assertEquals(
"result", verificationDataset
.filter((FilterFunction<Relation>) row -> row.getSource().getId().startsWith("00"))
.collectAsList()
.get(0)
.getTarget()
.getType());
Assertions
.assertEquals(
"IsRelatedTo", verificationDataset
.filter((FilterFunction<Relation>) row -> row.getTarget().getId().startsWith("00"))
.collectAsList()
.get(0)
.getReltype()
.getName());
Assertions
.assertEquals(
"relationship", verificationDataset
.filter((FilterFunction<Relation>) row -> row.getTarget().getId().startsWith("00"))
.collectAsList()
.get(0)
.getReltype()
.getType());
Assertions
.assertEquals(
"context", verificationDataset
.filter((FilterFunction<Relation>) row -> row.getTarget().getId().startsWith("00"))
.collectAsList()
.get(0)
.getTarget()
.getType());
Assertions
.assertEquals(
"result", verificationDataset
.filter((FilterFunction<Relation>) row -> row.getTarget().getId().startsWith("00"))
.collectAsList()
.get(0)
.getSource()
.getType());
}
}

View File

@ -1,70 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import org.dom4j.DocumentException;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.oa.model.graph.Funder;
class FunderParsingTest {
@Test
void testFunderTwoLevels() throws DocumentException {
String funding_Stream = "<fundingtree><funder><id>nsf_________::NSF</id><shortname>NSF</shortname><name>National Science "
+
"Foundation</name><jurisdiction>US</jurisdiction></funder><funding_level_1><id>nsf_________::NSF::CISE/OAD::CISE/CCF</id><description>Division "
+
"of Computing and Communication Foundations</description><name>Division of Computing and Communication " +
"Foundations</name><parent><funding_level_0><id>nsf_________::NSF::CISE/OAD</id><description>Directorate for "
+
"Computer &amp; Information Science &amp; Engineering</description><name>Directorate for Computer &amp; " +
"Information Science &amp; Engineering</name><parent/><class>nsf:fundingStream</class></funding_level_0></parent></funding_level_1></fundingtree>";
Funder f = DumpGraphEntities.getFunder(funding_Stream);
Assertions.assertEquals("NSF", f.getShortName());
Assertions.assertEquals("National Science Foundation", f.getName());
Assertions.assertEquals("US", f.getJurisdiction());
Assertions.assertEquals("NSF::CISE/OAD::CISE/CCF", f.getFunding_stream().getId());
Assertions
.assertEquals(
"Directorate for Computer & Information Science & Engineering - Division of Computing and Communication Foundations",
f.getFunding_stream().getDescription());
}
@Test
void testFunderThreeeLevels() throws DocumentException, SAXException {
String funding_stream = "<fundingtree><funder><id>ec__________::EC</id>" +
"<shortname>EC</shortname>" +
"<name>European Commission</name>" +
"<jurisdiction>EU</jurisdiction>" +
"</funder><funding_level_2>" +
"<id>ec__________::EC::H2020::ERC::ERC-COG</id>" +
"<description>Consolidator Grant</description>" +
"<name>ERC-COG</name><class>ec:h2020toas</class>" +
"<parent><funding_level_1><id>ec__________::EC::H2020::ERC</id>" +
"<description>European Research Council</description>" +
"<name>ERC</name><class>ec:h2020fundings</class><parent>" +
"<funding_level_0><id>ec__________::EC::H2020</id><name>H2020</name>" +
"<description>Horizon 2020 Framework Programme</description><parent/>" +
"<class>ec:h2020fundings</class></funding_level_0></parent></funding_level_1></parent></funding_level_2></fundingtree>";
Funder f = DumpGraphEntities.getFunder(funding_stream);
Assertions.assertEquals("EC", f.getShortName());
Assertions.assertEquals("European Commission", f.getName());
Assertions.assertEquals("EU", f.getJurisdiction());
Assertions.assertEquals("EC::H2020::ERC::ERC-COG", f.getFunding_stream().getId());
Assertions
.assertEquals(
"Horizon 2020 Framework Programme - European Research Council - Consolidator Grant",
f.getFunding_stream().getDescription());
}
}

View File

@ -1,810 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import static org.mockito.Mockito.lenient;
import java.util.*;
import java.util.function.Consumer;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
class QueryInformationSystemTest {
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and $x//context/param[./@name = 'status']/text() = 'all' " +
" return " +
"$x//context";
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
+
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
+
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
List<String> communityMap = Arrays
.asList(
"clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin@@ri",
"ee@@Sustainable Development Solutions Network - Greece@@The UN Sustainable Development Solutions Network (SDSN) has been operating since 2012 under the auspices of the UN Secretary-General. "
+
"SDSN mobilizes global scientific and technological expertise to promote practical solutions for sustainable development, including the implementation of the Sustainable Development Goals (SDGs) and the Paris Climate Agreement. The Greek hub of SDSN has been included in the SDSN network in 2017 and is co-hosted by ICRE8: International Center for Research on the Environment and the Economy and the Political Economy of Sustainable Development Lab.@@SDG13 - Climate action,SDG8 - Decent work and economic growth,SDG15 - "
+
"Life on land,SDG2 - Zero hunger,SDG17 - Partnerships for the ´goals,SDG10 - Reduced inequalities,SDG5 - Gender equality,SDG12 - Responsible consumption and production,SDG14 - Life below water,SDG6 - Clean water and sanitation,SDG11 - Sustainable cities and communities,SDG1 - No poverty,SDG3 - Good health and well being,SDG7 - Affordable and clean energy,SDG4 - Quality education,SDG9 - Industry innovation and infrastructure,SDG16 - Peace justice and strong institutions@@oac_sdsn-greece@@community",
"dh-ch@@Digital Humanities and Cultural Heritage@@This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.@@modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels@@oac_dh-ch@@community",
"fam@@Fisheries and Aquaculture Management@@Conservation of marine resources for sustainable development. The Fisheries and Aquaculture community focus on resources (document, data, codes..) which have been produced in the framework of projects (H2020, FP7, ..) related to the domain of fisheries and aquaculture.@@Stock Assessment,pelagic,Acoustic,Fish farming,Fisheries,Fishermen,maximum sustainable yield,trawler,Fishing vessel,Fisherman,Fishing gear,mackerel,RFMO,Fish Aggregating Device,Bycatch,Fishery,common fisheries policy,Fishing fleet,Aquaculture@@fisheries@@community",
"ni@@Neuroinformatics@@The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.@@brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities@@oac_ni@@community",
"mes@@European Marine Science@@This community was initially defined to include a very broad range of topics, with the intention to generate a number of more focused and sustainable dashboards for research communities and initiatives. As outlined in the logo of this community, we intend to setup a community dashboard for EuroMarine (a consortium of 56 research and academic organisations) and monitoring dashboards for marine research initiatives, including infrastructures (e.g. EMBRC & EMSO), advisory boards (e.g. Marine Boards & ICES), and transnational funding bodies (e.g. JPI-Oceans and Tara Foundation).@@marine,ocean,fish,aqua,sea@@oac_mes@@community",
"instruct@@Instruct-ERIC@@Instruct-ERIC is the European Research Infrastructure for Structural Biology@@@@oac_instruct@@community",
"elixir-gr@@The Greek National Node of the ESFRI European RI ELIXIR@@ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.@@@@oaa_elixir-gr@@ri",
"aginfra@@Agricultural and Food Sciences@@The scope of this community is to provide access to publications, research data, projects and software that are related to agricultural and food sciences@@animal production and health,fisheries and aquaculture,food safety and human nutrition,information management,food technology,agri-food education and extension,natural resources and environment,food system,engineering technology and Research,agriculture,food safety risk assessment,food security,farming practices and systems,plant production and protection,agri-food economics and policy,Agri-food,food distribution,forestry@@oac_aginfra@@community",
"dariah@@DARIAH EU@@The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support digitally-enabled research and teaching across the arts and humanities. It develops, maintains and operates an infrastructure in support of ICT-based research practices and sustains researchers in using them to build, analyse and interpret digital resources. DARIAH was established as a European Research Infrastructure Consortium (ERIC) in August 2014. Currently, DARIAH has 18 Members and several cooperating partners in eight non-member countries. Here you will find a growing collection of DARIAH-affiliated research outputs and other documents. @@@@dariah@@ri",
"epos@@European Plate Observing System@@EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of data, data products, and facilities from distributed research infrastructures for solid Earth science in Europe.@@@@@@ri",
"covid-19@@Corona Virus Disease@@This portal provides access to publications, research data, projects and software that may be relevant to the Corona Virus Disease (COVID-19). The OpenAIRE COVID-19 Gateway aggregates COVID-19 related records, links them and provides a single access point for discovery and navigation. We tag content from the OpenAIRE Research Graph (10,000+ data sources) and additional sources. All COVID-19 related research results are linked to people, organizations and projects, providing a contextualized navigation.@@COVID19,SARS-CoV,HCoV-19,mesh:C000657245,MERS-CoV,Síndrome Respiratorio Agudo Severo,mesh:COVID-19,COVID2019,COVID-19,SARS-CoV-2,2019 novel coronavirus,severe acute respiratory syndrome coronavirus 2,Orthocoronavirinae,Coronaviridae,mesh:D045169,coronavirus,SARS,coronaviruses,coronavirus disease-19,sars cov 2,Middle East Respiratory Syndrome,Severe acute respiratory syndrome coronavirus 2,Severe Acute Respiratory Syndrome,coronavirus disease 2019,2019-nCoV@@covid-19@@community");
List<String> communityContext = Arrays
.asList(
"<context id=\"clarin\" label=\"CLARIN\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">CLARIN</param>\n" +
" <param name=\"logourl\">https://www.clarin.eu/sites/default/files/clarin-frontpage-logo.jpg</param>\n"
+
" <param name=\"name\">Common Language Resources and Technology Infrastructure</param>\n" +
" <param name=\"manager\">maria@clarin.eu,dieter@clarin.eu,f.m.g.dejong@uu.nl,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\">(Part of) the work reported here was made possible by using the CLARIN infrastructure.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding through &lt;CLARIN national consortium member, e.g. CLARIN.SI&gt;, &lt;XYZ&gt; project, grant no. &lt;XYZ&gt;.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The work reported here has received funding (through CLARIN ERIC) from the European Unions Horizon 2020 research and innovation programme under grant agreement No &lt;0-9&gt; for project &lt;XYZ&gt;.\n"
+
" (E.g. No 676529 for project CLARIN-PLUS.)</param>\n" +
" <param name=\"zenodoCommunity\">oac_clarin</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"true\" id=\"clarin::projects\" label=\"CLARIN Projects\">\n" +
" <concept claim=\"false\" id=\"clarin::projects::1\" label=\"CLARIN-PLUS\">\n" +
" <param name=\"projectfullname\">CLARIN-PLUS</param>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">676529</param>\n" +
" <param name=\"url\">http://www.clarin.eu</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"funding\">H2020-INFRADEV-1-2015-1</param>\n" +
" <param name=\"acronym\">CLARIN+</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"clarin::projects::2\" label=\"CLARIN\">\n" +
" <param name=\"projectfullname\">Common Language Resources and Technology Infrastructure</param>\n"
+
" <param name=\"acronym\">CLARIN</param>\n" +
" <param name=\"CD_PROJECT_NUMBER\">212230</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"openaireId\">corda_______::ef782b2d85676aa3e5a907427feb18c4</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"clarin::contentproviders\" label=\"CLARIN Content providers\">" +
"<!--<concept claim=\"true\" id=\"clarin::contentproviders::1\" label=\"Zotero\">\n" +
" <param name=\"openaireId\">opendoar____::d96409bf894217686ba124d7356686c9</param>\n"
+
" <param name=\"name\">Public Knowledge Project EPrint Archive</param>\n" +
" <param name=\"officialname\">Public Knowledge Project EPrint Archive</param>\n"
+
" <param name=\"enabled\">true</param>\n" +
" </concept> -->\n" +
" <concept claim=\"false\" id=\"clarin::contentproviders::2\" label=\"\">\n" +
" <param name=\"name\">LINDAT/CLARIN repository</param>\n" +
" <param name=\"officialname\">LINDAT/CLARIN repository</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"true\" id=\"clarin::subcommunity\" label=\"CLARIN communities\">\n" +
" <concept claim=\"true\" id=\"clarin::subcommunity::1\" label=\"CLARIN-D\">\n" +
" <param name=\"fullname\">CLARIN-D</param>\n" +
" <param name=\"homepageUrl\">https://www.clarin-d.de/en/</param>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"example\">http://www.lrec-conf.org/proceedings/lrec2018/pdf/504.pdf</param>\n"
+
" <param name=\"nation\">Germany</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"clarin::zenodocommunities\" label=\"CLARIN Zenodo Communities\"/>\n"
+
" <category claim=\"false\" id=\"clarin::organizations\" label=\"CLARIN Organizations\"/>\n" +
"</context>",
"<context id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\" type=\"community\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">This community gathers research results, data, scientific publications and projects related to the domain of Digital Humanities. This broad definition includes Humanities, Cultural Heritage, History, Archaeology and related fields.</param>\n"
+
" <param name=\"logourl\">http://sanmamante.org/DH_CH_logo.png</param>\n" +
" <param name=\"name\">Digital Humanities and Cultural Heritage</param>\n" +
" <param name=\"manager\">ileniagalluccio87@gmail.com,achille.felicetti@gmail.com,paolo.manghi@isti.cnr.it,tim.evans@york.ac.uk</param>\n"
+
" <param name=\"subject\">modern art,monuments,europeana data model,sites,field walking,frescoes,LIDO metadata schema,art history,excavation,Arts and Humanities General,cities,coins,temples,numismatics,lithics,roads,environmental archaeology,digital cultural heritage,archaeological reports,history,CRMba,churches,cultural heritage,archaeological stratigraphy,religious art,buidings,digital humanities,survey,archaeological sites,linguistic studies,bioarchaeology,architectural orders,palaeoanthropology,fine arts,europeana,CIDOC CRM,decorations,classic art,stratigraphy,digital archaeology,intangible cultural heritage,walls,humanities,chapels,CRMtex,Language and Literature,paintings,archaeology,fair data,mosaics,burials,architecture,medieval art,castles,CARARE metadata schema,statues,natural language processing,inscriptions,CRMsci,vaults,contemporary art,Arts and Humanities,CRMarchaeo,pottery,site,architectural,vessels</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
+
" <param name=\"zenodoCommunity\">oac_dh-ch</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"dh-ch::projects\" label=\"DH-CH Projects\">\n" +
" <concept claim=\"false\" id=\"dh-ch::projects::1\" label=\"Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies\">\n"
+
" <param name=\"projectfullname\">Pooling Activities, Resources and Tools for Heritage E-research Networking, Optimization and Synergies</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The present work has been partially supported by the PARTHENOS project, funded by the European Commission (Grant Agreement No. 654119) under the HORIZON 2020 - INFRADEV-4-2014/2015 call</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">654119</param>\n" +
" <param name=\"url\">http://www.parthenos-project.eu</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">PARTHENOS</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"dh-ch::contentproviders\" label=\"DH-CH Content providers\">\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::2\" label=\"The UK's largest collection of digital research data in the social sciences and humanities\">\n"
+
" <param name=\"openaireId\">re3data_____::9ebe127e5f3a0bf401875690f3bb6b81</param>\n" +
" <param name=\"name\">The UK's largest collection of digital research data in the social sciences and humanities</param>\n"
+
" <param name=\"officialname\">UK Data Archive</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::3\" label=\"Journal of Data Mining and Digital Humanities\">\n"
+
" <param name=\"openaireId\">doajarticles::c6cd4b532e12868c1d760a8d7cda6815</param>\n" +
" <param name=\"name\">Journal of Data Mining and Digital Humanities</param>\n" +
" <param name=\"officialname\">Journal of Data Mining and Digital Humanities</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::6\" label=\"Frontiers in Digital Humanities\">\n"
+
" <param name=\"openaireId\">doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b</param>\n" +
" <param name=\"name\">Frontiers in Digital Humanities</param>\n" +
" <param name=\"officialname\">Frontiers in Digital Humanities</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::7\" label=\"Il Capitale Culturale: Studies on the Value of Cultural Heritage\">\n"
+
" <param name=\"openaireId\">doajarticles::6eb31d13b12bc06bbac06aef63cf33c9</param>\n" +
" <param name=\"name\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
+
" <param name=\"officialname\">Il Capitale Culturale: Studies on the Value of Cultural Heritage</param>\n"
+
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::8\" label=\"Conservation Science in Cultural Heritage\">\n"
+
" <param name=\"openaireId\">doajarticles::0da84e9dfdc8419576169e027baa8028</param>\n" +
" <param name=\"name\">Conservation Science in Cultural Heritage</param>\n" +
" <param name=\"officialname\">Conservation Science in Cultural Heritage</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::9\" label=\"Electronic Archiving System\">\n"
+
" <param name=\"openaireId\">re3data_____::84e123776089ce3c7a33db98d9cd15a8</param>\n" +
" <param name=\"name\">Electronic Archiving System</param>\n" +
" <param name=\"officialname\">EASY</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::10\" label=\"DANS-KB Harvester\">\n" +
" <param name=\"openaireId\">openaire____::c5502a43e76feab55dd00cf50f519125</param>\n" +
" <param name=\"name\">DANS-KB Harvester</param>\n" +
" <param name=\"officialname\">Gemeenschappelijke Harvester DANS-KB</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::11\" label=\"ads\">\n" +
" <param name=\"openaireId\">re3data_____::a48f09c562b247a9919acfe195549b47</param>\n" +
" <param name=\"name\">ads</param>\n" +
" <param name=\"officialname\">Archaeology Data Service</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::12\" label=\"\">\n" +
" <param name=\"openaireId\">opendoar____::97275a23ca44226c9964043c8462be96</param>\n" +
" <param name=\"name\">KNAW Repository</param>\n" +
" <param name=\"officialname\">KNAW Repository</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"dh-ch::contentproviders::13\" label=\"Internet Archaeology\">\n"
+
" <param name=\"openaireId\">doajarticles::2899208a99aa7d142646e0a80bfeef05</param>\n" +
" <param name=\"name\">Internet Archaeology</param>\n" +
" <param name=\"officialname\">Internet Archaeology</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"ni\" label=\"Neuroinformatics\" type=\"community\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">The neuroinformatics dashboard gathers research outputs from the 'neuroinformatics' community at large including the fields of: neuroscience, neuroinformatics, brain imaging databases and standards, brain imaging techniques, neuroimaging methods including statistics and machine learning. The dashboard covers a wide range of imaging methods including (but not limited to): MRI, TEP, EEG, MEG, and studies involving human participants as well as animal studies.</param>\n"
+
" <param name=\"logourl\">https://docs.google.com/drawings/u/0/d/10e191xGoGf4uaRluMqbt_7cCj6LSCs2a29im4CmWjqU/export/png</param>\n"
+
" <param name=\"name\">Neuroinformatics</param>\n" +
" <param name=\"manager\">sorina.pop@creatis.insa-lyon.fr,camille.maumet@inria.fr,christian.barillot@irisa.fr,xavier.rolland@irisa.fr,axel.bonnet@creatis.insa-lyon.fr,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\">brain mapping,brain imaging,electroencephalography,arterial spin labelling,brain fingerprinting,brain,neuroimaging,Multimodal Brain Image Analysis,fMRI,neuroinformatics,fetal brain,brain ultrasonic imaging,topographic brain mapping,diffusion tensor imaging,computerized knowledge assessment,connectome mapping,brain magnetic resonance imaging,brain abnormalities</param>\n"
+
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"zenodoCommunity\">oac_ni</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"ni::contentproviders\" label=\"NI Content providers\">\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::1\" label=\"OpenNeuro\">\n" +
" <param name=\"openaireId\">re3data_____::5b9bf9171d92df854cf3c520692e9122</param>\n" +
" <param name=\"name\">Formerly:OpenFMRI</param>\n" +
" <param name=\"officialname\">OpenNeuro</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::2\" label=\"RIO\">\n" +
" <param name=\"openaireId\">doajarticles::c7d3de67dc77af72f6747157441252ec</param>\n" +
" <param name=\"name\">Research Ideas and Outcomes</param>\n" +
" <param name=\"officialname\">Research Ideas and Outcomes</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::3\" label=\"NITRC\">\n" +
" <param name=\"openaireId\">re3data_____::8515794670370f49c1d176c399c714f5</param>\n" +
" <param name=\"name\">Neuroimaging Informatics Tools and Resources Clearinghouse</param>\n"
+
" <param name=\"officialname\">NITRC</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::4\" label=\"FRONTIERSNI\">\n" +
" <param name=\"openaireId\">doajarticles::d640648c84b10d425f96f11c3de468f3</param>\n" +
" <param name=\"name\">Frontiers in Neuroinformatics</param>\n" +
" <param name=\"officialname\">Frontiers in Neuroinformatics</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::5\" label=\"NeuroImage: Clinical\">\n" +
" <param name=\"openaireId\">doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a</param>\n" +
" <param name=\"name\">NeuroImage: Clinical</param>\n" +
" <param name=\"officialname\">NeuroImage: Clinical</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"ni::contentproviders::6\" label=\"NeuroVault\">\n" +
" <param name=\"openaireId\">rest________::fb1a3d4523c95e63496e3bc7ba36244b</param>\n" +
" <param name=\"name\">NeuroVault</param>\n" +
" <param name=\"officialname\">NeuroVault</param>\n" +
" <param name=\"enabled\">true</param>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"instruct\" label=\"Instruct-ERIC\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">Instruct-ERIC is the European Research Infrastructure for Structural Biology</param>\n"
+
" <param name=\"logourl\">https://instruct-eric.eu/templates/instructeric/images/logos/instruct-eric-logo-noline.png</param>\n"
+
" <param name=\"name\">Instruct-ERIC</param>\n" +
" <param name=\"manager\">claudia@instruct-eric.eu,carazo@cnb.csic.es,echrysina@eie.gr,susan@instruct-eric.eu,naomi@instruct-eric.eu,natalie@instruct-eric.eu,pmarie@igbmc.fr,darren.hart@ibs.fr,claudia@strubi.ox.ac.uk,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct-ERIC.</param>\n"
+
" <param name=\"suggestedAcknowledgement\">The authors acknowledge the support and the use of resources of Instruct (PID # or APPID #), a Landmark ESFRI project</param>\n"
+
" <param name=\"zenodoCommunity\">oac_instruct</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"instruct::projects\" label=\"Instruct-ERIC Projects\">\n" +
" <concept claim=\"false\" id=\"instruct::projects::1\" label=\"Authentication and Authorisation For Research and Collaboration\">\n"
+
" <param name=\"projectfullname\">Authentication and Authorisation For Research and Collaboration</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730941</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-EINFRA-2016-1</param>\n" +
" <param name=\"acronym\">AARC2</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::2\" label=\"Building data bridges between biological and medical infrastructures in Europe\">\n"
+
" <param name=\"projectfullname\">Building data bridges between biological and medical infrastructures in Europe</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">284209</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">BioMedBridges</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::3\" label=\"Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities\">\n"
+
" <param name=\"projectfullname\">Transnational access and enhancement of integrated Biological Structure determination at synchrotron X-ray radiation facilities</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">283570</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2011-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">BioStruct-X</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::4\" label=\"Coordinated Research Infrastructures Building Enduring Life-science services\">\n"
+
" <param name=\"projectfullname\">Coordinated Research Infrastructures Building Enduring Life-science services</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">654248</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRADEV-1-2014-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">CORBEL</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::5\" label=\"Infrastructure for NMR, EM and X-rays for translational research\">\n"
+
" <param name=\"projectfullname\">Infrastructure for NMR, EM and X-rays for translational research</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">653706</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRAIA-2014-2015</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">iNEXT</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::6\" label=\"Integrated Structural Biology Infrastructure\">\n"
+
" <param name=\"projectfullname\">Integrated Structural Biology Infrastructure</param>\n" +
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">211252</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2007-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">INSTRUCT</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::7\" label=\"Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research\">\n"
+
" <param name=\"projectfullname\">Releasing the full potential of Instruct to expand and consolidate infrastructure services for integrated structural life science research</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">731005</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRADEV-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">INSTRUCT-ULTRA</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::8\" label=\"Opening Synchrotron Light for Experimental Science and Applications in the Middle East\">\n"
+
" <param name=\"projectfullname\">Opening Synchrotron Light for Experimental Science and Applications in the Middle East</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730943</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRASUPP-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">OPEN SESAME</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::9\" label=\"Infrastructure for Protein Production Platforms\">\n"
+
" <param name=\"projectfullname\">Infrastructure for Protein Production Platforms</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">227764</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">FP7-INFRASTRUCTURES-2008-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">PCUBE</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::10\" label=\"European Vaccine Research and Development Infrastructure\">\n"
+
" <param name=\"projectfullname\">European Vaccine Research and Development Infrastructure</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">730964</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-INFRAIA-2016-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">TRAMSVAC2</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::11\" label=\"World-wide E-infrastructure for structural biology\">\n"
+
" <param name=\"projectfullname\">World-wide E-infrastructure for structural biology</param>\n"
+
" <param name=\"rule\"/>\n" +
" <param name=\"CD_PROJECT_NUMBER\">675858</param>\n" +
" <param name=\"url\"/>\n" +
" <param name=\"funding\">H2020-EINFRA-2015-1</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"acronym\">West-Life</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::projects::12\" label=\"RI-VIS\">\n" +
" <param name=\"projectfullname\">Expanding research infrastructure visibility to strengthen strategic partnerships</param>\n"
+
" <param name=\"acronym\">RI-VIS</param>\n" +
" <param name=\"CD_PROJECT_NUMBER\">824063</param>\n" +
" <param name=\"funder\">EC</param>\n" +
" <param name=\"openaireId\">corda__h2020::af93b591b76991d8437993a8f6fc6538</param>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"instruct::contentproviders\" label=\"Instruct-ERIC Content providers\"/>\n"
+
" <category claim=\"false\" id=\"instruct::zenodocommunities\" label=\"Instruct-ERIC Zenodo Communities\">\n"
+
" <concept claim=\"false\" id=\"instruct::zenodocommunities::1\" label=\"Instruct\">\n" +
" <param name=\"zenodoid\">instruct</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::zenodocommunities::2\" label=\"West-Life Virtual Research Environment for Structural Biology\">\n"
+
" <param name=\"zenodoid\">west-life</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"instruct::organizations\" label=\"Instruct-ERIC Organizations\">\n"
+
" <concept claim=\"false\" id=\"instruct::organizations::1\" label=\"FRISBI\">\n" +
" <param name=\"name\">FRISBI</param>\n" +
" <param name=\"logourl\">aHR0cDovL2ZyaXNiaS5ldS9zdGF0aWMvaW1hZ2VzL2xvZ29zL2xvZ28tZnJpc2JpLnBuZw==</param>\n"
+
" <param name=\"websiteurl\">aHR0cDovL2ZyaXNiaS5ldS8=</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::organizations::2\" label=\"RI-VIS\">\n" +
" <param name=\"name\">RI-VIS</param>\n" +
" <param name=\"logourl\">aHR0cHM6Ly9yaS12aXMuZXUvbmV0d29yay9yaXZpcy90ZW1wbGF0ZXMvcml2aXMvaW1hZ2VzL1JJLVZJU0xvZ29GaW5hbC0wNi5wbmc=</param>\n"
+
" <param name=\"websiteurl\">aHR0cHM6Ly9yaS12aXMuZXU=</param>\n" +
" </concept>\n" +
" <concept claim=\"false\" id=\"instruct::organizations::3\" label=\"CIISB\">\n" +
" <param name=\"name\">CIISB</param>\n" +
" <param name=\"logourl\">aHR0cDovL2JpYy5jZWl0ZWMuY3ovZmlsZXMvMjkyLzEyNS5KUEc=</param>\n" +
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuY2lpc2Iub3Jn</param>\n" +
" </concept>\n" +
" </category>\n" +
"</context>\n",
"<context id=\"elixir-gr\" label=\"ELIXIR GR\" type=\"ri\">\n" +
" <param name=\"status\">all</param>\n" +
" <param name=\"description\">ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open, easily accessible and state -of- the- art services to the Greek and the international academic community and other stakeholders, such as industry and the health sector. More importantly, by providing these services, the infrastructure facilitates discoveries in the field of the life-sciences, having strong spill over effects in promoting innovation in sectors such as discovery of new drug targets and development of novel therapeutic agents, development of innovative diagnostics, personalized medicine, and development of innovative biotechnological products and processes.</param>\n"
+
" <param name=\"logourl\">https://elixir-greece.org/sites/default/files/ELIXIR_GREECE_white_background.png</param>\n"
+
" <param name=\"name\">The Greek National Node of the ESFRI European RI ELIXIR</param>\n" +
" <param name=\"manager\">vergoulis@imis.athena-innovation.gr,schatz@imis.athena-innovation.gr,paolo.manghi@isti.cnr.it</param>\n"
+
" <param name=\"subject\"/>\n" +
" <param name=\"suggestedAcknowledgement\"/>\n" +
" <param name=\"zenodoCommunity\">oaa_elixir-gr</param>\n" +
" <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
" <category claim=\"false\" id=\"elixir-gr::projects\" label=\"ELIXIR GR Projects\"/>\n" +
" <category claim=\"false\" id=\"elixir-gr::contentproviders\" label=\"Elixir-GR Content providers\">\n"
+
" <concept claim=\"false\" id=\"elixir-gr::contentproviders::1\" label=\"bio.tools\">\n" +
" <param name=\"openaireId\">rest________::b8e502674c3c3499d5374e9b2ea6d8d5</param>\n" +
" <param name=\"name\">bio.tools</param>\n" +
" <param name=\"officialname\">bio.tools</param>\n" +
" <param name=\"enabled\">false</param>\n" +
" <param name=\"selcriteria\"/>\n" +
" </concept>\n" +
" </category>\n" +
" <category claim=\"false\" id=\"elixir-gr::zenodocommunities\" label=\"Elixir-GR Zenodo Communities\"/>\n"
+
" <category claim=\"false\" id=\"elixir-gr::organizations\" label=\"Elixir-GR Organizations\">\n" +
" <concept claim=\"false\" id=\"elixir-gr::organizations::1\" label=\"ATHENA RC\">\n" +
" <param name=\"name\">ATHENA RC</param>\n" +
" <param name=\"logourl\">aHR0cHM6Ly9lbGl4aXItZ3JlZWNlLm9yZy9zaXRlcy9kZWZhdWx0L2ZpbGVzL3N0eWxlcy90aHVtYm5haWwvcHVibGljL3BhcnRuZXJfbG9nb3MvYXRoZW5hX2xvZ28uanBnP2l0b2s9VXdGWFNpZng=</param>\n"
+
" <param name=\"websiteurl\">aHR0cHM6Ly93d3cuYXRoZW5hLWlubm92YXRpb24uZ3IvZW4=</param>\n" +
" </concept>\n" +
" </category><!-- <category claim=\"false\" id=\"elixir-gr::resultorganizations\" label=\"Elixir-GR Results through organizations\"/> -->\n"
+
"</context>");
@Mock
private ISLookUpService isLookUpService;
private QueryInformationSystem queryInformationSystem;
private Map<String, String> map;
@BeforeEach
public void setUp() throws ISLookUpException {
lenient().when(isLookUpService.quickSearchProfile(XQUERY_ENTITY)).thenReturn(communityMap);
lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityContext);
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(isLookUpService);
}
@Test
void testSizeEntity() throws ISLookUpException {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem.getContextInformation(consumer);
Assertions.assertEquals(12, cInfoList.size());
}
@Test
void testSizeRelation() throws ISLookUpException {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem.execContextRelationQuery();
queryInformationSystem
.getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource"));
Assertions.assertEquals(5, cInfoList.size());
}
@Test
void testContentRelation() throws ISLookUpException {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem.execContextRelationQuery();
queryInformationSystem
.getContextRelation(consumer, "contentproviders", ModelSupport.entityIdPrefix.get("datasource"));
cInfoList.forEach(contextInfo -> {
switch (contextInfo.getId()) {
case "elixir-gr":
Assertions.assertEquals(1, contextInfo.getDatasourceList().size());
Assertions
.assertEquals(
"10|rest________::b8e502674c3c3499d5374e9b2ea6d8d5",
contextInfo.getDatasourceList().get(0));
break;
case "instruct":
Assertions.assertEquals(0, contextInfo.getDatasourceList().size());
break;
case "ni":
Assertions.assertEquals(6, contextInfo.getDatasourceList().size());
Assertions
.assertTrue(
contextInfo
.getDatasourceList()
.contains("10|rest________::fb1a3d4523c95e63496e3bc7ba36244b"));
break;
case "dh-ch":
Assertions.assertEquals(10, contextInfo.getDatasourceList().size());
break;
case "clarin":
Assertions.assertEquals(0, contextInfo.getDatasourceList().size());
break;
}
});
}
@Test
void testContentEntity() throws ISLookUpException {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
queryInformationSystem.getContextInformation(consumer);
cInfoList.forEach(context -> {
switch (context.getId()) {
case "clarin":// clarin@@Common Language Resources and Technology Infrastructure@@CLARIN@@@@oac_clarin",
Assertions
.assertEquals("Common Language Resources and Technology Infrastructure", context.getName());
Assertions.assertEquals("CLARIN", context.getDescription());
Assertions
.assertTrue(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals("oac_clarin", context.getZenodocommunity());
Assertions.assertEquals("ri", context.getType());
break;
case "ee":
Assertions.assertEquals("Sustainable Development Solutions Network - Greece", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(17, context.getSubject().size());
Assertions.assertEquals("oac_sdsn-greece", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
break;
case "dh-ch":
Assertions.assertEquals("Digital Humanities and Cultural Heritage", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(67, context.getSubject().size());
Assertions.assertEquals("oac_dh-ch", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
break;
case "fam":
Assertions.assertEquals("Fisheries and Aquaculture Management", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith("Conservation of marine resources for sustainable development"));
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(19, context.getSubject().size());
Assertions.assertEquals("fisheries", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
break;
case "ni":
Assertions.assertEquals("Neuroinformatics", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith("The neuroinformatics dashboard gathers research outputs from the"));
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(18, context.getSubject().size());
Assertions.assertEquals("oac_ni", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
Assertions.assertTrue(context.getSubject().contains("brain"));
break;
case "mes":
Assertions.assertEquals("European Marine Science", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"This community was initially defined to include a very broad range of topics"));
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(5, context.getSubject().size());
Assertions.assertEquals("oac_mes", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
Assertions.assertTrue(context.getSubject().contains("sea"));
Assertions.assertTrue(context.getSubject().contains("fish"));
Assertions.assertTrue(context.getSubject().contains("ocean"));
Assertions.assertTrue(context.getSubject().contains("aqua"));
Assertions.assertTrue(context.getSubject().contains("marine"));
break;
case "instruct":
Assertions.assertEquals("Instruct-ERIC", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.equals(
"Instruct-ERIC is the European Research Infrastructure for Structural Biology"));
Assertions
.assertTrue(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals("oac_instruct", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
break;
case "elixir-gr":
Assertions
.assertEquals("The Greek National Node of the ESFRI European RI ELIXIR", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"ELIXIR-GR enhances the potential of the Greek bioinformatics community to offer open"));
Assertions
.assertTrue(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals("oaa_elixir-gr", context.getZenodocommunity());
Assertions.assertEquals("ri", context.getType());
break;
case "aginfra":
Assertions.assertEquals("Agricultural and Food Sciences", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"The scope of this community is to provide access to publications, research data, projects and software"));
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(18, context.getSubject().size());
Assertions.assertEquals("oac_aginfra", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
Assertions.assertTrue(context.getSubject().contains("food distribution"));
break;
case "dariah":
Assertions.assertEquals("DARIAH EU", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"The Digital Research Infrastructure for the Arts and Humanities (DARIAH) aims to enhance and support "));
Assertions
.assertTrue(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals("dariah", context.getZenodocommunity());
Assertions.assertEquals("ri", context.getType());
break;
case "epos":
Assertions.assertEquals("European Plate Observing System", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"EPOS, the European Plate Observing System, is a long-term plan to facilitate integrated use of "));
Assertions
.assertTrue(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals("", context.getZenodocommunity());
Assertions.assertEquals("ri", context.getType());
break;
case "covid-19":
Assertions.assertEquals("Corona Virus Disease", context.getName());
Assertions.assertTrue(context.getDescription().length() > 0);
Assertions
.assertTrue(
context
.getDescription()
.startsWith(
"This portal provides access to publications, research data, projects and "));
Assertions
.assertFalse(
Optional
.ofNullable(context.getSubject())
.map(value -> false)
.orElse(true));
Assertions.assertEquals(25, context.getSubject().size());
Assertions.assertEquals("covid-19", context.getZenodocommunity());
Assertions.assertEquals("community", context.getType());
Assertions.assertTrue(context.getSubject().contains("coronavirus disease 2019"));
break;
}
});
}
}

View File

@ -1,121 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.model.graph.Relation;
public class RelationFromOrganizationTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(RelationFromOrganizationTest.class);
private static final HashMap<String, String> map = new HashMap<>();
String organizationCommunityMap = "{\"20|grid________::afaa39865943381c51f76c08725ffa75\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8\":[\"mes\",\"euromarine\"], \"20|snsf________::9b253f265e3bef5cae6d881fdf61aceb\":[\"mes\",\"euromarine\"],\"20|rcuk________::e054eea0a47665af8c3656b5785ccf76\":[\"mes\",\"euromarine\"],\"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151\":[\"mes\",\"euromarine\"],\"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27\":[\"mes\",\"euromarine\"],\"20|snsf________::8fa091f8f25a846779acb4ea97b50aef\":[\"mes\",\"euromarine\"],\"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|corda_______::81e020977211c2c40fae2e1a50bffd71\":[\"mes\",\"euromarine\"],\"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78\":[\"mes\",\"euromarine\"],\"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70\":[\"mes\",\"euromarine\"],\"20|rcuk________::e16010089551a1a9182a94604fc0ea59\":[\"mes\",\"euromarine\"],\"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b\":[\"mes\",\"euromarine\"],\"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7\":[\"mes\",\"euromarine\"],\"20|snsf________::74730ef1439d7f7636a8be58a6b471b8\":[\"mes\",\"euromarine\"],\"20|nsf_________::ad72e19043a5a467e35f9b444d11563e\":[\"mes\",\"euromarine\"],\"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3\":[\"mes\",\"euromarine\"],\"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea\":[\"mes\",\"euromarine\"],\"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860\":[\"mes\",\"euromarine\"],\"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317\":[\"mes\",\"euromarine\"], \"20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f\":[\"mes\",\"euromarine\"], \"20|corda__h2020::65531bd11be9935948c7f2f4db1c1832\":[\"mes\",\"euromarine\"], \"20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946\":[\"mes\",\"euromarine\"], \"20|snsf________::3eb43582ac27601459a8d8b3e195724b\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6\":[\"mes\",\"euromarine\"], \"20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929\":[\"mes\",\"euromarine\"], \"20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0\":[\"mes\",\"euromarine\"], \"20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0\":[\"beopen\"], "
+
"\"20|grid________::a867f78acdc5041b34acfe4f9a349157\":[\"beopen\"], \"20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff\":[\"beopen\"], \"20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad\":[\"beopen\"], \"20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602\":[\"beopen\"], \"20|corda_______::8ba50792bc5f4d51d79fca47d860c602\":[\"beopen\"], \"20|corda__h2020::e70e9114979e963eef24666657b807c3\":[\"beopen\"], \"20|corda_______::e70e9114979e963eef24666657b807c3\":[\"beopen\"], \"20|corda_______::15911e01e9744d57205825d77c218737\":[\"beopen\"], \"20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab\":[\"beopen\"], \"20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3\":[\"beopen\"], \"20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3\":[\"beopen\"], \"20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9\":[\"beopen\"], \"20|corda_______::3ff558e30c2e434d688539548300b050\":[\"beopen\"], \"20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39\":[\"beopen\"], \"20|corda__h2020::5187217e2e806a6df3579c46f82401bc\":[\"beopen\"], \"20|grid________::5fa7e2709bcd945e26bfa18689adeec1\":[\"beopen\"], \"20|corda_______::d8696683c53027438031a96ad27c3c07\":[\"beopen\"], \"20|corda__h2020::d8696683c53027438031a96ad27c3c07\":[\"beopen\"], \"20|rcuk________::23a79ebdfa59790864e4a485881568c1\":[\"beopen\"], \"20|corda__h2020::b76cf8fe49590a966953c37e18608af9\":[\"beopen\"], \"20|grid________::d2f0204126ee709244a488a4cd3b91c2\":[\"beopen\"], \"20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6\":[\"beopen\"], \"20|grid________::802401579481dc32062bdee69f5e6a34\":[\"beopen\"], \"20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d\":[\"beopen\"]}";
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(RelationFromOrganizationTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(RelationFromOrganizationTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(RelationFromOrganizationTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void test1() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/relation")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymapservices.json")
.getPath();
SparkOrganizationRelation.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/relation",
"-sourcePath", sourcePath,
"-organizationCommunityMap", organizationCommunityMap,
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
org.apache.spark.sql.Dataset<Relation> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
verificationDataset.createOrReplaceTempView("table");
// Assertions.assertEquals(170, verificationDataset.count());
Assertions.assertEquals(0, verificationDataset.count());
// Dataset<Row> checkDs = spark
// .sql(
// "Select source.id, source.type " +
// "from table ");
//
// Assertions.assertEquals(2, checkDs.filter("substr(id, 4, 5) = 'dedup' ").count());
//
// Assertions.assertEquals(0, checkDs.filter("id = '20|grid________::afaa39865943381c51f76c08725ffa75'").count());
//
// Assertions.assertEquals(25, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("beopen") + "'").count());
//
// Assertions
// .assertEquals(30, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("euromarine") + "'").count());
//
// Assertions.assertEquals(30, checkDs.filter("id = '00|context_____::" + DHPUtils.md5("mes") + "'").count());
}
}

View File

@ -1,95 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class SelectRelationTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(SelectRelationTest.class);
private static HashMap<String, String> map = new HashMap<>();
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(SelectRelationTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(SelectRelationTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(SelectRelationTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void test1() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/selectrelations")
.getPath();
SparkSelectValidRelationsJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/relation",
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
Dataset<Relation> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
Assertions.assertTrue(verificationDataset.count() == 7);
}
}

View File

@ -6,7 +6,6 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.apache.commons.io.FileUtils;
@ -15,8 +14,6 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
@ -27,15 +24,10 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.eosc.model.EoscResult;
import eu.dnetlib.dhp.eosc.model.Indicator;
import eu.dnetlib.dhp.eosc.model.Organization;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.complete.SelectRelationTest;
import eu.dnetlib.dhp.oa.graph.dump.complete.SparkSelectValidRelationsJob;
import eu.dnetlib.dhp.eosc.model.Result;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import scala.Tuple2;
/**
@ -61,7 +53,7 @@ public class SelectEoscResultTest {
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(SelectRelationTest.class.getSimpleName());
conf.setAppName(SelectEoscResultTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
@ -72,7 +64,7 @@ public class SelectEoscResultTest {
spark = SparkSession
.builder()
.appName(SelectRelationTest.class.getSimpleName())
.appName(SelectEoscResultTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@ -104,9 +96,9 @@ public class SelectEoscResultTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<EoscResult> tmp = sc
JavaRDD<Result> tmp = sc
.textFile(workingDir.toString() + "/publication")
.map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(3, tmp.count());
@ -155,9 +147,9 @@ public class SelectEoscResultTest {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<EoscResult> tmp = sc
JavaRDD<Result> tmp = sc
.textFile(workingDir.toString() + "/publication")
.map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions.assertEquals(3, tmp.count());
@ -276,7 +268,7 @@ public class SelectEoscResultTest {
.getPath();
Utils
.readPath(spark, actionSetPath, Result.class)
.readPath(spark, actionSetPath, eu.dnetlib.dhp.schema.oaf.Result.class)
.toJavaRDD()
.map(p -> new AtomicAction(p.getClass(), p))
.mapToPair(
@ -294,9 +286,9 @@ public class SelectEoscResultTest {
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<EoscResult> tmp = sc
JavaRDD<Result> tmp = sc
.textFile(workingDir.toString() + "/publication")
.map(item -> OBJECT_MAPPER.readValue(item, EoscResult.class));
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
Assertions
.assertEquals(

View File

@ -1,139 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.funderresult;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkResultLinkedToProject;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
public class ResultLinkedToProjectTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(ResultLinkedToProjectTest.class);
private static final HashMap<String, String> map = new HashMap<>();
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(
ResultLinkedToProjectTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(ResultLinkedToProjectTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(ResultLinkedToProjectTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void testNoMatch() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json")
.getPath();
final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath")
.getPath();
SparkResultLinkedToProject.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/preparedInfo",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-graphPath", graphPath,
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/preparedInfo")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(0, tmp.count());
}
@Test
void testMatchOne() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json")
.getPath();
final String graphPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/preparedInfo")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/communityMapPath")
.getPath();
SparkResultLinkedToProject.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/preparedInfo",
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-graphPath", graphPath,
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/preparedInfo")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(1, tmp.count());
}
}

View File

@ -1,145 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.funderresult;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.funderresults.SparkDumpFunderResults;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
public class SplitPerFunderTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(SplitPerFunderTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(SplitPerFunderTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(SplitPerFunderTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(SplitPerFunderTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void test1() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/ext")
.getPath();
SparkDumpFunderResults.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/split",
"-sourcePath", sourcePath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
// FP7 3 and H2020 3
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/split/EC_FP7")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(3, verificationDataset.count());
Assertions
.assertEquals(
1, verificationDataset.filter("id = '50|dedup_wf_001::0d16b1714ab3077df73893a8ea57d776'").count());
// CIHR 2
tmp = sc
.textFile(workingDir.toString() + "/split/CIHR")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(2, tmp.count());
// NWO 1
tmp = sc
.textFile(workingDir.toString() + "/split/NWO")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(1, tmp.count());
// NIH 3
tmp = sc
.textFile(workingDir.toString() + "/split/NIH")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(2, tmp.count());
// NSF 1
tmp = sc
.textFile(workingDir.toString() + "/split/NSF")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(1, tmp.count());
// SNSF 1
tmp = sc
.textFile(workingDir.toString() + "/split/SNSF")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(1, tmp.count());
// NHMRC 1
tmp = sc
.textFile(workingDir.toString() + "/split/NHMRC")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(1, tmp.count());
// H2020 3
tmp = sc
.textFile(workingDir.toString() + "/split/EC_H2020")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(3, tmp.count());
// MZOS 1
tmp = sc
.textFile(workingDir.toString() + "/split/MZOS")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
Assertions.assertEquals(1, tmp.count());
}
}

View File

@ -1,124 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.projectssubset;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.model.graph.Project;
public class ProjectSubsetTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory
.getLogger(ProjectSubsetTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(
ProjectSubsetTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(ProjectSubsetTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(ProjectSubsetTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
void testAllNew() throws Exception {
final String projectListPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId")
.getPath();
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/allnew/projects")
.getPath();
spark
.read()
.textFile(projectListPath)
.write()
.mode(SaveMode.Overwrite)
.text(workingDir.toString() + "/projectIds");
ProjectsSubsetSparkJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/projects",
"-sourcePath", sourcePath,
"-projectListPath", workingDir.toString() + "/projectIds"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Project> tmp = sc
.textFile(workingDir.toString() + "/projects")
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
Assertions.assertEquals(12, tmp.count());
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count());
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count());
Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count());
Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count());
Assertions.assertEquals(1, tmp.filter(p -> p.getId().substring(3, 15).equals("corda_______")).count());
Assertions.assertEquals(40, sc.textFile(workingDir.toString() + "/projectIds").count());
}
@Test
void testMatchOne() throws Exception {
final String projectListPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/projectId")
.getPath();
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/projectsubset/matchOne/projects")
.getPath();
spark
.read()
.textFile(projectListPath)
.write()
.mode(SaveMode.Overwrite)
.text(workingDir.toString() + "/projectIds");
ProjectsSubsetSparkJob.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/projects",
"-sourcePath", sourcePath,
"-projectListPath", workingDir.toString() + "/projectIds"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Project> tmp = sc
.textFile(workingDir.toString() + "/projects")
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
Assertions.assertEquals(11, tmp.count());
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("aka_________")).count());
Assertions.assertEquals(2, tmp.filter(p -> p.getId().substring(3, 15).equals("anr_________")).count());
Assertions.assertEquals(4, tmp.filter(p -> p.getId().substring(3, 15).equals("arc_________")).count());
Assertions.assertEquals(3, tmp.filter(p -> p.getId().substring(3, 15).equals("conicytf____")).count());
Assertions.assertEquals(0, tmp.filter(p -> p.getId().substring(3, 15).equals("corda__h2020")).count());
Assertions.assertEquals(39, sc.textFile(workingDir.toString() + "/projectIds").count());
}
}

Some files were not shown because too many files have changed in this diff Show More