[SKG-IF] added first implementation for denormalization
This commit is contained in:
parent
7b715b2bb8
commit
a6a6922f11
|
@ -11,17 +11,17 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
* @Date 01/09/23
|
* @Date 01/09/23
|
||||||
*/
|
*/
|
||||||
public class Contribution implements Serializable {
|
public class Contribution implements Serializable {
|
||||||
private String person;
|
private MinPerson person;
|
||||||
@JsonProperty("declared_affiliations")
|
@JsonProperty("declared_affiliations")
|
||||||
private List<String> declared_affiliation;
|
private List<String> declared_affiliation;
|
||||||
private List<String> roles;
|
private List<String> roles;
|
||||||
private Integer rank;
|
private Integer rank;
|
||||||
|
|
||||||
public String getPerson() {
|
public MinPerson getPerson() {
|
||||||
return person;
|
return person;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPerson(String person) {
|
public void setPerson(MinPerson person) {
|
||||||
this.person = person;
|
this.person = person;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,9 +8,33 @@ import java.io.Serializable;
|
||||||
* @Date 22/02/24
|
* @Date 22/02/24
|
||||||
*/
|
*/
|
||||||
public class Contributor implements Serializable {
|
public class Contributor implements Serializable {
|
||||||
private String person; // I would not map it because we have only information regarding the person (if any)
|
private MinPerson person; // I would not map it because we have only information regarding the person (if any)
|
||||||
// associated to the leading organization
|
// associated to the leading organization
|
||||||
private String organization; // contributors.person
|
private String organization; // contributors.person
|
||||||
|
|
||||||
private String role;// private
|
private String role;// private
|
||||||
|
|
||||||
|
public MinPerson getPerson() {
|
||||||
|
return person;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPerson(MinPerson person) {
|
||||||
|
this.person = person;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOrganization() {
|
||||||
|
return organization;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOrganization(String organization) {
|
||||||
|
this.organization = organization;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRole() {
|
||||||
|
return role;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRole(String role) {
|
||||||
|
this.role = role;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,15 @@ public class Datasource implements Serializable {
|
||||||
// research_product_metadata_license.url not mappable
|
// research_product_metadata_license.url not mappable
|
||||||
private List<String> research_product_metadata_access_policy;// researchproductmetadataccesspolicies list with the
|
private List<String> research_product_metadata_access_policy;// researchproductmetadataccesspolicies list with the
|
||||||
// same mapping of research_product_access_policy
|
// same mapping of research_product_access_policy
|
||||||
|
private List<MinOrganization> organization;
|
||||||
|
|
||||||
|
public List<MinOrganization> getOrganization() {
|
||||||
|
return organization;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOrganization(List<MinOrganization> organization) {
|
||||||
|
this.organization = organization;
|
||||||
|
}
|
||||||
|
|
||||||
public String getLocal_identifier() {
|
public String getLocal_identifier() {
|
||||||
return local_identifier;
|
return local_identifier;
|
||||||
|
|
|
@ -28,7 +28,7 @@ public class Grant implements Serializable {
|
||||||
private String start_date;// startdate.value
|
private String start_date;// startdate.value
|
||||||
private String end_date;// enddate.value
|
private String end_date;// enddate.value
|
||||||
private String website;// websiteurl.value
|
private String website;// websiteurl.value
|
||||||
private List<String> beneficiaries;// organization.id for the organizations in the relation with semantic class
|
private List<MinOrganization> beneficiaries;// organization.id for the organizations in the relation with semantic class
|
||||||
// isParticipant produces the list of organization internal identifiers
|
// isParticipant produces the list of organization internal identifiers
|
||||||
private List<Contributor> contributors;//
|
private List<Contributor> contributors;//
|
||||||
|
|
||||||
|
@ -136,11 +136,11 @@ public class Grant implements Serializable {
|
||||||
this.website = website;
|
this.website = website;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getBeneficiaries() {
|
public List<MinOrganization> getBeneficiaries() {
|
||||||
return beneficiaries;
|
return beneficiaries;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setBeneficiaries(List<String> beneficiaries) {
|
public void setBeneficiaries(List<MinOrganization> beneficiaries) {
|
||||||
this.beneficiaries = beneficiaries;
|
this.beneficiaries = beneficiaries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,9 +28,9 @@ public class Manifestation implements Serializable {
|
||||||
@JsonProperty("licance_schema")
|
@JsonProperty("licance_schema")
|
||||||
private String licence_schema;
|
private String licence_schema;
|
||||||
private Biblio biblio;
|
private Biblio biblio;
|
||||||
private String venue;
|
private MinVenue venue;
|
||||||
@JsonProperty("hosting_datasource")
|
@JsonProperty("hosting_datasource")
|
||||||
private String hosting_datasource;
|
private MinVenue hosting_datasource;
|
||||||
|
|
||||||
public String getProduct_local_type() {
|
public String getProduct_local_type() {
|
||||||
return product_local_type;
|
return product_local_type;
|
||||||
|
@ -120,19 +120,19 @@ public class Manifestation implements Serializable {
|
||||||
this.biblio = biblio;
|
this.biblio = biblio;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getVenue() {
|
public MinVenue getVenue() {
|
||||||
return venue;
|
return venue;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setVenue(String venue) {
|
public void setVenue(MinVenue venue) {
|
||||||
this.venue = venue;
|
this.venue = venue;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getHosting_datasource() {
|
public MinVenue getHosting_datasource() {
|
||||||
return hosting_datasource;
|
return hosting_datasource;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setHosting_datasource(String hosting_datasource) {
|
public void setHosting_datasource(MinVenue hosting_datasource) {
|
||||||
this.hosting_datasource = hosting_datasource;
|
this.hosting_datasource = hosting_datasource;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/03/24
|
||||||
|
*/
|
||||||
|
public class MinGrant implements Serializable {
|
||||||
|
private String local_identifier;
|
||||||
|
private String funder;
|
||||||
|
private String code;
|
||||||
|
|
||||||
|
public String getLocal_identifier() {
|
||||||
|
return local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLocal_identifier(String local_identifier) {
|
||||||
|
this.local_identifier = local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFunder() {
|
||||||
|
return funder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFunder(String funder) {
|
||||||
|
this.funder = funder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCode() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCode(String code) {
|
||||||
|
this.code = code;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/03/24
|
||||||
|
*/
|
||||||
|
public class MinOrganization implements Serializable {
|
||||||
|
private String local_identifier;
|
||||||
|
private String name;
|
||||||
|
private String ror;
|
||||||
|
private String isni;
|
||||||
|
|
||||||
|
public String getLocal_identifier() {
|
||||||
|
return local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLocal_identifier(String local_identifier) {
|
||||||
|
this.local_identifier = local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRor() {
|
||||||
|
return ror;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRor(String ror) {
|
||||||
|
this.ror = ror;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getIsni() {
|
||||||
|
return isni;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIsni(String isni) {
|
||||||
|
this.isni = isni;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/03/24
|
||||||
|
*/
|
||||||
|
public class MinPerson implements Serializable {
|
||||||
|
private String local_identifier;
|
||||||
|
private String full_name;
|
||||||
|
private String orcid;
|
||||||
|
|
||||||
|
public String getLocal_identifier() {
|
||||||
|
return local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLocal_identifier(String local_identifier) {
|
||||||
|
this.local_identifier = local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFull_name() {
|
||||||
|
return full_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFull_name(String full_name) {
|
||||||
|
this.full_name = full_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOrcid() {
|
||||||
|
return orcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOrcid(String orcid) {
|
||||||
|
this.orcid = orcid;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,55 @@
|
||||||
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/03/24
|
||||||
|
*/
|
||||||
|
public class MinProduct implements Serializable {
|
||||||
|
private String local_identifier;
|
||||||
|
private String title;
|
||||||
|
private String doi;
|
||||||
|
private String pmcid;
|
||||||
|
private String arxivid;
|
||||||
|
|
||||||
|
public String getLocal_identifier() {
|
||||||
|
return local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLocal_identifier(String local_identifier) {
|
||||||
|
this.local_identifier = local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle() {
|
||||||
|
return title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTitle(String title) {
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDoi() {
|
||||||
|
return doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDoi(String doi) {
|
||||||
|
this.doi = doi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPmcid() {
|
||||||
|
return pmcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPmcid(String pmcid) {
|
||||||
|
this.pmcid = pmcid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getArxivid() {
|
||||||
|
return arxivid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setArxivid(String arxivid) {
|
||||||
|
this.arxivid = arxivid;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/03/24
|
||||||
|
*/
|
||||||
|
public class MinTopic implements Serializable {
|
||||||
|
private String local_identifier;
|
||||||
|
private String value;
|
||||||
|
|
||||||
|
public String getLocal_identifier() {
|
||||||
|
return local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLocal_identifier(String local_identifier) {
|
||||||
|
this.local_identifier = local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setValue(String value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/03/24
|
||||||
|
*/
|
||||||
|
public class MinVenue implements Serializable {
|
||||||
|
private String local_identifier;
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
public String getLocal_identifier() {
|
||||||
|
return local_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLocal_identifier(String loval_identifier) {
|
||||||
|
this.local_identifier = loval_identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MinVenue newInstance(String local_identifier, String name){
|
||||||
|
MinVenue minVenue = new MinVenue();
|
||||||
|
minVenue.local_identifier = local_identifier;
|
||||||
|
minVenue.name = name;
|
||||||
|
return minVenue;
|
||||||
|
}
|
||||||
|
}
|
|
@ -8,10 +8,18 @@ import java.io.Serializable;
|
||||||
* @Date 05/09/23
|
* @Date 05/09/23
|
||||||
*/
|
*/
|
||||||
public enum RelationType implements Serializable {
|
public enum RelationType implements Serializable {
|
||||||
RESULT_OUTCOME_FUNDING("isProducedBy"), RESULT_AFFILIATIED_TO_ORGANIZATION(
|
RESULT_OUTCOME_FUNDING("isProducedBy"),
|
||||||
"hasAuthorInstitution"), ORGANIZATION_PARTICIPANT_IN_PROJECT("isParticipant"), SUPPLEMENT(
|
RESULT_AFFILIATIED_TO_ORGANIZATION(
|
||||||
"IsSupplementedBy"), DOCUMENTS(
|
"hasAuthorInstitution"),
|
||||||
"IsDocumentedBy"), PART("IsPartOf"), VERSION("IsNewVersionOf"), CITATION("Cites");
|
DATASOURCE_PROVIDED_BY_ORGANIZATION ("isProvidedBy"),
|
||||||
|
ORGANIZATION_PARTICIPANT_IN_PROJECT("isParticipant"),
|
||||||
|
SUPPLEMENT(
|
||||||
|
"IsSupplementedBy"),
|
||||||
|
DOCUMENTS(
|
||||||
|
"IsDocumentedBy"),
|
||||||
|
PART("IsPartOf"),
|
||||||
|
VERSION("IsNewVersionOf"),
|
||||||
|
CITATION("Cites");
|
||||||
|
|
||||||
public final String label;
|
public final String label;
|
||||||
|
|
||||||
|
|
|
@ -14,9 +14,9 @@ public class Relations implements Serializable {
|
||||||
@JsonProperty("relation_type")
|
@JsonProperty("relation_type")
|
||||||
private String relation_type;
|
private String relation_type;
|
||||||
@JsonProperty("product_list")
|
@JsonProperty("product_list")
|
||||||
private List<String> product_list;
|
private List<MinProduct> product_list;
|
||||||
|
|
||||||
public static Relations newInstance(String relClass, List<String> target) {
|
public static Relations newInstance(String relClass, List<MinProduct> target) {
|
||||||
Relations r = new Relations();
|
Relations r = new Relations();
|
||||||
r.relation_type = relClass;
|
r.relation_type = relClass;
|
||||||
r.product_list = target;
|
r.product_list = target;
|
||||||
|
@ -31,11 +31,11 @@ public class Relations implements Serializable {
|
||||||
this.relation_type = relation_type;
|
this.relation_type = relation_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getProduct_list() {
|
public List<MinProduct> getProduct_list() {
|
||||||
return product_list;
|
return product_list;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setProduct_list(List<String> product_list) {
|
public void setProduct_list(List<MinProduct> product_list) {
|
||||||
this.product_list = product_list;
|
this.product_list = product_list;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,11 +23,12 @@ public class ResearchProduct implements Serializable {
|
||||||
private List<Contribution> contributions;
|
private List<Contribution> contributions;
|
||||||
private List<Manifestation> manifestations;
|
private List<Manifestation> manifestations;
|
||||||
@JsonProperty("relevant_organizations")
|
@JsonProperty("relevant_organizations")
|
||||||
private List<String> relevant_organizations;
|
private List<MinOrganization> relevant_organizations;
|
||||||
private List<String> funding;
|
private List<MinGrant> funding;
|
||||||
@JsonProperty("related_products")
|
@JsonProperty("related_products")
|
||||||
private List<Relations> related_products;
|
private List<Relations> related_products;
|
||||||
|
|
||||||
|
|
||||||
public String getLocal_identifier() {
|
public String getLocal_identifier() {
|
||||||
return local_identifier;
|
return local_identifier;
|
||||||
}
|
}
|
||||||
|
@ -92,19 +93,19 @@ public class ResearchProduct implements Serializable {
|
||||||
this.manifestations = manifestations;
|
this.manifestations = manifestations;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getRelevant_organizations() {
|
public List<MinOrganization> getRelevant_organizations() {
|
||||||
return relevant_organizations;
|
return relevant_organizations;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setRelevant_organizations(List<String> relevant_organizations) {
|
public void setRelevant_organizations(List<MinOrganization> relevant_organizations) {
|
||||||
this.relevant_organizations = relevant_organizations;
|
this.relevant_organizations = relevant_organizations;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getFunding() {
|
public List<MinGrant> getFunding() {
|
||||||
return funding;
|
return funding;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFunding(List<String> funding) {
|
public void setFunding(List<MinGrant> funding) {
|
||||||
this.funding = funding;
|
this.funding = funding;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,14 +8,14 @@ import java.io.Serializable;
|
||||||
* @Date 16/02/24
|
* @Date 16/02/24
|
||||||
*/
|
*/
|
||||||
public class ResultTopic implements Serializable {
|
public class ResultTopic implements Serializable {
|
||||||
private String topic;
|
private MinTopic topic;
|
||||||
private Provenance provenance;
|
private Provenance provenance;
|
||||||
|
|
||||||
public String getTopic() {
|
public MinTopic getTopic() {
|
||||||
return topic;
|
return topic;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setTopic(String topic) {
|
public void setTopic(MinTopic topic) {
|
||||||
this.topic = topic;
|
this.topic = topic;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,10 +7,16 @@ import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.RelationType;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
@ -22,6 +28,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
import eu.dnetlib.dhp.skgif.model.Identifier;
|
import eu.dnetlib.dhp.skgif.model.Identifier;
|
||||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
|
@ -64,60 +71,82 @@ public class DumpDatasource implements Serializable {
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath + "Datasources");
|
Utils.removeOutputDir(spark, outputPath + "Datasources");
|
||||||
|
|
||||||
mapDatasource(spark, inputPath, outputPath);
|
mapDatasource(spark, inputPath, outputPath, workingDir);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void mapDatasource(SparkSession spark, String inputPath, String outputPath) {
|
private static void mapDatasource(SparkSession spark, String inputPath, String outputPath, String workingDir) {
|
||||||
Utils
|
Dataset<Relation> relation = Utils.readPath(spark, inputPath + "relation", Relation.class)
|
||||||
.readPath(spark, inputPath + "datasource", Datasource.class)
|
.filter((FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference())
|
||||||
.filter(
|
.filter((FilterFunction<Relation>) r -> r.getRelClass().equalsIgnoreCase(RelationType.DATASOURCE_PROVIDED_BY_ORGANIZATION.label));
|
||||||
(FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible()
|
|
||||||
&& !d.getDataInfo().getDeletedbyinference())
|
|
||||||
.map((MapFunction<Datasource, eu.dnetlib.dhp.skgif.model.Datasource>) d -> {
|
|
||||||
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
|
|
||||||
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
|
|
||||||
datasource
|
|
||||||
.setIdentifiers(
|
|
||||||
d
|
|
||||||
.getPid()
|
|
||||||
.stream()
|
|
||||||
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
|
|
||||||
datasource.setName(d.getOfficialname().getValue());
|
Dataset<EncloseMinElement> eme = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
|
||||||
datasource.setSubmission_policy_url(d.getSubmissionpolicyurl());
|
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
|
||||||
datasource
|
|
||||||
.setJurisdiction(
|
Dataset<Datasource> datasourceDataset = Utils
|
||||||
Optional
|
.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||||
.ofNullable(d.getJurisdiction())
|
.filter(
|
||||||
.map(v -> v.getClassid())
|
(FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible()
|
||||||
.orElse(new String()));
|
&& !d.getDataInfo().getDeletedbyinference());
|
||||||
datasource.setPreservation_policy_url(d.getPreservationpolicyurl());
|
Dataset<Tuple2<String, EncloseMinElement>> datasourceOrganization = relation.joinWith(eme, relation.col("target").equalTo(eme.col("enclosedEntityId")))
|
||||||
datasource.setVersion_control(d.getVersioncontrol());
|
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(t2._1().getSource(), t2._2()), Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
|
||||||
|
|
||||||
|
datasourceDataset.joinWith(datasourceOrganization, datasourceDataset.col("id").equalTo(datasourceOrganization.col("_1")), "left")
|
||||||
|
.groupByKey((MapFunction<Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(), Encoders.STRING() )
|
||||||
|
.mapGroups((MapGroupsFunction<String, Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, eu.dnetlib.dhp.skgif.model.Datasource>) (k,vs) -> {
|
||||||
|
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
|
||||||
|
Tuple2<Datasource, Tuple2<String, EncloseMinElement>> first = vs.next();
|
||||||
|
Datasource d = first._1();
|
||||||
|
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
|
||||||
|
datasource
|
||||||
|
.setIdentifiers(
|
||||||
|
d
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
|
datasource.setName(d.getOfficialname().getValue());
|
||||||
|
datasource.setSubmission_policy_url(d.getSubmissionpolicyurl());
|
||||||
|
datasource
|
||||||
|
.setJurisdiction(
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getJurisdiction())
|
||||||
|
.map(v -> v.getClassid())
|
||||||
|
.orElse(new String()));
|
||||||
|
datasource.setPreservation_policy_url(d.getPreservationpolicyurl());
|
||||||
|
datasource.setVersion_control(d.getVersioncontrol());
|
||||||
|
|
||||||
|
datasource
|
||||||
|
.setData_source_classification(
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getEoscdatasourcetype())
|
||||||
|
.map(v -> v.getClassname())
|
||||||
|
.orElse(new String()));
|
||||||
|
datasource.setResearch_product_type(getEoscProductType(d.getResearchentitytypes()));
|
||||||
|
datasource.setThematic(d.getThematic());
|
||||||
|
datasource
|
||||||
|
.setResearch_product_access_policy(
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getDatabaseaccesstype())
|
||||||
|
.map(v -> getResearchProductAccessPolicy(d.getDatabaseaccesstype().getValue()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
datasource
|
||||||
|
.setResearch_product_metadata_access_policy(
|
||||||
|
Optional
|
||||||
|
.ofNullable(d.getResearchproductmetadataaccesspolicies())
|
||||||
|
.map(v -> getResearchProductAccessPolicy(d.getResearchproductmetadataaccesspolicies()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
if(Optional.ofNullable(first._2()).isPresent()){
|
||||||
|
List<MinOrganization> organizations = new ArrayList<>();
|
||||||
|
organizations.add(first._2()._2().getMinOrganization());
|
||||||
|
vs.forEachRemaining(org -> organizations.add(org._2()._2().getMinOrganization()));
|
||||||
|
datasource.setOrganization(organizations);
|
||||||
|
}
|
||||||
|
return datasource;
|
||||||
|
|
||||||
|
}, Encoders.bean( eu.dnetlib.dhp.skgif.model.Datasource.class))
|
||||||
|
|
||||||
datasource
|
|
||||||
.setData_source_classification(
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getEoscdatasourcetype())
|
|
||||||
.map(v -> v.getClassname())
|
|
||||||
.orElse(new String()));
|
|
||||||
datasource.setResearch_product_type(getEoscProductType(d.getResearchentitytypes()));
|
|
||||||
datasource.setThematic(d.getThematic());
|
|
||||||
datasource
|
|
||||||
.setResearch_product_access_policy(
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getDatabaseaccesstype())
|
|
||||||
.map(v -> getResearchProductAccessPolicy(d.getDatabaseaccesstype().getValue()))
|
|
||||||
.orElse(new ArrayList<>()));
|
|
||||||
datasource
|
|
||||||
.setResearch_product_metadata_access_policy(
|
|
||||||
Optional
|
|
||||||
.ofNullable(d.getResearchproductmetadataaccesspolicies())
|
|
||||||
.map(v -> getResearchProductAccessPolicy(d.getResearchproductmetadataaccesspolicies()))
|
|
||||||
.orElse(new ArrayList<>()));
|
|
||||||
return datasource;
|
|
||||||
}, Encoders.bean(eu.dnetlib.dhp.skgif.model.Datasource.class))
|
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
|
|
@ -10,6 +10,8 @@ import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.*;
|
||||||
import org.apache.avro.generic.GenericData;
|
import org.apache.avro.generic.GenericData;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -29,10 +31,6 @@ import org.slf4j.LoggerFactory;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.skgif.model.Grant;
|
|
||||||
import eu.dnetlib.dhp.skgif.model.Identifier;
|
|
||||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
|
||||||
import eu.dnetlib.dhp.skgif.model.RelationType;
|
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -76,11 +74,11 @@ public class DumpGrant implements Serializable {
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath + "Grant");
|
Utils.removeOutputDir(spark, outputPath + "Grant");
|
||||||
|
|
||||||
mapGrants(spark, inputPath, outputPath);
|
mapGrants(spark, inputPath, outputPath, workingDir);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void mapGrants(SparkSession spark, String inputPath, String outputPath) {
|
private static void mapGrants(SparkSession spark, String inputPath, String outputPath, String workingDir) {
|
||||||
Dataset<Project> projects = Utils
|
Dataset<Project> projects = Utils
|
||||||
.readPath(spark, inputPath + "project", Project.class)
|
.readPath(spark, inputPath + "project", Project.class)
|
||||||
.filter(
|
.filter(
|
||||||
|
@ -92,78 +90,84 @@ public class DumpGrant implements Serializable {
|
||||||
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
!r.getDataInfo().getInvisible() &&
|
!r.getDataInfo().getInvisible() &&
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
|
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
|
||||||
|
Dataset<EncloseMinElement> eme = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
|
||||||
|
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
|
||||||
|
|
||||||
|
Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations.joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId")))
|
||||||
|
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(t2._1().getTarget(), t2._2()), Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
|
||||||
|
|
||||||
projects
|
projects
|
||||||
.joinWith(relations, projects.col("id").equalTo(relations.col("target")), "left")
|
.joinWith(partecipantOrganization, projects.col("id").equalTo(partecipantOrganization.col("_1")), "left")
|
||||||
.groupByKey((MapFunction<Tuple2<Project, Relation>, String>) t2 -> t2._1().getId(), Encoders.STRING())
|
.groupByKey((MapFunction<Tuple2<Project, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(), Encoders.STRING() )
|
||||||
.mapGroups((MapGroupsFunction<String, Tuple2<Project, Relation>, Grant>) (k, v) -> {
|
.mapGroups((MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k,v) -> {
|
||||||
Grant g = new Grant();
|
Grant g = new Grant();
|
||||||
Tuple2<Project, Relation> first = v.next();
|
Tuple2<Project, Tuple2<String, EncloseMinElement>> first = v.next();
|
||||||
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k));
|
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k));
|
||||||
g.setIdentifiers(getProjectIdentifier(first._1()));
|
g.setIdentifiers(getProjectIdentifier(first._1()));
|
||||||
g.setTitle(first._1().getTitle().getValue());
|
g.setTitle(first._1().getTitle().getValue());
|
||||||
g
|
g
|
||||||
.setSummary(
|
.setSummary(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(first._1().getSummary())
|
.ofNullable(first._1().getSummary())
|
||||||
.map(value -> value.getValue())
|
.map(value -> value.getValue())
|
||||||
.orElse(new String()));
|
.orElse(new String()));
|
||||||
g
|
g
|
||||||
.setAcronym(
|
.setAcronym(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(first._1().getAcronym())
|
.ofNullable(first._1().getAcronym())
|
||||||
.map(value -> value.getValue())
|
.map(value -> value.getValue())
|
||||||
.orElse(new String()));
|
.orElse(new String()));
|
||||||
g.setFunder(getFunderName(first._1().getFundingtree().get(0).getValue()));
|
g.setFunder(Utils.getFunderName(first._1().getFundingtree().get(0).getValue()));
|
||||||
// * private String funding_stream;// fundingtree to be used the xpath //funding_level_[n]
|
// * private String funding_stream;// fundingtree to be used the xpath //funding_level_[n]
|
||||||
g.setFunding_stream(getFundingStream(first._1().getFundingtree().get(0).getValue()));
|
g.setFunding_stream(getFundingStream(first._1().getFundingtree().get(0).getValue()));
|
||||||
g
|
g
|
||||||
.setCurrency(
|
.setCurrency(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(first._1().getCurrency())
|
.ofNullable(first._1().getCurrency())
|
||||||
.map(value -> value.getValue())
|
.map(value -> value.getValue())
|
||||||
.orElse(new String()));
|
.orElse(new String()));
|
||||||
g
|
g
|
||||||
.setFunded_amount(
|
.setFunded_amount(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(first._1().getFundedamount())
|
.ofNullable(first._1().getFundedamount())
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
g
|
g
|
||||||
.setKeywords(
|
.setKeywords(
|
||||||
first
|
first
|
||||||
._1()
|
._1()
|
||||||
.getSubjects()
|
.getSubjects()
|
||||||
.stream()
|
.stream()
|
||||||
.map(s -> s.getValue())
|
.map(s -> s.getValue())
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
g
|
g
|
||||||
.setStart_date(
|
.setStart_date(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(first._1().getStartdate())
|
.ofNullable(first._1().getStartdate())
|
||||||
.map(value -> value.getValue())
|
.map(value -> value.getValue())
|
||||||
.orElse(new String()));
|
.orElse(new String()));
|
||||||
g
|
g
|
||||||
.setEnd_date(
|
.setEnd_date(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(first._1().getEnddate())
|
.ofNullable(first._1().getEnddate())
|
||||||
.map(value -> value.getValue())
|
.map(value -> value.getValue())
|
||||||
.orElse(new String()));
|
.orElse(new String()));
|
||||||
g
|
g
|
||||||
.setWebsite(
|
.setWebsite(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(first._1().getWebsiteurl())
|
.ofNullable(first._1().getWebsiteurl())
|
||||||
.map(value -> value.getValue())
|
.map(value -> value.getValue())
|
||||||
.orElse(new String()));
|
.orElse(new String()));
|
||||||
if (Optional.ofNullable(first._2()).isPresent()) {
|
if (Optional.ofNullable(first._2()).isPresent()) {
|
||||||
List<String> relevantOrganizatios = new ArrayList<>();
|
List<MinOrganization> relevantOrganizatios = new ArrayList<>();
|
||||||
relevantOrganizatios.add(Utils.getIdentifier(Prefixes.ORGANIZATION, first._2().getSource()));
|
relevantOrganizatios.add(first._2()._2().getMinOrganization());
|
||||||
v
|
v
|
||||||
.forEachRemaining(
|
.forEachRemaining(
|
||||||
t2 -> relevantOrganizatios
|
t2 -> relevantOrganizatios
|
||||||
.add(Utils.getIdentifier(Prefixes.ORGANIZATION, t2._2().getSource())));
|
.add(t2._2()._2().getMinOrganization()));
|
||||||
g.setBeneficiaries(relevantOrganizatios);
|
g.setBeneficiaries(relevantOrganizatios);
|
||||||
}
|
}
|
||||||
return g;
|
return g;
|
||||||
}, Encoders.bean(Grant.class))
|
}, Encoders.bean(Grant.class) )
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
|
@ -181,15 +185,7 @@ public class DumpGrant implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getFunderName(String fundingtree) throws DocumentException {
|
|
||||||
final Document doc;
|
|
||||||
|
|
||||||
doc = new SAXReader().read(new StringReader(fundingtree));
|
|
||||||
// f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
|
||||||
return ((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText();
|
|
||||||
// f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<Identifier> getProjectIdentifier(Project project) throws DocumentException {
|
private static List<Identifier> getProjectIdentifier(Project project) throws DocumentException {
|
||||||
List<Identifier> identifiers = new ArrayList<>();
|
List<Identifier> identifiers = new ArrayList<>();
|
||||||
|
@ -202,7 +198,7 @@ public class DumpGrant implements Serializable {
|
||||||
.add(
|
.add(
|
||||||
Identifier
|
Identifier
|
||||||
.newInstance(
|
.newInstance(
|
||||||
getFunderName(project.getFundingtree().get(0).getValue()), project.getCode().getValue()));
|
Utils.getFunderName(project.getFundingtree().get(0).getValue()), project.getCode().getValue()));
|
||||||
return identifiers;
|
return identifiers;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.*;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -13,17 +15,16 @@ import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
import org.apache.spark.sql.*;
|
import org.apache.spark.sql.*;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.PartialResearchProduct;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.RelationPerProduct;
|
|
||||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
import eu.dnetlib.dhp.skgif.model.*;
|
import eu.dnetlib.dhp.skgif.model.*;
|
||||||
import eu.dnetlib.dhp.skgif.model.AccessRight;
|
import eu.dnetlib.dhp.skgif.model.AccessRight;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
@ -79,6 +80,9 @@ public class DumpResult implements Serializable {
|
||||||
public static <R extends Result> void mapResult(SparkSession spark, String inputPath,
|
public static <R extends Result> void mapResult(SparkSession spark, String inputPath,
|
||||||
String workingDir, String outputPath) {
|
String workingDir, String outputPath) {
|
||||||
|
|
||||||
|
//emit the snippet of the entities to be included in other entities for the dematerialization
|
||||||
|
// emitMinEntities(spark, inputPath, workingDir);
|
||||||
|
|
||||||
// selection of the relevant relations from result type to other entity. Only teh semantic relevant ones are
|
// selection of the relevant relations from result type to other entity. Only teh semantic relevant ones are
|
||||||
// considered
|
// considered
|
||||||
selectRelations(spark, inputPath, workingDir);
|
selectRelations(spark, inputPath, workingDir);
|
||||||
|
@ -91,10 +95,50 @@ public class DumpResult implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// private static void emitMinEntities(SparkSession spark, String inputPath, String workingDir) {
|
||||||
|
//
|
||||||
|
// Utils.readPath(spark, inputPath + "organization", Organization.class)
|
||||||
|
// .filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference())
|
||||||
|
// .map((MapFunction<Organization, EncloseMinElement>) o -> {
|
||||||
|
// EncloseMinElement eme = new EncloseMinElement();
|
||||||
|
// eme.setEnclosedEntityId(o.getId());
|
||||||
|
// eme.setMinOrganization(Utils.getMinOrganization(o));
|
||||||
|
// return eme;
|
||||||
|
// }, Encoders.bean(EncloseMinElement.class) )
|
||||||
|
// .write()
|
||||||
|
// .mode(SaveMode.Overwrite)
|
||||||
|
// .option("compression","gzip")
|
||||||
|
// .json(workingDir + "encloseMinEntity");
|
||||||
|
//
|
||||||
|
// Utils.readPath(spark, inputPath + "project", Project.class)
|
||||||
|
// .filter((FilterFunction<Project>) p -> !p.getDataInfo().getDeletedbyinference())
|
||||||
|
// .map((MapFunction<Project,EncloseMinElement>) p -> {
|
||||||
|
// EncloseMinElement eme = new EncloseMinElement();
|
||||||
|
// eme.setEnclosedEntityId(p.getId());
|
||||||
|
// eme.setMinGrant(Utils.getMinGrant(p));
|
||||||
|
// return eme;
|
||||||
|
// }, Encoders.bean(EncloseMinElement.class))
|
||||||
|
// .write()
|
||||||
|
// .mode(SaveMode.Append)
|
||||||
|
// .option("compression","gzip")
|
||||||
|
// .json(workingDir + "encloseMinEntity");
|
||||||
|
//
|
||||||
|
// getMinProduct(spark, inputPath + "publication" , Publication.class)
|
||||||
|
// .union(getMinProduct(spark, inputPath + "dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class))
|
||||||
|
// .union(getMinProduct(spark, inputPath + "software", Software.class))
|
||||||
|
// .union(getMinProduct(spark, inputPath + "otherresearchproduct", OtherResearchProduct.class))
|
||||||
|
// .write()
|
||||||
|
// .mode(SaveMode.Append)
|
||||||
|
// .option("compression","gzip")
|
||||||
|
// .json(workingDir + "encloseMinEntity");
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
|
||||||
private static void getRelationAndManifestation(SparkSession spark, String workingDir, String inputPath) {
|
private static void getRelationAndManifestation(SparkSession spark, String workingDir, String inputPath) {
|
||||||
Dataset<RelationPerProduct> aggRelations = Utils
|
Dataset<RelationPerProduct> aggRelations = Utils
|
||||||
.readPath(spark, workingDir + "aggrelation", RelationPerProduct.class);
|
.readPath(spark, workingDir + "aggrelation", RelationPerProduct.class);
|
||||||
aggRelations.count();
|
|
||||||
ModelSupport.entityTypes
|
ModelSupport.entityTypes
|
||||||
.keySet()
|
.keySet()
|
||||||
.stream()
|
.stream()
|
||||||
|
@ -135,7 +179,8 @@ public class DumpResult implements Serializable {
|
||||||
(MapFunction<Tuple2<PartialResearchProduct, RelationPerProduct>, PartialResearchProduct>) t2 -> {
|
(MapFunction<Tuple2<PartialResearchProduct, RelationPerProduct>, PartialResearchProduct>) t2 -> {
|
||||||
PartialResearchProduct prp = t2._1();
|
PartialResearchProduct prp = t2._1();
|
||||||
if (Optional.ofNullable(t2._2()).isPresent()) {
|
if (Optional.ofNullable(t2._2()).isPresent()) {
|
||||||
prp.setRelated_products(t2._2().getRelatedProduct());
|
prp.setRelated_products(t2._2().getRelatedProduct().keySet()
|
||||||
|
.stream().map(key -> Relations.newInstance(key, t2._2().getRelatedProduct().get(key))).collect(Collectors.toList()));
|
||||||
prp.setRelevant_organizations(t2._2().getOrganizations());
|
prp.setRelevant_organizations(t2._2().getOrganizations());
|
||||||
prp.setFunding(t2._2().getFunding());
|
prp.setFunding(t2._2().getFunding());
|
||||||
}
|
}
|
||||||
|
@ -218,12 +263,12 @@ public class DumpResult implements Serializable {
|
||||||
if (Optional.ofNullable(t2._2()).isPresent()) {
|
if (Optional.ofNullable(t2._2()).isPresent()) {
|
||||||
manifestation.setBiblio(getBiblio(epm));
|
manifestation.setBiblio(getBiblio(epm));
|
||||||
if (Optional.ofNullable(t2._2().getJournal().getIssnPrinted()).isPresent())
|
if (Optional.ofNullable(t2._2().getJournal().getIssnPrinted()).isPresent())
|
||||||
manifestation.setVenue(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnPrinted()));
|
manifestation.setVenue(MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnPrinted()),t2._1().getJournal().getName()));
|
||||||
else if (Optional.ofNullable(t2._2().getJournal().getIssnOnline()).isPresent())
|
else if (Optional.ofNullable(t2._2().getJournal().getIssnOnline()).isPresent())
|
||||||
manifestation.setVenue(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnOnline()));
|
manifestation.setVenue(MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnOnline()),t2._1().getJournal().getName()));
|
||||||
}
|
}
|
||||||
manifestation
|
manifestation
|
||||||
.setHosting_datasource(Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()));
|
.setHosting_datasource(MinVenue.newInstance(Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()), epm.getInstance().getHostedby().getValue()));
|
||||||
|
|
||||||
return manifestation;
|
return manifestation;
|
||||||
}
|
}
|
||||||
|
@ -306,40 +351,56 @@ public class DumpResult implements Serializable {
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.PART.label) ||
|
r.getRelClass().equalsIgnoreCase(RelationType.PART.label) ||
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.VERSION.label) ||
|
r.getRelClass().equalsIgnoreCase(RelationType.VERSION.label) ||
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.CITATION.label));
|
r.getRelClass().equalsIgnoreCase(RelationType.CITATION.label));
|
||||||
|
Dataset<EncloseMinElement> encloseMinEntity = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class);
|
||||||
|
|
||||||
relation
|
relation.joinWith(encloseMinEntity, relation.col("target").equalTo(encloseMinEntity.col("enclosedEntityId")))
|
||||||
.groupByKey((MapFunction<Relation, String>) r -> r.getSource(), Encoders.STRING())
|
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, EncloseMinElement>) t2 ->
|
||||||
.mapGroups((MapGroupsFunction<String, Relation, RelationPerProduct>) (k, v) -> {
|
{
|
||||||
RelationPerProduct rpp = new RelationPerProduct();
|
EncloseMinElement eme = t2._2();
|
||||||
rpp.setResultId(k);
|
eme.setResultId(t2._1().getSource());
|
||||||
Map<String, List<String>> remainignRelations = new HashMap<>();
|
eme.setSemantics(t2._1().getRelClass());
|
||||||
while (v.hasNext()) {
|
return eme;
|
||||||
Relation rel = v.next();
|
}, Encoders.bean(EncloseMinElement.class))
|
||||||
String target = rel.getTarget();
|
.groupByKey((MapFunction<EncloseMinElement, String>) eme -> eme.getResultId(), Encoders.STRING())
|
||||||
String relClass = rel.getRelClass();
|
.mapGroups((MapGroupsFunction<String, EncloseMinElement, RelationPerProduct>) (k,v) ->
|
||||||
switch (rel.getRelClass().toLowerCase()) {
|
{
|
||||||
case "hasauthorinstitution":
|
RelationPerProduct rpp = new RelationPerProduct();
|
||||||
rpp.getOrganizations().add(Utils.getIdentifier(Prefixes.ORGANIZATION, target));
|
rpp.setResultId(k);
|
||||||
break;
|
insertEnclosedElement(rpp,v.next());
|
||||||
case "isproducedby":
|
v.forEachRemaining(e -> insertEnclosedElement(rpp,e));
|
||||||
rpp.getFunding().add(Utils.getIdentifier(Prefixes.GRANT, target));
|
return rpp;
|
||||||
break;
|
}, Encoders.bean(RelationPerProduct.class))
|
||||||
default:
|
|
||||||
if (!remainignRelations.keySet().contains(relClass))
|
|
||||||
remainignRelations.put(relClass, new ArrayList<>());
|
|
||||||
remainignRelations
|
|
||||||
.get(relClass)
|
|
||||||
.add(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, target));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (String key : remainignRelations.keySet())
|
|
||||||
rpp.getRelatedProduct().add(Relations.newInstance(key, remainignRelations.get(key)));
|
|
||||||
return rpp;
|
|
||||||
}, Encoders.bean(RelationPerProduct.class))
|
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(workingDir + "/aggrelation");
|
.json(workingDir + "/aggrelation");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void insertEnclosedElement(RelationPerProduct rpp, EncloseMinElement element) {
|
||||||
|
if(Optional.ofNullable(element.getMinOrganization()).isPresent())
|
||||||
|
rpp.getOrganizations().add(element.getMinOrganization());
|
||||||
|
if(Optional.ofNullable(element.getMinGrant()).isPresent())
|
||||||
|
rpp.getFunding().add(element.getMinGrant());
|
||||||
|
if(Optional.ofNullable(element.getMinProduct()).isPresent()){
|
||||||
|
String sem = element.getSemantics();
|
||||||
|
if(!rpp.getRelatedProduct().containsKey(sem))
|
||||||
|
rpp.getRelatedProduct().put(sem, new ArrayList<>());
|
||||||
|
rpp.getRelatedProduct().get(sem).add(element.getMinProduct());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static <R extends Result> Dataset<EncloseMinElement> getMinProduct(SparkSession spark, String inputPath, Class<R> clazz) {
|
||||||
|
return Utils.readPath(spark, inputPath , clazz)
|
||||||
|
.filter((FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
|
!r.getDataInfo().getInvisible())
|
||||||
|
.map((MapFunction<R, EncloseMinElement>) r -> {
|
||||||
|
EncloseMinElement eme = new EncloseMinElement();
|
||||||
|
eme.setEnclosedEntityId(r.getId());
|
||||||
|
eme.setMinProduct(Utils.getMinProduct(r));
|
||||||
|
return eme;
|
||||||
|
}, Encoders.bean(EncloseMinElement.class));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,8 @@ import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -27,21 +29,20 @@ import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
import eu.dnetlib.dhp.skgif.model.*;
|
import eu.dnetlib.dhp.skgif.model.*;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 06/02/24
|
* @Date 06/02/24
|
||||||
*/
|
*/
|
||||||
public class EmitFromResults implements Serializable {
|
public class EmitFromEntities implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(EmitFromResults.class);
|
private static final Logger log = LoggerFactory.getLogger(EmitFromEntities.class);
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
EmitFromResults.class
|
EmitFromEntities.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/oa/graph/dump/emit_biblio_parameters.json"));
|
"/eu/dnetlib/dhp/oa/graph/dump/emit_biblio_parameters.json"));
|
||||||
|
|
||||||
|
@ -71,18 +72,108 @@ public class EmitFromResults implements Serializable {
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
emitFromResult(spark, inputPath, outputPath, workingDir);
|
emitFromResult(spark, inputPath, outputPath, workingDir);
|
||||||
|
emitFromDatasource(spark, inputPath, workingDir);
|
||||||
|
emitFromOrganization(spark, inputPath, workingDir);
|
||||||
|
emitFromProject(spark, inputPath, workingDir);
|
||||||
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
//per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
|
private static void emitFromProject(SparkSession spark, String inputPath, String workingDir) {
|
||||||
|
Utils.readPath(spark, inputPath + "project" , Project.class)
|
||||||
|
.filter((FilterFunction<Project>) p -> !p.getDataInfo().getDeletedbyinference())
|
||||||
|
.map((MapFunction<Project, EncloseMinElement>) p->{
|
||||||
|
EncloseMinElement eme = new EncloseMinElement();
|
||||||
|
eme.setEnclosedEntityId(p.getId());
|
||||||
|
eme.setMinGrant(Utils.getMinGrant(p));
|
||||||
|
return eme;}, Encoders.bean(EncloseMinElement.class) )
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression","gzip")
|
||||||
|
.json(workingDir + "/minEntity");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void emitFromOrganization(SparkSession spark, String inputPath, String workingDir) {
|
||||||
|
Utils.readPath(spark, inputPath + "organization", Organization.class)
|
||||||
|
.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference())
|
||||||
|
.map((MapFunction<Organization, EncloseMinElement>) o -> {
|
||||||
|
EncloseMinElement eme = new EncloseMinElement();
|
||||||
|
eme.setMinOrganization(Utils.getMinOrganization(o));
|
||||||
|
eme.setEnclosedEntityId(o.getId());
|
||||||
|
return eme;},
|
||||||
|
Encoders.bean(EncloseMinElement.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression","gzip")
|
||||||
|
.json(workingDir + "/minEntity");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void emitFromDatasource(SparkSession spark, String inputPath, String workingDir) {
|
||||||
|
Utils.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||||
|
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
|
||||||
|
.map((MapFunction<Datasource, EncloseMinElement>) d -> {
|
||||||
|
EncloseMinElement eme = new EncloseMinElement();
|
||||||
|
eme.setMinDatsource(MinVenue.newInstance(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
|
||||||
|
eme.setEnclosedEntityId(d.getId());
|
||||||
|
return eme;
|
||||||
|
}
|
||||||
|
, Encoders.bean(EncloseMinElement.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression","gzip")
|
||||||
|
.json(workingDir + "/minEntity");
|
||||||
|
|
||||||
|
Utils.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||||
|
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
|
||||||
|
.filter((FilterFunction<Datasource>) d-> d.getEoscdatasourcetype().getClassid().equalsIgnoreCase("Journal archive"))
|
||||||
|
.map((MapFunction<Datasource, EncloseMinElement>) d-> {
|
||||||
|
EncloseMinElement eme = new EncloseMinElement();
|
||||||
|
eme.setEnclosedEntityId(d.getId());
|
||||||
|
if(Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent())
|
||||||
|
eme.setMinVenue( MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnPrinted()), d.getOfficialname().getValue()));
|
||||||
|
if(Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent())
|
||||||
|
eme.setMinVenue( MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnOnline()), d.getOfficialname().getValue()));
|
||||||
|
return null;
|
||||||
|
},Encoders.bean(EncloseMinElement.class) )
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression","gzip")
|
||||||
|
.json(workingDir + "/minEntity");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
|
||||||
public static <R extends Result> void emitFromResult(SparkSession spark, String inputPath, String outputPath,
|
public static <R extends Result> void emitFromResult(SparkSession spark, String inputPath, String outputPath,
|
||||||
String workingDir) {
|
String workingDir) {
|
||||||
emitManifestation(spark, inputPath, workingDir);
|
emitManifestation(spark, inputPath, workingDir);
|
||||||
emitPerson(spark, inputPath, outputPath, workingDir);
|
emitPerson(spark, inputPath, outputPath, workingDir);
|
||||||
emitTopic(spark, inputPath, outputPath, workingDir);
|
emitTopic(spark, inputPath, outputPath, workingDir);
|
||||||
|
emitMinProduct(spark, inputPath, workingDir);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static <R extends Result> void emitMinProduct(SparkSession spark, String inputPath, String workingDir) {
|
||||||
|
Utils.removeOutputDir(spark, workingDir + "minEntity");
|
||||||
|
ModelSupport.entityTypes.keySet().forEach(e -> {
|
||||||
|
if (ModelSupport.isResult(e)) {
|
||||||
|
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||||
|
|
||||||
|
Utils
|
||||||
|
.readPath(spark, inputPath + e.name(), resultClazz)
|
||||||
|
.map((MapFunction<R, MinProduct>) p -> Utils.getMinProduct(p), Encoders.bean(MinProduct.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Append)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(workingDir + "/minEntity");
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
private static <R extends Result> void emitTopic(SparkSession spark, String inputPath, String outputPath,
|
private static <R extends Result> void emitTopic(SparkSession spark, String inputPath, String outputPath,
|
||||||
String workingDir) {
|
String workingDir) {
|
||||||
ModelSupport.entityTypes.keySet().forEach(e -> {
|
ModelSupport.entityTypes.keySet().forEach(e -> {
|
||||||
|
@ -213,16 +304,10 @@ public class EmitFromResults implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <R extends Result> void emitManifestation(SparkSession spark, String inputPath, String workingDir) {
|
private static <R extends Result> void emitManifestation(SparkSession spark, String inputPath, String workingDir) {
|
||||||
Dataset<Datasource> datasource = Utils
|
|
||||||
.readPath(spark, inputPath + "datasource", Datasource.class)
|
|
||||||
.filter(
|
|
||||||
(FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEosctype()).isPresent() &&
|
|
||||||
d.getEosctype().getClassname().equalsIgnoreCase("Journal archive"));
|
|
||||||
|
|
||||||
ModelSupport.entityTypes.keySet().forEach(e -> {
|
ModelSupport.entityTypes.keySet().forEach(e -> {
|
||||||
if (ModelSupport.isResult(e)) {
|
if (ModelSupport.isResult(e)) {
|
||||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||||
// Dataset<EmitPerManifestation> emitformanifestation =
|
|
||||||
Utils
|
Utils
|
||||||
.readPath(spark, inputPath + e.name(), resultClazz)
|
.readPath(spark, inputPath + e.name(), resultClazz)
|
||||||
.flatMap((FlatMapFunction<R, EmitPerManifestation>) p -> p.getInstance().stream().map(i -> {
|
.flatMap((FlatMapFunction<R, EmitPerManifestation>) p -> p.getInstance().stream().map(i -> {
|
||||||
|
@ -245,7 +330,7 @@ public class EmitFromResults implements Serializable {
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(workingDir + e.name() + "/manifestation");
|
.json(workingDir + e.name() + "/manifestation");
|
||||||
;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,14 +54,20 @@ public class ResultMapper implements Serializable {
|
||||||
count += 1;
|
count += 1;
|
||||||
Contribution contribution = new Contribution();
|
Contribution contribution = new Contribution();
|
||||||
Tuple2<String, Boolean> orcid = Utils.getOrcid(a.getPid());
|
Tuple2<String, Boolean> orcid = Utils.getOrcid(a.getPid());
|
||||||
|
MinPerson minPerson = new MinPerson();
|
||||||
|
minPerson.setFull_name(a.getFullname());
|
||||||
if (orcid != null) {
|
if (orcid != null) {
|
||||||
contribution.setPerson(Utils.getIdentifier(Prefixes.PERSON, orcid._1() + orcid._2()));
|
minPerson.setLocal_identifier(Utils.getIdentifier(Prefixes.PERSON, orcid._1() + orcid._2()));
|
||||||
|
minPerson.setOrcid(orcid._1());
|
||||||
|
contribution.setPerson(minPerson);
|
||||||
} else {
|
} else {
|
||||||
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
||||||
|
minPerson.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + a.getRank()));
|
||||||
contribution
|
contribution
|
||||||
.setPerson(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + a.getRank()));
|
.setPerson(minPerson);
|
||||||
} else {
|
} else {
|
||||||
contribution.setPerson(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
|
minPerson.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
|
||||||
|
contribution.setPerson(minPerson);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -88,9 +94,12 @@ public class ResultMapper implements Serializable {
|
||||||
s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
||||||
.map(s -> {
|
.map(s -> {
|
||||||
ResultTopic topic = new ResultTopic();
|
ResultTopic topic = new ResultTopic();
|
||||||
|
MinTopic minTopic = new MinTopic();
|
||||||
|
minTopic.setLocal_identifier(Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
|
||||||
|
minTopic.setValue(s.getValue());
|
||||||
topic
|
topic
|
||||||
.setTopic(
|
.setTopic(minTopic
|
||||||
Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
|
);
|
||||||
if (Optional.ofNullable(s.getDataInfo()).isPresent()) {
|
if (Optional.ofNullable(s.getDataInfo()).isPresent()) {
|
||||||
Provenance provenance = new Provenance();
|
Provenance provenance = new Provenance();
|
||||||
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
|
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
|
||||||
|
|
|
@ -2,9 +2,16 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.io.StringReader;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.MinGrant;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.MinProduct;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -17,6 +24,9 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -63,4 +73,64 @@ public class Utils implements Serializable {
|
||||||
return entity.label + DHPUtils.md5(id);
|
return entity.label + DHPUtils.md5(id);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String getFunderName(String fundingtree) throws DocumentException {
|
||||||
|
final Document doc;
|
||||||
|
|
||||||
|
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||||
|
// f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||||
|
return ((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText();
|
||||||
|
// f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MinOrganization getMinOrganization(Organization o) {
|
||||||
|
MinOrganization mo = new MinOrganization();
|
||||||
|
mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
|
||||||
|
mo.setName(o.getLegalname().getValue());
|
||||||
|
for(StructuredProperty pid : o.getPid()){
|
||||||
|
switch (pid.getQualifier().getClassid().toLowerCase()){
|
||||||
|
case "ror":
|
||||||
|
mo.setRor(pid.getValue());
|
||||||
|
break;
|
||||||
|
case "isni":
|
||||||
|
mo.setIsni(pid.getValue());
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return mo;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MinGrant getMinGrant(Project p) throws DocumentException {
|
||||||
|
MinGrant mg = new MinGrant();
|
||||||
|
mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId()));
|
||||||
|
mg.setCode(p.getCode().getValue());
|
||||||
|
mg.setFunder(getFunderName(p.getFundingtree().get(0).getValue()));
|
||||||
|
return mg;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <R extends Result> MinProduct getMinProduct(R r) {
|
||||||
|
MinProduct mp = new MinProduct();
|
||||||
|
mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId()));
|
||||||
|
for (StructuredProperty title : r.getTitle()) {
|
||||||
|
if (title.getQualifier().getClassid().equalsIgnoreCase("main title")) {
|
||||||
|
mp.setTitle(title.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (StructuredProperty pid : r.getPid()) {
|
||||||
|
switch (pid.getQualifier().getClassid().toLowerCase()) {
|
||||||
|
case "doi":
|
||||||
|
mp.setDoi(pid.getValue());
|
||||||
|
break;
|
||||||
|
case "pmcid":
|
||||||
|
mp.setPmcid(pid.getValue());
|
||||||
|
break;
|
||||||
|
case "arxiv":
|
||||||
|
mp.setArxivid(pid.getValue());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return mp;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.skgif.beans;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/03/24
|
||||||
|
*/
|
||||||
|
public class Couple implements Serializable {
|
||||||
|
private String originalIdentifier;
|
||||||
|
private String localIdentifier;
|
||||||
|
|
||||||
|
public String getOriginalIdentifier() {
|
||||||
|
return originalIdentifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOriginalIdentifier(String originalIdentifier) {
|
||||||
|
this.originalIdentifier = originalIdentifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLocalIdentifier() {
|
||||||
|
return localIdentifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLocalIdentifier(String localIdentifier) {
|
||||||
|
this.localIdentifier = localIdentifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Couple newInstance(String originalIdentifier, String localIdentifier){
|
||||||
|
Couple couple = new Couple();
|
||||||
|
couple.originalIdentifier = originalIdentifier;
|
||||||
|
couple.localIdentifier = localIdentifier;
|
||||||
|
return couple;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,87 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.skgif.beans;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.skgif.model.MinGrant;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.MinProduct;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.MinVenue;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 04/03/24
|
||||||
|
*/
|
||||||
|
public class EncloseMinElement implements Serializable {
|
||||||
|
private String resultId;
|
||||||
|
private String enclosedEntityId;
|
||||||
|
private MinOrganization minOrganization;
|
||||||
|
private MinVenue minVenue;
|
||||||
|
private MinVenue minDatsource;
|
||||||
|
private MinGrant minGrant;
|
||||||
|
private MinProduct minProduct;
|
||||||
|
private String semantics;
|
||||||
|
|
||||||
|
public MinVenue getMinVenue() {
|
||||||
|
return minVenue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMinVenue(MinVenue minVenue) {
|
||||||
|
this.minVenue = minVenue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MinVenue getMinDatsource() {
|
||||||
|
return minDatsource;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMinDatsource(MinVenue minDatsource) {
|
||||||
|
this.minDatsource = minDatsource;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSemantics() {
|
||||||
|
return semantics;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSemantics(String semantics) {
|
||||||
|
this.semantics = semantics;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResultId() {
|
||||||
|
return resultId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResultId(String resultId) {
|
||||||
|
this.resultId = resultId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEnclosedEntityId() {
|
||||||
|
return enclosedEntityId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEnclosedEntityId(String enclosedEntityId) {
|
||||||
|
this.enclosedEntityId = enclosedEntityId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MinOrganization getMinOrganization() {
|
||||||
|
return minOrganization;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMinOrganization(MinOrganization minOrganization) {
|
||||||
|
this.minOrganization = minOrganization;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MinGrant getMinGrant() {
|
||||||
|
return minGrant;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMinGrant(MinGrant minGrant) {
|
||||||
|
this.minGrant = minGrant;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MinProduct getMinProduct() {
|
||||||
|
return minProduct;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMinProduct(MinProduct minProduct) {
|
||||||
|
this.minProduct = minProduct;
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,9 +3,13 @@ package eu.dnetlib.dhp.oa.graph.dump.skgif.beans;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.skgif.model.Relations;
|
import eu.dnetlib.dhp.skgif.model.MinGrant;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.MinProduct;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
|
@ -14,14 +18,14 @@ import eu.dnetlib.dhp.skgif.model.Relations;
|
||||||
public class RelationPerProduct implements Serializable {
|
public class RelationPerProduct implements Serializable {
|
||||||
|
|
||||||
private String resultId;
|
private String resultId;
|
||||||
private List<String> organizations;
|
private List<MinOrganization> organizations;
|
||||||
private List<String> funding;
|
private List<MinGrant> funding;
|
||||||
private List<Relations> relatedProduct;
|
private Map<String,List<MinProduct>> relatedProduct;
|
||||||
|
|
||||||
public RelationPerProduct() {
|
public RelationPerProduct() {
|
||||||
organizations = new ArrayList<>();
|
organizations = new ArrayList<>();
|
||||||
funding = new ArrayList<>();
|
funding = new ArrayList<>();
|
||||||
relatedProduct = new ArrayList<>();
|
relatedProduct = new HashMap<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getResultId() {
|
public String getResultId() {
|
||||||
|
@ -32,27 +36,27 @@ public class RelationPerProduct implements Serializable {
|
||||||
this.resultId = resultId;
|
this.resultId = resultId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getOrganizations() {
|
public List<MinOrganization> getOrganizations() {
|
||||||
return organizations;
|
return organizations;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setOrganizations(List<String> organizations) {
|
public void setOrganizations(List<MinOrganization> organizations) {
|
||||||
this.organizations = organizations;
|
this.organizations = organizations;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getFunding() {
|
public List<MinGrant> getFunding() {
|
||||||
return funding;
|
return funding;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFunding(List<String> funding) {
|
public void setFunding(List<MinGrant> funding) {
|
||||||
this.funding = funding;
|
this.funding = funding;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Relations> getRelatedProduct() {
|
public Map<String, List<MinProduct>> getRelatedProduct() {
|
||||||
return relatedProduct;
|
return relatedProduct;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setRelatedProduct(List<Relations> relatedProduct) {
|
public void setRelatedProduct(Map<String, List<MinProduct>> relatedProduct) {
|
||||||
this.relatedProduct = relatedProduct;
|
this.relatedProduct = relatedProduct;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,7 +71,7 @@
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Extraction</name>
|
<name>Extraction</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromResults</class>
|
<class>eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromEntities</class>
|
||||||
<jar>dump-${projectVersion}.jar</jar>
|
<jar>dump-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-memory=${sparkExecutorMemory}
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
|
|
@ -267,14 +267,15 @@ public class DumpResultTest {
|
||||||
.anyMatch(
|
.anyMatch(
|
||||||
t -> t
|
t -> t
|
||||||
.getTopic()
|
.getTopic()
|
||||||
|
.getValue()
|
||||||
.equalsIgnoreCase(Prefixes.TOPIC.label + DHPUtils.md5("FOSSustained delivery"))));
|
.equalsIgnoreCase(Prefixes.TOPIC.label + DHPUtils.md5("FOSSustained delivery"))));
|
||||||
|
|
||||||
// check contributions
|
// check contributions
|
||||||
Assertions.assertEquals(4, rp.getContributions().size());
|
Assertions.assertEquals(4, rp.getContributions().size());
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(3, rp.getContributions().stream().filter(c -> c.getPerson().startsWith("person")).count());
|
.assertEquals(3, rp.getContributions().stream().filter(c -> c.getPerson().getLocal_identifier().startsWith("person")).count());
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(1, rp.getContributions().stream().filter(c -> c.getPerson().startsWith("temp")).count());
|
.assertEquals(1, rp.getContributions().stream().filter(c -> c.getPerson().getLocal_identifier().startsWith("temp")).count());
|
||||||
rp.getContributions().forEach(c -> Assertions.assertTrue(c.getDeclared_affiliation() == null));
|
rp.getContributions().forEach(c -> Assertions.assertTrue(c.getDeclared_affiliation() == null));
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
|
|
@ -72,7 +72,7 @@ public class EmitFromResultJobTest {
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
EmitFromResults
|
EmitFromEntities
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
@ -171,7 +171,7 @@ public class EmitFromResultJobTest {
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph_complete_entities/")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph_complete_entities/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
EmitFromResults
|
EmitFromEntities
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
|
Loading…
Reference in New Issue