[SKG-IF] added first implementation for denormalization
This commit is contained in:
parent
7b715b2bb8
commit
a6a6922f11
|
@ -11,17 +11,17 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
|||
* @Date 01/09/23
|
||||
*/
|
||||
public class Contribution implements Serializable {
|
||||
private String person;
|
||||
private MinPerson person;
|
||||
@JsonProperty("declared_affiliations")
|
||||
private List<String> declared_affiliation;
|
||||
private List<String> roles;
|
||||
private Integer rank;
|
||||
|
||||
public String getPerson() {
|
||||
public MinPerson getPerson() {
|
||||
return person;
|
||||
}
|
||||
|
||||
public void setPerson(String person) {
|
||||
public void setPerson(MinPerson person) {
|
||||
this.person = person;
|
||||
}
|
||||
|
||||
|
|
|
@ -8,9 +8,33 @@ import java.io.Serializable;
|
|||
* @Date 22/02/24
|
||||
*/
|
||||
public class Contributor implements Serializable {
|
||||
private String person; // I would not map it because we have only information regarding the person (if any)
|
||||
private MinPerson person; // I would not map it because we have only information regarding the person (if any)
|
||||
// associated to the leading organization
|
||||
private String organization; // contributors.person
|
||||
|
||||
private String role;// private
|
||||
|
||||
public MinPerson getPerson() {
|
||||
return person;
|
||||
}
|
||||
|
||||
public void setPerson(MinPerson person) {
|
||||
this.person = person;
|
||||
}
|
||||
|
||||
public String getOrganization() {
|
||||
return organization;
|
||||
}
|
||||
|
||||
public void setOrganization(String organization) {
|
||||
this.organization = organization;
|
||||
}
|
||||
|
||||
public String getRole() {
|
||||
return role;
|
||||
}
|
||||
|
||||
public void setRole(String role) {
|
||||
this.role = role;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,15 @@ public class Datasource implements Serializable {
|
|||
// research_product_metadata_license.url not mappable
|
||||
private List<String> research_product_metadata_access_policy;// researchproductmetadataccesspolicies list with the
|
||||
// same mapping of research_product_access_policy
|
||||
private List<MinOrganization> organization;
|
||||
|
||||
public List<MinOrganization> getOrganization() {
|
||||
return organization;
|
||||
}
|
||||
|
||||
public void setOrganization(List<MinOrganization> organization) {
|
||||
this.organization = organization;
|
||||
}
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
|
|
|
@ -28,7 +28,7 @@ public class Grant implements Serializable {
|
|||
private String start_date;// startdate.value
|
||||
private String end_date;// enddate.value
|
||||
private String website;// websiteurl.value
|
||||
private List<String> beneficiaries;// organization.id for the organizations in the relation with semantic class
|
||||
private List<MinOrganization> beneficiaries;// organization.id for the organizations in the relation with semantic class
|
||||
// isParticipant produces the list of organization internal identifiers
|
||||
private List<Contributor> contributors;//
|
||||
|
||||
|
@ -136,11 +136,11 @@ public class Grant implements Serializable {
|
|||
this.website = website;
|
||||
}
|
||||
|
||||
public List<String> getBeneficiaries() {
|
||||
public List<MinOrganization> getBeneficiaries() {
|
||||
return beneficiaries;
|
||||
}
|
||||
|
||||
public void setBeneficiaries(List<String> beneficiaries) {
|
||||
public void setBeneficiaries(List<MinOrganization> beneficiaries) {
|
||||
this.beneficiaries = beneficiaries;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,9 +28,9 @@ public class Manifestation implements Serializable {
|
|||
@JsonProperty("licance_schema")
|
||||
private String licence_schema;
|
||||
private Biblio biblio;
|
||||
private String venue;
|
||||
private MinVenue venue;
|
||||
@JsonProperty("hosting_datasource")
|
||||
private String hosting_datasource;
|
||||
private MinVenue hosting_datasource;
|
||||
|
||||
public String getProduct_local_type() {
|
||||
return product_local_type;
|
||||
|
@ -120,19 +120,19 @@ public class Manifestation implements Serializable {
|
|||
this.biblio = biblio;
|
||||
}
|
||||
|
||||
public String getVenue() {
|
||||
public MinVenue getVenue() {
|
||||
return venue;
|
||||
}
|
||||
|
||||
public void setVenue(String venue) {
|
||||
public void setVenue(MinVenue venue) {
|
||||
this.venue = venue;
|
||||
}
|
||||
|
||||
public String getHosting_datasource() {
|
||||
public MinVenue getHosting_datasource() {
|
||||
return hosting_datasource;
|
||||
}
|
||||
|
||||
public void setHosting_datasource(String hosting_datasource) {
|
||||
public void setHosting_datasource(MinVenue hosting_datasource) {
|
||||
this.hosting_datasource = hosting_datasource;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinGrant implements Serializable {
|
||||
private String local_identifier;
|
||||
private String funder;
|
||||
private String code;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
|
||||
public String getFunder() {
|
||||
return funder;
|
||||
}
|
||||
|
||||
public void setFunder(String funder) {
|
||||
this.funder = funder;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinOrganization implements Serializable {
|
||||
private String local_identifier;
|
||||
private String name;
|
||||
private String ror;
|
||||
private String isni;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getRor() {
|
||||
return ror;
|
||||
}
|
||||
|
||||
public void setRor(String ror) {
|
||||
this.ror = ror;
|
||||
}
|
||||
|
||||
public String getIsni() {
|
||||
return isni;
|
||||
}
|
||||
|
||||
public void setIsni(String isni) {
|
||||
this.isni = isni;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinPerson implements Serializable {
|
||||
private String local_identifier;
|
||||
private String full_name;
|
||||
private String orcid;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
|
||||
public String getFull_name() {
|
||||
return full_name;
|
||||
}
|
||||
|
||||
public void setFull_name(String full_name) {
|
||||
this.full_name = full_name;
|
||||
}
|
||||
|
||||
public String getOrcid() {
|
||||
return orcid;
|
||||
}
|
||||
|
||||
public void setOrcid(String orcid) {
|
||||
this.orcid = orcid;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinProduct implements Serializable {
|
||||
private String local_identifier;
|
||||
private String title;
|
||||
private String doi;
|
||||
private String pmcid;
|
||||
private String arxivid;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getDoi() {
|
||||
return doi;
|
||||
}
|
||||
|
||||
public void setDoi(String doi) {
|
||||
this.doi = doi;
|
||||
}
|
||||
|
||||
public String getPmcid() {
|
||||
return pmcid;
|
||||
}
|
||||
|
||||
public void setPmcid(String pmcid) {
|
||||
this.pmcid = pmcid;
|
||||
}
|
||||
|
||||
public String getArxivid() {
|
||||
return arxivid;
|
||||
}
|
||||
|
||||
public void setArxivid(String arxivid) {
|
||||
this.arxivid = arxivid;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinTopic implements Serializable {
|
||||
private String local_identifier;
|
||||
private String value;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinVenue implements Serializable {
|
||||
private String local_identifier;
|
||||
private String name;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String loval_identifier) {
|
||||
this.local_identifier = loval_identifier;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public static MinVenue newInstance(String local_identifier, String name){
|
||||
MinVenue minVenue = new MinVenue();
|
||||
minVenue.local_identifier = local_identifier;
|
||||
minVenue.name = name;
|
||||
return minVenue;
|
||||
}
|
||||
}
|
|
@ -8,10 +8,18 @@ import java.io.Serializable;
|
|||
* @Date 05/09/23
|
||||
*/
|
||||
public enum RelationType implements Serializable {
|
||||
RESULT_OUTCOME_FUNDING("isProducedBy"), RESULT_AFFILIATIED_TO_ORGANIZATION(
|
||||
"hasAuthorInstitution"), ORGANIZATION_PARTICIPANT_IN_PROJECT("isParticipant"), SUPPLEMENT(
|
||||
"IsSupplementedBy"), DOCUMENTS(
|
||||
"IsDocumentedBy"), PART("IsPartOf"), VERSION("IsNewVersionOf"), CITATION("Cites");
|
||||
RESULT_OUTCOME_FUNDING("isProducedBy"),
|
||||
RESULT_AFFILIATIED_TO_ORGANIZATION(
|
||||
"hasAuthorInstitution"),
|
||||
DATASOURCE_PROVIDED_BY_ORGANIZATION ("isProvidedBy"),
|
||||
ORGANIZATION_PARTICIPANT_IN_PROJECT("isParticipant"),
|
||||
SUPPLEMENT(
|
||||
"IsSupplementedBy"),
|
||||
DOCUMENTS(
|
||||
"IsDocumentedBy"),
|
||||
PART("IsPartOf"),
|
||||
VERSION("IsNewVersionOf"),
|
||||
CITATION("Cites");
|
||||
|
||||
public final String label;
|
||||
|
||||
|
|
|
@ -14,9 +14,9 @@ public class Relations implements Serializable {
|
|||
@JsonProperty("relation_type")
|
||||
private String relation_type;
|
||||
@JsonProperty("product_list")
|
||||
private List<String> product_list;
|
||||
private List<MinProduct> product_list;
|
||||
|
||||
public static Relations newInstance(String relClass, List<String> target) {
|
||||
public static Relations newInstance(String relClass, List<MinProduct> target) {
|
||||
Relations r = new Relations();
|
||||
r.relation_type = relClass;
|
||||
r.product_list = target;
|
||||
|
@ -31,11 +31,11 @@ public class Relations implements Serializable {
|
|||
this.relation_type = relation_type;
|
||||
}
|
||||
|
||||
public List<String> getProduct_list() {
|
||||
public List<MinProduct> getProduct_list() {
|
||||
return product_list;
|
||||
}
|
||||
|
||||
public void setProduct_list(List<String> product_list) {
|
||||
public void setProduct_list(List<MinProduct> product_list) {
|
||||
this.product_list = product_list;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,11 +23,12 @@ public class ResearchProduct implements Serializable {
|
|||
private List<Contribution> contributions;
|
||||
private List<Manifestation> manifestations;
|
||||
@JsonProperty("relevant_organizations")
|
||||
private List<String> relevant_organizations;
|
||||
private List<String> funding;
|
||||
private List<MinOrganization> relevant_organizations;
|
||||
private List<MinGrant> funding;
|
||||
@JsonProperty("related_products")
|
||||
private List<Relations> related_products;
|
||||
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
@ -92,19 +93,19 @@ public class ResearchProduct implements Serializable {
|
|||
this.manifestations = manifestations;
|
||||
}
|
||||
|
||||
public List<String> getRelevant_organizations() {
|
||||
public List<MinOrganization> getRelevant_organizations() {
|
||||
return relevant_organizations;
|
||||
}
|
||||
|
||||
public void setRelevant_organizations(List<String> relevant_organizations) {
|
||||
public void setRelevant_organizations(List<MinOrganization> relevant_organizations) {
|
||||
this.relevant_organizations = relevant_organizations;
|
||||
}
|
||||
|
||||
public List<String> getFunding() {
|
||||
public List<MinGrant> getFunding() {
|
||||
return funding;
|
||||
}
|
||||
|
||||
public void setFunding(List<String> funding) {
|
||||
public void setFunding(List<MinGrant> funding) {
|
||||
this.funding = funding;
|
||||
}
|
||||
|
||||
|
|
|
@ -8,14 +8,14 @@ import java.io.Serializable;
|
|||
* @Date 16/02/24
|
||||
*/
|
||||
public class ResultTopic implements Serializable {
|
||||
private String topic;
|
||||
private MinTopic topic;
|
||||
private Provenance provenance;
|
||||
|
||||
public String getTopic() {
|
||||
public MinTopic getTopic() {
|
||||
return topic;
|
||||
}
|
||||
|
||||
public void setTopic(String topic) {
|
||||
public void setTopic(MinTopic topic) {
|
||||
this.topic = topic;
|
||||
}
|
||||
|
||||
|
|
|
@ -7,10 +7,16 @@ import java.io.Serializable;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||
import eu.dnetlib.dhp.skgif.model.RelationType;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
|
@ -22,6 +28,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.skgif.model.Identifier;
|
||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
|
@ -64,60 +71,82 @@ public class DumpDatasource implements Serializable {
|
|||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath + "Datasources");
|
||||
|
||||
mapDatasource(spark, inputPath, outputPath);
|
||||
mapDatasource(spark, inputPath, outputPath, workingDir);
|
||||
});
|
||||
}
|
||||
|
||||
private static void mapDatasource(SparkSession spark, String inputPath, String outputPath) {
|
||||
Utils
|
||||
.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter(
|
||||
(FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible()
|
||||
&& !d.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<Datasource, eu.dnetlib.dhp.skgif.model.Datasource>) d -> {
|
||||
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
|
||||
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
|
||||
datasource
|
||||
.setIdentifiers(
|
||||
d
|
||||
.getPid()
|
||||
.stream()
|
||||
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||
.collect(Collectors.toList()));
|
||||
private static void mapDatasource(SparkSession spark, String inputPath, String outputPath, String workingDir) {
|
||||
Dataset<Relation> relation = Utils.readPath(spark, inputPath + "relation", Relation.class)
|
||||
.filter((FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference())
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelClass().equalsIgnoreCase(RelationType.DATASOURCE_PROVIDED_BY_ORGANIZATION.label));
|
||||
|
||||
datasource.setName(d.getOfficialname().getValue());
|
||||
datasource.setSubmission_policy_url(d.getSubmissionpolicyurl());
|
||||
datasource
|
||||
.setJurisdiction(
|
||||
Optional
|
||||
.ofNullable(d.getJurisdiction())
|
||||
.map(v -> v.getClassid())
|
||||
.orElse(new String()));
|
||||
datasource.setPreservation_policy_url(d.getPreservationpolicyurl());
|
||||
datasource.setVersion_control(d.getVersioncontrol());
|
||||
Dataset<EncloseMinElement> eme = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
|
||||
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
|
||||
|
||||
Dataset<Datasource> datasourceDataset = Utils
|
||||
.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter(
|
||||
(FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible()
|
||||
&& !d.getDataInfo().getDeletedbyinference());
|
||||
Dataset<Tuple2<String, EncloseMinElement>> datasourceOrganization = relation.joinWith(eme, relation.col("target").equalTo(eme.col("enclosedEntityId")))
|
||||
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(t2._1().getSource(), t2._2()), Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
|
||||
|
||||
datasourceDataset.joinWith(datasourceOrganization, datasourceDataset.col("id").equalTo(datasourceOrganization.col("_1")), "left")
|
||||
.groupByKey((MapFunction<Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(), Encoders.STRING() )
|
||||
.mapGroups((MapGroupsFunction<String, Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, eu.dnetlib.dhp.skgif.model.Datasource>) (k,vs) -> {
|
||||
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
|
||||
Tuple2<Datasource, Tuple2<String, EncloseMinElement>> first = vs.next();
|
||||
Datasource d = first._1();
|
||||
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
|
||||
datasource
|
||||
.setIdentifiers(
|
||||
d
|
||||
.getPid()
|
||||
.stream()
|
||||
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
datasource.setName(d.getOfficialname().getValue());
|
||||
datasource.setSubmission_policy_url(d.getSubmissionpolicyurl());
|
||||
datasource
|
||||
.setJurisdiction(
|
||||
Optional
|
||||
.ofNullable(d.getJurisdiction())
|
||||
.map(v -> v.getClassid())
|
||||
.orElse(new String()));
|
||||
datasource.setPreservation_policy_url(d.getPreservationpolicyurl());
|
||||
datasource.setVersion_control(d.getVersioncontrol());
|
||||
|
||||
datasource
|
||||
.setData_source_classification(
|
||||
Optional
|
||||
.ofNullable(d.getEoscdatasourcetype())
|
||||
.map(v -> v.getClassname())
|
||||
.orElse(new String()));
|
||||
datasource.setResearch_product_type(getEoscProductType(d.getResearchentitytypes()));
|
||||
datasource.setThematic(d.getThematic());
|
||||
datasource
|
||||
.setResearch_product_access_policy(
|
||||
Optional
|
||||
.ofNullable(d.getDatabaseaccesstype())
|
||||
.map(v -> getResearchProductAccessPolicy(d.getDatabaseaccesstype().getValue()))
|
||||
.orElse(new ArrayList<>()));
|
||||
datasource
|
||||
.setResearch_product_metadata_access_policy(
|
||||
Optional
|
||||
.ofNullable(d.getResearchproductmetadataaccesspolicies())
|
||||
.map(v -> getResearchProductAccessPolicy(d.getResearchproductmetadataaccesspolicies()))
|
||||
.orElse(new ArrayList<>()));
|
||||
if(Optional.ofNullable(first._2()).isPresent()){
|
||||
List<MinOrganization> organizations = new ArrayList<>();
|
||||
organizations.add(first._2()._2().getMinOrganization());
|
||||
vs.forEachRemaining(org -> organizations.add(org._2()._2().getMinOrganization()));
|
||||
datasource.setOrganization(organizations);
|
||||
}
|
||||
return datasource;
|
||||
|
||||
}, Encoders.bean( eu.dnetlib.dhp.skgif.model.Datasource.class))
|
||||
|
||||
datasource
|
||||
.setData_source_classification(
|
||||
Optional
|
||||
.ofNullable(d.getEoscdatasourcetype())
|
||||
.map(v -> v.getClassname())
|
||||
.orElse(new String()));
|
||||
datasource.setResearch_product_type(getEoscProductType(d.getResearchentitytypes()));
|
||||
datasource.setThematic(d.getThematic());
|
||||
datasource
|
||||
.setResearch_product_access_policy(
|
||||
Optional
|
||||
.ofNullable(d.getDatabaseaccesstype())
|
||||
.map(v -> getResearchProductAccessPolicy(d.getDatabaseaccesstype().getValue()))
|
||||
.orElse(new ArrayList<>()));
|
||||
datasource
|
||||
.setResearch_product_metadata_access_policy(
|
||||
Optional
|
||||
.ofNullable(d.getResearchproductmetadataaccesspolicies())
|
||||
.map(v -> getResearchProductAccessPolicy(d.getResearchproductmetadataaccesspolicies()))
|
||||
.orElse(new ArrayList<>()));
|
||||
return datasource;
|
||||
}, Encoders.bean(eu.dnetlib.dhp.skgif.model.Datasource.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
|
|
|
@ -10,6 +10,8 @@ import java.util.List;
|
|||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||
import eu.dnetlib.dhp.skgif.model.*;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -29,10 +31,6 @@ import org.slf4j.LoggerFactory;
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.skgif.model.Grant;
|
||||
import eu.dnetlib.dhp.skgif.model.Identifier;
|
||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||
import eu.dnetlib.dhp.skgif.model.RelationType;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
|
@ -76,11 +74,11 @@ public class DumpGrant implements Serializable {
|
|||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath + "Grant");
|
||||
|
||||
mapGrants(spark, inputPath, outputPath);
|
||||
mapGrants(spark, inputPath, outputPath, workingDir);
|
||||
});
|
||||
}
|
||||
|
||||
private static void mapGrants(SparkSession spark, String inputPath, String outputPath) {
|
||||
private static void mapGrants(SparkSession spark, String inputPath, String outputPath, String workingDir) {
|
||||
Dataset<Project> projects = Utils
|
||||
.readPath(spark, inputPath + "project", Project.class)
|
||||
.filter(
|
||||
|
@ -92,78 +90,84 @@ public class DumpGrant implements Serializable {
|
|||
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||
!r.getDataInfo().getInvisible() &&
|
||||
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
|
||||
Dataset<EncloseMinElement> eme = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
|
||||
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
|
||||
|
||||
Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations.joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId")))
|
||||
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(t2._1().getTarget(), t2._2()), Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
|
||||
|
||||
projects
|
||||
.joinWith(relations, projects.col("id").equalTo(relations.col("target")), "left")
|
||||
.groupByKey((MapFunction<Tuple2<Project, Relation>, String>) t2 -> t2._1().getId(), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, Tuple2<Project, Relation>, Grant>) (k, v) -> {
|
||||
Grant g = new Grant();
|
||||
Tuple2<Project, Relation> first = v.next();
|
||||
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k));
|
||||
g.setIdentifiers(getProjectIdentifier(first._1()));
|
||||
g.setTitle(first._1().getTitle().getValue());
|
||||
g
|
||||
.setSummary(
|
||||
Optional
|
||||
.ofNullable(first._1().getSummary())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setAcronym(
|
||||
Optional
|
||||
.ofNullable(first._1().getAcronym())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g.setFunder(getFunderName(first._1().getFundingtree().get(0).getValue()));
|
||||
// * private String funding_stream;// fundingtree to be used the xpath //funding_level_[n]
|
||||
g.setFunding_stream(getFundingStream(first._1().getFundingtree().get(0).getValue()));
|
||||
g
|
||||
.setCurrency(
|
||||
Optional
|
||||
.ofNullable(first._1().getCurrency())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setFunded_amount(
|
||||
Optional
|
||||
.ofNullable(first._1().getFundedamount())
|
||||
.orElse(null));
|
||||
g
|
||||
.setKeywords(
|
||||
first
|
||||
._1()
|
||||
.getSubjects()
|
||||
.stream()
|
||||
.map(s -> s.getValue())
|
||||
.collect(Collectors.toList()));
|
||||
g
|
||||
.setStart_date(
|
||||
Optional
|
||||
.ofNullable(first._1().getStartdate())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setEnd_date(
|
||||
Optional
|
||||
.ofNullable(first._1().getEnddate())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setWebsite(
|
||||
Optional
|
||||
.ofNullable(first._1().getWebsiteurl())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
if (Optional.ofNullable(first._2()).isPresent()) {
|
||||
List<String> relevantOrganizatios = new ArrayList<>();
|
||||
relevantOrganizatios.add(Utils.getIdentifier(Prefixes.ORGANIZATION, first._2().getSource()));
|
||||
v
|
||||
.forEachRemaining(
|
||||
t2 -> relevantOrganizatios
|
||||
.add(Utils.getIdentifier(Prefixes.ORGANIZATION, t2._2().getSource())));
|
||||
g.setBeneficiaries(relevantOrganizatios);
|
||||
}
|
||||
return g;
|
||||
}, Encoders.bean(Grant.class))
|
||||
.joinWith(partecipantOrganization, projects.col("id").equalTo(partecipantOrganization.col("_1")), "left")
|
||||
.groupByKey((MapFunction<Tuple2<Project, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(), Encoders.STRING() )
|
||||
.mapGroups((MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k,v) -> {
|
||||
Grant g = new Grant();
|
||||
Tuple2<Project, Tuple2<String, EncloseMinElement>> first = v.next();
|
||||
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k));
|
||||
g.setIdentifiers(getProjectIdentifier(first._1()));
|
||||
g.setTitle(first._1().getTitle().getValue());
|
||||
g
|
||||
.setSummary(
|
||||
Optional
|
||||
.ofNullable(first._1().getSummary())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setAcronym(
|
||||
Optional
|
||||
.ofNullable(first._1().getAcronym())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g.setFunder(Utils.getFunderName(first._1().getFundingtree().get(0).getValue()));
|
||||
// * private String funding_stream;// fundingtree to be used the xpath //funding_level_[n]
|
||||
g.setFunding_stream(getFundingStream(first._1().getFundingtree().get(0).getValue()));
|
||||
g
|
||||
.setCurrency(
|
||||
Optional
|
||||
.ofNullable(first._1().getCurrency())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setFunded_amount(
|
||||
Optional
|
||||
.ofNullable(first._1().getFundedamount())
|
||||
.orElse(null));
|
||||
g
|
||||
.setKeywords(
|
||||
first
|
||||
._1()
|
||||
.getSubjects()
|
||||
.stream()
|
||||
.map(s -> s.getValue())
|
||||
.collect(Collectors.toList()));
|
||||
g
|
||||
.setStart_date(
|
||||
Optional
|
||||
.ofNullable(first._1().getStartdate())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setEnd_date(
|
||||
Optional
|
||||
.ofNullable(first._1().getEnddate())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setWebsite(
|
||||
Optional
|
||||
.ofNullable(first._1().getWebsiteurl())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
if (Optional.ofNullable(first._2()).isPresent()) {
|
||||
List<MinOrganization> relevantOrganizatios = new ArrayList<>();
|
||||
relevantOrganizatios.add(first._2()._2().getMinOrganization());
|
||||
v
|
||||
.forEachRemaining(
|
||||
t2 -> relevantOrganizatios
|
||||
.add(t2._2()._2().getMinOrganization()));
|
||||
g.setBeneficiaries(relevantOrganizatios);
|
||||
}
|
||||
return g;
|
||||
}, Encoders.bean(Grant.class) )
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
|
@ -181,15 +185,7 @@ public class DumpGrant implements Serializable {
|
|||
|
||||
}
|
||||
|
||||
private static String getFunderName(String fundingtree) throws DocumentException {
|
||||
final Document doc;
|
||||
|
||||
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||
// f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||
return ((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText();
|
||||
// f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||
|
||||
}
|
||||
|
||||
private static List<Identifier> getProjectIdentifier(Project project) throws DocumentException {
|
||||
List<Identifier> identifiers = new ArrayList<>();
|
||||
|
@ -202,7 +198,7 @@ public class DumpGrant implements Serializable {
|
|||
.add(
|
||||
Identifier
|
||||
.newInstance(
|
||||
getFunderName(project.getFundingtree().get(0).getValue()), project.getCode().getValue()));
|
||||
Utils.getFunderName(project.getFundingtree().get(0).getValue()), project.getCode().getValue()));
|
||||
return identifiers;
|
||||
|
||||
}
|
||||
|
|
|
@ -5,7 +5,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.*;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
|
@ -13,17 +15,16 @@ import org.apache.spark.api.java.function.MapFunction;
|
|||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.PartialResearchProduct;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.RelationPerProduct;
|
||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.skgif.model.*;
|
||||
import eu.dnetlib.dhp.skgif.model.AccessRight;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
|
@ -79,6 +80,9 @@ public class DumpResult implements Serializable {
|
|||
public static <R extends Result> void mapResult(SparkSession spark, String inputPath,
|
||||
String workingDir, String outputPath) {
|
||||
|
||||
//emit the snippet of the entities to be included in other entities for the dematerialization
|
||||
// emitMinEntities(spark, inputPath, workingDir);
|
||||
|
||||
// selection of the relevant relations from result type to other entity. Only teh semantic relevant ones are
|
||||
// considered
|
||||
selectRelations(spark, inputPath, workingDir);
|
||||
|
@ -91,10 +95,50 @@ public class DumpResult implements Serializable {
|
|||
|
||||
}
|
||||
|
||||
// private static void emitMinEntities(SparkSession spark, String inputPath, String workingDir) {
|
||||
//
|
||||
// Utils.readPath(spark, inputPath + "organization", Organization.class)
|
||||
// .filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference())
|
||||
// .map((MapFunction<Organization, EncloseMinElement>) o -> {
|
||||
// EncloseMinElement eme = new EncloseMinElement();
|
||||
// eme.setEnclosedEntityId(o.getId());
|
||||
// eme.setMinOrganization(Utils.getMinOrganization(o));
|
||||
// return eme;
|
||||
// }, Encoders.bean(EncloseMinElement.class) )
|
||||
// .write()
|
||||
// .mode(SaveMode.Overwrite)
|
||||
// .option("compression","gzip")
|
||||
// .json(workingDir + "encloseMinEntity");
|
||||
//
|
||||
// Utils.readPath(spark, inputPath + "project", Project.class)
|
||||
// .filter((FilterFunction<Project>) p -> !p.getDataInfo().getDeletedbyinference())
|
||||
// .map((MapFunction<Project,EncloseMinElement>) p -> {
|
||||
// EncloseMinElement eme = new EncloseMinElement();
|
||||
// eme.setEnclosedEntityId(p.getId());
|
||||
// eme.setMinGrant(Utils.getMinGrant(p));
|
||||
// return eme;
|
||||
// }, Encoders.bean(EncloseMinElement.class))
|
||||
// .write()
|
||||
// .mode(SaveMode.Append)
|
||||
// .option("compression","gzip")
|
||||
// .json(workingDir + "encloseMinEntity");
|
||||
//
|
||||
// getMinProduct(spark, inputPath + "publication" , Publication.class)
|
||||
// .union(getMinProduct(spark, inputPath + "dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class))
|
||||
// .union(getMinProduct(spark, inputPath + "software", Software.class))
|
||||
// .union(getMinProduct(spark, inputPath + "otherresearchproduct", OtherResearchProduct.class))
|
||||
// .write()
|
||||
// .mode(SaveMode.Append)
|
||||
// .option("compression","gzip")
|
||||
// .json(workingDir + "encloseMinEntity");
|
||||
//
|
||||
//
|
||||
// }
|
||||
|
||||
private static void getRelationAndManifestation(SparkSession spark, String workingDir, String inputPath) {
|
||||
Dataset<RelationPerProduct> aggRelations = Utils
|
||||
.readPath(spark, workingDir + "aggrelation", RelationPerProduct.class);
|
||||
aggRelations.count();
|
||||
|
||||
ModelSupport.entityTypes
|
||||
.keySet()
|
||||
.stream()
|
||||
|
@ -135,7 +179,8 @@ public class DumpResult implements Serializable {
|
|||
(MapFunction<Tuple2<PartialResearchProduct, RelationPerProduct>, PartialResearchProduct>) t2 -> {
|
||||
PartialResearchProduct prp = t2._1();
|
||||
if (Optional.ofNullable(t2._2()).isPresent()) {
|
||||
prp.setRelated_products(t2._2().getRelatedProduct());
|
||||
prp.setRelated_products(t2._2().getRelatedProduct().keySet()
|
||||
.stream().map(key -> Relations.newInstance(key, t2._2().getRelatedProduct().get(key))).collect(Collectors.toList()));
|
||||
prp.setRelevant_organizations(t2._2().getOrganizations());
|
||||
prp.setFunding(t2._2().getFunding());
|
||||
}
|
||||
|
@ -218,12 +263,12 @@ public class DumpResult implements Serializable {
|
|||
if (Optional.ofNullable(t2._2()).isPresent()) {
|
||||
manifestation.setBiblio(getBiblio(epm));
|
||||
if (Optional.ofNullable(t2._2().getJournal().getIssnPrinted()).isPresent())
|
||||
manifestation.setVenue(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnPrinted()));
|
||||
manifestation.setVenue(MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnPrinted()),t2._1().getJournal().getName()));
|
||||
else if (Optional.ofNullable(t2._2().getJournal().getIssnOnline()).isPresent())
|
||||
manifestation.setVenue(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnOnline()));
|
||||
manifestation.setVenue(MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnOnline()),t2._1().getJournal().getName()));
|
||||
}
|
||||
manifestation
|
||||
.setHosting_datasource(Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()));
|
||||
.setHosting_datasource(MinVenue.newInstance(Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()), epm.getInstance().getHostedby().getValue()));
|
||||
|
||||
return manifestation;
|
||||
}
|
||||
|
@ -306,40 +351,56 @@ public class DumpResult implements Serializable {
|
|||
r.getRelClass().equalsIgnoreCase(RelationType.PART.label) ||
|
||||
r.getRelClass().equalsIgnoreCase(RelationType.VERSION.label) ||
|
||||
r.getRelClass().equalsIgnoreCase(RelationType.CITATION.label));
|
||||
Dataset<EncloseMinElement> encloseMinEntity = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class);
|
||||
|
||||
relation
|
||||
.groupByKey((MapFunction<Relation, String>) r -> r.getSource(), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, Relation, RelationPerProduct>) (k, v) -> {
|
||||
RelationPerProduct rpp = new RelationPerProduct();
|
||||
rpp.setResultId(k);
|
||||
Map<String, List<String>> remainignRelations = new HashMap<>();
|
||||
while (v.hasNext()) {
|
||||
Relation rel = v.next();
|
||||
String target = rel.getTarget();
|
||||
String relClass = rel.getRelClass();
|
||||
switch (rel.getRelClass().toLowerCase()) {
|
||||
case "hasauthorinstitution":
|
||||
rpp.getOrganizations().add(Utils.getIdentifier(Prefixes.ORGANIZATION, target));
|
||||
break;
|
||||
case "isproducedby":
|
||||
rpp.getFunding().add(Utils.getIdentifier(Prefixes.GRANT, target));
|
||||
break;
|
||||
default:
|
||||
if (!remainignRelations.keySet().contains(relClass))
|
||||
remainignRelations.put(relClass, new ArrayList<>());
|
||||
remainignRelations
|
||||
.get(relClass)
|
||||
.add(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, target));
|
||||
}
|
||||
}
|
||||
for (String key : remainignRelations.keySet())
|
||||
rpp.getRelatedProduct().add(Relations.newInstance(key, remainignRelations.get(key)));
|
||||
return rpp;
|
||||
}, Encoders.bean(RelationPerProduct.class))
|
||||
relation.joinWith(encloseMinEntity, relation.col("target").equalTo(encloseMinEntity.col("enclosedEntityId")))
|
||||
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, EncloseMinElement>) t2 ->
|
||||
{
|
||||
EncloseMinElement eme = t2._2();
|
||||
eme.setResultId(t2._1().getSource());
|
||||
eme.setSemantics(t2._1().getRelClass());
|
||||
return eme;
|
||||
}, Encoders.bean(EncloseMinElement.class))
|
||||
.groupByKey((MapFunction<EncloseMinElement, String>) eme -> eme.getResultId(), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, EncloseMinElement, RelationPerProduct>) (k,v) ->
|
||||
{
|
||||
RelationPerProduct rpp = new RelationPerProduct();
|
||||
rpp.setResultId(k);
|
||||
insertEnclosedElement(rpp,v.next());
|
||||
v.forEachRemaining(e -> insertEnclosedElement(rpp,e));
|
||||
return rpp;
|
||||
}, Encoders.bean(RelationPerProduct.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir + "/aggrelation");
|
||||
}
|
||||
|
||||
private static void insertEnclosedElement(RelationPerProduct rpp, EncloseMinElement element) {
|
||||
if(Optional.ofNullable(element.getMinOrganization()).isPresent())
|
||||
rpp.getOrganizations().add(element.getMinOrganization());
|
||||
if(Optional.ofNullable(element.getMinGrant()).isPresent())
|
||||
rpp.getFunding().add(element.getMinGrant());
|
||||
if(Optional.ofNullable(element.getMinProduct()).isPresent()){
|
||||
String sem = element.getSemantics();
|
||||
if(!rpp.getRelatedProduct().containsKey(sem))
|
||||
rpp.getRelatedProduct().put(sem, new ArrayList<>());
|
||||
rpp.getRelatedProduct().get(sem).add(element.getMinProduct());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static <R extends Result> Dataset<EncloseMinElement> getMinProduct(SparkSession spark, String inputPath, Class<R> clazz) {
|
||||
return Utils.readPath(spark, inputPath , clazz)
|
||||
.filter((FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||
!r.getDataInfo().getInvisible())
|
||||
.map((MapFunction<R, EncloseMinElement>) r -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setEnclosedEntityId(r.getId());
|
||||
eme.setMinProduct(Utils.getMinProduct(r));
|
||||
return eme;
|
||||
}, Encoders.bean(EncloseMinElement.class));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@ import java.io.Serializable;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
|
@ -27,21 +29,20 @@ import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.skgif.model.*;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 06/02/24
|
||||
*/
|
||||
public class EmitFromResults implements Serializable {
|
||||
public class EmitFromEntities implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(EmitFromResults.class);
|
||||
private static final Logger log = LoggerFactory.getLogger(EmitFromEntities.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
EmitFromResults.class
|
||||
EmitFromEntities.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/dump/emit_biblio_parameters.json"));
|
||||
|
||||
|
@ -71,18 +72,108 @@ public class EmitFromResults implements Serializable {
|
|||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath);
|
||||
emitFromResult(spark, inputPath, outputPath, workingDir);
|
||||
emitFromDatasource(spark, inputPath, workingDir);
|
||||
emitFromOrganization(spark, inputPath, workingDir);
|
||||
emitFromProject(spark, inputPath, workingDir);
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
//per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
|
||||
private static void emitFromProject(SparkSession spark, String inputPath, String workingDir) {
|
||||
Utils.readPath(spark, inputPath + "project" , Project.class)
|
||||
.filter((FilterFunction<Project>) p -> !p.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<Project, EncloseMinElement>) p->{
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setEnclosedEntityId(p.getId());
|
||||
eme.setMinGrant(Utils.getMinGrant(p));
|
||||
return eme;}, Encoders.bean(EncloseMinElement.class) )
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression","gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
|
||||
}
|
||||
|
||||
private static void emitFromOrganization(SparkSession spark, String inputPath, String workingDir) {
|
||||
Utils.readPath(spark, inputPath + "organization", Organization.class)
|
||||
.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<Organization, EncloseMinElement>) o -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setMinOrganization(Utils.getMinOrganization(o));
|
||||
eme.setEnclosedEntityId(o.getId());
|
||||
return eme;},
|
||||
Encoders.bean(EncloseMinElement.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression","gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
}
|
||||
|
||||
private static void emitFromDatasource(SparkSession spark, String inputPath, String workingDir) {
|
||||
Utils.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<Datasource, EncloseMinElement>) d -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setMinDatsource(MinVenue.newInstance(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
|
||||
eme.setEnclosedEntityId(d.getId());
|
||||
return eme;
|
||||
}
|
||||
, Encoders.bean(EncloseMinElement.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression","gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
|
||||
Utils.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
|
||||
.filter((FilterFunction<Datasource>) d-> d.getEoscdatasourcetype().getClassid().equalsIgnoreCase("Journal archive"))
|
||||
.map((MapFunction<Datasource, EncloseMinElement>) d-> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setEnclosedEntityId(d.getId());
|
||||
if(Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent())
|
||||
eme.setMinVenue( MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnPrinted()), d.getOfficialname().getValue()));
|
||||
if(Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent())
|
||||
eme.setMinVenue( MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnOnline()), d.getOfficialname().getValue()));
|
||||
return null;
|
||||
},Encoders.bean(EncloseMinElement.class) )
|
||||
.filter(Objects::nonNull)
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression","gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
|
||||
}
|
||||
|
||||
//per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
|
||||
public static <R extends Result> void emitFromResult(SparkSession spark, String inputPath, String outputPath,
|
||||
String workingDir) {
|
||||
emitManifestation(spark, inputPath, workingDir);
|
||||
emitPerson(spark, inputPath, outputPath, workingDir);
|
||||
emitTopic(spark, inputPath, outputPath, workingDir);
|
||||
emitMinProduct(spark, inputPath, workingDir);
|
||||
|
||||
}
|
||||
|
||||
private static <R extends Result> void emitMinProduct(SparkSession spark, String inputPath, String workingDir) {
|
||||
Utils.removeOutputDir(spark, workingDir + "minEntity");
|
||||
ModelSupport.entityTypes.keySet().forEach(e -> {
|
||||
if (ModelSupport.isResult(e)) {
|
||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||
|
||||
Utils
|
||||
.readPath(spark, inputPath + e.name(), resultClazz)
|
||||
.map((MapFunction<R, MinProduct>) p -> Utils.getMinProduct(p), Encoders.bean(MinProduct.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
|
||||
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
private static <R extends Result> void emitTopic(SparkSession spark, String inputPath, String outputPath,
|
||||
String workingDir) {
|
||||
ModelSupport.entityTypes.keySet().forEach(e -> {
|
||||
|
@ -213,16 +304,10 @@ public class EmitFromResults implements Serializable {
|
|||
}
|
||||
|
||||
private static <R extends Result> void emitManifestation(SparkSession spark, String inputPath, String workingDir) {
|
||||
Dataset<Datasource> datasource = Utils
|
||||
.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter(
|
||||
(FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEosctype()).isPresent() &&
|
||||
d.getEosctype().getClassname().equalsIgnoreCase("Journal archive"));
|
||||
|
||||
ModelSupport.entityTypes.keySet().forEach(e -> {
|
||||
if (ModelSupport.isResult(e)) {
|
||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||
// Dataset<EmitPerManifestation> emitformanifestation =
|
||||
|
||||
Utils
|
||||
.readPath(spark, inputPath + e.name(), resultClazz)
|
||||
.flatMap((FlatMapFunction<R, EmitPerManifestation>) p -> p.getInstance().stream().map(i -> {
|
||||
|
@ -245,7 +330,7 @@ public class EmitFromResults implements Serializable {
|
|||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir + e.name() + "/manifestation");
|
||||
;
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -54,14 +54,20 @@ public class ResultMapper implements Serializable {
|
|||
count += 1;
|
||||
Contribution contribution = new Contribution();
|
||||
Tuple2<String, Boolean> orcid = Utils.getOrcid(a.getPid());
|
||||
MinPerson minPerson = new MinPerson();
|
||||
minPerson.setFull_name(a.getFullname());
|
||||
if (orcid != null) {
|
||||
contribution.setPerson(Utils.getIdentifier(Prefixes.PERSON, orcid._1() + orcid._2()));
|
||||
minPerson.setLocal_identifier(Utils.getIdentifier(Prefixes.PERSON, orcid._1() + orcid._2()));
|
||||
minPerson.setOrcid(orcid._1());
|
||||
contribution.setPerson(minPerson);
|
||||
} else {
|
||||
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
||||
minPerson.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + a.getRank()));
|
||||
contribution
|
||||
.setPerson(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + a.getRank()));
|
||||
.setPerson(minPerson);
|
||||
} else {
|
||||
contribution.setPerson(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
|
||||
minPerson.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
|
||||
contribution.setPerson(minPerson);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -88,9 +94,12 @@ public class ResultMapper implements Serializable {
|
|||
s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
||||
.map(s -> {
|
||||
ResultTopic topic = new ResultTopic();
|
||||
MinTopic minTopic = new MinTopic();
|
||||
minTopic.setLocal_identifier(Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
|
||||
minTopic.setValue(s.getValue());
|
||||
topic
|
||||
.setTopic(
|
||||
Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
|
||||
.setTopic(minTopic
|
||||
);
|
||||
if (Optional.ofNullable(s.getDataInfo()).isPresent()) {
|
||||
Provenance provenance = new Provenance();
|
||||
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
|
||||
|
|
|
@ -2,9 +2,16 @@
|
|||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.io.StringReader;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.skgif.model.MinGrant;
|
||||
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||
import eu.dnetlib.dhp.skgif.model.MinProduct;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
|
@ -17,6 +24,9 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
|
@ -63,4 +73,64 @@ public class Utils implements Serializable {
|
|||
return entity.label + DHPUtils.md5(id);
|
||||
|
||||
}
|
||||
|
||||
public static String getFunderName(String fundingtree) throws DocumentException {
|
||||
final Document doc;
|
||||
|
||||
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||
// f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||
return ((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText();
|
||||
// f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||
|
||||
}
|
||||
|
||||
public static MinOrganization getMinOrganization(Organization o) {
|
||||
MinOrganization mo = new MinOrganization();
|
||||
mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
|
||||
mo.setName(o.getLegalname().getValue());
|
||||
for(StructuredProperty pid : o.getPid()){
|
||||
switch (pid.getQualifier().getClassid().toLowerCase()){
|
||||
case "ror":
|
||||
mo.setRor(pid.getValue());
|
||||
break;
|
||||
case "isni":
|
||||
mo.setIsni(pid.getValue());
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
return mo;
|
||||
}
|
||||
|
||||
public static MinGrant getMinGrant(Project p) throws DocumentException {
|
||||
MinGrant mg = new MinGrant();
|
||||
mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId()));
|
||||
mg.setCode(p.getCode().getValue());
|
||||
mg.setFunder(getFunderName(p.getFundingtree().get(0).getValue()));
|
||||
return mg;
|
||||
}
|
||||
|
||||
public static <R extends Result> MinProduct getMinProduct(R r) {
|
||||
MinProduct mp = new MinProduct();
|
||||
mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId()));
|
||||
for (StructuredProperty title : r.getTitle()) {
|
||||
if (title.getQualifier().getClassid().equalsIgnoreCase("main title")) {
|
||||
mp.setTitle(title.getValue());
|
||||
}
|
||||
}
|
||||
for (StructuredProperty pid : r.getPid()) {
|
||||
switch (pid.getQualifier().getClassid().toLowerCase()) {
|
||||
case "doi":
|
||||
mp.setDoi(pid.getValue());
|
||||
break;
|
||||
case "pmcid":
|
||||
mp.setPmcid(pid.getValue());
|
||||
break;
|
||||
case "arxiv":
|
||||
mp.setArxivid(pid.getValue());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return mp;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
package eu.dnetlib.dhp.oa.graph.dump.skgif.beans;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 04/03/24
|
||||
*/
|
||||
public class Couple implements Serializable {
|
||||
private String originalIdentifier;
|
||||
private String localIdentifier;
|
||||
|
||||
public String getOriginalIdentifier() {
|
||||
return originalIdentifier;
|
||||
}
|
||||
|
||||
public void setOriginalIdentifier(String originalIdentifier) {
|
||||
this.originalIdentifier = originalIdentifier;
|
||||
}
|
||||
|
||||
public String getLocalIdentifier() {
|
||||
return localIdentifier;
|
||||
}
|
||||
|
||||
public void setLocalIdentifier(String localIdentifier) {
|
||||
this.localIdentifier = localIdentifier;
|
||||
}
|
||||
|
||||
public static Couple newInstance(String originalIdentifier, String localIdentifier){
|
||||
Couple couple = new Couple();
|
||||
couple.originalIdentifier = originalIdentifier;
|
||||
couple.localIdentifier = localIdentifier;
|
||||
return couple;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
package eu.dnetlib.dhp.oa.graph.dump.skgif.beans;
|
||||
|
||||
import eu.dnetlib.dhp.skgif.model.MinGrant;
|
||||
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||
import eu.dnetlib.dhp.skgif.model.MinProduct;
|
||||
import eu.dnetlib.dhp.skgif.model.MinVenue;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 04/03/24
|
||||
*/
|
||||
public class EncloseMinElement implements Serializable {
|
||||
private String resultId;
|
||||
private String enclosedEntityId;
|
||||
private MinOrganization minOrganization;
|
||||
private MinVenue minVenue;
|
||||
private MinVenue minDatsource;
|
||||
private MinGrant minGrant;
|
||||
private MinProduct minProduct;
|
||||
private String semantics;
|
||||
|
||||
public MinVenue getMinVenue() {
|
||||
return minVenue;
|
||||
}
|
||||
|
||||
public void setMinVenue(MinVenue minVenue) {
|
||||
this.minVenue = minVenue;
|
||||
}
|
||||
|
||||
public MinVenue getMinDatsource() {
|
||||
return minDatsource;
|
||||
}
|
||||
|
||||
public void setMinDatsource(MinVenue minDatsource) {
|
||||
this.minDatsource = minDatsource;
|
||||
}
|
||||
|
||||
public String getSemantics() {
|
||||
return semantics;
|
||||
}
|
||||
|
||||
public void setSemantics(String semantics) {
|
||||
this.semantics = semantics;
|
||||
}
|
||||
|
||||
public String getResultId() {
|
||||
return resultId;
|
||||
}
|
||||
|
||||
public void setResultId(String resultId) {
|
||||
this.resultId = resultId;
|
||||
}
|
||||
|
||||
public String getEnclosedEntityId() {
|
||||
return enclosedEntityId;
|
||||
}
|
||||
|
||||
public void setEnclosedEntityId(String enclosedEntityId) {
|
||||
this.enclosedEntityId = enclosedEntityId;
|
||||
}
|
||||
|
||||
public MinOrganization getMinOrganization() {
|
||||
return minOrganization;
|
||||
}
|
||||
|
||||
public void setMinOrganization(MinOrganization minOrganization) {
|
||||
this.minOrganization = minOrganization;
|
||||
}
|
||||
|
||||
public MinGrant getMinGrant() {
|
||||
return minGrant;
|
||||
}
|
||||
|
||||
public void setMinGrant(MinGrant minGrant) {
|
||||
this.minGrant = minGrant;
|
||||
}
|
||||
|
||||
public MinProduct getMinProduct() {
|
||||
return minProduct;
|
||||
}
|
||||
|
||||
public void setMinProduct(MinProduct minProduct) {
|
||||
this.minProduct = minProduct;
|
||||
}
|
||||
}
|
|
@ -3,9 +3,13 @@ package eu.dnetlib.dhp.oa.graph.dump.skgif.beans;
|
|||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import eu.dnetlib.dhp.skgif.model.Relations;
|
||||
import eu.dnetlib.dhp.skgif.model.MinGrant;
|
||||
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||
import eu.dnetlib.dhp.skgif.model.MinProduct;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
|
@ -14,14 +18,14 @@ import eu.dnetlib.dhp.skgif.model.Relations;
|
|||
public class RelationPerProduct implements Serializable {
|
||||
|
||||
private String resultId;
|
||||
private List<String> organizations;
|
||||
private List<String> funding;
|
||||
private List<Relations> relatedProduct;
|
||||
private List<MinOrganization> organizations;
|
||||
private List<MinGrant> funding;
|
||||
private Map<String,List<MinProduct>> relatedProduct;
|
||||
|
||||
public RelationPerProduct() {
|
||||
organizations = new ArrayList<>();
|
||||
funding = new ArrayList<>();
|
||||
relatedProduct = new ArrayList<>();
|
||||
relatedProduct = new HashMap<>();
|
||||
}
|
||||
|
||||
public String getResultId() {
|
||||
|
@ -32,27 +36,27 @@ public class RelationPerProduct implements Serializable {
|
|||
this.resultId = resultId;
|
||||
}
|
||||
|
||||
public List<String> getOrganizations() {
|
||||
public List<MinOrganization> getOrganizations() {
|
||||
return organizations;
|
||||
}
|
||||
|
||||
public void setOrganizations(List<String> organizations) {
|
||||
public void setOrganizations(List<MinOrganization> organizations) {
|
||||
this.organizations = organizations;
|
||||
}
|
||||
|
||||
public List<String> getFunding() {
|
||||
public List<MinGrant> getFunding() {
|
||||
return funding;
|
||||
}
|
||||
|
||||
public void setFunding(List<String> funding) {
|
||||
public void setFunding(List<MinGrant> funding) {
|
||||
this.funding = funding;
|
||||
}
|
||||
|
||||
public List<Relations> getRelatedProduct() {
|
||||
public Map<String, List<MinProduct>> getRelatedProduct() {
|
||||
return relatedProduct;
|
||||
}
|
||||
|
||||
public void setRelatedProduct(List<Relations> relatedProduct) {
|
||||
public void setRelatedProduct(Map<String, List<MinProduct>> relatedProduct) {
|
||||
this.relatedProduct = relatedProduct;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,7 +71,7 @@
|
|||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Extraction</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromResults</class>
|
||||
<class>eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromEntities</class>
|
||||
<jar>dump-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
|
|
|
@ -267,14 +267,15 @@ public class DumpResultTest {
|
|||
.anyMatch(
|
||||
t -> t
|
||||
.getTopic()
|
||||
.getValue()
|
||||
.equalsIgnoreCase(Prefixes.TOPIC.label + DHPUtils.md5("FOSSustained delivery"))));
|
||||
|
||||
// check contributions
|
||||
Assertions.assertEquals(4, rp.getContributions().size());
|
||||
Assertions
|
||||
.assertEquals(3, rp.getContributions().stream().filter(c -> c.getPerson().startsWith("person")).count());
|
||||
.assertEquals(3, rp.getContributions().stream().filter(c -> c.getPerson().getLocal_identifier().startsWith("person")).count());
|
||||
Assertions
|
||||
.assertEquals(1, rp.getContributions().stream().filter(c -> c.getPerson().startsWith("temp")).count());
|
||||
.assertEquals(1, rp.getContributions().stream().filter(c -> c.getPerson().getLocal_identifier().startsWith("temp")).count());
|
||||
rp.getContributions().forEach(c -> Assertions.assertTrue(c.getDeclared_affiliation() == null));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
|
|
@ -72,7 +72,7 @@ public class EmitFromResultJobTest {
|
|||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
||||
.getPath();
|
||||
|
||||
EmitFromResults
|
||||
EmitFromEntities
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
|
@ -171,7 +171,7 @@ public class EmitFromResultJobTest {
|
|||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph_complete_entities/")
|
||||
.getPath();
|
||||
|
||||
EmitFromResults
|
||||
EmitFromEntities
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
|
|
Loading…
Reference in New Issue