[SKG-IF] denormalization fixing issue and adding new field to mingrant

This commit is contained in:
Miriam Baglioni 2024-03-11 09:56:40 +01:00
parent a6a6922f11
commit 0f40ed6b11
447 changed files with 1559 additions and 522 deletions

View File

@ -9,7 +9,7 @@ import java.io.Serializable;
*/
public class Contributor implements Serializable {
private MinPerson person; // I would not map it because we have only information regarding the person (if any)
// associated to the leading organization
// associated to the leading organization
private String organization; // contributors.person
private String role;// private

View File

@ -28,8 +28,9 @@ public class Grant implements Serializable {
private String start_date;// startdate.value
private String end_date;// enddate.value
private String website;// websiteurl.value
private List<MinOrganization> beneficiaries;// organization.id for the organizations in the relation with semantic class
// isParticipant produces the list of organization internal identifiers
private List<MinOrganization> beneficiaries;// organization.id for the organizations in the relation with semantic
// class
// isParticipant produces the list of organization internal identifiers
private List<Contributor> contributors;//
public String getLocal_identifier() {

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,31 +8,40 @@ import java.io.Serializable;
* @Date 04/03/24
*/
public class MinGrant implements Serializable {
private String local_identifier;
private String funder;
private String code;
private String local_identifier;
private String funder;
private String code;
private String title;
public String getLocal_identifier() {
return local_identifier;
}
public String getTitle() {
return title;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public void setTitle(String title) {
this.title = title;
}
public String getFunder() {
return funder;
}
public String getLocal_identifier() {
return local_identifier;
}
public void setFunder(String funder) {
this.funder = funder;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public String getCode() {
return code;
}
public String getFunder() {
return funder;
}
public void setCode(String code) {
this.code = code;
}
public void setFunder(String funder) {
this.funder = funder;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,42 +8,66 @@ import java.io.Serializable;
* @Date 04/03/24
*/
public class MinOrganization implements Serializable {
private String local_identifier;
private String name;
private String ror;
private String isni;
private String local_identifier;
private String name;
private String ror;
private String isni;
private String fundRef;
private String rinGold;
private String wikidata;
public String getLocal_identifier() {
return local_identifier;
}
public String getWikidata() {
return wikidata;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public void setWikidata(String wikidata) {
this.wikidata = wikidata;
}
public String getName() {
return name;
}
public String getFundRef() {
return fundRef;
}
public void setName(String name) {
this.name = name;
}
public String getRinGold() {
return rinGold;
}
public String getRor() {
return ror;
}
public void setRinGold(String rinGold) {
this.rinGold = rinGold;
}
public void setRor(String ror) {
this.ror = ror;
}
public String getLocal_identifier() {
return local_identifier;
}
public String getIsni() {
return isni;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public void setIsni(String isni) {
this.isni = isni;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getRor() {
return ror;
}
public void setRor(String ror) {
this.ror = ror;
}
public String getIsni() {
return isni;
}
public void setIsni(String isni) {
this.isni = isni;
}
public void setFundRef(String value) {
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,31 +8,31 @@ import java.io.Serializable;
* @Date 04/03/24
*/
public class MinPerson implements Serializable {
private String local_identifier;
private String full_name;
private String orcid;
private String local_identifier;
private String full_name;
private String orcid;
public String getLocal_identifier() {
return local_identifier;
}
public String getLocal_identifier() {
return local_identifier;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public String getFull_name() {
return full_name;
}
public String getFull_name() {
return full_name;
}
public void setFull_name(String full_name) {
this.full_name = full_name;
}
public void setFull_name(String full_name) {
this.full_name = full_name;
}
public String getOrcid() {
return orcid;
}
public String getOrcid() {
return orcid;
}
public void setOrcid(String orcid) {
this.orcid = orcid;
}
public void setOrcid(String orcid) {
this.orcid = orcid;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,49 +8,49 @@ import java.io.Serializable;
* @Date 04/03/24
*/
public class MinProduct implements Serializable {
private String local_identifier;
private String title;
private String doi;
private String pmcid;
private String arxivid;
private String local_identifier;
private String title;
private String doi;
private String pmcid;
private String arxivid;
public String getLocal_identifier() {
return local_identifier;
}
public String getLocal_identifier() {
return local_identifier;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public String getTitle() {
return title;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public void setTitle(String title) {
this.title = title;
}
public String getDoi() {
return doi;
}
public String getDoi() {
return doi;
}
public void setDoi(String doi) {
this.doi = doi;
}
public void setDoi(String doi) {
this.doi = doi;
}
public String getPmcid() {
return pmcid;
}
public String getPmcid() {
return pmcid;
}
public void setPmcid(String pmcid) {
this.pmcid = pmcid;
}
public void setPmcid(String pmcid) {
this.pmcid = pmcid;
}
public String getArxivid() {
return arxivid;
}
public String getArxivid() {
return arxivid;
}
public void setArxivid(String arxivid) {
this.arxivid = arxivid;
}
public void setArxivid(String arxivid) {
this.arxivid = arxivid;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,22 +8,22 @@ import java.io.Serializable;
* @Date 04/03/24
*/
public class MinTopic implements Serializable {
private String local_identifier;
private String value;
private String local_identifier;
private String value;
public String getLocal_identifier() {
return local_identifier;
}
public String getLocal_identifier() {
return local_identifier;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public String getValue() {
return value;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public void setValue(String value) {
this.value = value;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,29 +8,29 @@ import java.io.Serializable;
* @Date 04/03/24
*/
public class MinVenue implements Serializable {
private String local_identifier;
private String name;
private String local_identifier;
private String name;
public String getLocal_identifier() {
return local_identifier;
}
public String getLocal_identifier() {
return local_identifier;
}
public void setLocal_identifier(String loval_identifier) {
this.local_identifier = loval_identifier;
}
public void setLocal_identifier(String loval_identifier) {
this.local_identifier = loval_identifier;
}
public String getName() {
return name;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public void setName(String name) {
this.name = name;
}
public static MinVenue newInstance(String local_identifier, String name){
MinVenue minVenue = new MinVenue();
minVenue.local_identifier = local_identifier;
minVenue.name = name;
return minVenue;
}
public static MinVenue newInstance(String local_identifier, String name) {
MinVenue minVenue = new MinVenue();
minVenue.local_identifier = local_identifier;
minVenue.name = name;
return minVenue;
}
}

View File

@ -8,18 +8,11 @@ import java.io.Serializable;
* @Date 05/09/23
*/
public enum RelationType implements Serializable {
RESULT_OUTCOME_FUNDING("isProducedBy"),
RESULT_AFFILIATIED_TO_ORGANIZATION(
"hasAuthorInstitution"),
DATASOURCE_PROVIDED_BY_ORGANIZATION ("isProvidedBy"),
ORGANIZATION_PARTICIPANT_IN_PROJECT("isParticipant"),
SUPPLEMENT(
"IsSupplementedBy"),
DOCUMENTS(
"IsDocumentedBy"),
PART("IsPartOf"),
VERSION("IsNewVersionOf"),
CITATION("Cites");
RESULT_OUTCOME_FUNDING("isProducedBy"), RESULT_AFFILIATIED_TO_ORGANIZATION(
"hasAuthorInstitution"), DATASOURCE_PROVIDED_BY_ORGANIZATION(
"isProvidedBy"), ORGANIZATION_PARTICIPANT_IN_PROJECT("isParticipant"), SUPPLEMENT(
"IsSupplementedBy"), DOCUMENTS(
"IsDocumentedBy"), PART("IsPartOf"), VERSION("IsNewVersionOf"), CITATION("Cites");
public final String label;

View File

@ -28,7 +28,6 @@ public class ResearchProduct implements Serializable {
@JsonProperty("related_products")
private List<Relations> related_products;
public String getLocal_identifier() {
return local_identifier;
}

View File

@ -7,11 +7,6 @@ import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.skgif.model.MinOrganization;
import eu.dnetlib.dhp.skgif.model.RelationType;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
@ -25,9 +20,14 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.skgif.model.Identifier;
import eu.dnetlib.dhp.skgif.model.MinOrganization;
import eu.dnetlib.dhp.skgif.model.Prefixes;
import eu.dnetlib.dhp.skgif.model.RelationType;
import scala.Tuple2;
/**
@ -76,68 +76,83 @@ public class DumpDatasource implements Serializable {
}
private static void mapDatasource(SparkSession spark, String inputPath, String outputPath, String workingDir) {
Dataset<Relation> relation = Utils.readPath(spark, inputPath + "relation", Relation.class)
.filter((FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Relation>) r -> r.getRelClass().equalsIgnoreCase(RelationType.DATASOURCE_PROVIDED_BY_ORGANIZATION.label));
Dataset<Relation> relation = Utils
.readPath(spark, inputPath + "relation", Relation.class)
.filter((FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference())
.filter(
(FilterFunction<Relation>) r -> r
.getRelClass()
.equalsIgnoreCase(RelationType.DATASOURCE_PROVIDED_BY_ORGANIZATION.label));
Dataset<EncloseMinElement> eme = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
Dataset<EncloseMinElement> eme = Utils
.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
Dataset<Datasource> datasourceDataset = Utils
.readPath(spark, inputPath + "datasource", Datasource.class)
.filter(
(FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible()
&& !d.getDataInfo().getDeletedbyinference());
Dataset<Tuple2<String, EncloseMinElement>> datasourceOrganization = relation.joinWith(eme, relation.col("target").equalTo(eme.col("enclosedEntityId")))
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(t2._1().getSource(), t2._2()), Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
.readPath(spark, inputPath + "datasource", Datasource.class)
.filter(
(FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible()
&& !d.getDataInfo().getDeletedbyinference());
Dataset<Tuple2<String, EncloseMinElement>> datasourceOrganization = relation
.joinWith(eme, relation.col("target").equalTo(eme.col("enclosedEntityId")))
.map(
(MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(
t2._1().getSource(), t2._2()),
Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
datasourceDataset.joinWith(datasourceOrganization, datasourceDataset.col("id").equalTo(datasourceOrganization.col("_1")), "left")
.groupByKey((MapFunction<Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(), Encoders.STRING() )
.mapGroups((MapGroupsFunction<String, Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, eu.dnetlib.dhp.skgif.model.Datasource>) (k,vs) -> {
datasourceDataset
.joinWith(
datasourceOrganization, datasourceDataset.col("id").equalTo(datasourceOrganization.col("_1")), "left")
.groupByKey(
(MapFunction<Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(),
Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, eu.dnetlib.dhp.skgif.model.Datasource>) (
k, vs) -> {
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
Tuple2<Datasource, Tuple2<String, EncloseMinElement>> first = vs.next();
Datasource d = first._1();
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
datasource
.setIdentifiers(
d
.getPid()
.stream()
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList()));
.setIdentifiers(
d
.getPid()
.stream()
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList()));
datasource.setName(d.getOfficialname().getValue());
datasource.setSubmission_policy_url(d.getSubmissionpolicyurl());
datasource
.setJurisdiction(
Optional
.ofNullable(d.getJurisdiction())
.map(v -> v.getClassid())
.orElse(new String()));
.setJurisdiction(
Optional
.ofNullable(d.getJurisdiction())
.map(v -> v.getClassid())
.orElse(new String()));
datasource.setPreservation_policy_url(d.getPreservationpolicyurl());
datasource.setVersion_control(d.getVersioncontrol());
datasource
.setData_source_classification(
Optional
.ofNullable(d.getEoscdatasourcetype())
.map(v -> v.getClassname())
.orElse(new String()));
.setData_source_classification(
Optional
.ofNullable(d.getEoscdatasourcetype())
.map(v -> v.getClassname())
.orElse(new String()));
datasource.setResearch_product_type(getEoscProductType(d.getResearchentitytypes()));
datasource.setThematic(d.getThematic());
datasource
.setResearch_product_access_policy(
Optional
.ofNullable(d.getDatabaseaccesstype())
.map(v -> getResearchProductAccessPolicy(d.getDatabaseaccesstype().getValue()))
.orElse(new ArrayList<>()));
.setResearch_product_access_policy(
Optional
.ofNullable(d.getDatabaseaccesstype())
.map(v -> getResearchProductAccessPolicy(d.getDatabaseaccesstype().getValue()))
.orElse(new ArrayList<>()));
datasource
.setResearch_product_metadata_access_policy(
Optional
.ofNullable(d.getResearchproductmetadataaccesspolicies())
.map(v -> getResearchProductAccessPolicy(d.getResearchproductmetadataaccesspolicies()))
.orElse(new ArrayList<>()));
if(Optional.ofNullable(first._2()).isPresent()){
.setResearch_product_metadata_access_policy(
Optional
.ofNullable(d.getResearchproductmetadataaccesspolicies())
.map(v -> getResearchProductAccessPolicy(d.getResearchproductmetadataaccesspolicies()))
.orElse(new ArrayList<>()));
if (Optional.ofNullable(first._2()).isPresent()) {
List<MinOrganization> organizations = new ArrayList<>();
organizations.add(first._2()._2().getMinOrganization());
vs.forEachRemaining(org -> organizations.add(org._2()._2().getMinOrganization()));
@ -145,7 +160,7 @@ public class DumpDatasource implements Serializable {
}
return datasource;
}, Encoders.bean( eu.dnetlib.dhp.skgif.model.Datasource.class))
}, Encoders.bean(eu.dnetlib.dhp.skgif.model.Datasource.class))
.write()
.mode(SaveMode.Overwrite)

View File

@ -10,8 +10,6 @@ import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
import eu.dnetlib.dhp.skgif.model.*;
import org.apache.avro.generic.GenericData;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
@ -29,8 +27,10 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.skgif.model.*;
import scala.Tuple2;
/**
@ -90,84 +90,92 @@ public class DumpGrant implements Serializable {
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
!r.getDataInfo().getInvisible() &&
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
Dataset<EncloseMinElement> eme = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
Dataset<EncloseMinElement> eme = Utils
.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations.joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId")))
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(t2._1().getTarget(), t2._2()), Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations
.joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId")))
.map(
(MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(
t2._1().getTarget(), t2._2()),
Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
projects
.joinWith(partecipantOrganization, projects.col("id").equalTo(partecipantOrganization.col("_1")), "left")
.groupByKey((MapFunction<Tuple2<Project, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(), Encoders.STRING() )
.mapGroups((MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k,v) -> {
.groupByKey(
(MapFunction<Tuple2<Project, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(),
Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k, v) -> {
Grant g = new Grant();
Tuple2<Project, Tuple2<String, EncloseMinElement>> first = v.next();
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k));
g.setIdentifiers(getProjectIdentifier(first._1()));
g.setTitle(first._1().getTitle().getValue());
g
.setSummary(
Optional
.ofNullable(first._1().getSummary())
.map(value -> value.getValue())
.orElse(new String()));
.setSummary(
Optional
.ofNullable(first._1().getSummary())
.map(value -> value.getValue())
.orElse(new String()));
g
.setAcronym(
Optional
.ofNullable(first._1().getAcronym())
.map(value -> value.getValue())
.orElse(new String()));
.setAcronym(
Optional
.ofNullable(first._1().getAcronym())
.map(value -> value.getValue())
.orElse(new String()));
g.setFunder(Utils.getFunderName(first._1().getFundingtree().get(0).getValue()));
// * private String funding_stream;// fundingtree to be used the xpath //funding_level_[n]
g.setFunding_stream(getFundingStream(first._1().getFundingtree().get(0).getValue()));
g
.setCurrency(
Optional
.ofNullable(first._1().getCurrency())
.map(value -> value.getValue())
.orElse(new String()));
.setCurrency(
Optional
.ofNullable(first._1().getCurrency())
.map(value -> value.getValue())
.orElse(new String()));
g
.setFunded_amount(
Optional
.ofNullable(first._1().getFundedamount())
.orElse(null));
.setFunded_amount(
Optional
.ofNullable(first._1().getFundedamount())
.orElse(null));
g
.setKeywords(
first
._1()
.getSubjects()
.stream()
.map(s -> s.getValue())
.collect(Collectors.toList()));
.setKeywords(
first
._1()
.getSubjects()
.stream()
.map(s -> s.getValue())
.collect(Collectors.toList()));
g
.setStart_date(
Optional
.ofNullable(first._1().getStartdate())
.map(value -> value.getValue())
.orElse(new String()));
.setStart_date(
Optional
.ofNullable(first._1().getStartdate())
.map(value -> value.getValue())
.orElse(new String()));
g
.setEnd_date(
Optional
.ofNullable(first._1().getEnddate())
.map(value -> value.getValue())
.orElse(new String()));
.setEnd_date(
Optional
.ofNullable(first._1().getEnddate())
.map(value -> value.getValue())
.orElse(new String()));
g
.setWebsite(
Optional
.ofNullable(first._1().getWebsiteurl())
.map(value -> value.getValue())
.orElse(new String()));
.setWebsite(
Optional
.ofNullable(first._1().getWebsiteurl())
.map(value -> value.getValue())
.orElse(new String()));
if (Optional.ofNullable(first._2()).isPresent()) {
List<MinOrganization> relevantOrganizatios = new ArrayList<>();
relevantOrganizatios.add(first._2()._2().getMinOrganization());
v
.forEachRemaining(
t2 -> relevantOrganizatios
.add(t2._2()._2().getMinOrganization()));
.forEachRemaining(
t2 -> relevantOrganizatios
.add(t2._2()._2().getMinOrganization()));
g.setBeneficiaries(relevantOrganizatios);
}
return g;
}, Encoders.bean(Grant.class) )
}, Encoders.bean(Grant.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
@ -185,8 +193,6 @@ public class DumpGrant implements Serializable {
}
private static List<Identifier> getProjectIdentifier(Project project) throws DocumentException {
List<Identifier> identifiers = new ArrayList<>();
if (project.getPid().size() > 0)

View File

@ -7,7 +7,6 @@ import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.*;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
@ -20,6 +19,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.*;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
@ -80,8 +80,8 @@ public class DumpResult implements Serializable {
public static <R extends Result> void mapResult(SparkSession spark, String inputPath,
String workingDir, String outputPath) {
//emit the snippet of the entities to be included in other entities for the dematerialization
// emitMinEntities(spark, inputPath, workingDir);
// emit the snippet of the entities to be included in other entities for the dematerialization
// emitMinEntities(spark, inputPath, workingDir);
// selection of the relevant relations from result type to other entity. Only teh semantic relevant ones are
// considered
@ -179,8 +179,16 @@ public class DumpResult implements Serializable {
(MapFunction<Tuple2<PartialResearchProduct, RelationPerProduct>, PartialResearchProduct>) t2 -> {
PartialResearchProduct prp = t2._1();
if (Optional.ofNullable(t2._2()).isPresent()) {
prp.setRelated_products(t2._2().getRelatedProduct().keySet()
.stream().map(key -> Relations.newInstance(key, t2._2().getRelatedProduct().get(key))).collect(Collectors.toList()));
prp
.setRelated_products(
t2
._2()
.getRelatedProduct()
.keySet()
.stream()
.map(
key -> Relations.newInstance(key, t2._2().getRelatedProduct().get(key)))
.collect(Collectors.toList()));
prp.setRelevant_organizations(t2._2().getOrganizations());
prp.setFunding(t2._2().getFunding());
}
@ -251,7 +259,7 @@ public class DumpResult implements Serializable {
.ofNullable(epm.getInstance().getLicense())
.map(value -> value.getValue())
.orElse(null));
if(Optional.ofNullable(epm.getInstance().getUrl()).isPresent() && epm.getInstance().getUrl().size() > 0)
if (Optional.ofNullable(epm.getInstance().getUrl()).isPresent() && epm.getInstance().getUrl().size() > 0)
manifestation
.setUrl(epm.getInstance().getUrl().get(0));
else
@ -263,12 +271,26 @@ public class DumpResult implements Serializable {
if (Optional.ofNullable(t2._2()).isPresent()) {
manifestation.setBiblio(getBiblio(epm));
if (Optional.ofNullable(t2._2().getJournal().getIssnPrinted()).isPresent())
manifestation.setVenue(MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnPrinted()),t2._1().getJournal().getName()));
manifestation
.setVenue(
MinVenue
.newInstance(
Utils.getIdentifier(Prefixes.VENUE, t2._2().getJournal().getIssnPrinted()),
t2._1().getJournal().getName()));
else if (Optional.ofNullable(t2._2().getJournal().getIssnOnline()).isPresent())
manifestation.setVenue(MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnOnline()),t2._1().getJournal().getName()));
manifestation
.setVenue(
MinVenue
.newInstance(
Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnOnline()),
t2._1().getJournal().getName()));
}
manifestation
.setHosting_datasource(MinVenue.newInstance(Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()), epm.getInstance().getHostedby().getValue()));
.setHosting_datasource(
MinVenue
.newInstance(
Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()),
epm.getInstance().getHostedby().getValue()));
return manifestation;
}
@ -351,25 +373,26 @@ public class DumpResult implements Serializable {
r.getRelClass().equalsIgnoreCase(RelationType.PART.label) ||
r.getRelClass().equalsIgnoreCase(RelationType.VERSION.label) ||
r.getRelClass().equalsIgnoreCase(RelationType.CITATION.label));
Dataset<EncloseMinElement> encloseMinEntity = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class);
relation.joinWith(encloseMinEntity, relation.col("target").equalTo(encloseMinEntity.col("enclosedEntityId")))
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, EncloseMinElement>) t2 ->
{
EncloseMinElement eme = t2._2();
eme.setResultId(t2._1().getSource());
eme.setSemantics(t2._1().getRelClass());
return eme;
}, Encoders.bean(EncloseMinElement.class))
.groupByKey((MapFunction<EncloseMinElement, String>) eme -> eme.getResultId(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, EncloseMinElement, RelationPerProduct>) (k,v) ->
{
RelationPerProduct rpp = new RelationPerProduct();
rpp.setResultId(k);
insertEnclosedElement(rpp,v.next());
v.forEachRemaining(e -> insertEnclosedElement(rpp,e));
return rpp;
}, Encoders.bean(RelationPerProduct.class))
Dataset<EncloseMinElement> encloseMinEntity = Utils
.readPath(spark, workingDir + "minEntity", EncloseMinElement.class);
relation
.joinWith(encloseMinEntity, relation.col("target").equalTo(encloseMinEntity.col("enclosedEntityId")))
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, EncloseMinElement>) t2 -> {
EncloseMinElement eme = t2._2();
eme.setResultId(t2._1().getSource());
eme.setSemantics(t2._1().getRelClass());
return eme;
}, Encoders.bean(EncloseMinElement.class))
.groupByKey((MapFunction<EncloseMinElement, String>) eme -> eme.getResultId(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, EncloseMinElement, RelationPerProduct>) (k, v) -> {
RelationPerProduct rpp = new RelationPerProduct();
rpp.setResultId(k);
insertEnclosedElement(rpp, v.next());
v.forEachRemaining(e -> insertEnclosedElement(rpp, e));
return rpp;
}, Encoders.bean(RelationPerProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
@ -377,30 +400,32 @@ public class DumpResult implements Serializable {
}
private static void insertEnclosedElement(RelationPerProduct rpp, EncloseMinElement element) {
if(Optional.ofNullable(element.getMinOrganization()).isPresent())
if (Optional.ofNullable(element.getMinOrganization()).isPresent())
rpp.getOrganizations().add(element.getMinOrganization());
if(Optional.ofNullable(element.getMinGrant()).isPresent())
if (Optional.ofNullable(element.getMinGrant()).isPresent())
rpp.getFunding().add(element.getMinGrant());
if(Optional.ofNullable(element.getMinProduct()).isPresent()){
if (Optional.ofNullable(element.getMinProduct()).isPresent()) {
String sem = element.getSemantics();
if(!rpp.getRelatedProduct().containsKey(sem))
if (!rpp.getRelatedProduct().containsKey(sem))
rpp.getRelatedProduct().put(sem, new ArrayList<>());
rpp.getRelatedProduct().get(sem).add(element.getMinProduct());
}
}
private static <R extends Result> Dataset<EncloseMinElement> getMinProduct(SparkSession spark, String inputPath, Class<R> clazz) {
return Utils.readPath(spark, inputPath , clazz)
.filter((FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
!r.getDataInfo().getInvisible())
.map((MapFunction<R, EncloseMinElement>) r -> {
EncloseMinElement eme = new EncloseMinElement();
eme.setEnclosedEntityId(r.getId());
eme.setMinProduct(Utils.getMinProduct(r));
return eme;
}, Encoders.bean(EncloseMinElement.class));
private static <R extends Result> Dataset<EncloseMinElement> getMinProduct(SparkSession spark, String inputPath,
Class<R> clazz) {
return Utils
.readPath(spark, inputPath, clazz)
.filter(
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
!r.getDataInfo().getInvisible())
.map((MapFunction<R, EncloseMinElement>) r -> {
EncloseMinElement eme = new EncloseMinElement();
eme.setEnclosedEntityId(r.getId());
eme.setMinProduct(Utils.getMinProduct(r));
return eme;
}, Encoders.bean(EncloseMinElement.class));
}
}

View File

@ -65,7 +65,7 @@ public class DumpVenue implements Serializable {
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath + "Venue");
Utils.removeOutputDir(spark, outputPath + "Venues");
mapVenue(spark, inputPath, outputPath, workingDir);
});
@ -82,14 +82,14 @@ public class DumpVenue implements Serializable {
&& d.getEoscdatasourcetype().getClassid().equalsIgnoreCase("Journal archive"));
datasourceDataset
.joinWith(
manifestationDataset, datasourceDataset.col("id").equalTo(manifestationDataset.col("hostedby.key")),
manifestationDataset, datasourceDataset.col("id").equalTo(manifestationDataset.col("hostedby")),
"left")
.map((MapFunction<Tuple2<Datasource, EmitPerManifestation>, Venue>) t2 -> {
Venue venue = new Venue();
Datasource d = t2._1();
if (Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent())
if (Optional.ofNullable(d.getJournal()).isPresent() && Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent())
venue.setLocal_identifier(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnPrinted()));
else if (Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent())
else if (Optional.ofNullable(d.getJournal()).isPresent() && Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent())
venue.setLocal_identifier(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnOnline()));
venue.setIdentifiers(getVenueIdentifier(d.getJournal()));
venue.setName(d.getOfficialname().getValue());
@ -103,7 +103,6 @@ public class DumpVenue implements Serializable {
venue.setContributions(null);
return venue;
}, Encoders.bean(Venue.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")

View File

@ -7,8 +7,6 @@ import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
import eu.dnetlib.dhp.schema.oaf.Organization;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
@ -24,10 +22,12 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.skgif.model.*;
import scala.Tuple2;
@ -80,94 +80,127 @@ public class EmitFromEntities implements Serializable {
}
private static void emitFromProject(SparkSession spark, String inputPath, String workingDir) {
Utils.readPath(spark, inputPath + "project" , Project.class)
.filter((FilterFunction<Project>) p -> !p.getDataInfo().getDeletedbyinference())
.map((MapFunction<Project, EncloseMinElement>) p->{
EncloseMinElement eme = new EncloseMinElement();
eme.setEnclosedEntityId(p.getId());
eme.setMinGrant(Utils.getMinGrant(p));
return eme;}, Encoders.bean(EncloseMinElement.class) )
.write()
.mode(SaveMode.Append)
.option("compression","gzip")
.json(workingDir + "/minEntity");
Utils
.readPath(spark, inputPath + "project", Project.class)
.filter((FilterFunction<Project>) p -> !p.getDataInfo().getDeletedbyinference())
.map((MapFunction<Project, EncloseMinElement>) p -> {
EncloseMinElement eme = new EncloseMinElement();
eme.setEnclosedEntityId(p.getId());
eme.setMinGrant(Utils.getMinGrant(p));
return eme;
}, Encoders.bean(EncloseMinElement.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(workingDir + "/minEntity");
}
private static void emitFromOrganization(SparkSession spark, String inputPath, String workingDir) {
Utils.readPath(spark, inputPath + "organization", Organization.class)
.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference())
.map((MapFunction<Organization, EncloseMinElement>) o -> {
EncloseMinElement eme = new EncloseMinElement();
eme.setMinOrganization(Utils.getMinOrganization(o));
eme.setEnclosedEntityId(o.getId());
return eme;},
Encoders.bean(EncloseMinElement.class))
.write()
.mode(SaveMode.Append)
.option("compression","gzip")
.json(workingDir + "/minEntity");
Utils
.readPath(spark, inputPath + "organization", Organization.class)
.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference())
.map((MapFunction<Organization, EncloseMinElement>) o -> {
EncloseMinElement eme = new EncloseMinElement();
eme.setMinOrganization(Utils.getMinOrganization(o));
eme.setEnclosedEntityId(o.getId());
return eme;
},
Encoders.bean(EncloseMinElement.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(workingDir + "/minEntity");
}
private static void emitFromDatasource(SparkSession spark, String inputPath, String workingDir) {
Utils.readPath(spark, inputPath + "datasource", Datasource.class)
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
.map((MapFunction<Datasource, EncloseMinElement>) d -> {
EncloseMinElement eme = new EncloseMinElement();
eme.setMinDatsource(MinVenue.newInstance(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
eme.setEnclosedEntityId(d.getId());
return eme;
}
, Encoders.bean(EncloseMinElement.class))
.write()
.mode(SaveMode.Append)
.option("compression","gzip")
.json(workingDir + "/minEntity");
Utils
.readPath(spark, inputPath + "datasource", Datasource.class)
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
.map((MapFunction<Datasource, EncloseMinElement>) d -> {
EncloseMinElement eme = new EncloseMinElement();
eme
.setMinDatsource(
MinVenue
.newInstance(
Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
eme.setEnclosedEntityId(d.getId());
return eme;
}, Encoders.bean(EncloseMinElement.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(workingDir + "/minEntity");
Utils.readPath(spark, inputPath + "datasource", Datasource.class)
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
.filter((FilterFunction<Datasource>) d-> d.getEoscdatasourcetype().getClassid().equalsIgnoreCase("Journal archive"))
.map((MapFunction<Datasource, EncloseMinElement>) d-> {
EncloseMinElement eme = new EncloseMinElement();
eme.setEnclosedEntityId(d.getId());
if(Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent())
eme.setMinVenue( MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnPrinted()), d.getOfficialname().getValue()));
if(Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent())
eme.setMinVenue( MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnOnline()), d.getOfficialname().getValue()));
return null;
},Encoders.bean(EncloseMinElement.class) )
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Append)
.option("compression","gzip")
.json(workingDir + "/minEntity");
Utils
.readPath(spark, inputPath + "datasource", Datasource.class)
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
.filter(
(FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEoscdatasourcetype()).isPresent() && d
.getEoscdatasourcetype()
.getClassid()
.equalsIgnoreCase("Journal archive"))
.map((MapFunction<Datasource, EncloseMinElement>) d -> {
EncloseMinElement eme = new EncloseMinElement();
eme.setEnclosedEntityId(d.getId());
if (Optional.ofNullable(d.getJournal()).isPresent() &&
Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent()) {
eme
.setMinVenue(
MinVenue
.newInstance(
Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnPrinted()),
d.getOfficialname().getValue()));
return eme;
}
if (Optional.ofNullable(d.getJournal()).isPresent() &&
Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent()) {
eme
.setMinVenue(
MinVenue
.newInstance(
Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnOnline()),
d.getOfficialname().getValue()));
return eme;
}
return null;
}, Encoders.bean(EncloseMinElement.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(workingDir + "/minEntity");
}
//per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
// per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
public static <R extends Result> void emitFromResult(SparkSession spark, String inputPath, String outputPath,
String workingDir) {
emitManifestation(spark, inputPath, workingDir);
emitPerson(spark, inputPath, outputPath, workingDir);
emitTopic(spark, inputPath, outputPath, workingDir);
emitMinProduct(spark, inputPath, workingDir);
emitMinProduct(spark, inputPath, workingDir);
}
private static <R extends Result> void emitMinProduct(SparkSession spark, String inputPath, String workingDir) {
private static <R extends Result> void emitMinProduct(SparkSession spark, String inputPath, String workingDir) {
Utils.removeOutputDir(spark, workingDir + "minEntity");
ModelSupport.entityTypes.keySet().forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
Utils
.readPath(spark, inputPath + e.name(), resultClazz)
.map((MapFunction<R, MinProduct>) p -> Utils.getMinProduct(p), Encoders.bean(MinProduct.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(workingDir + "/minEntity");
.readPath(spark, inputPath + e.name(), resultClazz)
.map((MapFunction<R, EncloseMinElement>) p -> {
EncloseMinElement eme = new EncloseMinElement();
eme.setMinProduct(Utils.getMinProduct(p));
eme.setEnclosedEntityId(p.getId());
return eme;
}, Encoders.bean(EncloseMinElement.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(workingDir + "/minEntity");
}
@ -279,7 +312,7 @@ public class EmitFromEntities implements Serializable {
}
return authors.iterator();
}, Encoders.bean(Persons.class))
.filter(Objects::nonNull)
.filter((FilterFunction<Persons>) p -> p != null)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
@ -331,7 +364,6 @@ public class EmitFromEntities implements Serializable {
.option("compression", "gzip")
.json(workingDir + e.name() + "/manifestation");
}
});

View File

@ -62,11 +62,14 @@ public class ResultMapper implements Serializable {
contribution.setPerson(minPerson);
} else {
if (Optional.ofNullable(a.getRank()).isPresent()) {
minPerson.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + a.getRank()));
minPerson
.setLocal_identifier(
Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + a.getRank()));
contribution
.setPerson(minPerson);
} else {
minPerson.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
minPerson
.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
contribution.setPerson(minPerson);
}
@ -95,14 +98,19 @@ public class ResultMapper implements Serializable {
.map(s -> {
ResultTopic topic = new ResultTopic();
MinTopic minTopic = new MinTopic();
minTopic.setLocal_identifier(Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
minTopic
.setLocal_identifier(
Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
minTopic.setValue(s.getValue());
topic
.setTopic(minTopic
);
.setTopic(minTopic);
if (Optional.ofNullable(s.getDataInfo()).isPresent()) {
Provenance provenance = new Provenance();
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
try {
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
} catch (NumberFormatException nfe) {
}
provenance.setType(s.getDataInfo().getInferenceprovenance());
topic.setProvenance(provenance);
}

View File

@ -6,27 +6,28 @@ import java.io.StringReader;
import java.util.List;
import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.skgif.model.MinGrant;
import eu.dnetlib.dhp.skgif.model.MinOrganization;
import eu.dnetlib.dhp.skgif.model.MinProduct;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.skgif.model.MinGrant;
import eu.dnetlib.dhp.skgif.model.MinOrganization;
import eu.dnetlib.dhp.skgif.model.MinProduct;
import eu.dnetlib.dhp.skgif.model.Prefixes;
import eu.dnetlib.dhp.utils.DHPUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import scala.Tuple2;
/**
@ -88,14 +89,23 @@ public class Utils implements Serializable {
MinOrganization mo = new MinOrganization();
mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
mo.setName(o.getLegalname().getValue());
for(StructuredProperty pid : o.getPid()){
switch (pid.getQualifier().getClassid().toLowerCase()){
for (StructuredProperty pid : o.getPid()) {
switch (pid.getQualifier().getClassid().toLowerCase()) {
case "ror":
mo.setRor(pid.getValue());
break;
case "isni":
mo.setIsni(pid.getValue());
break;
case "fundref":
mo.setFundRef(pid.getValue());
break;
case "ringgold":
mo.setRinGold(pid.getValue());
break;
case "wikidata":
mo.setWikidata(pid.getValue());
break;
}
}
@ -107,10 +117,11 @@ public class Utils implements Serializable {
mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId()));
mg.setCode(p.getCode().getValue());
mg.setFunder(getFunderName(p.getFundingtree().get(0).getValue()));
mg.setTitle(p.getTitle().getValue());
return mg;
}
public static <R extends Result> MinProduct getMinProduct(R r) {
public static <R extends Result> MinProduct getMinProduct(R r) throws JsonProcessingException {
MinProduct mp = new MinProduct();
mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId()));
for (StructuredProperty title : r.getTitle()) {
@ -118,19 +129,21 @@ public class Utils implements Serializable {
mp.setTitle(title.getValue());
}
}
for (StructuredProperty pid : r.getPid()) {
switch (pid.getQualifier().getClassid().toLowerCase()) {
case "doi":
mp.setDoi(pid.getValue());
break;
case "pmcid":
mp.setPmcid(pid.getValue());
break;
case "arxiv":
mp.setArxivid(pid.getValue());
break;
if (r.getPid() != null)
for (StructuredProperty pid : r.getPid()) {
switch (pid.getQualifier().getClassid().toLowerCase()) {
case "doi":
mp.setDoi(pid.getValue());
break;
case "pmcid":
mp.setPmcid(pid.getValue());
break;
case "arxiv":
mp.setArxivid(pid.getValue());
break;
}
}
}
return mp;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.oa.graph.dump.skgif.beans;
import java.io.Serializable;
@ -7,29 +8,29 @@ import java.io.Serializable;
* @Date 04/03/24
*/
public class Couple implements Serializable {
private String originalIdentifier;
private String localIdentifier;
private String originalIdentifier;
private String localIdentifier;
public String getOriginalIdentifier() {
return originalIdentifier;
}
public String getOriginalIdentifier() {
return originalIdentifier;
}
public void setOriginalIdentifier(String originalIdentifier) {
this.originalIdentifier = originalIdentifier;
}
public void setOriginalIdentifier(String originalIdentifier) {
this.originalIdentifier = originalIdentifier;
}
public String getLocalIdentifier() {
return localIdentifier;
}
public String getLocalIdentifier() {
return localIdentifier;
}
public void setLocalIdentifier(String localIdentifier) {
this.localIdentifier = localIdentifier;
}
public void setLocalIdentifier(String localIdentifier) {
this.localIdentifier = localIdentifier;
}
public static Couple newInstance(String originalIdentifier, String localIdentifier){
Couple couple = new Couple();
couple.originalIdentifier = originalIdentifier;
couple.localIdentifier = localIdentifier;
return couple;
}
public static Couple newInstance(String originalIdentifier, String localIdentifier) {
Couple couple = new Couple();
couple.originalIdentifier = originalIdentifier;
couple.localIdentifier = localIdentifier;
return couple;
}
}

View File

@ -1,87 +1,88 @@
package eu.dnetlib.dhp.oa.graph.dump.skgif.beans;
import java.io.Serializable;
import eu.dnetlib.dhp.skgif.model.MinGrant;
import eu.dnetlib.dhp.skgif.model.MinOrganization;
import eu.dnetlib.dhp.skgif.model.MinProduct;
import eu.dnetlib.dhp.skgif.model.MinVenue;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 04/03/24
*/
public class EncloseMinElement implements Serializable {
private String resultId;
private String enclosedEntityId;
private MinOrganization minOrganization;
private MinVenue minVenue;
private MinVenue minDatsource;
private MinGrant minGrant;
private MinProduct minProduct;
private String semantics;
private String resultId;
private String enclosedEntityId;
private MinOrganization minOrganization;
private MinVenue minVenue;
private MinVenue minDatsource;
private MinGrant minGrant;
private MinProduct minProduct;
private String semantics;
public MinVenue getMinVenue() {
return minVenue;
}
public MinVenue getMinVenue() {
return minVenue;
}
public void setMinVenue(MinVenue minVenue) {
this.minVenue = minVenue;
}
public void setMinVenue(MinVenue minVenue) {
this.minVenue = minVenue;
}
public MinVenue getMinDatsource() {
return minDatsource;
}
public MinVenue getMinDatsource() {
return minDatsource;
}
public void setMinDatsource(MinVenue minDatsource) {
this.minDatsource = minDatsource;
}
public void setMinDatsource(MinVenue minDatsource) {
this.minDatsource = minDatsource;
}
public String getSemantics() {
return semantics;
}
public String getSemantics() {
return semantics;
}
public void setSemantics(String semantics) {
this.semantics = semantics;
}
public void setSemantics(String semantics) {
this.semantics = semantics;
}
public String getResultId() {
return resultId;
}
public String getResultId() {
return resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
public String getEnclosedEntityId() {
return enclosedEntityId;
}
public String getEnclosedEntityId() {
return enclosedEntityId;
}
public void setEnclosedEntityId(String enclosedEntityId) {
this.enclosedEntityId = enclosedEntityId;
}
public void setEnclosedEntityId(String enclosedEntityId) {
this.enclosedEntityId = enclosedEntityId;
}
public MinOrganization getMinOrganization() {
return minOrganization;
}
public MinOrganization getMinOrganization() {
return minOrganization;
}
public void setMinOrganization(MinOrganization minOrganization) {
this.minOrganization = minOrganization;
}
public void setMinOrganization(MinOrganization minOrganization) {
this.minOrganization = minOrganization;
}
public MinGrant getMinGrant() {
return minGrant;
}
public MinGrant getMinGrant() {
return minGrant;
}
public void setMinGrant(MinGrant minGrant) {
this.minGrant = minGrant;
}
public void setMinGrant(MinGrant minGrant) {
this.minGrant = minGrant;
}
public MinProduct getMinProduct() {
return minProduct;
}
public MinProduct getMinProduct() {
return minProduct;
}
public void setMinProduct(MinProduct minProduct) {
this.minProduct = minProduct;
}
public void setMinProduct(MinProduct minProduct) {
this.minProduct = minProduct;
}
}

View File

@ -20,7 +20,7 @@ public class RelationPerProduct implements Serializable {
private String resultId;
private List<MinOrganization> organizations;
private List<MinGrant> funding;
private Map<String,List<MinProduct>> relatedProduct;
private Map<String, List<MinProduct>> relatedProduct;
public RelationPerProduct() {
organizations = new ArrayList<>();

View File

@ -7,16 +7,12 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromResultJobTest;
import eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromEntitiesJobTest;
import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException;
import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient;
@ -31,7 +27,7 @@ public class ZenodoUploadTest {
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(EmitFromResultJobTest.class.getSimpleName())
.createTempDirectory(EmitFromEntitiesJobTest.class.getSimpleName())
.toString();
}

View File

@ -173,4 +173,37 @@ public class DumpDatasourceTest implements Serializable {
//
}
@Test
public void testDumpDatasourceApi() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
.getPath();
final String workingDir = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDirApiExample/")
.getPath();
DumpDatasource
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir,
"-workingDir", workingDir
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Datasource> datasource = sc
.textFile(workingDir.toString() + "Datasource")
.map(item -> OBJECT_MAPPER.readValue(item, Datasource.class));
Dataset<Datasource> datasourceDataset = spark.createDataset(datasource.rdd(), Encoders.bean(Datasource.class));
datasourceDataset
.foreach((ForeachFunction<Datasource>) d -> System.out.println(OBJECT_MAPPER.writeValueAsString(d)));
}
}

View File

@ -118,4 +118,34 @@ public class DumpGrantTest implements Serializable {
//
}
@Test
public void testDumpGrantApi() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
.getPath();
final String workingDir = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDirApiExample/")
.getPath();
DumpGrant
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir,
"-workingDir", workingDir
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Grant> grant = sc
.textFile(workingDir + "Grant")
.map(item -> OBJECT_MAPPER.readValue(item, Grant.class));
grant.foreach(g -> System.out.println(OBJECT_MAPPER.writeValueAsString(g)));
}
}

View File

@ -122,4 +122,33 @@ public class DumpOrganizationTest implements Serializable {
//
}
@Test
public void testDumpOrganizationAPI() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
.getPath();
final String workingDir = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDirApiExample/")
.getPath();
DumpOrganization
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Organization> organization = sc
.textFile(workingDir.toString() + "/Organization")
.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
organization.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
}
}

View File

@ -118,17 +118,15 @@ public class DumpResultTest {
1,
temp
.getRelatedProduct()
.stream()
.filter(rp -> rp.getRelation_type().equalsIgnoreCase("issupplementedby"))
.count());
.get(RelationType.SUPPLEMENT.label)
.size());
Assertions
.assertEquals(
1,
temp
.getRelatedProduct()
.stream()
.filter(rp -> rp.getRelation_type().equalsIgnoreCase("isdocumentedby"))
.count());
.get(RelationType.DOCUMENTS.label)
.size());
JavaRDD<ResearchProduct> researchProduct = sc
.textFile(workingDir.toString() + "/publication/researchproduct")
@ -166,6 +164,8 @@ public class DumpResultTest {
Assertions.assertEquals(2, product.getManifestations().size());
researchProductDataset.show(false);
researchProductDataset
.foreach((ForeachFunction<ResearchProduct>) rp -> System.out.println(OBJECT_MAPPER.writeValueAsString(rp)));
}
@ -267,15 +267,27 @@ public class DumpResultTest {
.anyMatch(
t -> t
.getTopic()
.getValue()
.getValue()
.equalsIgnoreCase(Prefixes.TOPIC.label + DHPUtils.md5("FOSSustained delivery"))));
// check contributions
Assertions.assertEquals(4, rp.getContributions().size());
Assertions
.assertEquals(3, rp.getContributions().stream().filter(c -> c.getPerson().getLocal_identifier().startsWith("person")).count());
.assertEquals(
3,
rp
.getContributions()
.stream()
.filter(c -> c.getPerson().getLocal_identifier().startsWith("person"))
.count());
Assertions
.assertEquals(1, rp.getContributions().stream().filter(c -> c.getPerson().getLocal_identifier().startsWith("temp")).count());
.assertEquals(
1,
rp
.getContributions()
.stream()
.filter(c -> c.getPerson().getLocal_identifier().startsWith("temp"))
.count());
rp.getContributions().forEach(c -> Assertions.assertTrue(c.getDeclared_affiliation() == null));
Assertions
.assertEquals(
@ -477,4 +489,326 @@ public class DumpResultTest {
}
@Test
public void testEmitFromApiDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
.getPath();
final String workingDir = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDirApiExample/")
.getPath();
DumpResult
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-workingDir", workingDir,
"-outputPath", workingDir
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<ResearchProduct> researchProduct = sc
.textFile(workingDir.toString() + "ResearchProduct")
.map(item -> OBJECT_MAPPER.readValue(item, ResearchProduct.class));
researchProduct.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
// org.apache.spark.sql.Dataset<ResearchProduct> researchProductDataset = spark
// .createDataset(researchProduct.rdd(), Encoders.bean(ResearchProduct.class));
// Assertions.assertEquals(1, researchProductDataset.count());
//
// ResearchProduct rp = researchProductDataset.first();
//
// // check the local identifier
// Assertions.assertEquals("product_____::e22a152ab43b9215d14ece613f76ec84", rp.getLocal_identifier());
//
// // check the pids of the result
// Assertions.assertEquals(3, rp.getIdentifiers().size());
// Assertions
// .assertEquals(1, rp.getIdentifiers().stream().filter(p -> p.getScheme().equalsIgnoreCase("doi")).count());
// Assertions
// .assertEquals(
// "10.1007/s40199-021-00403-x",
// rp
// .getIdentifiers()
// .stream()
// .filter(p -> p.getScheme().equalsIgnoreCase("doi"))
// .collect(Collectors.toList())
// .get(0)
// .getValue());
// Assertions
// .assertEquals(1, rp.getIdentifiers().stream().filter(p -> p.getScheme().equalsIgnoreCase("pmid")).count());
// Assertions
// .assertEquals(
// "34327650",
// rp
// .getIdentifiers()
// .stream()
// .filter(p -> p.getScheme().equalsIgnoreCase("pmid"))
// .collect(Collectors.toList())
// .get(0)
// .getValue());
// Assertions
// .assertEquals(1, rp.getIdentifiers().stream().filter(p -> p.getScheme().equalsIgnoreCase("pmc")).count());
// Assertions
// .assertEquals(
// "PMC8602609",
// rp
// .getIdentifiers()
// .stream()
// .filter(p -> p.getScheme().equalsIgnoreCase("pmc"))
// .collect(Collectors.toList())
// .get(0)
// .getValue());
//
// // check the title
// Assertions.assertEquals(1, rp.getTitles().keySet().size());
// Assertions.assertTrue(rp.getTitles().keySet().contains("none"));
// Assertions.assertEquals(1, rp.getTitles().get("none").size());
//
// // check abstract
// Assertions.assertEquals(1, rp.getAbstracts().keySet().size());
// Assertions.assertTrue(rp.getAbstracts().keySet().contains("none"));
// Assertions.assertEquals(1, rp.getAbstracts().get("none").size());
//
// // check type
// Assertions.assertEquals("literature", rp.getProduct_type());
//
// // check topics
// Assertions.assertEquals(3, rp.getTopics().size());
// Assertions
// .assertTrue(
// rp
// .getTopics()
// .stream()
// .anyMatch(
// t -> t
// .getTopic()
// .getValue()
// .equalsIgnoreCase(Prefixes.TOPIC.label + DHPUtils.md5("FOSSustained delivery"))));
//
// // check contributions
// Assertions.assertEquals(4, rp.getContributions().size());
// Assertions
// .assertEquals(
// 3,
// rp
// .getContributions()
// .stream()
// .filter(c -> c.getPerson().getLocal_identifier().startsWith("person"))
// .count());
// Assertions
// .assertEquals(
// 1,
// rp
// .getContributions()
// .stream()
// .filter(c -> c.getPerson().getLocal_identifier().startsWith("temp"))
// .count());
// rp.getContributions().forEach(c -> Assertions.assertTrue(c.getDeclared_affiliation() == null));
// Assertions
// .assertEquals(
// 1,
// rp
// .getContributions()
// .stream()
// .filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0001-8284-6269true")))
// .collect(Collectors.toList())
// .get(0)
// .getRank());
// Assertions
// .assertEquals(
// 2,
// rp
// .getContributions()
// .stream()
// .filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0002-0940-893xtrue")))
// .collect(Collectors.toList())
// .get(0)
// .getRank());
// Assertions
// .assertEquals(
// 3,
// rp
// .getContributions()
// .stream()
// .filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0001-5291-577xtrue")))
// .collect(Collectors.toList())
// .get(0)
// .getRank());
// Assertions
// .assertEquals(
// 4,
// rp
// .getContributions()
// .stream()
// .filter(
// c -> c
// .getPerson()
// .equals(
// Utils
// .getIdentifier(
// Prefixes.TEMPORARY_PERSON,
// "50|doi_dedup___::0000661be7c602727bae9690778b16514")))
// .collect(Collectors.toList())
// .get(0)
// .getRank());
// researchProductDataset.show(10, 100, true);
//
// // check manifestation 1
// Assertions.assertEquals(3, rp.getManifestations().size());
// Manifestation manifestation = rp
// .getManifestations()
// .stream()
// .filter(
// m -> m
// .getHosting_datasource()
// .equals(
// Utils.getIdentifier(Prefixes.DATASOURCE, "10|doajarticles::6107489403b31fc7cf37cb7fda35f7f1")))
// .collect(Collectors.toList())
// .get(0);
// Assertions.assertEquals("Article", manifestation.getProduct_local_type());
// Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
// Assertions.assertEquals(1, manifestation.getDates().size());
// Assertions.assertEquals("2021-07-29", manifestation.getDates().get(0).getValue());
// Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
// Assertions.assertEquals(PeerReview.PEER_REVIEWED.label, manifestation.getPeer_review());
// Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
// Assertions.assertEquals(AccessRight.CLOSED.label, manifestation.getAccess_right());
// Assertions.assertEquals("Springer Nature TDM", manifestation.getLicence());
// Assertions.assertEquals("https://doi.org/10.1007/s40199-021-00403-x", manifestation.getUrl());
// Assertions.assertEquals("10.1007/s40199-021-00403-x", manifestation.getPid());
// Assertions.assertTrue(manifestation.getBiblio() != null);
// Biblio biblio = manifestation.getBiblio();
// Assertions.assertTrue(biblio.getEdition() == null);
// Assertions.assertTrue(biblio.getIssue() == null);
// Assertions.assertEquals("Springer Science and Business Media LLC", biblio.getPublisher());
// Assertions.assertEquals("29", biblio.getVolume());
// Assertions.assertEquals("415", biblio.getStart_page());
// Assertions.assertEquals("438", biblio.getEnd_page());
//
// // check manifestation 2
// manifestation = rp
// .getManifestations()
// .stream()
// .filter(
// m -> m
// .getHosting_datasource()
// .equals(
// Utils.getIdentifier(Prefixes.DATASOURCE, "10|openaire____::55045bd2a65019fd8e6741a755395c8c")))
// .collect(Collectors.toList())
// .get(0);
// Assertions.assertEquals("Article", manifestation.getProduct_local_type());
// Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
// Assertions.assertEquals(1, manifestation.getDates().size());
// Assertions.assertEquals("2020-01-03", manifestation.getDates().get(0).getValue());
// Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
// Assertions.assertEquals(PeerReview.NON_PEER_REVIEWED.label, manifestation.getPeer_review());
// Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
// Assertions.assertEquals(AccessRight.UNAVAILABLE.label, manifestation.getAccess_right());
// Assertions.assertEquals(null, manifestation.getLicence());
// Assertions.assertEquals("https://pubmed.ncbi.nlm.nih.gov/34327650", manifestation.getUrl());
// Assertions.assertEquals("34327650", manifestation.getPid());
// Assertions.assertTrue(manifestation.getBiblio() == null);
//
// // check manifestation 3
// manifestation = rp
// .getManifestations()
// .stream()
// .filter(
// m -> m
// .getHosting_datasource()
// .equals(
// Utils.getIdentifier(Prefixes.DATASOURCE, "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c")))
// .collect(Collectors.toList())
// .get(0);
// Assertions.assertEquals("Other literature type", manifestation.getProduct_local_type());
// Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
// Assertions.assertEquals(1, manifestation.getDates().size());
// Assertions.assertEquals("2021-07-29", manifestation.getDates().get(0).getValue());
// Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
// Assertions.assertEquals(PeerReview.NON_PEER_REVIEWED.label, manifestation.getPeer_review());
// Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
// Assertions.assertEquals(AccessRight.OPEN.label, manifestation.getAccess_right());
// Assertions.assertEquals(null, manifestation.getLicence());
// Assertions.assertEquals("https://europepmc.org/articles/PMC8602609/", manifestation.getUrl());
// Assertions.assertEquals("PMC8602609", manifestation.getPid());
// Assertions.assertTrue(manifestation.getBiblio() == null);
//
// // check relevant organization
// Assertions.assertEquals(1, rp.getRelevant_organizations().size());
// Assertions
// .assertEquals(
// Prefixes.ORGANIZATION.label + "601e510b1fda7cc6cb03329531502171",
// rp.getRelevant_organizations().get(0));
//
// // check funding
// Assertions.assertEquals(1, rp.getFunding().size());
// Assertions.assertEquals(Prefixes.GRANT.label + "a7795022763d413f5de59036ebbd0c52", rp.getFunding().get(0));
//
// // check related products
// Assertions.assertEquals(5, rp.getRelated_products().size());
// Assertions
// .assertEquals(
// 4,
// rp
// .getRelated_products()
// .stream()
// .filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.CITATION.label))
// .collect(Collectors.toList())
// .get(0)
// .getProduct_list()
// .size());
// Assertions
// .assertEquals(
// 1,
// rp
// .getRelated_products()
// .stream()
// .filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.DOCUMENTS.label))
// .collect(Collectors.toList())
// .get(0)
// .getProduct_list()
// .size());
// Assertions
// .assertEquals(
// 1,
// rp
// .getRelated_products()
// .stream()
// .filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.PART.label))
// .collect(Collectors.toList())
// .get(0)
// .getProduct_list()
// .size());
// Assertions
// .assertEquals(
// 1,
// rp
// .getRelated_products()
// .stream()
// .filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.SUPPLEMENT.label))
// .collect(Collectors.toList())
// .get(0)
// .getProduct_list()
// .size());
// Assertions
// .assertEquals(
// 1,
// rp
// .getRelated_products()
// .stream()
// .filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.VERSION.label))
// .collect(Collectors.toList())
// .get(0)
// .getProduct_list()
// .size());
}
}

View File

@ -0,0 +1,102 @@
package eu.dnetlib.dhp.oa.graph.dump.skgif;
import java.io.IOException;
import java.io.Serializable;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.skgif.model.Datasource;
import eu.dnetlib.dhp.skgif.model.Venue;
/**
* @author miriam.baglioni
* @Date 22/02/24
*/
public class DumpVenueTest implements Serializable {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(DumpVenueTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(DumpVenueTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(DumpVenueTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(DumpVenueTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void testDumpVenueApi() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
.getPath();
final String workingDir = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDirApiExample/")
.getPath();
DumpVenue
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", "/tmp/",
"-workingDir", workingDir
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Venue> datasource = sc
.textFile(workingDir.toString() + "Venues")
.map(item -> OBJECT_MAPPER.readValue(item, Venue.class));
Dataset<Venue> datasourceDataset = spark.createDataset(datasource.rdd(), Encoders.bean(Venue.class));
datasourceDataset
.foreach((ForeachFunction<Venue>) d -> System.out.println(OBJECT_MAPPER.writeValueAsString(d)));
}
}

View File

@ -28,7 +28,7 @@ import eu.dnetlib.dhp.skgif.model.Persons;
import eu.dnetlib.dhp.skgif.model.Topic;
//@Disabled
public class EmitFromResultJobTest {
public class EmitFromEntitiesJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -36,15 +36,15 @@ public class EmitFromResultJobTest {
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(EmitFromResultJobTest.class);
private static final Logger log = LoggerFactory.getLogger(EmitFromEntitiesJobTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(EmitFromResultJobTest.class.getSimpleName());
workingDir = Files.createTempDirectory(EmitFromEntitiesJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(EmitFromResultJobTest.class.getSimpleName());
conf.setAppName(EmitFromEntitiesJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
@ -55,7 +55,7 @@ public class EmitFromResultJobTest {
spark = SparkSession
.builder()
.appName(EmitFromResultJobTest.class.getSimpleName())
.appName(EmitFromEntitiesJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@ -104,7 +104,7 @@ public class EmitFromResultJobTest {
.filter(
(FilterFunction<Persons>) p -> p
.getLocal_identifier()
.equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
.equalsIgnoreCase("temp_person_::2c1eea261f7d9a97ab7ca8c4200781db"))
.first();
Assertions
@ -124,7 +124,7 @@ public class EmitFromResultJobTest {
&& p.getFamily_name().equalsIgnoreCase("borer")
&& !p
.getLocal_identifier()
.equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
.equalsIgnoreCase("temp_person_::2c1eea261f7d9a97ab7ca8c4200781db"))
.count());
Assertions.assertEquals("claudia", claudiaBorer.getGiven_name().toLowerCase());
Assertions.assertEquals("borer", claudiaBorer.getFamily_name().toLowerCase());
@ -157,7 +157,7 @@ public class EmitFromResultJobTest {
Dataset<EmitPerManifestation> manifestationDataset = spark
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
manifestationDataset.show(false);
Assertions.assertEquals(4, manifestationDataset.count());
Assertions.assertEquals(5, manifestationDataset.count());
Dataset<Topic> topicDataset = spark
.createDataset(topics.rdd(), Encoders.bean(Topic.class));
@ -208,26 +208,50 @@ public class EmitFromResultJobTest {
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
manifestationDataset.show(false);
// Persons claudiaBorer = personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
// .first();
//
// Assertions.assertEquals(2, personsDataset.filter((FilterFunction<Persons>) p -> p.getGiven_name().equalsIgnoreCase("claudia") && p.getFamily_name().equalsIgnoreCase("borer")).count());
// Assertions.assertEquals(1, personsDataset.filter((FilterFunction<Persons>) p -> p.getGiven_name().equalsIgnoreCase("claudia") && p.getFamily_name().equalsIgnoreCase("borer") && !p.getLocal_identifier().equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db")).count());
// Assertions.assertEquals("claudia", claudiaBorer.getGiven_name().toLowerCase());
// Assertions.assertEquals("borer", claudiaBorer.getFamily_name().toLowerCase());
//
// Assertions.assertEquals(2, personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person")).count());
// Assertions.assertEquals(1, personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person") && p.getIdentifiers().get(0).getValue().equals("0000-0002-5597-4916")).count());
// Persons orcidPerson = personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person") && p.getIdentifiers().get(0).getValue().equals("0000-0002-5597-4916")).first();
// Assertions.assertEquals("M.", orcidPerson.getGiven_name());
// Assertions.assertEquals("Kooi", orcidPerson.getFamily_name());
// Assertions.assertEquals(1, orcidPerson.getIdentifiers().size());
// Assertions.assertEquals("orcid", orcidPerson.getIdentifiers().get(0).getScheme());
// Assertions.assertEquals("0000-0002-5597-4916", orcidPerson.getIdentifiers().get(0).getValue());
//
}
// Assertions.assertEquals(4, manifestationDataset.count());
//
@Test
public void testEmitFromResultApiSubset() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
.getPath();
EmitFromEntities
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir.toString() + "/result/",
"-workingDir", workingDir.toString() + "/"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Persons> persons = sc
.textFile(workingDir.toString() + "/result/Persons")
.map(item -> OBJECT_MAPPER.readValue(item, Persons.class));
org.apache.spark.sql.Dataset<Persons> personsDataset = spark
.createDataset(persons.rdd(), Encoders.bean(Persons.class));
personsDataset.foreach((ForeachFunction<Persons>) p -> System.out.println(OBJECT_MAPPER.writeValueAsString(p)));
JavaRDD<Topic> topics = sc
.textFile(workingDir.toString() + "/result/Topic")
.map(item -> OBJECT_MAPPER.readValue(item, Topic.class));
Dataset<Topic> topicDataset = spark
.createDataset(topics.rdd(), Encoders.bean(Topic.class));
// Assertions.assertEquals(3, topicDataset.count());
topicDataset.foreach((ForeachFunction<Topic>) t -> System.out.println(OBJECT_MAPPER.writeValueAsString(t)));
JavaRDD<EmitPerManifestation> manifestation = sc
.textFile(workingDir.toString() + "/publication/manifestation")
.map(item -> OBJECT_MAPPER.readValue(item, EmitPerManifestation.class));
Dataset<EmitPerManifestation> manifestationDataset = spark
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
manifestationDataset.show(false);
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,23 @@
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::4f7dd940bce9e9209645e6816dc4e081", "lastupdatetimestamp": 1706531034696, "relType": "resultResult", "source": "50|doi_________::479f67164e2f1d21baafbfbc3b12851d", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::233bd897c6654f274059272b8740b47f", "lastupdatetimestamp": 0, "relType": "resultResult", "source": "50|doi_________::479f67164e2f1d21baafbfbc3b12851d", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_________::1647515a356946e4857c817afc7bfa38", "lastupdatetimestamp": 1705539801728, "relType": "resultResult", "source": "50|doi_________::0888444d70ea8e3bcbc12d9d281fce22", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::7a40bb2e9ad2e96d6de26787ab1c310b", "lastupdatetimestamp": 1702883945826, "relType": "resultResult", "source": "50|doi_________::1f56d1b5a2e7c9223c3479476c9c3491", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::893d0a5c08409822dbb82b8973a42909", "lastupdatetimestamp": 1698123701963, "relType": "resultResult", "source": "50|doi_dedup___::079ebd73b673282559d6f4befe003d47", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::8b0484d2afbd9b09116ef60c18044964", "lastupdatetimestamp": 1698123701963, "relType": "resultResult", "source": "50|doi_dedup___::079ebd73b673282559d6f4befe003d47", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::6481edac8ee74c1f1bfa9c0fdd3e6f11", "lastupdatetimestamp": 1702883945826, "relType": "resultResult", "source": "50|doi_________::1f56d1b5a2e7c9223c3479476c9c3491", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::5ec916a2cec1ed474d7eb3619c3899d1", "lastupdatetimestamp": 1698123701963, "relType": "resultResult", "source": "50|doi_dedup___::079ebd73b673282559d6f4befe003d47", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_________::1034996a07e1496dde6048008e2187de", "lastupdatetimestamp": 1706725621178, "relType": "resultResult", "source": "50|doi_________::0941e6d7170c6bbfb40721aa9ecff245", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_________::81a64d0689f4a2f93995b81574d2fcbe", "lastupdatetimestamp": 0, "relType": "resultResult", "source": "50|doi_________::86d7aecc7029c70836d268b4ea9b974d", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
{"subRelType": "outcome", "relClass": "isProducedBy", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "40|wt__________::1e5e62235d094afd01cd56e65112fc63", "lastupdatetimestamp": 1704362206417, "relType": "resultProject", "source": "50|doi_________::a83b2a6d96883a5f2d360fb5fb7db8ae", "collectedfrom": [{"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "validated": false, "properties": []}
{"subRelType": "outcome", "relClass": "isProducedBy", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "40|sshrc_______::1e5e62235d094afd01cd56e65112fc63", "lastupdatetimestamp": 1706725621178, "relType": "resultProject", "source": "50|doi_________::0941e6d7170c6bbfb40721aa9ecff245", "collectedfrom": [{"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "validated": false, "properties": []}
{"subRelType": "outcome", "relClass": "isProducedBy", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "40|nsf_________::7a5b4fbb6ed8545679194fe45a984ec9", "lastupdatetimestamp": 1705539801728, "relType": "resultProject", "source": "50|doi_________::0888444d70ea8e3bcbc12d9d281fce22", "collectedfrom": [{"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::0ff89de99d4a8f4b04cb162bcb5740cf", "relType": "resultOrganization", "source": "50|doi_________::7078f75053e8c1bff562adbce2d81fe2", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::45327df9862294107f84a90daa1e0cbc", "lastupdatetimestamp": 0, "relType": "resultOrganization", "source": "50|doi_________::86d7aecc7029c70836d268b4ea9b974d", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "result:organization:semrel", "classname": "Inferred by OpenAIRE", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "propagation", "invisible": false, "trust": "0.85"}, "target": "20|openorgs____::33f2ec599c4e086e750c126f19552f67", "relType": "resultOrganization", "source": "50|doi_________::0888444d70ea8e3bcbc12d9d281fce22", "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::b8b8ca674452579f3f593d9f5e557483", "relType": "resultOrganization", "source": "50|doi_________::0363a8e9be16f62cdfd58b3def423a57", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::6a7b1b4c40a067a1f209de6867fe094d", "lastupdatetimestamp": 1708723553432, "relType": "resultOrganization", "source": "50|doi_________::1f56d1b5a2e7c9223c3479476c9c3491", "collectedfrom": [{"value": "OpenAPC Global Initiative", "key": "10|apc_________::e2b1600b229fc30663c8a1f662debddf"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "Pubmed", "key": "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357"}, {"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "validated": false, "properties": [{"value": "4729.73", "key": "apc_amount"}, {"value": "EUR", "key": "apc_currency"}]}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::013584bf907fdcb40bcec00b5a78fc12", "lastupdatetimestamp": 1709018933662, "relType": "resultOrganization", "source": "50|doi_________::0888444d70ea8e3bcbc12d9d281fce22", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::0ae431b820e4c33db8967fbb2b919150", "lastupdatetimestamp": 0, "relType": "resultOrganization", "source": "50|doi_dedup___::079ebd73b673282559d6f4befe003d47", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Pubmed", "key": "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357"}], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "result:organization:semrel", "classname": "Inferred by OpenAIRE", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "propagation", "invisible": false, "trust": "0.85"}, "target": "20|openorgs____::4dee7ea0d1fd1b48c199ee744e21dc13", "relType": "resultOrganization", "source": "50|doi_________::7078f75053e8c1bff562adbce2d81fe2", "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::9d3eb607509fcacae1c5a7f25e36e3e5", "relType": "resultOrganization", "source": "50|doi_dedup___::079ebd73b673282559d6f4befe003d47", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "validated": false, "properties": []}
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::c9f6cad9752276cb556e88a5111e86aa", "lastupdatetimestamp": 0, "relType": "resultOrganization", "source": "50|doi_________::7078f75053e8c1bff562adbce2d81fe2", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "validated": false, "properties": []}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More