[SKG-IF] denormalization fixing issue and adding new field to mingrant
This commit is contained in:
parent
a6a6922f11
commit
0f40ed6b11
|
@ -9,7 +9,7 @@ import java.io.Serializable;
|
|||
*/
|
||||
public class Contributor implements Serializable {
|
||||
private MinPerson person; // I would not map it because we have only information regarding the person (if any)
|
||||
// associated to the leading organization
|
||||
// associated to the leading organization
|
||||
private String organization; // contributors.person
|
||||
|
||||
private String role;// private
|
||||
|
|
|
@ -28,8 +28,9 @@ public class Grant implements Serializable {
|
|||
private String start_date;// startdate.value
|
||||
private String end_date;// enddate.value
|
||||
private String website;// websiteurl.value
|
||||
private List<MinOrganization> beneficiaries;// organization.id for the organizations in the relation with semantic class
|
||||
// isParticipant produces the list of organization internal identifiers
|
||||
private List<MinOrganization> beneficiaries;// organization.id for the organizations in the relation with semantic
|
||||
// class
|
||||
// isParticipant produces the list of organization internal identifiers
|
||||
private List<Contributor> contributors;//
|
||||
|
||||
public String getLocal_identifier() {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -7,31 +8,40 @@ import java.io.Serializable;
|
|||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinGrant implements Serializable {
|
||||
private String local_identifier;
|
||||
private String funder;
|
||||
private String code;
|
||||
private String local_identifier;
|
||||
private String funder;
|
||||
private String code;
|
||||
private String title;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getFunder() {
|
||||
return funder;
|
||||
}
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setFunder(String funder) {
|
||||
this.funder = funder;
|
||||
}
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
public String getFunder() {
|
||||
return funder;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
public void setFunder(String funder) {
|
||||
this.funder = funder;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -7,42 +8,66 @@ import java.io.Serializable;
|
|||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinOrganization implements Serializable {
|
||||
private String local_identifier;
|
||||
private String name;
|
||||
private String ror;
|
||||
private String isni;
|
||||
private String local_identifier;
|
||||
private String name;
|
||||
private String ror;
|
||||
private String isni;
|
||||
private String fundRef;
|
||||
private String rinGold;
|
||||
private String wikidata;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
public String getWikidata() {
|
||||
return wikidata;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
public void setWikidata(String wikidata) {
|
||||
this.wikidata = wikidata;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
public String getFundRef() {
|
||||
return fundRef;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
public String getRinGold() {
|
||||
return rinGold;
|
||||
}
|
||||
|
||||
public String getRor() {
|
||||
return ror;
|
||||
}
|
||||
public void setRinGold(String rinGold) {
|
||||
this.rinGold = rinGold;
|
||||
}
|
||||
|
||||
public void setRor(String ror) {
|
||||
this.ror = ror;
|
||||
}
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public String getIsni() {
|
||||
return isni;
|
||||
}
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
|
||||
public void setIsni(String isni) {
|
||||
this.isni = isni;
|
||||
}
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getRor() {
|
||||
return ror;
|
||||
}
|
||||
|
||||
public void setRor(String ror) {
|
||||
this.ror = ror;
|
||||
}
|
||||
|
||||
public String getIsni() {
|
||||
return isni;
|
||||
}
|
||||
|
||||
public void setIsni(String isni) {
|
||||
this.isni = isni;
|
||||
}
|
||||
|
||||
public void setFundRef(String value) {
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -7,31 +8,31 @@ import java.io.Serializable;
|
|||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinPerson implements Serializable {
|
||||
private String local_identifier;
|
||||
private String full_name;
|
||||
private String orcid;
|
||||
private String local_identifier;
|
||||
private String full_name;
|
||||
private String orcid;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
|
||||
public String getFull_name() {
|
||||
return full_name;
|
||||
}
|
||||
public String getFull_name() {
|
||||
return full_name;
|
||||
}
|
||||
|
||||
public void setFull_name(String full_name) {
|
||||
this.full_name = full_name;
|
||||
}
|
||||
public void setFull_name(String full_name) {
|
||||
this.full_name = full_name;
|
||||
}
|
||||
|
||||
public String getOrcid() {
|
||||
return orcid;
|
||||
}
|
||||
public String getOrcid() {
|
||||
return orcid;
|
||||
}
|
||||
|
||||
public void setOrcid(String orcid) {
|
||||
this.orcid = orcid;
|
||||
}
|
||||
public void setOrcid(String orcid) {
|
||||
this.orcid = orcid;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -7,49 +8,49 @@ import java.io.Serializable;
|
|||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinProduct implements Serializable {
|
||||
private String local_identifier;
|
||||
private String title;
|
||||
private String doi;
|
||||
private String pmcid;
|
||||
private String arxivid;
|
||||
private String local_identifier;
|
||||
private String title;
|
||||
private String doi;
|
||||
private String pmcid;
|
||||
private String arxivid;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getDoi() {
|
||||
return doi;
|
||||
}
|
||||
public String getDoi() {
|
||||
return doi;
|
||||
}
|
||||
|
||||
public void setDoi(String doi) {
|
||||
this.doi = doi;
|
||||
}
|
||||
public void setDoi(String doi) {
|
||||
this.doi = doi;
|
||||
}
|
||||
|
||||
public String getPmcid() {
|
||||
return pmcid;
|
||||
}
|
||||
public String getPmcid() {
|
||||
return pmcid;
|
||||
}
|
||||
|
||||
public void setPmcid(String pmcid) {
|
||||
this.pmcid = pmcid;
|
||||
}
|
||||
public void setPmcid(String pmcid) {
|
||||
this.pmcid = pmcid;
|
||||
}
|
||||
|
||||
public String getArxivid() {
|
||||
return arxivid;
|
||||
}
|
||||
public String getArxivid() {
|
||||
return arxivid;
|
||||
}
|
||||
|
||||
public void setArxivid(String arxivid) {
|
||||
this.arxivid = arxivid;
|
||||
}
|
||||
public void setArxivid(String arxivid) {
|
||||
this.arxivid = arxivid;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -7,22 +8,22 @@ import java.io.Serializable;
|
|||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinTopic implements Serializable {
|
||||
private String local_identifier;
|
||||
private String value;
|
||||
private String local_identifier;
|
||||
private String value;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
public void setLocal_identifier(String local_identifier) {
|
||||
this.local_identifier = local_identifier;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.skgif.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -7,29 +8,29 @@ import java.io.Serializable;
|
|||
* @Date 04/03/24
|
||||
*/
|
||||
public class MinVenue implements Serializable {
|
||||
private String local_identifier;
|
||||
private String name;
|
||||
private String local_identifier;
|
||||
private String name;
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
||||
public void setLocal_identifier(String loval_identifier) {
|
||||
this.local_identifier = loval_identifier;
|
||||
}
|
||||
public void setLocal_identifier(String loval_identifier) {
|
||||
this.local_identifier = loval_identifier;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public static MinVenue newInstance(String local_identifier, String name){
|
||||
MinVenue minVenue = new MinVenue();
|
||||
minVenue.local_identifier = local_identifier;
|
||||
minVenue.name = name;
|
||||
return minVenue;
|
||||
}
|
||||
public static MinVenue newInstance(String local_identifier, String name) {
|
||||
MinVenue minVenue = new MinVenue();
|
||||
minVenue.local_identifier = local_identifier;
|
||||
minVenue.name = name;
|
||||
return minVenue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,18 +8,11 @@ import java.io.Serializable;
|
|||
* @Date 05/09/23
|
||||
*/
|
||||
public enum RelationType implements Serializable {
|
||||
RESULT_OUTCOME_FUNDING("isProducedBy"),
|
||||
RESULT_AFFILIATIED_TO_ORGANIZATION(
|
||||
"hasAuthorInstitution"),
|
||||
DATASOURCE_PROVIDED_BY_ORGANIZATION ("isProvidedBy"),
|
||||
ORGANIZATION_PARTICIPANT_IN_PROJECT("isParticipant"),
|
||||
SUPPLEMENT(
|
||||
"IsSupplementedBy"),
|
||||
DOCUMENTS(
|
||||
"IsDocumentedBy"),
|
||||
PART("IsPartOf"),
|
||||
VERSION("IsNewVersionOf"),
|
||||
CITATION("Cites");
|
||||
RESULT_OUTCOME_FUNDING("isProducedBy"), RESULT_AFFILIATIED_TO_ORGANIZATION(
|
||||
"hasAuthorInstitution"), DATASOURCE_PROVIDED_BY_ORGANIZATION(
|
||||
"isProvidedBy"), ORGANIZATION_PARTICIPANT_IN_PROJECT("isParticipant"), SUPPLEMENT(
|
||||
"IsSupplementedBy"), DOCUMENTS(
|
||||
"IsDocumentedBy"), PART("IsPartOf"), VERSION("IsNewVersionOf"), CITATION("Cites");
|
||||
|
||||
public final String label;
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@ public class ResearchProduct implements Serializable {
|
|||
@JsonProperty("related_products")
|
||||
private List<Relations> related_products;
|
||||
|
||||
|
||||
public String getLocal_identifier() {
|
||||
return local_identifier;
|
||||
}
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -7,11 +7,6 @@ import java.io.Serializable;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||
import eu.dnetlib.dhp.skgif.model.RelationType;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
|
@ -25,9 +20,14 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.skgif.model.Identifier;
|
||||
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||
import eu.dnetlib.dhp.skgif.model.RelationType;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
|
@ -76,68 +76,83 @@ public class DumpDatasource implements Serializable {
|
|||
}
|
||||
|
||||
private static void mapDatasource(SparkSession spark, String inputPath, String outputPath, String workingDir) {
|
||||
Dataset<Relation> relation = Utils.readPath(spark, inputPath + "relation", Relation.class)
|
||||
.filter((FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference())
|
||||
.filter((FilterFunction<Relation>) r -> r.getRelClass().equalsIgnoreCase(RelationType.DATASOURCE_PROVIDED_BY_ORGANIZATION.label));
|
||||
Dataset<Relation> relation = Utils
|
||||
.readPath(spark, inputPath + "relation", Relation.class)
|
||||
.filter((FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference())
|
||||
.filter(
|
||||
(FilterFunction<Relation>) r -> r
|
||||
.getRelClass()
|
||||
.equalsIgnoreCase(RelationType.DATASOURCE_PROVIDED_BY_ORGANIZATION.label));
|
||||
|
||||
Dataset<EncloseMinElement> eme = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
|
||||
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
|
||||
Dataset<EncloseMinElement> eme = Utils
|
||||
.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
|
||||
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
|
||||
|
||||
Dataset<Datasource> datasourceDataset = Utils
|
||||
.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter(
|
||||
(FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible()
|
||||
&& !d.getDataInfo().getDeletedbyinference());
|
||||
Dataset<Tuple2<String, EncloseMinElement>> datasourceOrganization = relation.joinWith(eme, relation.col("target").equalTo(eme.col("enclosedEntityId")))
|
||||
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(t2._1().getSource(), t2._2()), Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
|
||||
.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter(
|
||||
(FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible()
|
||||
&& !d.getDataInfo().getDeletedbyinference());
|
||||
Dataset<Tuple2<String, EncloseMinElement>> datasourceOrganization = relation
|
||||
.joinWith(eme, relation.col("target").equalTo(eme.col("enclosedEntityId")))
|
||||
.map(
|
||||
(MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(
|
||||
t2._1().getSource(), t2._2()),
|
||||
Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
|
||||
|
||||
datasourceDataset.joinWith(datasourceOrganization, datasourceDataset.col("id").equalTo(datasourceOrganization.col("_1")), "left")
|
||||
.groupByKey((MapFunction<Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(), Encoders.STRING() )
|
||||
.mapGroups((MapGroupsFunction<String, Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, eu.dnetlib.dhp.skgif.model.Datasource>) (k,vs) -> {
|
||||
datasourceDataset
|
||||
.joinWith(
|
||||
datasourceOrganization, datasourceDataset.col("id").equalTo(datasourceOrganization.col("_1")), "left")
|
||||
.groupByKey(
|
||||
(MapFunction<Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(),
|
||||
Encoders.STRING())
|
||||
.mapGroups(
|
||||
(MapGroupsFunction<String, Tuple2<Datasource, Tuple2<String, EncloseMinElement>>, eu.dnetlib.dhp.skgif.model.Datasource>) (
|
||||
k, vs) -> {
|
||||
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
|
||||
Tuple2<Datasource, Tuple2<String, EncloseMinElement>> first = vs.next();
|
||||
Datasource d = first._1();
|
||||
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
|
||||
datasource
|
||||
.setIdentifiers(
|
||||
d
|
||||
.getPid()
|
||||
.stream()
|
||||
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||
.collect(Collectors.toList()));
|
||||
.setIdentifiers(
|
||||
d
|
||||
.getPid()
|
||||
.stream()
|
||||
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
datasource.setName(d.getOfficialname().getValue());
|
||||
datasource.setSubmission_policy_url(d.getSubmissionpolicyurl());
|
||||
datasource
|
||||
.setJurisdiction(
|
||||
Optional
|
||||
.ofNullable(d.getJurisdiction())
|
||||
.map(v -> v.getClassid())
|
||||
.orElse(new String()));
|
||||
.setJurisdiction(
|
||||
Optional
|
||||
.ofNullable(d.getJurisdiction())
|
||||
.map(v -> v.getClassid())
|
||||
.orElse(new String()));
|
||||
datasource.setPreservation_policy_url(d.getPreservationpolicyurl());
|
||||
datasource.setVersion_control(d.getVersioncontrol());
|
||||
|
||||
datasource
|
||||
.setData_source_classification(
|
||||
Optional
|
||||
.ofNullable(d.getEoscdatasourcetype())
|
||||
.map(v -> v.getClassname())
|
||||
.orElse(new String()));
|
||||
.setData_source_classification(
|
||||
Optional
|
||||
.ofNullable(d.getEoscdatasourcetype())
|
||||
.map(v -> v.getClassname())
|
||||
.orElse(new String()));
|
||||
datasource.setResearch_product_type(getEoscProductType(d.getResearchentitytypes()));
|
||||
datasource.setThematic(d.getThematic());
|
||||
datasource
|
||||
.setResearch_product_access_policy(
|
||||
Optional
|
||||
.ofNullable(d.getDatabaseaccesstype())
|
||||
.map(v -> getResearchProductAccessPolicy(d.getDatabaseaccesstype().getValue()))
|
||||
.orElse(new ArrayList<>()));
|
||||
.setResearch_product_access_policy(
|
||||
Optional
|
||||
.ofNullable(d.getDatabaseaccesstype())
|
||||
.map(v -> getResearchProductAccessPolicy(d.getDatabaseaccesstype().getValue()))
|
||||
.orElse(new ArrayList<>()));
|
||||
datasource
|
||||
.setResearch_product_metadata_access_policy(
|
||||
Optional
|
||||
.ofNullable(d.getResearchproductmetadataaccesspolicies())
|
||||
.map(v -> getResearchProductAccessPolicy(d.getResearchproductmetadataaccesspolicies()))
|
||||
.orElse(new ArrayList<>()));
|
||||
if(Optional.ofNullable(first._2()).isPresent()){
|
||||
.setResearch_product_metadata_access_policy(
|
||||
Optional
|
||||
.ofNullable(d.getResearchproductmetadataaccesspolicies())
|
||||
.map(v -> getResearchProductAccessPolicy(d.getResearchproductmetadataaccesspolicies()))
|
||||
.orElse(new ArrayList<>()));
|
||||
if (Optional.ofNullable(first._2()).isPresent()) {
|
||||
List<MinOrganization> organizations = new ArrayList<>();
|
||||
organizations.add(first._2()._2().getMinOrganization());
|
||||
vs.forEachRemaining(org -> organizations.add(org._2()._2().getMinOrganization()));
|
||||
|
@ -145,7 +160,7 @@ public class DumpDatasource implements Serializable {
|
|||
}
|
||||
return datasource;
|
||||
|
||||
}, Encoders.bean( eu.dnetlib.dhp.skgif.model.Datasource.class))
|
||||
}, Encoders.bean(eu.dnetlib.dhp.skgif.model.Datasource.class))
|
||||
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
|
|
|
@ -10,8 +10,6 @@ import java.util.List;
|
|||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||
import eu.dnetlib.dhp.skgif.model.*;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -29,8 +27,10 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.skgif.model.*;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
|
@ -90,84 +90,92 @@ public class DumpGrant implements Serializable {
|
|||
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||
!r.getDataInfo().getInvisible() &&
|
||||
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
|
||||
Dataset<EncloseMinElement> eme = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
|
||||
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
|
||||
Dataset<EncloseMinElement> eme = Utils
|
||||
.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
|
||||
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
|
||||
|
||||
Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations.joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId")))
|
||||
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(t2._1().getTarget(), t2._2()), Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
|
||||
Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations
|
||||
.joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId")))
|
||||
.map(
|
||||
(MapFunction<Tuple2<Relation, EncloseMinElement>, Tuple2<String, EncloseMinElement>>) t2 -> new Tuple2<>(
|
||||
t2._1().getTarget(), t2._2()),
|
||||
Encoders.tuple(Encoders.STRING(), Encoders.bean(EncloseMinElement.class)));
|
||||
|
||||
projects
|
||||
.joinWith(partecipantOrganization, projects.col("id").equalTo(partecipantOrganization.col("_1")), "left")
|
||||
.groupByKey((MapFunction<Tuple2<Project, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(), Encoders.STRING() )
|
||||
.mapGroups((MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k,v) -> {
|
||||
.groupByKey(
|
||||
(MapFunction<Tuple2<Project, Tuple2<String, EncloseMinElement>>, String>) t2 -> t2._1().getId(),
|
||||
Encoders.STRING())
|
||||
.mapGroups(
|
||||
(MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k, v) -> {
|
||||
Grant g = new Grant();
|
||||
Tuple2<Project, Tuple2<String, EncloseMinElement>> first = v.next();
|
||||
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k));
|
||||
g.setIdentifiers(getProjectIdentifier(first._1()));
|
||||
g.setTitle(first._1().getTitle().getValue());
|
||||
g
|
||||
.setSummary(
|
||||
Optional
|
||||
.ofNullable(first._1().getSummary())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
.setSummary(
|
||||
Optional
|
||||
.ofNullable(first._1().getSummary())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setAcronym(
|
||||
Optional
|
||||
.ofNullable(first._1().getAcronym())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
.setAcronym(
|
||||
Optional
|
||||
.ofNullable(first._1().getAcronym())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g.setFunder(Utils.getFunderName(first._1().getFundingtree().get(0).getValue()));
|
||||
// * private String funding_stream;// fundingtree to be used the xpath //funding_level_[n]
|
||||
g.setFunding_stream(getFundingStream(first._1().getFundingtree().get(0).getValue()));
|
||||
g
|
||||
.setCurrency(
|
||||
Optional
|
||||
.ofNullable(first._1().getCurrency())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
.setCurrency(
|
||||
Optional
|
||||
.ofNullable(first._1().getCurrency())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setFunded_amount(
|
||||
Optional
|
||||
.ofNullable(first._1().getFundedamount())
|
||||
.orElse(null));
|
||||
.setFunded_amount(
|
||||
Optional
|
||||
.ofNullable(first._1().getFundedamount())
|
||||
.orElse(null));
|
||||
g
|
||||
.setKeywords(
|
||||
first
|
||||
._1()
|
||||
.getSubjects()
|
||||
.stream()
|
||||
.map(s -> s.getValue())
|
||||
.collect(Collectors.toList()));
|
||||
.setKeywords(
|
||||
first
|
||||
._1()
|
||||
.getSubjects()
|
||||
.stream()
|
||||
.map(s -> s.getValue())
|
||||
.collect(Collectors.toList()));
|
||||
g
|
||||
.setStart_date(
|
||||
Optional
|
||||
.ofNullable(first._1().getStartdate())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
.setStart_date(
|
||||
Optional
|
||||
.ofNullable(first._1().getStartdate())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setEnd_date(
|
||||
Optional
|
||||
.ofNullable(first._1().getEnddate())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
.setEnd_date(
|
||||
Optional
|
||||
.ofNullable(first._1().getEnddate())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
g
|
||||
.setWebsite(
|
||||
Optional
|
||||
.ofNullable(first._1().getWebsiteurl())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
.setWebsite(
|
||||
Optional
|
||||
.ofNullable(first._1().getWebsiteurl())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(new String()));
|
||||
if (Optional.ofNullable(first._2()).isPresent()) {
|
||||
List<MinOrganization> relevantOrganizatios = new ArrayList<>();
|
||||
relevantOrganizatios.add(first._2()._2().getMinOrganization());
|
||||
v
|
||||
.forEachRemaining(
|
||||
t2 -> relevantOrganizatios
|
||||
.add(t2._2()._2().getMinOrganization()));
|
||||
.forEachRemaining(
|
||||
t2 -> relevantOrganizatios
|
||||
.add(t2._2()._2().getMinOrganization()));
|
||||
g.setBeneficiaries(relevantOrganizatios);
|
||||
}
|
||||
return g;
|
||||
}, Encoders.bean(Grant.class) )
|
||||
}, Encoders.bean(Grant.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
|
@ -185,8 +193,6 @@ public class DumpGrant implements Serializable {
|
|||
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static List<Identifier> getProjectIdentifier(Project project) throws DocumentException {
|
||||
List<Identifier> identifiers = new ArrayList<>();
|
||||
if (project.getPid().size() > 0)
|
||||
|
|
|
@ -7,7 +7,6 @@ import java.io.Serializable;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.*;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
|
@ -20,6 +19,7 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.*;
|
||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
@ -80,8 +80,8 @@ public class DumpResult implements Serializable {
|
|||
public static <R extends Result> void mapResult(SparkSession spark, String inputPath,
|
||||
String workingDir, String outputPath) {
|
||||
|
||||
//emit the snippet of the entities to be included in other entities for the dematerialization
|
||||
// emitMinEntities(spark, inputPath, workingDir);
|
||||
// emit the snippet of the entities to be included in other entities for the dematerialization
|
||||
// emitMinEntities(spark, inputPath, workingDir);
|
||||
|
||||
// selection of the relevant relations from result type to other entity. Only teh semantic relevant ones are
|
||||
// considered
|
||||
|
@ -179,8 +179,16 @@ public class DumpResult implements Serializable {
|
|||
(MapFunction<Tuple2<PartialResearchProduct, RelationPerProduct>, PartialResearchProduct>) t2 -> {
|
||||
PartialResearchProduct prp = t2._1();
|
||||
if (Optional.ofNullable(t2._2()).isPresent()) {
|
||||
prp.setRelated_products(t2._2().getRelatedProduct().keySet()
|
||||
.stream().map(key -> Relations.newInstance(key, t2._2().getRelatedProduct().get(key))).collect(Collectors.toList()));
|
||||
prp
|
||||
.setRelated_products(
|
||||
t2
|
||||
._2()
|
||||
.getRelatedProduct()
|
||||
.keySet()
|
||||
.stream()
|
||||
.map(
|
||||
key -> Relations.newInstance(key, t2._2().getRelatedProduct().get(key)))
|
||||
.collect(Collectors.toList()));
|
||||
prp.setRelevant_organizations(t2._2().getOrganizations());
|
||||
prp.setFunding(t2._2().getFunding());
|
||||
}
|
||||
|
@ -251,7 +259,7 @@ public class DumpResult implements Serializable {
|
|||
.ofNullable(epm.getInstance().getLicense())
|
||||
.map(value -> value.getValue())
|
||||
.orElse(null));
|
||||
if(Optional.ofNullable(epm.getInstance().getUrl()).isPresent() && epm.getInstance().getUrl().size() > 0)
|
||||
if (Optional.ofNullable(epm.getInstance().getUrl()).isPresent() && epm.getInstance().getUrl().size() > 0)
|
||||
manifestation
|
||||
.setUrl(epm.getInstance().getUrl().get(0));
|
||||
else
|
||||
|
@ -263,12 +271,26 @@ public class DumpResult implements Serializable {
|
|||
if (Optional.ofNullable(t2._2()).isPresent()) {
|
||||
manifestation.setBiblio(getBiblio(epm));
|
||||
if (Optional.ofNullable(t2._2().getJournal().getIssnPrinted()).isPresent())
|
||||
manifestation.setVenue(MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnPrinted()),t2._1().getJournal().getName()));
|
||||
manifestation
|
||||
.setVenue(
|
||||
MinVenue
|
||||
.newInstance(
|
||||
Utils.getIdentifier(Prefixes.VENUE, t2._2().getJournal().getIssnPrinted()),
|
||||
t2._1().getJournal().getName()));
|
||||
else if (Optional.ofNullable(t2._2().getJournal().getIssnOnline()).isPresent())
|
||||
manifestation.setVenue(MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnOnline()),t2._1().getJournal().getName()));
|
||||
manifestation
|
||||
.setVenue(
|
||||
MinVenue
|
||||
.newInstance(
|
||||
Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnOnline()),
|
||||
t2._1().getJournal().getName()));
|
||||
}
|
||||
manifestation
|
||||
.setHosting_datasource(MinVenue.newInstance(Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()), epm.getInstance().getHostedby().getValue()));
|
||||
.setHosting_datasource(
|
||||
MinVenue
|
||||
.newInstance(
|
||||
Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()),
|
||||
epm.getInstance().getHostedby().getValue()));
|
||||
|
||||
return manifestation;
|
||||
}
|
||||
|
@ -351,25 +373,26 @@ public class DumpResult implements Serializable {
|
|||
r.getRelClass().equalsIgnoreCase(RelationType.PART.label) ||
|
||||
r.getRelClass().equalsIgnoreCase(RelationType.VERSION.label) ||
|
||||
r.getRelClass().equalsIgnoreCase(RelationType.CITATION.label));
|
||||
Dataset<EncloseMinElement> encloseMinEntity = Utils.readPath(spark, workingDir + "minEntity", EncloseMinElement.class);
|
||||
|
||||
relation.joinWith(encloseMinEntity, relation.col("target").equalTo(encloseMinEntity.col("enclosedEntityId")))
|
||||
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, EncloseMinElement>) t2 ->
|
||||
{
|
||||
EncloseMinElement eme = t2._2();
|
||||
eme.setResultId(t2._1().getSource());
|
||||
eme.setSemantics(t2._1().getRelClass());
|
||||
return eme;
|
||||
}, Encoders.bean(EncloseMinElement.class))
|
||||
.groupByKey((MapFunction<EncloseMinElement, String>) eme -> eme.getResultId(), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, EncloseMinElement, RelationPerProduct>) (k,v) ->
|
||||
{
|
||||
RelationPerProduct rpp = new RelationPerProduct();
|
||||
rpp.setResultId(k);
|
||||
insertEnclosedElement(rpp,v.next());
|
||||
v.forEachRemaining(e -> insertEnclosedElement(rpp,e));
|
||||
return rpp;
|
||||
}, Encoders.bean(RelationPerProduct.class))
|
||||
Dataset<EncloseMinElement> encloseMinEntity = Utils
|
||||
.readPath(spark, workingDir + "minEntity", EncloseMinElement.class);
|
||||
|
||||
relation
|
||||
.joinWith(encloseMinEntity, relation.col("target").equalTo(encloseMinEntity.col("enclosedEntityId")))
|
||||
.map((MapFunction<Tuple2<Relation, EncloseMinElement>, EncloseMinElement>) t2 -> {
|
||||
EncloseMinElement eme = t2._2();
|
||||
eme.setResultId(t2._1().getSource());
|
||||
eme.setSemantics(t2._1().getRelClass());
|
||||
return eme;
|
||||
}, Encoders.bean(EncloseMinElement.class))
|
||||
.groupByKey((MapFunction<EncloseMinElement, String>) eme -> eme.getResultId(), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, EncloseMinElement, RelationPerProduct>) (k, v) -> {
|
||||
RelationPerProduct rpp = new RelationPerProduct();
|
||||
rpp.setResultId(k);
|
||||
insertEnclosedElement(rpp, v.next());
|
||||
v.forEachRemaining(e -> insertEnclosedElement(rpp, e));
|
||||
return rpp;
|
||||
}, Encoders.bean(RelationPerProduct.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
|
@ -377,30 +400,32 @@ public class DumpResult implements Serializable {
|
|||
}
|
||||
|
||||
private static void insertEnclosedElement(RelationPerProduct rpp, EncloseMinElement element) {
|
||||
if(Optional.ofNullable(element.getMinOrganization()).isPresent())
|
||||
if (Optional.ofNullable(element.getMinOrganization()).isPresent())
|
||||
rpp.getOrganizations().add(element.getMinOrganization());
|
||||
if(Optional.ofNullable(element.getMinGrant()).isPresent())
|
||||
if (Optional.ofNullable(element.getMinGrant()).isPresent())
|
||||
rpp.getFunding().add(element.getMinGrant());
|
||||
if(Optional.ofNullable(element.getMinProduct()).isPresent()){
|
||||
if (Optional.ofNullable(element.getMinProduct()).isPresent()) {
|
||||
String sem = element.getSemantics();
|
||||
if(!rpp.getRelatedProduct().containsKey(sem))
|
||||
if (!rpp.getRelatedProduct().containsKey(sem))
|
||||
rpp.getRelatedProduct().put(sem, new ArrayList<>());
|
||||
rpp.getRelatedProduct().get(sem).add(element.getMinProduct());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static <R extends Result> Dataset<EncloseMinElement> getMinProduct(SparkSession spark, String inputPath, Class<R> clazz) {
|
||||
return Utils.readPath(spark, inputPath , clazz)
|
||||
.filter((FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||
!r.getDataInfo().getInvisible())
|
||||
.map((MapFunction<R, EncloseMinElement>) r -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setEnclosedEntityId(r.getId());
|
||||
eme.setMinProduct(Utils.getMinProduct(r));
|
||||
return eme;
|
||||
}, Encoders.bean(EncloseMinElement.class));
|
||||
private static <R extends Result> Dataset<EncloseMinElement> getMinProduct(SparkSession spark, String inputPath,
|
||||
Class<R> clazz) {
|
||||
return Utils
|
||||
.readPath(spark, inputPath, clazz)
|
||||
.filter(
|
||||
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||
!r.getDataInfo().getInvisible())
|
||||
.map((MapFunction<R, EncloseMinElement>) r -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setEnclosedEntityId(r.getId());
|
||||
eme.setMinProduct(Utils.getMinProduct(r));
|
||||
return eme;
|
||||
}, Encoders.bean(EncloseMinElement.class));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ public class DumpVenue implements Serializable {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
Utils.removeOutputDir(spark, outputPath + "Venue");
|
||||
Utils.removeOutputDir(spark, outputPath + "Venues");
|
||||
|
||||
mapVenue(spark, inputPath, outputPath, workingDir);
|
||||
});
|
||||
|
@ -82,14 +82,14 @@ public class DumpVenue implements Serializable {
|
|||
&& d.getEoscdatasourcetype().getClassid().equalsIgnoreCase("Journal archive"));
|
||||
datasourceDataset
|
||||
.joinWith(
|
||||
manifestationDataset, datasourceDataset.col("id").equalTo(manifestationDataset.col("hostedby.key")),
|
||||
manifestationDataset, datasourceDataset.col("id").equalTo(manifestationDataset.col("hostedby")),
|
||||
"left")
|
||||
.map((MapFunction<Tuple2<Datasource, EmitPerManifestation>, Venue>) t2 -> {
|
||||
Venue venue = new Venue();
|
||||
Datasource d = t2._1();
|
||||
if (Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent())
|
||||
if (Optional.ofNullable(d.getJournal()).isPresent() && Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent())
|
||||
venue.setLocal_identifier(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnPrinted()));
|
||||
else if (Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent())
|
||||
else if (Optional.ofNullable(d.getJournal()).isPresent() && Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent())
|
||||
venue.setLocal_identifier(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnOnline()));
|
||||
venue.setIdentifiers(getVenueIdentifier(d.getJournal()));
|
||||
venue.setName(d.getOfficialname().getValue());
|
||||
|
@ -103,7 +103,6 @@ public class DumpVenue implements Serializable {
|
|||
venue.setContributions(null);
|
||||
return venue;
|
||||
}, Encoders.bean(Venue.class))
|
||||
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
|
|
|
@ -7,8 +7,6 @@ import java.io.Serializable;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
|
@ -24,10 +22,12 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EncloseMinElement;
|
||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.skgif.model.*;
|
||||
import scala.Tuple2;
|
||||
|
||||
|
@ -80,94 +80,127 @@ public class EmitFromEntities implements Serializable {
|
|||
}
|
||||
|
||||
private static void emitFromProject(SparkSession spark, String inputPath, String workingDir) {
|
||||
Utils.readPath(spark, inputPath + "project" , Project.class)
|
||||
.filter((FilterFunction<Project>) p -> !p.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<Project, EncloseMinElement>) p->{
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setEnclosedEntityId(p.getId());
|
||||
eme.setMinGrant(Utils.getMinGrant(p));
|
||||
return eme;}, Encoders.bean(EncloseMinElement.class) )
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression","gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
Utils
|
||||
.readPath(spark, inputPath + "project", Project.class)
|
||||
.filter((FilterFunction<Project>) p -> !p.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<Project, EncloseMinElement>) p -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setEnclosedEntityId(p.getId());
|
||||
eme.setMinGrant(Utils.getMinGrant(p));
|
||||
return eme;
|
||||
}, Encoders.bean(EncloseMinElement.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
|
||||
}
|
||||
|
||||
private static void emitFromOrganization(SparkSession spark, String inputPath, String workingDir) {
|
||||
Utils.readPath(spark, inputPath + "organization", Organization.class)
|
||||
.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<Organization, EncloseMinElement>) o -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setMinOrganization(Utils.getMinOrganization(o));
|
||||
eme.setEnclosedEntityId(o.getId());
|
||||
return eme;},
|
||||
Encoders.bean(EncloseMinElement.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression","gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
Utils
|
||||
.readPath(spark, inputPath + "organization", Organization.class)
|
||||
.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<Organization, EncloseMinElement>) o -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setMinOrganization(Utils.getMinOrganization(o));
|
||||
eme.setEnclosedEntityId(o.getId());
|
||||
return eme;
|
||||
},
|
||||
Encoders.bean(EncloseMinElement.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
}
|
||||
|
||||
private static void emitFromDatasource(SparkSession spark, String inputPath, String workingDir) {
|
||||
Utils.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<Datasource, EncloseMinElement>) d -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setMinDatsource(MinVenue.newInstance(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
|
||||
eme.setEnclosedEntityId(d.getId());
|
||||
return eme;
|
||||
}
|
||||
, Encoders.bean(EncloseMinElement.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression","gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
Utils
|
||||
.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
|
||||
.map((MapFunction<Datasource, EncloseMinElement>) d -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme
|
||||
.setMinDatsource(
|
||||
MinVenue
|
||||
.newInstance(
|
||||
Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
|
||||
eme.setEnclosedEntityId(d.getId());
|
||||
return eme;
|
||||
}, Encoders.bean(EncloseMinElement.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
|
||||
Utils.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
|
||||
.filter((FilterFunction<Datasource>) d-> d.getEoscdatasourcetype().getClassid().equalsIgnoreCase("Journal archive"))
|
||||
.map((MapFunction<Datasource, EncloseMinElement>) d-> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setEnclosedEntityId(d.getId());
|
||||
if(Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent())
|
||||
eme.setMinVenue( MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnPrinted()), d.getOfficialname().getValue()));
|
||||
if(Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent())
|
||||
eme.setMinVenue( MinVenue.newInstance(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnOnline()), d.getOfficialname().getValue()));
|
||||
return null;
|
||||
},Encoders.bean(EncloseMinElement.class) )
|
||||
.filter(Objects::nonNull)
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression","gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
Utils
|
||||
.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getDeletedbyinference())
|
||||
.filter(
|
||||
(FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEoscdatasourcetype()).isPresent() && d
|
||||
.getEoscdatasourcetype()
|
||||
.getClassid()
|
||||
.equalsIgnoreCase("Journal archive"))
|
||||
.map((MapFunction<Datasource, EncloseMinElement>) d -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setEnclosedEntityId(d.getId());
|
||||
if (Optional.ofNullable(d.getJournal()).isPresent() &&
|
||||
Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent()) {
|
||||
eme
|
||||
.setMinVenue(
|
||||
MinVenue
|
||||
.newInstance(
|
||||
Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnPrinted()),
|
||||
d.getOfficialname().getValue()));
|
||||
return eme;
|
||||
}
|
||||
if (Optional.ofNullable(d.getJournal()).isPresent() &&
|
||||
Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent()) {
|
||||
eme
|
||||
.setMinVenue(
|
||||
MinVenue
|
||||
.newInstance(
|
||||
Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnOnline()),
|
||||
d.getOfficialname().getValue()));
|
||||
return eme;
|
||||
}
|
||||
return null;
|
||||
}, Encoders.bean(EncloseMinElement.class))
|
||||
.filter(Objects::nonNull)
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
|
||||
}
|
||||
|
||||
//per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
|
||||
// per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
|
||||
public static <R extends Result> void emitFromResult(SparkSession spark, String inputPath, String outputPath,
|
||||
String workingDir) {
|
||||
emitManifestation(spark, inputPath, workingDir);
|
||||
emitPerson(spark, inputPath, outputPath, workingDir);
|
||||
emitTopic(spark, inputPath, outputPath, workingDir);
|
||||
emitMinProduct(spark, inputPath, workingDir);
|
||||
emitMinProduct(spark, inputPath, workingDir);
|
||||
|
||||
}
|
||||
|
||||
private static <R extends Result> void emitMinProduct(SparkSession spark, String inputPath, String workingDir) {
|
||||
private static <R extends Result> void emitMinProduct(SparkSession spark, String inputPath, String workingDir) {
|
||||
Utils.removeOutputDir(spark, workingDir + "minEntity");
|
||||
ModelSupport.entityTypes.keySet().forEach(e -> {
|
||||
if (ModelSupport.isResult(e)) {
|
||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||
|
||||
Utils
|
||||
.readPath(spark, inputPath + e.name(), resultClazz)
|
||||
.map((MapFunction<R, MinProduct>) p -> Utils.getMinProduct(p), Encoders.bean(MinProduct.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
|
||||
.readPath(spark, inputPath + e.name(), resultClazz)
|
||||
.map((MapFunction<R, EncloseMinElement>) p -> {
|
||||
EncloseMinElement eme = new EncloseMinElement();
|
||||
eme.setMinProduct(Utils.getMinProduct(p));
|
||||
eme.setEnclosedEntityId(p.getId());
|
||||
return eme;
|
||||
}, Encoders.bean(EncloseMinElement.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir + "/minEntity");
|
||||
|
||||
}
|
||||
|
||||
|
@ -279,7 +312,7 @@ public class EmitFromEntities implements Serializable {
|
|||
}
|
||||
return authors.iterator();
|
||||
}, Encoders.bean(Persons.class))
|
||||
.filter(Objects::nonNull)
|
||||
.filter((FilterFunction<Persons>) p -> p != null)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
|
@ -331,7 +364,6 @@ public class EmitFromEntities implements Serializable {
|
|||
.option("compression", "gzip")
|
||||
.json(workingDir + e.name() + "/manifestation");
|
||||
|
||||
|
||||
}
|
||||
|
||||
});
|
||||
|
|
|
@ -62,11 +62,14 @@ public class ResultMapper implements Serializable {
|
|||
contribution.setPerson(minPerson);
|
||||
} else {
|
||||
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
||||
minPerson.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + a.getRank()));
|
||||
minPerson
|
||||
.setLocal_identifier(
|
||||
Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + a.getRank()));
|
||||
contribution
|
||||
.setPerson(minPerson);
|
||||
} else {
|
||||
minPerson.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
|
||||
minPerson
|
||||
.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
|
||||
contribution.setPerson(minPerson);
|
||||
}
|
||||
|
||||
|
@ -95,14 +98,19 @@ public class ResultMapper implements Serializable {
|
|||
.map(s -> {
|
||||
ResultTopic topic = new ResultTopic();
|
||||
MinTopic minTopic = new MinTopic();
|
||||
minTopic.setLocal_identifier(Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
|
||||
minTopic
|
||||
.setLocal_identifier(
|
||||
Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
|
||||
minTopic.setValue(s.getValue());
|
||||
topic
|
||||
.setTopic(minTopic
|
||||
);
|
||||
.setTopic(minTopic);
|
||||
if (Optional.ofNullable(s.getDataInfo()).isPresent()) {
|
||||
Provenance provenance = new Provenance();
|
||||
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
|
||||
try {
|
||||
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
|
||||
} catch (NumberFormatException nfe) {
|
||||
|
||||
}
|
||||
provenance.setType(s.getDataInfo().getInferenceprovenance());
|
||||
topic.setProvenance(provenance);
|
||||
}
|
||||
|
|
|
@ -6,27 +6,28 @@ import java.io.StringReader;
|
|||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.skgif.model.MinGrant;
|
||||
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||
import eu.dnetlib.dhp.skgif.model.MinProduct;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.io.SAXReader;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.skgif.model.MinGrant;
|
||||
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||
import eu.dnetlib.dhp.skgif.model.MinProduct;
|
||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
|
@ -88,14 +89,23 @@ public class Utils implements Serializable {
|
|||
MinOrganization mo = new MinOrganization();
|
||||
mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
|
||||
mo.setName(o.getLegalname().getValue());
|
||||
for(StructuredProperty pid : o.getPid()){
|
||||
switch (pid.getQualifier().getClassid().toLowerCase()){
|
||||
for (StructuredProperty pid : o.getPid()) {
|
||||
switch (pid.getQualifier().getClassid().toLowerCase()) {
|
||||
case "ror":
|
||||
mo.setRor(pid.getValue());
|
||||
break;
|
||||
case "isni":
|
||||
mo.setIsni(pid.getValue());
|
||||
break;
|
||||
case "fundref":
|
||||
mo.setFundRef(pid.getValue());
|
||||
break;
|
||||
case "ringgold":
|
||||
mo.setRinGold(pid.getValue());
|
||||
break;
|
||||
case "wikidata":
|
||||
mo.setWikidata(pid.getValue());
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -107,10 +117,11 @@ public class Utils implements Serializable {
|
|||
mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId()));
|
||||
mg.setCode(p.getCode().getValue());
|
||||
mg.setFunder(getFunderName(p.getFundingtree().get(0).getValue()));
|
||||
mg.setTitle(p.getTitle().getValue());
|
||||
return mg;
|
||||
}
|
||||
|
||||
public static <R extends Result> MinProduct getMinProduct(R r) {
|
||||
public static <R extends Result> MinProduct getMinProduct(R r) throws JsonProcessingException {
|
||||
MinProduct mp = new MinProduct();
|
||||
mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId()));
|
||||
for (StructuredProperty title : r.getTitle()) {
|
||||
|
@ -118,19 +129,21 @@ public class Utils implements Serializable {
|
|||
mp.setTitle(title.getValue());
|
||||
}
|
||||
}
|
||||
for (StructuredProperty pid : r.getPid()) {
|
||||
switch (pid.getQualifier().getClassid().toLowerCase()) {
|
||||
case "doi":
|
||||
mp.setDoi(pid.getValue());
|
||||
break;
|
||||
case "pmcid":
|
||||
mp.setPmcid(pid.getValue());
|
||||
break;
|
||||
case "arxiv":
|
||||
mp.setArxivid(pid.getValue());
|
||||
break;
|
||||
if (r.getPid() != null)
|
||||
for (StructuredProperty pid : r.getPid()) {
|
||||
switch (pid.getQualifier().getClassid().toLowerCase()) {
|
||||
case "doi":
|
||||
mp.setDoi(pid.getValue());
|
||||
break;
|
||||
case "pmcid":
|
||||
mp.setPmcid(pid.getValue());
|
||||
break;
|
||||
case "arxiv":
|
||||
mp.setArxivid(pid.getValue());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mp;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.skgif.beans;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -7,29 +8,29 @@ import java.io.Serializable;
|
|||
* @Date 04/03/24
|
||||
*/
|
||||
public class Couple implements Serializable {
|
||||
private String originalIdentifier;
|
||||
private String localIdentifier;
|
||||
private String originalIdentifier;
|
||||
private String localIdentifier;
|
||||
|
||||
public String getOriginalIdentifier() {
|
||||
return originalIdentifier;
|
||||
}
|
||||
public String getOriginalIdentifier() {
|
||||
return originalIdentifier;
|
||||
}
|
||||
|
||||
public void setOriginalIdentifier(String originalIdentifier) {
|
||||
this.originalIdentifier = originalIdentifier;
|
||||
}
|
||||
public void setOriginalIdentifier(String originalIdentifier) {
|
||||
this.originalIdentifier = originalIdentifier;
|
||||
}
|
||||
|
||||
public String getLocalIdentifier() {
|
||||
return localIdentifier;
|
||||
}
|
||||
public String getLocalIdentifier() {
|
||||
return localIdentifier;
|
||||
}
|
||||
|
||||
public void setLocalIdentifier(String localIdentifier) {
|
||||
this.localIdentifier = localIdentifier;
|
||||
}
|
||||
public void setLocalIdentifier(String localIdentifier) {
|
||||
this.localIdentifier = localIdentifier;
|
||||
}
|
||||
|
||||
public static Couple newInstance(String originalIdentifier, String localIdentifier){
|
||||
Couple couple = new Couple();
|
||||
couple.originalIdentifier = originalIdentifier;
|
||||
couple.localIdentifier = localIdentifier;
|
||||
return couple;
|
||||
}
|
||||
public static Couple newInstance(String originalIdentifier, String localIdentifier) {
|
||||
Couple couple = new Couple();
|
||||
couple.originalIdentifier = originalIdentifier;
|
||||
couple.localIdentifier = localIdentifier;
|
||||
return couple;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,87 +1,88 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.skgif.beans;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import eu.dnetlib.dhp.skgif.model.MinGrant;
|
||||
import eu.dnetlib.dhp.skgif.model.MinOrganization;
|
||||
import eu.dnetlib.dhp.skgif.model.MinProduct;
|
||||
import eu.dnetlib.dhp.skgif.model.MinVenue;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 04/03/24
|
||||
*/
|
||||
public class EncloseMinElement implements Serializable {
|
||||
private String resultId;
|
||||
private String enclosedEntityId;
|
||||
private MinOrganization minOrganization;
|
||||
private MinVenue minVenue;
|
||||
private MinVenue minDatsource;
|
||||
private MinGrant minGrant;
|
||||
private MinProduct minProduct;
|
||||
private String semantics;
|
||||
private String resultId;
|
||||
private String enclosedEntityId;
|
||||
private MinOrganization minOrganization;
|
||||
private MinVenue minVenue;
|
||||
private MinVenue minDatsource;
|
||||
private MinGrant minGrant;
|
||||
private MinProduct minProduct;
|
||||
private String semantics;
|
||||
|
||||
public MinVenue getMinVenue() {
|
||||
return minVenue;
|
||||
}
|
||||
public MinVenue getMinVenue() {
|
||||
return minVenue;
|
||||
}
|
||||
|
||||
public void setMinVenue(MinVenue minVenue) {
|
||||
this.minVenue = minVenue;
|
||||
}
|
||||
public void setMinVenue(MinVenue minVenue) {
|
||||
this.minVenue = minVenue;
|
||||
}
|
||||
|
||||
public MinVenue getMinDatsource() {
|
||||
return minDatsource;
|
||||
}
|
||||
public MinVenue getMinDatsource() {
|
||||
return minDatsource;
|
||||
}
|
||||
|
||||
public void setMinDatsource(MinVenue minDatsource) {
|
||||
this.minDatsource = minDatsource;
|
||||
}
|
||||
public void setMinDatsource(MinVenue minDatsource) {
|
||||
this.minDatsource = minDatsource;
|
||||
}
|
||||
|
||||
public String getSemantics() {
|
||||
return semantics;
|
||||
}
|
||||
public String getSemantics() {
|
||||
return semantics;
|
||||
}
|
||||
|
||||
public void setSemantics(String semantics) {
|
||||
this.semantics = semantics;
|
||||
}
|
||||
public void setSemantics(String semantics) {
|
||||
this.semantics = semantics;
|
||||
}
|
||||
|
||||
public String getResultId() {
|
||||
return resultId;
|
||||
}
|
||||
public String getResultId() {
|
||||
return resultId;
|
||||
}
|
||||
|
||||
public void setResultId(String resultId) {
|
||||
this.resultId = resultId;
|
||||
}
|
||||
public void setResultId(String resultId) {
|
||||
this.resultId = resultId;
|
||||
}
|
||||
|
||||
public String getEnclosedEntityId() {
|
||||
return enclosedEntityId;
|
||||
}
|
||||
public String getEnclosedEntityId() {
|
||||
return enclosedEntityId;
|
||||
}
|
||||
|
||||
public void setEnclosedEntityId(String enclosedEntityId) {
|
||||
this.enclosedEntityId = enclosedEntityId;
|
||||
}
|
||||
public void setEnclosedEntityId(String enclosedEntityId) {
|
||||
this.enclosedEntityId = enclosedEntityId;
|
||||
}
|
||||
|
||||
public MinOrganization getMinOrganization() {
|
||||
return minOrganization;
|
||||
}
|
||||
public MinOrganization getMinOrganization() {
|
||||
return minOrganization;
|
||||
}
|
||||
|
||||
public void setMinOrganization(MinOrganization minOrganization) {
|
||||
this.minOrganization = minOrganization;
|
||||
}
|
||||
public void setMinOrganization(MinOrganization minOrganization) {
|
||||
this.minOrganization = minOrganization;
|
||||
}
|
||||
|
||||
public MinGrant getMinGrant() {
|
||||
return minGrant;
|
||||
}
|
||||
public MinGrant getMinGrant() {
|
||||
return minGrant;
|
||||
}
|
||||
|
||||
public void setMinGrant(MinGrant minGrant) {
|
||||
this.minGrant = minGrant;
|
||||
}
|
||||
public void setMinGrant(MinGrant minGrant) {
|
||||
this.minGrant = minGrant;
|
||||
}
|
||||
|
||||
public MinProduct getMinProduct() {
|
||||
return minProduct;
|
||||
}
|
||||
public MinProduct getMinProduct() {
|
||||
return minProduct;
|
||||
}
|
||||
|
||||
public void setMinProduct(MinProduct minProduct) {
|
||||
this.minProduct = minProduct;
|
||||
}
|
||||
public void setMinProduct(MinProduct minProduct) {
|
||||
this.minProduct = minProduct;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ public class RelationPerProduct implements Serializable {
|
|||
private String resultId;
|
||||
private List<MinOrganization> organizations;
|
||||
private List<MinGrant> funding;
|
||||
private Map<String,List<MinProduct>> relatedProduct;
|
||||
private Map<String, List<MinProduct>> relatedProduct;
|
||||
|
||||
public RelationPerProduct() {
|
||||
organizations = new ArrayList<>();
|
||||
|
|
|
@ -7,16 +7,12 @@ import java.io.IOException;
|
|||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.*;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromResultJobTest;
|
||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromEntitiesJobTest;
|
||||
import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException;
|
||||
import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient;
|
||||
|
||||
|
@ -31,7 +27,7 @@ public class ZenodoUploadTest {
|
|||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(EmitFromResultJobTest.class.getSimpleName())
|
||||
.createTempDirectory(EmitFromEntitiesJobTest.class.getSimpleName())
|
||||
.toString();
|
||||
}
|
||||
|
||||
|
|
|
@ -173,4 +173,37 @@ public class DumpDatasourceTest implements Serializable {
|
|||
//
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDumpDatasourceApi() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
|
||||
.getPath();
|
||||
|
||||
final String workingDir = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDirApiExample/")
|
||||
.getPath();
|
||||
|
||||
DumpDatasource
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-outputPath", workingDir,
|
||||
"-workingDir", workingDir
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Datasource> datasource = sc
|
||||
.textFile(workingDir.toString() + "Datasource")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Datasource.class));
|
||||
|
||||
Dataset<Datasource> datasourceDataset = spark.createDataset(datasource.rdd(), Encoders.bean(Datasource.class));
|
||||
|
||||
datasourceDataset
|
||||
.foreach((ForeachFunction<Datasource>) d -> System.out.println(OBJECT_MAPPER.writeValueAsString(d)));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -118,4 +118,34 @@ public class DumpGrantTest implements Serializable {
|
|||
//
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDumpGrantApi() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
|
||||
.getPath();
|
||||
|
||||
final String workingDir = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDirApiExample/")
|
||||
.getPath();
|
||||
|
||||
DumpGrant
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-outputPath", workingDir,
|
||||
"-workingDir", workingDir
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Grant> grant = sc
|
||||
.textFile(workingDir + "Grant")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Grant.class));
|
||||
|
||||
grant.foreach(g -> System.out.println(OBJECT_MAPPER.writeValueAsString(g)));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,4 +122,33 @@ public class DumpOrganizationTest implements Serializable {
|
|||
//
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDumpOrganizationAPI() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
|
||||
.getPath();
|
||||
|
||||
final String workingDir = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDirApiExample/")
|
||||
.getPath();
|
||||
|
||||
DumpOrganization
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-outputPath", workingDir
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Organization> organization = sc
|
||||
.textFile(workingDir.toString() + "/Organization")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
|
||||
|
||||
organization.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -118,17 +118,15 @@ public class DumpResultTest {
|
|||
1,
|
||||
temp
|
||||
.getRelatedProduct()
|
||||
.stream()
|
||||
.filter(rp -> rp.getRelation_type().equalsIgnoreCase("issupplementedby"))
|
||||
.count());
|
||||
.get(RelationType.SUPPLEMENT.label)
|
||||
.size());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
temp
|
||||
.getRelatedProduct()
|
||||
.stream()
|
||||
.filter(rp -> rp.getRelation_type().equalsIgnoreCase("isdocumentedby"))
|
||||
.count());
|
||||
.get(RelationType.DOCUMENTS.label)
|
||||
.size());
|
||||
|
||||
JavaRDD<ResearchProduct> researchProduct = sc
|
||||
.textFile(workingDir.toString() + "/publication/researchproduct")
|
||||
|
@ -166,6 +164,8 @@ public class DumpResultTest {
|
|||
Assertions.assertEquals(2, product.getManifestations().size());
|
||||
|
||||
researchProductDataset.show(false);
|
||||
researchProductDataset
|
||||
.foreach((ForeachFunction<ResearchProduct>) rp -> System.out.println(OBJECT_MAPPER.writeValueAsString(rp)));
|
||||
|
||||
}
|
||||
|
||||
|
@ -267,15 +267,27 @@ public class DumpResultTest {
|
|||
.anyMatch(
|
||||
t -> t
|
||||
.getTopic()
|
||||
.getValue()
|
||||
.getValue()
|
||||
.equalsIgnoreCase(Prefixes.TOPIC.label + DHPUtils.md5("FOSSustained delivery"))));
|
||||
|
||||
// check contributions
|
||||
Assertions.assertEquals(4, rp.getContributions().size());
|
||||
Assertions
|
||||
.assertEquals(3, rp.getContributions().stream().filter(c -> c.getPerson().getLocal_identifier().startsWith("person")).count());
|
||||
.assertEquals(
|
||||
3,
|
||||
rp
|
||||
.getContributions()
|
||||
.stream()
|
||||
.filter(c -> c.getPerson().getLocal_identifier().startsWith("person"))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(1, rp.getContributions().stream().filter(c -> c.getPerson().getLocal_identifier().startsWith("temp")).count());
|
||||
.assertEquals(
|
||||
1,
|
||||
rp
|
||||
.getContributions()
|
||||
.stream()
|
||||
.filter(c -> c.getPerson().getLocal_identifier().startsWith("temp"))
|
||||
.count());
|
||||
rp.getContributions().forEach(c -> Assertions.assertTrue(c.getDeclared_affiliation() == null));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -477,4 +489,326 @@ public class DumpResultTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmitFromApiDump() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
|
||||
.getPath();
|
||||
|
||||
final String workingDir = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDirApiExample/")
|
||||
.getPath();
|
||||
|
||||
DumpResult
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-workingDir", workingDir,
|
||||
"-outputPath", workingDir
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<ResearchProduct> researchProduct = sc
|
||||
.textFile(workingDir.toString() + "ResearchProduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, ResearchProduct.class));
|
||||
|
||||
researchProduct.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
|
||||
|
||||
// org.apache.spark.sql.Dataset<ResearchProduct> researchProductDataset = spark
|
||||
// .createDataset(researchProduct.rdd(), Encoders.bean(ResearchProduct.class));
|
||||
|
||||
// Assertions.assertEquals(1, researchProductDataset.count());
|
||||
//
|
||||
// ResearchProduct rp = researchProductDataset.first();
|
||||
//
|
||||
// // check the local identifier
|
||||
// Assertions.assertEquals("product_____::e22a152ab43b9215d14ece613f76ec84", rp.getLocal_identifier());
|
||||
//
|
||||
// // check the pids of the result
|
||||
// Assertions.assertEquals(3, rp.getIdentifiers().size());
|
||||
// Assertions
|
||||
// .assertEquals(1, rp.getIdentifiers().stream().filter(p -> p.getScheme().equalsIgnoreCase("doi")).count());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// "10.1007/s40199-021-00403-x",
|
||||
// rp
|
||||
// .getIdentifiers()
|
||||
// .stream()
|
||||
// .filter(p -> p.getScheme().equalsIgnoreCase("doi"))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getValue());
|
||||
// Assertions
|
||||
// .assertEquals(1, rp.getIdentifiers().stream().filter(p -> p.getScheme().equalsIgnoreCase("pmid")).count());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// "34327650",
|
||||
// rp
|
||||
// .getIdentifiers()
|
||||
// .stream()
|
||||
// .filter(p -> p.getScheme().equalsIgnoreCase("pmid"))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getValue());
|
||||
// Assertions
|
||||
// .assertEquals(1, rp.getIdentifiers().stream().filter(p -> p.getScheme().equalsIgnoreCase("pmc")).count());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// "PMC8602609",
|
||||
// rp
|
||||
// .getIdentifiers()
|
||||
// .stream()
|
||||
// .filter(p -> p.getScheme().equalsIgnoreCase("pmc"))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getValue());
|
||||
//
|
||||
// // check the title
|
||||
// Assertions.assertEquals(1, rp.getTitles().keySet().size());
|
||||
// Assertions.assertTrue(rp.getTitles().keySet().contains("none"));
|
||||
// Assertions.assertEquals(1, rp.getTitles().get("none").size());
|
||||
//
|
||||
// // check abstract
|
||||
// Assertions.assertEquals(1, rp.getAbstracts().keySet().size());
|
||||
// Assertions.assertTrue(rp.getAbstracts().keySet().contains("none"));
|
||||
// Assertions.assertEquals(1, rp.getAbstracts().get("none").size());
|
||||
//
|
||||
// // check type
|
||||
// Assertions.assertEquals("literature", rp.getProduct_type());
|
||||
//
|
||||
// // check topics
|
||||
// Assertions.assertEquals(3, rp.getTopics().size());
|
||||
// Assertions
|
||||
// .assertTrue(
|
||||
// rp
|
||||
// .getTopics()
|
||||
// .stream()
|
||||
// .anyMatch(
|
||||
// t -> t
|
||||
// .getTopic()
|
||||
// .getValue()
|
||||
// .equalsIgnoreCase(Prefixes.TOPIC.label + DHPUtils.md5("FOSSustained delivery"))));
|
||||
//
|
||||
// // check contributions
|
||||
// Assertions.assertEquals(4, rp.getContributions().size());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 3,
|
||||
// rp
|
||||
// .getContributions()
|
||||
// .stream()
|
||||
// .filter(c -> c.getPerson().getLocal_identifier().startsWith("person"))
|
||||
// .count());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 1,
|
||||
// rp
|
||||
// .getContributions()
|
||||
// .stream()
|
||||
// .filter(c -> c.getPerson().getLocal_identifier().startsWith("temp"))
|
||||
// .count());
|
||||
// rp.getContributions().forEach(c -> Assertions.assertTrue(c.getDeclared_affiliation() == null));
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 1,
|
||||
// rp
|
||||
// .getContributions()
|
||||
// .stream()
|
||||
// .filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0001-8284-6269true")))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getRank());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 2,
|
||||
// rp
|
||||
// .getContributions()
|
||||
// .stream()
|
||||
// .filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0002-0940-893xtrue")))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getRank());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 3,
|
||||
// rp
|
||||
// .getContributions()
|
||||
// .stream()
|
||||
// .filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0001-5291-577xtrue")))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getRank());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 4,
|
||||
// rp
|
||||
// .getContributions()
|
||||
// .stream()
|
||||
// .filter(
|
||||
// c -> c
|
||||
// .getPerson()
|
||||
// .equals(
|
||||
// Utils
|
||||
// .getIdentifier(
|
||||
// Prefixes.TEMPORARY_PERSON,
|
||||
// "50|doi_dedup___::0000661be7c602727bae9690778b16514")))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getRank());
|
||||
// researchProductDataset.show(10, 100, true);
|
||||
//
|
||||
// // check manifestation 1
|
||||
// Assertions.assertEquals(3, rp.getManifestations().size());
|
||||
// Manifestation manifestation = rp
|
||||
// .getManifestations()
|
||||
// .stream()
|
||||
// .filter(
|
||||
// m -> m
|
||||
// .getHosting_datasource()
|
||||
// .equals(
|
||||
// Utils.getIdentifier(Prefixes.DATASOURCE, "10|doajarticles::6107489403b31fc7cf37cb7fda35f7f1")))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0);
|
||||
// Assertions.assertEquals("Article", manifestation.getProduct_local_type());
|
||||
// Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
|
||||
// Assertions.assertEquals(1, manifestation.getDates().size());
|
||||
// Assertions.assertEquals("2021-07-29", manifestation.getDates().get(0).getValue());
|
||||
// Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
|
||||
// Assertions.assertEquals(PeerReview.PEER_REVIEWED.label, manifestation.getPeer_review());
|
||||
// Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
|
||||
// Assertions.assertEquals(AccessRight.CLOSED.label, manifestation.getAccess_right());
|
||||
// Assertions.assertEquals("Springer Nature TDM", manifestation.getLicence());
|
||||
// Assertions.assertEquals("https://doi.org/10.1007/s40199-021-00403-x", manifestation.getUrl());
|
||||
// Assertions.assertEquals("10.1007/s40199-021-00403-x", manifestation.getPid());
|
||||
// Assertions.assertTrue(manifestation.getBiblio() != null);
|
||||
// Biblio biblio = manifestation.getBiblio();
|
||||
// Assertions.assertTrue(biblio.getEdition() == null);
|
||||
// Assertions.assertTrue(biblio.getIssue() == null);
|
||||
// Assertions.assertEquals("Springer Science and Business Media LLC", biblio.getPublisher());
|
||||
// Assertions.assertEquals("29", biblio.getVolume());
|
||||
// Assertions.assertEquals("415", biblio.getStart_page());
|
||||
// Assertions.assertEquals("438", biblio.getEnd_page());
|
||||
//
|
||||
// // check manifestation 2
|
||||
// manifestation = rp
|
||||
// .getManifestations()
|
||||
// .stream()
|
||||
// .filter(
|
||||
// m -> m
|
||||
// .getHosting_datasource()
|
||||
// .equals(
|
||||
// Utils.getIdentifier(Prefixes.DATASOURCE, "10|openaire____::55045bd2a65019fd8e6741a755395c8c")))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0);
|
||||
// Assertions.assertEquals("Article", manifestation.getProduct_local_type());
|
||||
// Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
|
||||
// Assertions.assertEquals(1, manifestation.getDates().size());
|
||||
// Assertions.assertEquals("2020-01-03", manifestation.getDates().get(0).getValue());
|
||||
// Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
|
||||
// Assertions.assertEquals(PeerReview.NON_PEER_REVIEWED.label, manifestation.getPeer_review());
|
||||
// Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
|
||||
// Assertions.assertEquals(AccessRight.UNAVAILABLE.label, manifestation.getAccess_right());
|
||||
// Assertions.assertEquals(null, manifestation.getLicence());
|
||||
// Assertions.assertEquals("https://pubmed.ncbi.nlm.nih.gov/34327650", manifestation.getUrl());
|
||||
// Assertions.assertEquals("34327650", manifestation.getPid());
|
||||
// Assertions.assertTrue(manifestation.getBiblio() == null);
|
||||
//
|
||||
// // check manifestation 3
|
||||
// manifestation = rp
|
||||
// .getManifestations()
|
||||
// .stream()
|
||||
// .filter(
|
||||
// m -> m
|
||||
// .getHosting_datasource()
|
||||
// .equals(
|
||||
// Utils.getIdentifier(Prefixes.DATASOURCE, "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c")))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0);
|
||||
// Assertions.assertEquals("Other literature type", manifestation.getProduct_local_type());
|
||||
// Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
|
||||
// Assertions.assertEquals(1, manifestation.getDates().size());
|
||||
// Assertions.assertEquals("2021-07-29", manifestation.getDates().get(0).getValue());
|
||||
// Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
|
||||
// Assertions.assertEquals(PeerReview.NON_PEER_REVIEWED.label, manifestation.getPeer_review());
|
||||
// Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
|
||||
// Assertions.assertEquals(AccessRight.OPEN.label, manifestation.getAccess_right());
|
||||
// Assertions.assertEquals(null, manifestation.getLicence());
|
||||
// Assertions.assertEquals("https://europepmc.org/articles/PMC8602609/", manifestation.getUrl());
|
||||
// Assertions.assertEquals("PMC8602609", manifestation.getPid());
|
||||
// Assertions.assertTrue(manifestation.getBiblio() == null);
|
||||
//
|
||||
// // check relevant organization
|
||||
// Assertions.assertEquals(1, rp.getRelevant_organizations().size());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// Prefixes.ORGANIZATION.label + "601e510b1fda7cc6cb03329531502171",
|
||||
// rp.getRelevant_organizations().get(0));
|
||||
//
|
||||
// // check funding
|
||||
// Assertions.assertEquals(1, rp.getFunding().size());
|
||||
// Assertions.assertEquals(Prefixes.GRANT.label + "a7795022763d413f5de59036ebbd0c52", rp.getFunding().get(0));
|
||||
//
|
||||
// // check related products
|
||||
// Assertions.assertEquals(5, rp.getRelated_products().size());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 4,
|
||||
// rp
|
||||
// .getRelated_products()
|
||||
// .stream()
|
||||
// .filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.CITATION.label))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getProduct_list()
|
||||
// .size());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 1,
|
||||
// rp
|
||||
// .getRelated_products()
|
||||
// .stream()
|
||||
// .filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.DOCUMENTS.label))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getProduct_list()
|
||||
// .size());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 1,
|
||||
// rp
|
||||
// .getRelated_products()
|
||||
// .stream()
|
||||
// .filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.PART.label))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getProduct_list()
|
||||
// .size());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 1,
|
||||
// rp
|
||||
// .getRelated_products()
|
||||
// .stream()
|
||||
// .filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.SUPPLEMENT.label))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getProduct_list()
|
||||
// .size());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 1,
|
||||
// rp
|
||||
// .getRelated_products()
|
||||
// .stream()
|
||||
// .filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.VERSION.label))
|
||||
// .collect(Collectors.toList())
|
||||
// .get(0)
|
||||
// .getProduct_list()
|
||||
// .size());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.ForeachFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.skgif.model.Datasource;
|
||||
import eu.dnetlib.dhp.skgif.model.Venue;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 22/02/24
|
||||
*/
|
||||
public class DumpVenueTest implements Serializable {
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(DumpVenueTest.class);
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(DumpVenueTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(DumpVenueTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(DumpVenueTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDumpVenueApi() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
|
||||
.getPath();
|
||||
|
||||
final String workingDir = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDirApiExample/")
|
||||
.getPath();
|
||||
|
||||
DumpVenue
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-outputPath", "/tmp/",
|
||||
"-workingDir", workingDir
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Venue> datasource = sc
|
||||
.textFile(workingDir.toString() + "Venues")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Venue.class));
|
||||
|
||||
Dataset<Venue> datasourceDataset = spark.createDataset(datasource.rdd(), Encoders.bean(Venue.class));
|
||||
|
||||
datasourceDataset
|
||||
.foreach((ForeachFunction<Venue>) d -> System.out.println(OBJECT_MAPPER.writeValueAsString(d)));
|
||||
|
||||
}
|
||||
}
|
|
@ -28,7 +28,7 @@ import eu.dnetlib.dhp.skgif.model.Persons;
|
|||
import eu.dnetlib.dhp.skgif.model.Topic;
|
||||
|
||||
//@Disabled
|
||||
public class EmitFromResultJobTest {
|
||||
public class EmitFromEntitiesJobTest {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
|
@ -36,15 +36,15 @@ public class EmitFromResultJobTest {
|
|||
|
||||
private static Path workingDir;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(EmitFromResultJobTest.class);
|
||||
private static final Logger log = LoggerFactory.getLogger(EmitFromEntitiesJobTest.class);
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(EmitFromResultJobTest.class.getSimpleName());
|
||||
workingDir = Files.createTempDirectory(EmitFromEntitiesJobTest.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(EmitFromResultJobTest.class.getSimpleName());
|
||||
conf.setAppName(EmitFromEntitiesJobTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
|
@ -55,7 +55,7 @@ public class EmitFromResultJobTest {
|
|||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(EmitFromResultJobTest.class.getSimpleName())
|
||||
.appName(EmitFromEntitiesJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
@ -104,7 +104,7 @@ public class EmitFromResultJobTest {
|
|||
.filter(
|
||||
(FilterFunction<Persons>) p -> p
|
||||
.getLocal_identifier()
|
||||
.equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
|
||||
.equalsIgnoreCase("temp_person_::2c1eea261f7d9a97ab7ca8c4200781db"))
|
||||
.first();
|
||||
|
||||
Assertions
|
||||
|
@ -124,7 +124,7 @@ public class EmitFromResultJobTest {
|
|||
&& p.getFamily_name().equalsIgnoreCase("borer")
|
||||
&& !p
|
||||
.getLocal_identifier()
|
||||
.equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
|
||||
.equalsIgnoreCase("temp_person_::2c1eea261f7d9a97ab7ca8c4200781db"))
|
||||
.count());
|
||||
Assertions.assertEquals("claudia", claudiaBorer.getGiven_name().toLowerCase());
|
||||
Assertions.assertEquals("borer", claudiaBorer.getFamily_name().toLowerCase());
|
||||
|
@ -157,7 +157,7 @@ public class EmitFromResultJobTest {
|
|||
Dataset<EmitPerManifestation> manifestationDataset = spark
|
||||
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
|
||||
manifestationDataset.show(false);
|
||||
Assertions.assertEquals(4, manifestationDataset.count());
|
||||
Assertions.assertEquals(5, manifestationDataset.count());
|
||||
|
||||
Dataset<Topic> topicDataset = spark
|
||||
.createDataset(topics.rdd(), Encoders.bean(Topic.class));
|
||||
|
@ -208,26 +208,50 @@ public class EmitFromResultJobTest {
|
|||
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
|
||||
manifestationDataset.show(false);
|
||||
|
||||
// Persons claudiaBorer = personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
|
||||
// .first();
|
||||
//
|
||||
// Assertions.assertEquals(2, personsDataset.filter((FilterFunction<Persons>) p -> p.getGiven_name().equalsIgnoreCase("claudia") && p.getFamily_name().equalsIgnoreCase("borer")).count());
|
||||
// Assertions.assertEquals(1, personsDataset.filter((FilterFunction<Persons>) p -> p.getGiven_name().equalsIgnoreCase("claudia") && p.getFamily_name().equalsIgnoreCase("borer") && !p.getLocal_identifier().equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db")).count());
|
||||
// Assertions.assertEquals("claudia", claudiaBorer.getGiven_name().toLowerCase());
|
||||
// Assertions.assertEquals("borer", claudiaBorer.getFamily_name().toLowerCase());
|
||||
//
|
||||
// Assertions.assertEquals(2, personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person")).count());
|
||||
// Assertions.assertEquals(1, personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person") && p.getIdentifiers().get(0).getValue().equals("0000-0002-5597-4916")).count());
|
||||
// Persons orcidPerson = personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person") && p.getIdentifiers().get(0).getValue().equals("0000-0002-5597-4916")).first();
|
||||
// Assertions.assertEquals("M.", orcidPerson.getGiven_name());
|
||||
// Assertions.assertEquals("Kooi", orcidPerson.getFamily_name());
|
||||
// Assertions.assertEquals(1, orcidPerson.getIdentifiers().size());
|
||||
// Assertions.assertEquals("orcid", orcidPerson.getIdentifiers().get(0).getScheme());
|
||||
// Assertions.assertEquals("0000-0002-5597-4916", orcidPerson.getIdentifiers().get(0).getValue());
|
||||
//
|
||||
}
|
||||
|
||||
// Assertions.assertEquals(4, manifestationDataset.count());
|
||||
//
|
||||
@Test
|
||||
public void testEmitFromResultApiSubset() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graphForAPIExample/")
|
||||
.getPath();
|
||||
|
||||
EmitFromEntities
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-outputPath", workingDir.toString() + "/result/",
|
||||
"-workingDir", workingDir.toString() + "/"
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Persons> persons = sc
|
||||
.textFile(workingDir.toString() + "/result/Persons")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Persons.class));
|
||||
org.apache.spark.sql.Dataset<Persons> personsDataset = spark
|
||||
.createDataset(persons.rdd(), Encoders.bean(Persons.class));
|
||||
|
||||
personsDataset.foreach((ForeachFunction<Persons>) p -> System.out.println(OBJECT_MAPPER.writeValueAsString(p)));
|
||||
|
||||
JavaRDD<Topic> topics = sc
|
||||
.textFile(workingDir.toString() + "/result/Topic")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Topic.class));
|
||||
Dataset<Topic> topicDataset = spark
|
||||
.createDataset(topics.rdd(), Encoders.bean(Topic.class));
|
||||
// Assertions.assertEquals(3, topicDataset.count());
|
||||
|
||||
topicDataset.foreach((ForeachFunction<Topic>) t -> System.out.println(OBJECT_MAPPER.writeValueAsString(t)));
|
||||
|
||||
JavaRDD<EmitPerManifestation> manifestation = sc
|
||||
.textFile(workingDir.toString() + "/publication/manifestation")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, EmitPerManifestation.class));
|
||||
|
||||
Dataset<EmitPerManifestation> manifestationDataset = spark
|
||||
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
|
||||
manifestationDataset.show(false);
|
||||
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,23 @@
|
|||
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::4f7dd940bce9e9209645e6816dc4e081", "lastupdatetimestamp": 1706531034696, "relType": "resultResult", "source": "50|doi_________::479f67164e2f1d21baafbfbc3b12851d", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
|
||||
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::233bd897c6654f274059272b8740b47f", "lastupdatetimestamp": 0, "relType": "resultResult", "source": "50|doi_________::479f67164e2f1d21baafbfbc3b12851d", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
|
||||
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_________::1647515a356946e4857c817afc7bfa38", "lastupdatetimestamp": 1705539801728, "relType": "resultResult", "source": "50|doi_________::0888444d70ea8e3bcbc12d9d281fce22", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
|
||||
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::7a40bb2e9ad2e96d6de26787ab1c310b", "lastupdatetimestamp": 1702883945826, "relType": "resultResult", "source": "50|doi_________::1f56d1b5a2e7c9223c3479476c9c3491", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
|
||||
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::893d0a5c08409822dbb82b8973a42909", "lastupdatetimestamp": 1698123701963, "relType": "resultResult", "source": "50|doi_dedup___::079ebd73b673282559d6f4befe003d47", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
|
||||
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::8b0484d2afbd9b09116ef60c18044964", "lastupdatetimestamp": 1698123701963, "relType": "resultResult", "source": "50|doi_dedup___::079ebd73b673282559d6f4befe003d47", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
|
||||
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::6481edac8ee74c1f1bfa9c0fdd3e6f11", "lastupdatetimestamp": 1702883945826, "relType": "resultResult", "source": "50|doi_________::1f56d1b5a2e7c9223c3479476c9c3491", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
|
||||
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_dedup___::5ec916a2cec1ed474d7eb3619c3899d1", "lastupdatetimestamp": 1698123701963, "relType": "resultResult", "source": "50|doi_dedup___::079ebd73b673282559d6f4befe003d47", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
|
||||
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_________::1034996a07e1496dde6048008e2187de", "lastupdatetimestamp": 1706725621178, "relType": "resultResult", "source": "50|doi_________::0941e6d7170c6bbfb40721aa9ecff245", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
|
||||
{"subRelType": "citation", "relClass": "Cites", "dataInfo": {"invisible": false, "deletedbyinference": false}, "target": "50|doi_________::81a64d0689f4a2f93995b81574d2fcbe", "lastupdatetimestamp": 0, "relType": "resultResult", "source": "50|doi_________::86d7aecc7029c70836d268b4ea9b974d", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "OpenCitations", "key": "10|openaire____::c06df618c5de1c786535ccf3f8b7b059"}], "validated": false, "properties": []}
|
||||
{"subRelType": "outcome", "relClass": "isProducedBy", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "40|wt__________::1e5e62235d094afd01cd56e65112fc63", "lastupdatetimestamp": 1704362206417, "relType": "resultProject", "source": "50|doi_________::a83b2a6d96883a5f2d360fb5fb7db8ae", "collectedfrom": [{"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "validated": false, "properties": []}
|
||||
{"subRelType": "outcome", "relClass": "isProducedBy", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "40|sshrc_______::1e5e62235d094afd01cd56e65112fc63", "lastupdatetimestamp": 1706725621178, "relType": "resultProject", "source": "50|doi_________::0941e6d7170c6bbfb40721aa9ecff245", "collectedfrom": [{"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "validated": false, "properties": []}
|
||||
{"subRelType": "outcome", "relClass": "isProducedBy", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "40|nsf_________::7a5b4fbb6ed8545679194fe45a984ec9", "lastupdatetimestamp": 1705539801728, "relType": "resultProject", "source": "50|doi_________::0888444d70ea8e3bcbc12d9d281fce22", "collectedfrom": [{"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "validated": false, "properties": []}
|
||||
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::0ff89de99d4a8f4b04cb162bcb5740cf", "relType": "resultOrganization", "source": "50|doi_________::7078f75053e8c1bff562adbce2d81fe2", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "validated": false, "properties": []}
|
||||
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::45327df9862294107f84a90daa1e0cbc", "lastupdatetimestamp": 0, "relType": "resultOrganization", "source": "50|doi_________::86d7aecc7029c70836d268b4ea9b974d", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "validated": false, "properties": []}
|
||||
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "result:organization:semrel", "classname": "Inferred by OpenAIRE", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "propagation", "invisible": false, "trust": "0.85"}, "target": "20|openorgs____::33f2ec599c4e086e750c126f19552f67", "relType": "resultOrganization", "source": "50|doi_________::0888444d70ea8e3bcbc12d9d281fce22", "validated": false, "properties": []}
|
||||
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::b8b8ca674452579f3f593d9f5e557483", "relType": "resultOrganization", "source": "50|doi_________::0363a8e9be16f62cdfd58b3def423a57", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "validated": false, "properties": []}
|
||||
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:datasetarchive", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::6a7b1b4c40a067a1f209de6867fe094d", "lastupdatetimestamp": 1708723553432, "relType": "resultOrganization", "source": "50|doi_________::1f56d1b5a2e7c9223c3479476c9c3491", "collectedfrom": [{"value": "OpenAPC Global Initiative", "key": "10|apc_________::e2b1600b229fc30663c8a1f662debddf"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}, {"value": "Pubmed", "key": "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357"}, {"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "validated": false, "properties": [{"value": "4729.73", "key": "apc_amount"}, {"value": "EUR", "key": "apc_currency"}]}
|
||||
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "iis", "classname": "Inferred by OpenAIRE", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "iis::document_affiliations", "invisible": false, "trust": "0.9"}, "target": "20|openorgs____::013584bf907fdcb40bcec00b5a78fc12", "lastupdatetimestamp": 1709018933662, "relType": "resultOrganization", "source": "50|doi_________::0888444d70ea8e3bcbc12d9d281fce22", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Crossref", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"}], "validated": false, "properties": []}
|
||||
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::0ae431b820e4c33db8967fbb2b919150", "lastupdatetimestamp": 0, "relType": "resultOrganization", "source": "50|doi_dedup___::079ebd73b673282559d6f4befe003d47", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}, {"value": "Pubmed", "key": "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357"}], "validated": false, "properties": []}
|
||||
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "result:organization:semrel", "classname": "Inferred by OpenAIRE", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": true, "inferenceprovenance": "propagation", "invisible": false, "trust": "0.85"}, "target": "20|openorgs____::4dee7ea0d1fd1b48c199ee744e21dc13", "relType": "resultOrganization", "source": "50|doi_________::7078f75053e8c1bff562adbce2d81fe2", "validated": false, "properties": []}
|
||||
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::9d3eb607509fcacae1c5a7f25e36e3e5", "relType": "resultOrganization", "source": "50|doi_dedup___::079ebd73b673282559d6f4befe003d47", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "validated": false, "properties": []}
|
||||
{"subRelType": "affiliation", "relClass": "hasAuthorInstitution", "dataInfo": {"invisible": false, "trust": "0.9", "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "deletedbyinference": false}, "target": "20|openorgs____::c9f6cad9752276cb556e88a5111e86aa", "lastupdatetimestamp": 0, "relType": "resultOrganization", "source": "50|doi_________::7078f75053e8c1bff562adbce2d81fe2", "collectedfrom": [{"value": "Microsoft Academic Graph", "key": "10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a"}], "validated": false, "properties": []}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue