first entities

This commit is contained in:
Miriam Baglioni 2023-09-18 08:59:02 +02:00
parent 4885d36b3b
commit b1b48a90dc
37 changed files with 1322 additions and 30 deletions

View File

@ -0,0 +1,19 @@
package eu.dnetlib.dhp.skgif.model;
/**
* @author miriam.baglioni
* @Date 04/09/23
*/
public enum AccessRight {
OPEN("open"),
CLOSED("closed"),
EMBARGO("embargo"),
RESTRICTED("restricted"),
UNAVAILABLE("unavailable");
public final String label;
private AccessRight(String label) {
this.label = label;
}
}

View File

@ -0,0 +1,41 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 05/09/23
*/
public class Affiliation implements Serializable {
private String organization;
@JsonProperty("start_date")
private String startDate;
@JsonProperty("end_date")
private String endDate;
public String getOrganization() {
return organization;
}
public void setOrganization(String organization) {
this.organization = organization;
}
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return endDate;
}
public void setEndDate(String endDate) {
this.endDate = endDate;
}
}

View File

@ -0,0 +1,86 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Biblio implements Serializable {
private String issue;
@JsonProperty("start_page")
private String startPage;
@JsonProperty("end_page")
private String endPage;
private String volume;
private String edition;
private String number;
private String publisher;
private String series;
public String getIssue() {
return issue;
}
public void setIssue(String issue) {
this.issue = issue;
}
public String getStartPage() {
return startPage;
}
public void setStartPage(String startPage) {
this.startPage = startPage;
}
public String getEndPage() {
return endPage;
}
public void setEndPage(String endPage) {
this.endPage = endPage;
}
public String getVolume() {
return volume;
}
public void setVolume(String volume) {
this.volume = volume;
}
public String getEdition() {
return edition;
}
public void setEdition(String edition) {
this.edition = edition;
}
public String getNumber() {
return number;
}
public void setNumber(String number) {
this.number = number;
}
public String getPublisher() {
return publisher;
}
public void setPublisher(String publisher) {
this.publisher = publisher;
}
public String getSeries() {
return series;
}
public void setSeries(String series) {
this.series = series;
}
}

View File

@ -0,0 +1,50 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
import java.util.List;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Contribution implements Serializable {
private String person;
@JsonProperty("declared_affiliations")
private List<String> declaredAffiliation;
private List<String> roles;
private Integer rank;
public String getPerson() {
return person;
}
public void setPerson(String person) {
this.person = person;
}
public List<String> getDeclaredAffiliation() {
return declaredAffiliation;
}
public void setDeclaredAffiliation(List<String> declaredAffiliation) {
this.declaredAffiliation = declaredAffiliation;
}
public List<String> getRoles() {
return roles;
}
public void setRoles(List<String> roles) {
this.roles = roles;
}
public Integer getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
}

View File

@ -0,0 +1,28 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Dates implements Serializable {
private String value;
private String type;
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
}

View File

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Identifier implements Serializable {
private String scheme;
private String value;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
}

View File

@ -0,0 +1,138 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
import java.net.URL;
import java.util.List;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Manifestation implements Serializable {
@JsonProperty("product_local_type")
private String productLocalType;
@JsonProperty("product_local_type_schema")
private String productLocalTypeSchema;
private List<Dates> dates;
@JsonProperty("peer_review")
private String peerReview;
@JsonProperty("metadata_curation")
private String metadataCuration;
private URL url;
private String pid;
@JsonProperty("access_right")
private String accessRight;
private String licence;
@JsonProperty("licance_schema")
private String licenceSchema;
private Biblio biblio;
private String venue;
@JsonProperty("hosting_datasource")
private String hostingDatasource;
public String getProductLocalType() {
return productLocalType;
}
public void setProductLocalType(String productLocalType) {
this.productLocalType = productLocalType;
}
public String getProductLocalTypeSchema() {
return productLocalTypeSchema;
}
public void setProductLocalTypeSchema(String productLocalTypeSchema) {
this.productLocalTypeSchema = productLocalTypeSchema;
}
public List<Dates> getDates() {
return dates;
}
public void setDates(List<Dates> dates) {
this.dates = dates;
}
public String getPeerReview() {
return peerReview;
}
public void setPeerReview(String peerReview) {
this.peerReview = peerReview;
}
public String getMetadataCuration() {
return metadataCuration;
}
public void setMetadataCuration(String metadataCuration) {
this.metadataCuration = metadataCuration;
}
public URL getUrl() {
return url;
}
public void setUrl(URL url) {
this.url = url;
}
public String getPid() {
return pid;
}
public void setPid(String pid) {
this.pid = pid;
}
public String getAccessRight() {
return accessRight;
}
public void setAccessRight(String accessRight) {
this.accessRight = accessRight;
}
public String getLicence() {
return licence;
}
public void setLicence(String licence) {
this.licence = licence;
}
public String getLicenceSchema() {
return licenceSchema;
}
public void setLicenceSchema(String licenceSchema) {
this.licenceSchema = licenceSchema;
}
public Biblio getBiblio() {
return biblio;
}
public void setBiblio(Biblio biblio) {
this.biblio = biblio;
}
public String getVenue() {
return venue;
}
public void setVenue(String venue) {
this.venue = venue;
}
public String getHostingDatasource() {
return hostingDatasource;
}
public void setHostingDatasource(String hostingDatasource) {
this.hostingDatasource = hostingDatasource;
}
}

View File

@ -0,0 +1,17 @@
package eu.dnetlib.dhp.skgif.model;
/**
* @author miriam.baglioni
* @Date 04/09/23
*/
public enum MetadataCuration {
YES("yes"),
NO("no"),
UNAVAILABLE("unavailable");
public final String label;
private MetadataCuration(String label) {
this.label = label;
}
}

View File

@ -0,0 +1,19 @@
package eu.dnetlib.dhp.skgif.model;
/**
* @author miriam.baglioni
* @Date 04/09/23
*/
public enum PeerReview {
PEER_REVIEWED("peer reviewed"),
NON_PEER_REVIEWED("open"),
DOUBLE_BLIND("double-blind"),
SINGLE_BLIND("single-blind"),
UNAVAILABLE("unavailable");
public final String label;
private PeerReview(String label) {
this.label = label;
}
}

View File

@ -0,0 +1,71 @@
package eu.dnetlib.dhp.skgif.model;
import org.codehaus.jackson.annotate.JsonProperty;
import java.io.Serializable;
import java.util.List;
/**
* @author miriam.baglioni
* @Date 05/09/23
*/
public class Persons implements Serializable {
@JsonProperty("local_identifier")
private String localIdentifier;
private List<Identifier> identifiers;
@JsonProperty("given_name")
private String givenName;
@JsonProperty("family_name")
private String familyName;
private String agent;
@JsonProperty("declared_affiliations")
private List<Affiliation>declaredAffiliations;
public String getLocalIdentifier() {
return localIdentifier;
}
public void setLocalIdentifier(String localIdentifier) {
this.localIdentifier = localIdentifier;
}
public List<Identifier> getIdentifiers() {
return identifiers;
}
public void setIdentifiers(List<Identifier> identifiers) {
this.identifiers = identifiers;
}
public String getGivenName() {
return givenName;
}
public void setGivenName(String givenName) {
this.givenName = givenName;
}
public String getFamilyName() {
return familyName;
}
public void setFamilyName(String familyName) {
this.familyName = familyName;
}
public String getAgent() {
return agent;
}
public void setAgent(String agent) {
this.agent = agent;
}
public List<Affiliation> getDeclaredAffiliations() {
return declaredAffiliations;
}
public void setDeclaredAffiliations(List<Affiliation> declaredAffiliations) {
this.declaredAffiliations = declaredAffiliations;
}
}

View File

@ -0,0 +1,28 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Provenance implements Serializable {
private String type;
private double trust;
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public double getTrust() {
return trust;
}
public void setTrust(double trust) {
this.trust = trust;
}
}

View File

@ -0,0 +1,22 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 05/09/23
*/
public enum RelationType implements Serializable {
OUTCOME("outcome"),
AFFILIATION("hasAuthorInstitution"),
SUPPLEMENT("IsSupplementedBy"),
DOCUMENTS("IsDocumentedBy"),
PART("IsPartOf"),
VERSION("IsNewVersioneOf");
public final String label;
private RelationType(String label) {
this.label = label;
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
import java.util.List;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Relations implements Serializable {
@JsonProperty("relation_type")
private String relationType;
@JsonProperty("product_list")
private List<String> productList;
public String getRelationType() {
return relationType;
}
public void setRelationType(String relationType) {
this.relationType = relationType;
}
public List<String> getProductList() {
return productList;
}
public void setProductList(List<String> productList) {
this.productList = productList;
}
}

View File

@ -0,0 +1,116 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
import java.util.List;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class ResearchProduct implements Serializable {
@JsonProperty("local_identifier")
private String localIdentifier;
private List<Identifier> identifiers;
private List<String> titles;
private List<String> abstracts;
@JsonProperty("product_type")
private String productType;
private List<Topic> topics;
private List<Contribution> contributions;
private List<Manifestation> manifestations;
@JsonProperty("relevant_organizations")
private List<String> relevantOrganizations;
private List<String> funding;
@JsonProperty("related_products")
private List<Relations> relatedProducts;
public String getLocalIdentifier() {
return localIdentifier;
}
public void setLocalIdentifier(String localIdentifier) {
this.localIdentifier = localIdentifier;
}
public List<Identifier> getIdentifiers() {
return identifiers;
}
public void setIdentifiers(List<Identifier> identifiers) {
this.identifiers = identifiers;
}
public List<String> getTitles() {
return titles;
}
public void setTitles(List<String> titles) {
this.titles = titles;
}
public List<String> getAbstracts() {
return abstracts;
}
public void setAbstracts(List<String> abstracts) {
this.abstracts = abstracts;
}
public String getProductType() {
return productType;
}
public void setProductType(String productType) {
this.productType = productType;
}
public List<Topic> getTopics() {
return topics;
}
public void setTopics(List<Topic> topics) {
this.topics = topics;
}
public List<Contribution> getContributions() {
return contributions;
}
public void setContributions(List<Contribution> contributions) {
this.contributions = contributions;
}
public List<Manifestation> getManifestations() {
return manifestations;
}
public void setManifestations(List<Manifestation> manifestations) {
this.manifestations = manifestations;
}
public List<String> getRelevantOrganizations() {
return relevantOrganizations;
}
public void setRelevantOrganizations(List<String> relevantOrganizations) {
this.relevantOrganizations = relevantOrganizations;
}
public List<String> getFunding() {
return funding;
}
public void setFunding(List<String> funding) {
this.funding = funding;
}
public List<Relations> getRelatedProducts() {
return relatedProducts;
}
public void setRelatedProducts(List<Relations> relatedProducts) {
this.relatedProducts = relatedProducts;
}
}

View File

@ -0,0 +1,19 @@
package eu.dnetlib.dhp.skgif.model;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public enum ResearchTypes {
LITERATURE("literature"),
RESEARCH_DATA("research data"),
RESEARCH_SOFTWARE("research software"),
OTHER("other");
public final String label;
private ResearchTypes(String label) {
this.label = label;
}
}

View File

@ -0,0 +1,28 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Topic implements Serializable {
private String topic;
private Provenance provenance;
public String getTopic() {
return topic;
}
public void setTopic(String topic) {
this.topic = topic;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.common;
package eu.dnetlib.dhp.common;
import java.io.BufferedInputStream;
import java.io.IOException;

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi;
package eu.dnetlib.dhp.common.zenodoapi;
/**
* @author miriam.baglioni

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi;
package eu.dnetlib.dhp.common.zenodoapi;
public class MissingConceptDoiException extends Throwable {
public MissingConceptDoiException(String message) {

View File

@ -1,31 +1,20 @@
package eu.dnetlib.dhp.oa.zenodoapi;
package eu.dnetlib.dhp.common.zenodoapi;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
// import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jetbrains.annotations.NotNull;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.zenodoapi.model.ZenodoModel;
import eu.dnetlib.dhp.oa.zenodoapi.model.ZenodoModelList;
import eu.dnetlib.dhp.common.zenodoapi.model.ZenodoModel;
import eu.dnetlib.dhp.common.zenodoapi.model.ZenodoModelList;
import okhttp3.*;
public class ZenodoAPIClient implements Serializable {

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
package eu.dnetlib.dhp.common.zenodoapi.model;
/**
* @author miriam.baglioni

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
package eu.dnetlib.dhp.common.zenodoapi.model;
/**
* @author miriam.baglioni

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
package eu.dnetlib.dhp.common.zenodoapi.model;
/**
* @author miriam.baglioni

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
package eu.dnetlib.dhp.common.zenodoapi.model;
/**
* @author miriam.baglioni

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
package eu.dnetlib.dhp.common.zenodoapi.model;
/**
* @author miriam.baglioni

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
package eu.dnetlib.dhp.common.zenodoapi.model;
/**
* @author miriam.baglioni

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
package eu.dnetlib.dhp.common.zenodoapi.model;
/**
* @author miriam.baglioni

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
package eu.dnetlib.dhp.common.zenodoapi.model;
/**
* @author miriam.baglioni

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
package eu.dnetlib.dhp.common.zenodoapi.model;
/**
* @author miriam.baglioni

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.oa.zenodoapi.model;
package eu.dnetlib.dhp.common.zenodoapi.model;
/**
* @author miriam.baglioni

View File

@ -15,7 +15,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.common.MakeTarArchive;
import eu.dnetlib.dhp.common.MakeTarArchive;
public class MakeTar implements Serializable {

View File

@ -0,0 +1,126 @@
package eu.dnetlib.dhp.skgif;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.skgif.model.RelationType;
import eu.dnetlib.dhp.skgif.model.Relations;
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import java.io.Serializable;
import java.util.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
/**
* @author miriam.baglioni
* @Date 05/09/23
*/
public class ExtendResult implements Serializable {
private static final Logger log = LoggerFactory.getLogger(ExtendResult.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/extend_result_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extendResult(spark, inputPath, outputPath);
});
}
private static void extendResult(SparkSession spark, String inputPath, String outputPath) {
ObjectMapper mapper = new ObjectMapper();
Dataset<ResearchProduct> result = spark.read().json(inputPath + "/result")
.as(Encoders.bean(ResearchProduct.class));
final StructType structureSchema = new StructType()
.fromDDL("`resultId` STRING, `target` STRING, `resultClass` STRING");
Dataset<Row> relations = spark
.read()
.schema(structureSchema)
.json(inputPath + "/preparedRelations");
result.joinWith(relations, result.col("localIdentifier")
.equalTo(relations.col("resultId")), "left")
.groupByKey((MapFunction<Tuple2<ResearchProduct, Row>, String>)t2 -> t2._1().getLocalIdentifier(),Encoders.STRING() )
.mapGroups((MapGroupsFunction<String, Tuple2<ResearchProduct, Row>, ResearchProduct>) (key,it) -> {
Tuple2<ResearchProduct, Row> first = it.next();
ResearchProduct rp = first._1();
addRels(rp, first._2());
it.forEachRemaining(t2 -> addRels(rp, t2._2()));
return rp;
}, Encoders.bean(ResearchProduct.class))
.map((MapFunction<ResearchProduct, String>) r -> mapper.writeValueAsString(r), Encoders.STRING())
.write()
.mode(SaveMode.Overwrite)
.option("compression","gzip")
.text(outputPath);
}
private static void addRels(ResearchProduct rp, Row row) {
String relClass = row.getAs("relClass");
Map<String, List<String>> relations = new HashMap<>();
if(relClass.equals(RelationType.OUTCOME.label)){
if(!Optional.ofNullable(rp.getFunding()).isPresent()){
rp.setFunding(new ArrayList<>());
}
rp.getFunding().add(row.getAs("target"));
}else if(relClass.equals(RelationType.AFFILIATION)){
if(!Optional.ofNullable(rp.getRelevantOrganizations()).isPresent())
rp.setRelevantOrganizations(new ArrayList<>());
rp.getRelevantOrganizations().add(row.getAs("target"));
}else{
if(!relations.containsKey(relClass)){
relations.put(relClass, new ArrayList<>());
}
relations.get(relClass).add(row.getAs("target"));
}
if(relations.size() > 0) {
rp.setRelatedProducts(new ArrayList<>());
for (String key: relations.keySet()){
Relations rel = new Relations();
rel.setRelationType(key);
rel.setProductList(relations.get(key));
}
}
}
}

View File

@ -0,0 +1,93 @@
package eu.dnetlib.dhp.skgif;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.oaf.Relation;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
/**
* @author miriam.baglioni
* @Date 04/09/23
*/
public class PrepareResultRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(PrepareResultRelation.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/result_relation_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
prepareResultRelationList(spark, inputPath, outputPath);
});
}
private static void prepareResultRelationList(SparkSession spark, String inputPath, String outputPath) {
final StructType structureSchema = new StructType()
.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>");
Dataset<Relation> relation = spark.read().json(inputPath).as(Encoders.bean(Relation.class))
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true")
.filter("relClass == 'hasAuthorInstitution' or relClass == 'outcome' or " +
"relClass == 'IsSupplementedBy' or relClass == 'IsDocumentedBy' or relClass == 'IsPartOf' " +
"relClass == IsNewVersionOf");
org.apache.spark.sql.Dataset<Row> df = spark.createDataFrame(new ArrayList<Row>(), structureSchema);
List<String> entities = Arrays
.asList(
"publication", "dataset", "otherresearchproduct", "software");
for (String e : entities)
df = df
.union(
spark
.read()
.schema(structureSchema)
.json(inputPath + "/" + e)
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true"));
relation.joinWith(df, relation.col("source").equalTo(df.col("id")))
.select(
new Column("id").as("resultId"),
new Column("target"),
new Column("relClass"))
.write()
.mode(SaveMode.Overwrite)
.option("compression","gzip")
.json(outputPath);
}
}

View File

@ -0,0 +1,282 @@
package eu.dnetlib.dhp.skgif;
import eu.dnetlib.dhp.oa.graph.dump.csv.AuthorResult;
import eu.dnetlib.dhp.oa.model.ResultPid;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.skgif.exception.NoAllowedTypeException;
import eu.dnetlib.dhp.skgif.exception.NoTitleFoundException;
import eu.dnetlib.dhp.skgif.model.*;
import eu.dnetlib.dhp.skgif.model.AccessRight;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2;
import scala.reflect.internal.Trees;
import javax.management.RuntimeErrorException;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.oa.graph.dump.ResultMapper.*;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class ResultMapper implements Serializable {
public static <E extends Result> ResearchProduct map(
E input)
throws Exception{
ResearchProduct out = new ResearchProduct();
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
if (ort.isPresent()) {
try {
out.setLocalIdentifier(input.getId());
mapPid(out, input);
mapTitle(out, input);
mapAbstract(out, input);
mapType(out, input);
mapTopic(out, input);
mapContribution(out, input);
if(!Optional.ofNullable(out.getTitles()).isPresent() ||
!Optional.ofNullable(out.getContributions()).isPresent())
return null;
//TODO map the manifestation directly from the instances
//it is not completed
mapManifestation(out, input);
//TODO extend the mapping to consider relations between these entities and the results
// private List<String> relevantOrganizations;
// private List<String> funding;
// private List<Relations> relatedProducts;
} catch (ClassCastException cce) {
return null;
}
}
return null;
}
private static <E extends Result> void mapManifestation(ResearchProduct out, E input) {
out.setManifestations( input.getInstance().stream().parallel()
.map(i -> {
try {
return getManifestation(i);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
}).collect(Collectors.toList()));
}
private static Manifestation getManifestation(Instance i) throws MalformedURLException {
Manifestation manifestation = new Manifestation();
manifestation.setProductLocalType(i.getInstancetype().getClassname());
manifestation.setProductLocalTypeSchema(i.getInstancetype().getSchemename());
Dates dates = new Dates();
dates.setType("publishing");
dates.setValue(i.getDateofacceptance().getValue());
manifestation.setDates(Arrays.asList(dates));
switch(i.getRefereed().getClassid()){
case "0000":
manifestation.setPeerReview(PeerReview.UNAVAILABLE.label);
break;
case "0001":
manifestation.setPeerReview(PeerReview.PEER_REVIEWED.label);
break;
case "0002":
manifestation.setPeerReview(PeerReview.NON_PEER_REVIEWED.label);
break;
}
manifestation.setMetadataCuration(MetadataCuration.UNAVAILABLE.label);
//TODO filter out the URL that refer to pids. If nothing remains, decide what to do
manifestation.setUrl(new URL(i.getUrl().get(0)));
if(Optional.ofNullable(i.getPid()).isPresent()){
manifestation.setPid(i.getPid().get(0).getValue());
}
switch(i.getAccessright().getClassid()){
case"OPEN":
case"OPEN DATA":
case "OPEN SOURCE":
manifestation.setAccessRight(AccessRight.OPEN.label);
break;
case "CLOSED":
manifestation.setAccessRight(AccessRight.CLOSED.label);
break;
case "RESTRICTED":
manifestation.setAccessRight(AccessRight.RESTRICTED.label);
break;
case"EMBARGO":
case "12MONTHS":
case "6MONTHS":
manifestation.setAccessRight(AccessRight.EMBARGO.label);
break;
default:
manifestation.setAccessRight(AccessRight.UNAVAILABLE.label);
}
if(Optional.ofNullable(i.getLicense()).isPresent())
manifestation.setLicence(i.getLicense().getValue());
//TODO to fill the biblio in case it is a journal, we need to join with the datasource and verify the type
Biblio biblio = null;
manifestation.setHostingDatasource(i.getHostedby().getKey());
//TODO verify if the result is published in ojournal or conferences. In that case the venue is the identifier
//of the journal/conference. In case it is not, the venue is the datasource
if(biblio == null){
manifestation.setVenue(i.getHostedby().getKey());
}else{
manifestation.setVenue("insert the id of the venue");
}
return manifestation;
}
private static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
if (!Optional.ofNullable(pid).isPresent())
return null;
if (pid.size() == 0)
return null;
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
return new Tuple2<>(p.getValue(), Boolean.TRUE);
}
}
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
return new Tuple2<>(p.getValue(), Boolean.FALSE);
}
}
return null;
}
private static <E extends Result> void mapContribution(ResearchProduct out, E input) {
if(Optional.ofNullable(input.getAuthor()).isPresent()){
int count = 0;
for (Author a : input.getAuthor()) {
count += 1;
Contribution contribution = new Contribution();
if(Optional.ofNullable(a.getPid()).isPresent()){
Tuple2<String, Boolean> orcid = getOrcid(a.getPid());
if(orcid != null){
contribution.setPerson(DHPUtils.md5(orcid._1() + orcid._2()));
}else{
if(Optional.ofNullable(a.getRank()).isPresent()){
contribution.setPerson(DHPUtils.md5(input.getId() + a.getRank()));
}else{
contribution.setPerson(DHPUtils.md5(input.getId() + count));
}
}
}
}
}
// "contributions": [
// {
// "person": "person_123",
// "declared_affiliations": ["org_1", "org_3"],
// "rank": 1,
// "roles": ["writing-original-draft", "conceptualization"]
// }
// ]
}
private static <E extends Result> void mapTopic(ResearchProduct out, E input) {
if(Optional.ofNullable(input.getSubject()).isPresent()){
out.setTopics(input.getSubject().stream().parallel().map(s -> {
Topic topic = new Topic();
topic.setTopic(getIdentifier(s));
Provenance provenance = new Provenance();
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
provenance.setType(s.getDataInfo().getInferenceprovenance());
topic.setProvenance(provenance);
return topic;
}).collect(Collectors.toList()));
}
}
private static String getIdentifier(StructuredProperty s) {
return DHPUtils.md5(s.getQualifier().getClassid() + s.getValue());
}
private static <E extends Result> void mapType(ResearchProduct out, E input) throws NoAllowedTypeException{
switch (input.getResulttype().getClassid()){
case "publication":
out.setProductType(ResearchTypes.LITERATURE.label);
break;
case"dataset":
out.setProductType(ResearchTypes.RESEARCH_DATA.label);
break;
case"software":
out.setProductType(ResearchTypes.RESEARCH_SOFTWARE.label);
break;
case"other":
out.setProductType(ResearchTypes.OTHER.label);
break;
default:
throw new ClassCastException("Result type not present or not allowed");
}
}
private static void mapPid(ResearchProduct out, Result input) {
Optional
.ofNullable(input.getPid())
.ifPresent(
value -> out
.setIdentifiers(
value
.stream()
.map(
p -> {
Identifier identifier = new Identifier();
identifier.setValue(p.getValue());
identifier.setScheme(p.getQualifier().getSchemeid());
return identifier;
})
.collect(Collectors.toList())));
}
private static void mapTitle(ResearchProduct out, Result input) throws NoTitleFoundException {
Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
if (otitle.isPresent()) {
List<StructuredProperty> iTitle = otitle
.get()
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
.collect(Collectors.toList());
if (!iTitle.isEmpty()) {
out.setTitles(Arrays.asList(iTitle.get(0).getValue()));
return;
}
iTitle = otitle
.get()
.stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
.collect(Collectors.toList());
if (!iTitle.isEmpty()) {
out.setTitles(Arrays.asList(iTitle.get(0).getValue()));
}
}
}
private static void mapAbstract(ResearchProduct out, Result input) {
final List<String> descriptionList = new ArrayList<>();
Optional
.ofNullable(input.getDescription())
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
out.setAbstracts(descriptionList);
}
}

View File

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.skgif.exception;
public class NoAllowedTypeException extends Exception {
public NoAllowedTypeException() {
super();
}
public NoAllowedTypeException(
final String message,
final Throwable cause,
final boolean enableSuppression,
final boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public NoAllowedTypeException(final String message, final Throwable cause) {
super(message, cause);
}
public NoAllowedTypeException(final String message) {
super(message);
}
public NoAllowedTypeException(final Throwable cause) {
super(cause);
}
}

View File

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.skgif.exception;
public class NoTitleFoundException extends Exception {
public NoTitleFoundException() {
super();
}
public NoTitleFoundException(
final String message,
final Throwable cause,
final boolean enableSuppression,
final boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public NoTitleFoundException(final String message, final Throwable cause) {
super(message, cause);
}
public NoTitleFoundException(final String message) {
super(message);
}
public NoTitleFoundException(final Throwable cause) {
super(cause);
}
}

View File

@ -17,8 +17,8 @@ import org.junit.jupiter.api.Test;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException;
import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient;
import eu.dnetlib.dhp.common.zenodoapi.MissingConceptDoiException;
import eu.dnetlib.dhp.common.zenodoapi.ZenodoAPIClient;
@Disabled
public class ZenodoUploadTest {