diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/ProvisionUtil.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/ProvisionUtil.java index db14aa671..cd797f44c 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/ProvisionUtil.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/ProvisionUtil.java @@ -1,6 +1,6 @@ package eu.dnetlib.dhp.provision; -import eu.dnetlib.dhp.provision.scholix.Typology; +import eu.dnetlib.dhp.provision.scholix.summary.Typology; import eu.dnetlib.dhp.utils.DHPUtils; import org.apache.commons.lang3.StringUtils; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateScholix.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateScholix.java new file mode 100644 index 000000000..5ace02bbc --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateScholix.java @@ -0,0 +1,72 @@ +package eu.dnetlib.dhp.provision; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.provision.scholix.Scholix; +import eu.dnetlib.dhp.utils.DHPUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.sql.SparkSession; +import scala.Tuple2; + +public class SparkGenerateScholix { + + private static final String jsonIDPath = "$.id"; + private static final String sourceIDPath = "$.source"; + private static final String targetIDPath = "$.target"; + + + + public static void main(String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkGenerateScholix.class.getResourceAsStream("/eu/dnetlib/dhp/provision/input_generate_summary_parameters.json"))); + parser.parseArgument(args); + final SparkSession spark = SparkSession + .builder() + .appName(SparkExtractRelationCount.class.getSimpleName()) + .master(parser.get("master")) + .getOrCreate(); + + + final String graphPath = parser.get("graphPath"); + final String workingDirPath = parser.get("workingDirPath"); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + + final JavaRDD relationToExport = sc.textFile(graphPath + "/relation").filter(ProvisionUtil::isNotDeleted); + final JavaPairRDD scholixSummary = sc.textFile(workingDirPath + "/summary").mapToPair((PairFunction) i -> new Tuple2<>(DHPUtils.getJPathString(jsonIDPath, i), i)); + + + PairFunction, String, Scholix> k = + summaryRelation -> + new Tuple2<>( + DHPUtils.getJPathString(targetIDPath,summaryRelation._2()), + Scholix.generateScholixWithSource(summaryRelation._1(), summaryRelation._2())); + + scholixSummary.join( + relationToExport + .mapToPair((PairFunction) i -> new Tuple2<>(DHPUtils.getJPathString(sourceIDPath, i), i))) + .map(Tuple2::_2) + .mapToPair(k) + .join(scholixSummary) + .map(Tuple2::_2) + .map(i -> i._1().addTarget(i._2())) + .map(s-> { + ObjectMapper mapper = new ObjectMapper(); + return mapper.writeValueAsString(s); + }) + .saveAsTextFile(workingDirPath + "/scholix", GzipCodec.class); + + + ; + + + } + + + +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateSummary.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateSummary.java index 7245a9064..a8cdf6dd5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateSummary.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkGenerateSummary.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.provision; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.provision.scholix.ScholixSummary; +import eu.dnetlib.dhp.provision.scholix.summary.ScholixSummary; import eu.dnetlib.dhp.utils.DHPUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkIndexCollectionOnES.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkIndexCollectionOnES.java index aa1734b2f..e7c97ee1c 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkIndexCollectionOnES.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/SparkIndexCollectionOnES.java @@ -1,8 +1,6 @@ package eu.dnetlib.dhp.provision; -import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.provision.scholix.ScholixSummary; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/Scholix.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/Scholix.java new file mode 100644 index 000000000..70467abb6 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/Scholix.java @@ -0,0 +1,119 @@ +package eu.dnetlib.dhp.provision.scholix; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.provision.scholix.summary.ScholixSummary; +import eu.dnetlib.dhp.schema.oaf.Relation; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public class Scholix implements Serializable { + private String publicationDate; + + private List publisher; + + private List linkprovider; + + private ScholixRelationship relationship; + + private ScholixResource source; + + private ScholixResource target; + + private String identifier; + + + public static Scholix generateScholixWithSource(final String sourceSummaryJson, final String relation) { + final ObjectMapper mapper = new ObjectMapper(); + + try { + ScholixSummary scholixSummary = mapper.readValue(sourceSummaryJson, ScholixSummary.class); + Relation rel = mapper.readValue(sourceSummaryJson, Relation.class); + final Scholix s = new Scholix(); + if (scholixSummary.getDate() != null) + s.setPublicationDate(scholixSummary.getDate().stream().findFirst().orElse(null)); + + + s.setLinkprovider(rel.getCollectedFrom().stream().map(cf -> + new ScholixEntityId(cf.getValue(), Collections.singletonList( + new ScholixIdentifier(cf.getKey(), "dnet_identifier") + ))).collect(Collectors.toList())); + + + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + public Scholix addTarget(final String targetSummaryJson) { + return this; + } + + + public String getPublicationDate() { + return publicationDate; + } + + public Scholix setPublicationDate(String publicationDate) { + this.publicationDate = publicationDate; + return this; + } + + public List getPublisher() { + return publisher; + } + + public Scholix setPublisher(List publisher) { + this.publisher = publisher; + return this; + } + + public List getLinkprovider() { + return linkprovider; + } + + public Scholix setLinkprovider(List linkprovider) { + this.linkprovider = linkprovider; + return this; + } + + public ScholixRelationship getRelationship() { + return relationship; + } + + public Scholix setRelationship(ScholixRelationship relationship) { + this.relationship = relationship; + return this; + } + + public ScholixResource getSource() { + return source; + } + + public Scholix setSource(ScholixResource source) { + this.source = source; + return this; + } + + public ScholixResource getTarget() { + return target; + } + + public Scholix setTarget(ScholixResource target) { + this.target = target; + return this; + } + + public String getIdentifier() { + return identifier; + } + + public Scholix setIdentifier(String identifier) { + this.identifier = identifier; + return this; + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixCollectedFrom.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixCollectedFrom.java new file mode 100644 index 000000000..62da993ba --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixCollectedFrom.java @@ -0,0 +1,46 @@ +package eu.dnetlib.dhp.provision.scholix; + +import java.io.Serializable; + +public class ScholixCollectedFrom implements Serializable { + + private ScholixEntityId provider; + private String provisionMode; + private String completionStatus; + + public ScholixCollectedFrom() { + } + + public ScholixCollectedFrom(ScholixEntityId provider, String provisionMode, String completionStatus) { + this.provider = provider; + this.provisionMode = provisionMode; + this.completionStatus = completionStatus; + } + + public ScholixEntityId getProvider() { + return provider; + } + + public ScholixCollectedFrom setProvider(ScholixEntityId provider) { + this.provider = provider; + return this; + } + + public String getProvisionMode() { + return provisionMode; + } + + public ScholixCollectedFrom setProvisionMode(String provisionMode) { + this.provisionMode = provisionMode; + return this; + } + + public String getCompletionStatus() { + return completionStatus; + } + + public ScholixCollectedFrom setCompletionStatus(String completionStatus) { + this.completionStatus = completionStatus; + return this; + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixEntityId.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixEntityId.java new file mode 100644 index 000000000..a2e307e6e --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixEntityId.java @@ -0,0 +1,35 @@ +package eu.dnetlib.dhp.provision.scholix; + +import java.io.Serializable; +import java.util.List; + +public class ScholixEntityId implements Serializable { + private String name; + private List identifiers; + + public ScholixEntityId() { + } + + public ScholixEntityId(String name, List identifiers) { + this.name = name; + this.identifiers = identifiers; + } + + public String getName() { + return name; + } + + public ScholixEntityId setName(String name) { + this.name = name; + return this; + } + + public List getIdentifiers() { + return identifiers; + } + + public ScholixEntityId setIdentifiers(List identifiers) { + this.identifiers = identifiers; + return this; + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixIdentifier.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixIdentifier.java new file mode 100644 index 000000000..9adac698d --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixIdentifier.java @@ -0,0 +1,34 @@ +package eu.dnetlib.dhp.provision.scholix; + +import java.io.Serializable; + +public class ScholixIdentifier implements Serializable { + private String identifier; + private String schema; + + public ScholixIdentifier() { + } + + public ScholixIdentifier(String identifier, String schema) { + this.identifier = identifier; + this.schema = schema; + } + + public String getIdentifier() { + return identifier; + } + + public ScholixIdentifier setIdentifier(String identifier) { + this.identifier = identifier; + return this; + } + + public String getSchema() { + return schema; + } + + public ScholixIdentifier setSchema(String schema) { + this.schema = schema; + return this; + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixRelationship.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixRelationship.java new file mode 100644 index 000000000..9bcb9222b --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixRelationship.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.provision.scholix; + +import java.io.Serializable; + +public class ScholixRelationship implements Serializable { + private String name; + private String schema; + private String inverse; + + public ScholixRelationship() { + } + + public ScholixRelationship(String name, String schema, String inverse) { + this.name = name; + this.schema = schema; + this.inverse = inverse; + } + + public String getName() { + return name; + } + + public ScholixRelationship setName(String name) { + this.name = name; + return this; + } + + public String getSchema() { + return schema; + } + + public ScholixRelationship setSchema(String schema) { + this.schema = schema; + return this; + } + + public String getInverse() { + return inverse; + } + + public ScholixRelationship setInverse(String inverse) { + this.inverse = inverse; + return this; + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixResource.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixResource.java new file mode 100644 index 000000000..74cb361f6 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixResource.java @@ -0,0 +1,99 @@ +package eu.dnetlib.dhp.provision.scholix; + +import java.io.Serializable; +import java.util.List; + +public class ScholixResource implements Serializable { + + private ScholixIdentifier identifier ; + private String dnetIdentifier ; + private String objectType ; + private String objectSubType ; + private String title ; + private List creator ; + private String publicationDate ; + private List publisher ; + private List collectedFrom ; + + + public ScholixIdentifier getIdentifier() { + return identifier; + } + + public ScholixResource setIdentifier(ScholixIdentifier identifier) { + this.identifier = identifier; + return this; + } + + public String getDnetIdentifier() { + return dnetIdentifier; + } + + public ScholixResource setDnetIdentifier(String dnetIdentifier) { + this.dnetIdentifier = dnetIdentifier; + return this; + } + + public String getObjectType() { + return objectType; + } + + public ScholixResource setObjectType(String objectType) { + this.objectType = objectType; + return this; + } + + public String getObjectSubType() { + return objectSubType; + } + + public ScholixResource setObjectSubType(String objectSubType) { + this.objectSubType = objectSubType; + return this; + } + + public String getTitle() { + return title; + } + + public ScholixResource setTitle(String title) { + this.title = title; + return this; + } + + public List getCreator() { + return creator; + } + + public ScholixResource setCreator(List creator) { + this.creator = creator; + return this; + } + + public String getPublicationDate() { + return publicationDate; + } + + public ScholixResource setPublicationDate(String publicationDate) { + this.publicationDate = publicationDate; + return this; + } + + public List getPublisher() { + return publisher; + } + + public ScholixResource setPublisher(List publisher) { + this.publisher = publisher; + return this; + } + + public List getCollectedFrom() { + return collectedFrom; + } + + public ScholixResource setCollectedFrom(List collectedFrom) { + this.collectedFrom = collectedFrom; + return this; + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/CollectedFromType.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/CollectedFromType.java similarity index 95% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/CollectedFromType.java rename to dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/CollectedFromType.java index 2a6f0ab8d..6fc0c7b29 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/CollectedFromType.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/CollectedFromType.java @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.provision.scholix; +package eu.dnetlib.dhp.provision.scholix.summary; import java.io.Serializable; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/SchemeValue.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/SchemeValue.java similarity index 91% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/SchemeValue.java rename to dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/SchemeValue.java index 6e77fea70..95a292b9d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/SchemeValue.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/SchemeValue.java @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.provision.scholix; +package eu.dnetlib.dhp.provision.scholix.summary; import java.io.Serializable; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixSummary.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/ScholixSummary.java similarity index 99% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixSummary.java rename to dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/ScholixSummary.java index 690566823..577126cd5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/ScholixSummary.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/ScholixSummary.java @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.provision.scholix; +package eu.dnetlib.dhp.provision.scholix.summary; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.DeserializationFeature; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/TypedIdentifier.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/TypedIdentifier.java similarity index 91% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/TypedIdentifier.java rename to dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/TypedIdentifier.java index 5d9ced6cf..fd6c05ce3 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/TypedIdentifier.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/TypedIdentifier.java @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.provision.scholix; +package eu.dnetlib.dhp.provision.scholix.summary; import java.io.Serializable; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/Typology.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/Typology.java similarity index 70% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/Typology.java rename to dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/Typology.java index 78ddcae51..bba4b6ddf 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/Typology.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/provision/scholix/summary/Typology.java @@ -1,4 +1,4 @@ -package eu.dnetlib.dhp.provision.scholix; +package eu.dnetlib.dhp.provision.scholix.summary; import java.io.Serializable; diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/provision/ExtractInfoTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/provision/ExtractInfoTest.java index a45ee5d18..d4b185fdf 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/provision/ExtractInfoTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/provision/ExtractInfoTest.java @@ -1,9 +1,7 @@ package eu.dnetlib.dhp.provision; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.provision.scholix.ScholixSummary; +import eu.dnetlib.dhp.provision.scholix.summary.ScholixSummary; import org.apache.commons.io.IOUtils; import org.junit.Ignore; import org.junit.Test;