diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/ScholixFlat.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/ScholixFlat.java new file mode 100644 index 000000000..f1e2796d0 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/ScholixFlat.java @@ -0,0 +1,194 @@ +package eu.dnetlib.dhp.sx.provision; + + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.schema.sx.scholix.Scholix; +import eu.dnetlib.dhp.schema.sx.scholix.ScholixResource; + +import java.util.ArrayList; +import java.util.List; + +public class ScholixFlat { + private static ObjectMapper MAPPER = new ObjectMapper(); + private List linkProvider= new ArrayList<>(); + + private String publicationDate; + + private List sourceLinkPublisher = new ArrayList<>(); + private List targetLinkPublisher = new ArrayList<>(); + + private String sourceDnetIdentifier ; + private String targetDnetIdentifier ; + private List sourcePids = new ArrayList<>(); + private List sourcePidTypes = new ArrayList<>(); + private List targetPids = new ArrayList<>(); + private List targetPidTypes = new ArrayList<>(); + + private String json; + + + + public void addLinkProvider(final String providerName) { + addStringToList(providerName, this.linkProvider); + } + + public void addSourceLinkPublisher(final String linkPublisher) { + addStringToList(linkPublisher, sourceLinkPublisher); + + } + public void addTargetLinkPublisher(final String linkPublisher) { + addStringToList(linkPublisher, targetLinkPublisher); + + } + + public void addSourcePid(final String pid) { + addStringToList(pid, sourcePids); + } + + public void addSourcePidType(final String pidType) { + addStringToList(pidType, sourcePidTypes); + } + + public void addTargetPidType(final String pidType) { + addStringToList(pidType, targetPidTypes); + } + + + public void addTargetPid(final String pid) { + addStringToList(pid, targetPids); + } + + public void addStringToList(final String s, final Listl ) { + if (l!= null && !l.contains(s)) + l.add(s); + } + + public String getSourceDnetIdentifier() { + return sourceDnetIdentifier; + } + + public void setSourceDnetIdentifier(String sourceDnetIdentifier) { + this.sourceDnetIdentifier = sourceDnetIdentifier; + } + + public String getTargetDnetIdentifier() { + return targetDnetIdentifier; + } + + public void setTargetDnetIdentifier(String targetDnetIdentifier) { + this.targetDnetIdentifier = targetDnetIdentifier; + } + + public List getSourcePids() { + return sourcePids; + } + + public void setSourcePids(List sourcePids) { + this.sourcePids = sourcePids; + } + + public List getSourcePidTypes() { + return sourcePidTypes; + } + + public void setSourcePidTypes(List sourcePidTypes) { + this.sourcePidTypes = sourcePidTypes; + } + + public List getTargetPids() { + return targetPids; + } + + public void setTargetPids(List targetPids) { + this.targetPids = targetPids; + } + + public List getTargetPidTypes() { + return targetPidTypes; + } + + public void setTargetPidTypes(List targetPidTypes) { + this.targetPidTypes = targetPidTypes; + } + + public List getSourceLinkPublisher() { + return sourceLinkPublisher; + } + + public void setSourceLinkPublisher(List sourceLinkPublisher) { + this.sourceLinkPublisher = sourceLinkPublisher; + } + + public List getTargetLinkPublisher() { + return targetLinkPublisher; + } + + public void setTargetLinkPublisher(List targetLinkPublisher) { + this.targetLinkPublisher = targetLinkPublisher; + } + + + public List getLinkProvider() { + return linkProvider; + } + + public void setLinkProvider(List linkProvider) { + this.linkProvider = linkProvider; + } + + public String getPublicationDate() { + return publicationDate; + } + + public void setPublicationDate(String publicationDate) { + this.publicationDate = publicationDate; + } + + public String getJson() { + return json; + } + + public void setJson(String json) { + this.json = json; + } + + public static ScholixFlat fromScholix(final Scholix scholix) throws JsonProcessingException { + if (scholix== null || scholix.getSource()==null || scholix.getTarget()== null) + return null; + final ScholixFlat flat = new ScholixFlat(); + if (scholix.getLinkprovider()!= null) + scholix.getLinkprovider().forEach(l ->flat.addLinkProvider(l.getName())); + + flat.setPublicationDate(scholix.getPublicationDate()); + + final ScholixResource source = scholix.getSource(); + flat.setSourceDnetIdentifier(source.getDnetIdentifier()); + if (source.getIdentifier()!= null) { + source.getIdentifier().forEach(i -> { + flat.addSourcePid(i.getIdentifier()); + flat.addSourcePidType(i.getSchema()); + }); + } + if (source.getPublisher()!= null) { + source.getPublisher().forEach(p -> flat.addSourceLinkPublisher(p.getName())); + } + + + + final ScholixResource target = scholix.getSource(); + flat.setTargetDnetIdentifier(target.getDnetIdentifier()); + if (target.getIdentifier()!= null) { + target.getIdentifier().forEach(i -> { + flat.addTargetPid(i.getIdentifier()); + flat.addTargetPidType(i.getSchema()); + }); + } + if (target.getPublisher()!= null) { + target.getPublisher().forEach(p -> flat.addTargetLinkPublisher(p.getName())); + } + flat.setJson(MAPPER.writeValueAsString(scholix)); + return flat; + } + +} diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/sx/provision/scholix_dump.zip b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/sx/provision/scholix_dump.zip new file mode 100644 index 000000000..e9afa5a55 Binary files /dev/null and b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/sx/provision/scholix_dump.zip differ diff --git a/dhp-workflows/dhp-graph-provision/src/test/scala/eu/dnetlib/dhp/sx/provision/ScholixFlatTest.scala b/dhp-workflows/dhp-graph-provision/src/test/scala/eu/dnetlib/dhp/sx/provision/ScholixFlatTest.scala new file mode 100644 index 000000000..3304b6bef --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/scala/eu/dnetlib/dhp/sx/provision/ScholixFlatTest.scala @@ -0,0 +1,52 @@ +package eu.dnetlib.dhp.sx.provision + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.junit.Before + +import org.junit.jupiter.api.{Test} + +class ScholixFlatTest{ + + + var spark:SparkSession = null + + + def initSpark(): Unit = { + + if (spark!= null) + return + println("SONO QUI") + val conf = new SparkConf + conf.setAppName(getClass.getSimpleName ) + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + conf.set("hive.metastore.local", "true") + conf.set("spark.ui.enabled", "false") + + spark = SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + } + + + + def after(): Unit = { + spark.stop() + } + + + @Test + def testScholixConversion (): Unit = { + initSpark() + val p = getClass.getResource("/eu/dnetlib/dhp/sx/provision/scholix_dump.zip").getPath + + val t = spark.read.text(p).count + println(s"total =$t") + + + } + +}