implemented scholix Flat mapping
This commit is contained in:
parent
ef82b8362d
commit
ab1842e5dc
|
@ -0,0 +1,158 @@
|
||||||
|
package eu.dnetlib.dhp.sx.graph.scholix;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ScholixFlat {
|
||||||
|
private String identifier;
|
||||||
|
private String relationType;
|
||||||
|
private String sourceId;
|
||||||
|
private String sourceType;
|
||||||
|
private String sourceSubType;
|
||||||
|
private List<String> sourcePid;
|
||||||
|
private List<String> sourcePidType;
|
||||||
|
private List<String> sourcePublisher;
|
||||||
|
private String targetId;
|
||||||
|
private String targetType;
|
||||||
|
private String targetSubType;
|
||||||
|
private List<String> targetPid;
|
||||||
|
private List<String> targetPidType;
|
||||||
|
private List<String> targetPublisher;
|
||||||
|
private List<String> linkProviders;
|
||||||
|
private String publicationDate;
|
||||||
|
private String blob;
|
||||||
|
|
||||||
|
public String getIdentifier() {
|
||||||
|
return identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIdentifier(String identifier) {
|
||||||
|
this.identifier = identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRelationType() {
|
||||||
|
return relationType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRelationType(String relationType) {
|
||||||
|
this.relationType = relationType;
|
||||||
|
}
|
||||||
|
public String getSourceId() {
|
||||||
|
return sourceId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourceId(String sourceId) {
|
||||||
|
this.sourceId = sourceId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSourceType() {
|
||||||
|
return sourceType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourceType(String sourceType) {
|
||||||
|
this.sourceType = sourceType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSourceSubType() {
|
||||||
|
return sourceSubType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourceSubType(String sourceSubType) {
|
||||||
|
this.sourceSubType = sourceSubType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getSourcePid() {
|
||||||
|
return sourcePid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourcePid(List<String> sourcePid) {
|
||||||
|
this.sourcePid = sourcePid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getSourcePidType() {
|
||||||
|
return sourcePidType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourcePidType(List<String> sourcePidType) {
|
||||||
|
this.sourcePidType = sourcePidType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getSourcePublisher() {
|
||||||
|
return sourcePublisher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSourcePublisher(List<String> sourcePublisher) {
|
||||||
|
this.sourcePublisher = sourcePublisher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTargetId() {
|
||||||
|
return targetId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTargetId(String targetId) {
|
||||||
|
this.targetId = targetId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTargetType() {
|
||||||
|
return targetType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTargetType(String targetType) {
|
||||||
|
this.targetType = targetType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTargetSubType() {
|
||||||
|
return targetSubType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTargetSubType(String targetSubType) {
|
||||||
|
this.targetSubType = targetSubType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getTargetPid() {
|
||||||
|
return targetPid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTargetPid(List<String> targetPid) {
|
||||||
|
this.targetPid = targetPid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getTargetPidType() {
|
||||||
|
return targetPidType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTargetPidType(List<String> targetPidType) {
|
||||||
|
this.targetPidType = targetPidType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getTargetPublisher() {
|
||||||
|
return targetPublisher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTargetPublisher(List<String> targetPublisher) {
|
||||||
|
this.targetPublisher = targetPublisher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getLinkProviders() {
|
||||||
|
return linkProviders;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLinkProviders(List<String> linkProviders) {
|
||||||
|
this.linkProviders = linkProviders;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPublicationDate() {
|
||||||
|
return publicationDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPublicationDate(String publicationDate) {
|
||||||
|
this.publicationDate = publicationDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getBlob() {
|
||||||
|
return blob;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBlob(String blob) {
|
||||||
|
this.blob = blob;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,14 +1,15 @@
|
||||||
package eu.dnetlib.dhp.sx.graph.scholix
|
package eu.dnetlib.dhp.sx.graph.scholix
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty}
|
import eu.dnetlib.dhp.schema.oaf.{Dataset, OtherResearchProduct, Publication, Relation, Result, Software, StructuredProperty}
|
||||||
import eu.dnetlib.dhp.schema.sx.scholix._
|
import eu.dnetlib.dhp.schema.sx.scholix._
|
||||||
import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology}
|
import eu.dnetlib.dhp.schema.sx.summary.{AuthorPid, CollectedFromType, SchemeValue, ScholixSummary, Typology}
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import org.apache.spark.sql.expressions.Aggregator
|
import org.apache.spark.sql.expressions.Aggregator
|
||||||
import org.apache.spark.sql.{Encoder, Encoders}
|
import org.apache.spark.sql.{Encoder, Encoders}
|
||||||
import org.json4s
|
import org.json4s
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.json4s.jackson.JsonMethods.parse
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
|
|
||||||
|
@ -59,6 +60,36 @@ object ScholixUtils extends Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def flattenizeScholix(input: Scholix, json: String): ScholixFlat = {
|
||||||
|
|
||||||
|
val flat: ScholixFlat = new ScholixFlat
|
||||||
|
flat.setIdentifier(input.getIdentifier)
|
||||||
|
flat.setRelationType(input.getRelationship.getName)
|
||||||
|
if (input.getSource != null && input.getSource.getIdentifier != null) {
|
||||||
|
flat.setSourceId(input.getSource.getDnetIdentifier)
|
||||||
|
flat.setSourcePid(input.getSource.getIdentifier.asScala.map(p => p.getIdentifier).distinct.toList.asJava)
|
||||||
|
flat.setSourcePidType(input.getSource.getIdentifier.asScala.map(p => p.getSchema).distinct.toList.asJava)
|
||||||
|
flat.setSourceType(input.getSource.getObjectType)
|
||||||
|
flat.setSourceSubType(input.getSource.getObjectSubType)
|
||||||
|
} else return null
|
||||||
|
if (input.getSource.getPublisher != null)
|
||||||
|
flat.setSourcePublisher(input.getSource.getPublisher.asScala.map(p => p.getName).toList.asJava)
|
||||||
|
if (input.getTarget != null && input.getTarget.getIdentifier != null) {
|
||||||
|
flat.setTargetId(input.getTarget.getDnetIdentifier)
|
||||||
|
flat.setTargetPid(input.getTarget.getIdentifier.asScala.map(p => p.getIdentifier).distinct.toList.asJava)
|
||||||
|
flat.setTargetPidType(input.getTarget.getIdentifier.asScala.map(p => p.getSchema).distinct.toList.asJava)
|
||||||
|
flat.setTargetType(input.getTarget.getObjectType)
|
||||||
|
flat.setTargetSubType(input.getTarget.getObjectSubType)
|
||||||
|
} else return null
|
||||||
|
if (input.getTarget.getPublisher != null)
|
||||||
|
flat.setTargetPublisher(input.getTarget.getPublisher.asScala.map(p => p.getName).distinct.toList.asJava)
|
||||||
|
flat.setPublicationDate(input.getPublicationDate)
|
||||||
|
if (input.getLinkprovider != null)
|
||||||
|
flat.setLinkProviders(input.getLinkprovider.asScala.map(l => l.getName).distinct.toList.asJava)
|
||||||
|
flat.setBlob(json);
|
||||||
|
flat
|
||||||
|
}
|
||||||
|
|
||||||
def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = {
|
def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = {
|
||||||
new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName)
|
new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName)
|
||||||
|
|
||||||
|
@ -232,7 +263,14 @@ object ScholixUtils extends Serializable {
|
||||||
|
|
||||||
if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) {
|
if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) {
|
||||||
val l: List[ScholixEntityId] =
|
val l: List[ScholixEntityId] =
|
||||||
summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList
|
summaryObject.getAuthor.asScala.map(a => {
|
||||||
|
if (a.getORCID != null)
|
||||||
|
new ScholixEntityId(
|
||||||
|
a.getFullname,
|
||||||
|
List(new ScholixIdentifier(a.getORCID, "ORCID", s"https://orcid.org/${a.getORCID}")).asJava
|
||||||
|
)
|
||||||
|
else new ScholixEntityId(a.getFullname, null)
|
||||||
|
}).toList
|
||||||
if (l.nonEmpty)
|
if (l.nonEmpty)
|
||||||
r.setCreator(l.asJava)
|
r.setCreator(l.asJava)
|
||||||
}
|
}
|
||||||
|
@ -377,10 +415,13 @@ object ScholixUtils extends Serializable {
|
||||||
if (persistentIdentifiers.isEmpty)
|
if (persistentIdentifiers.isEmpty)
|
||||||
return null
|
return null
|
||||||
s.setLocalIdentifier(persistentIdentifiers.asJava)
|
s.setLocalIdentifier(persistentIdentifiers.asJava)
|
||||||
if (r.isInstanceOf[Publication])
|
r match {
|
||||||
s.setTypology(Typology.publication)
|
case _: Publication => s.setTypology(Typology.publication)
|
||||||
else
|
case _: Dataset => s.setTypology(Typology.dataset)
|
||||||
s.setTypology(Typology.dataset)
|
case _: Software => s.setTypology(Typology.software)
|
||||||
|
case _: OtherResearchProduct => s.setTypology(Typology.otherresearchproduct)
|
||||||
|
case _ =>
|
||||||
|
}
|
||||||
|
|
||||||
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
||||||
|
|
||||||
|
@ -393,7 +434,20 @@ object ScholixUtils extends Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (r.getAuthor != null && !r.getAuthor.isEmpty) {
|
if (r.getAuthor != null && !r.getAuthor.isEmpty) {
|
||||||
val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname).toList
|
val authors: List[AuthorPid] = r.getAuthor.asScala
|
||||||
|
.map(a => {
|
||||||
|
var ORCID: String = null
|
||||||
|
if (a.getPid != null) {
|
||||||
|
val result = a.getPid.asScala.find(p =>
|
||||||
|
p.getQualifier != null && p.getQualifier.getClassid != null && p.getQualifier.getClassid.toLowerCase
|
||||||
|
.contains("orcid")
|
||||||
|
)
|
||||||
|
if (result.isDefined)
|
||||||
|
ORCID = result.get.getValue
|
||||||
|
}
|
||||||
|
new AuthorPid(a.getFullname, ORCID)
|
||||||
|
})
|
||||||
|
.toList
|
||||||
if (authors.nonEmpty)
|
if (authors.nonEmpty)
|
||||||
s.setAuthor(authors.asJava)
|
s.setAuthor(authors.asJava)
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.sx.graph.scholix;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.sx.scholix.Scholix;
|
||||||
|
|
||||||
|
public class ScholixFlatTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void flattenScholixTest() throws IOException {
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
InputStream gzipStream = new GZIPInputStream(getClass().getResourceAsStream("scholix_records.gz"));
|
||||||
|
Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
|
||||||
|
BufferedReader buffered = new BufferedReader(decoder);
|
||||||
|
String line;
|
||||||
|
FileWriter myWriter = new FileWriter("/Users/sandro/Downloads/records");
|
||||||
|
while ((line = buffered.readLine()) != null) {
|
||||||
|
final Scholix s = mapper.readValue(line, Scholix.class);
|
||||||
|
final ScholixFlat flat = ScholixUtils.flattenizeScholix(s, line);
|
||||||
|
assertNotNull(s);
|
||||||
|
assertNotNull(flat);
|
||||||
|
assertEquals(s.getIdentifier(), flat.getIdentifier());
|
||||||
|
assertEquals(s.getRelationship().getName(), flat.getRelationType());
|
||||||
|
assertEquals(s.getSource().getObjectType(), flat.getSourceType());
|
||||||
|
assertEquals(s.getSource().getObjectSubType(), flat.getSourceSubType());
|
||||||
|
myWriter.write(mapper.writeValueAsString(flat));
|
||||||
|
myWriter.write("\n");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
myWriter.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Binary file not shown.
|
@ -1,194 +1,190 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.sx.provision;
|
package eu.dnetlib.dhp.sx.provision;
|
||||||
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import eu.dnetlib.dhp.schema.sx.scholix.Scholix;
|
|
||||||
import eu.dnetlib.dhp.schema.sx.scholix.ScholixResource;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.sx.scholix.Scholix;
|
||||||
|
import eu.dnetlib.dhp.schema.sx.scholix.ScholixResource;
|
||||||
|
|
||||||
public class ScholixFlat {
|
public class ScholixFlat {
|
||||||
private static ObjectMapper MAPPER = new ObjectMapper();
|
private static ObjectMapper MAPPER = new ObjectMapper();
|
||||||
private List<String> linkProvider= new ArrayList<>();
|
private List<String> linkProvider = new ArrayList<>();
|
||||||
|
|
||||||
private String publicationDate;
|
private String publicationDate;
|
||||||
|
|
||||||
private List<String> sourceLinkPublisher = new ArrayList<>();
|
private List<String> sourceLinkPublisher = new ArrayList<>();
|
||||||
private List<String> targetLinkPublisher = new ArrayList<>();
|
private List<String> targetLinkPublisher = new ArrayList<>();
|
||||||
|
|
||||||
private String sourceDnetIdentifier ;
|
private String sourceDnetIdentifier;
|
||||||
private String targetDnetIdentifier ;
|
private String targetDnetIdentifier;
|
||||||
private List<String> sourcePids = new ArrayList<>();
|
private List<String> sourcePids = new ArrayList<>();
|
||||||
private List<String> sourcePidTypes = new ArrayList<>();
|
private List<String> sourcePidTypes = new ArrayList<>();
|
||||||
private List<String> targetPids = new ArrayList<>();
|
private List<String> targetPids = new ArrayList<>();
|
||||||
private List<String> targetPidTypes = new ArrayList<>();
|
private List<String> targetPidTypes = new ArrayList<>();
|
||||||
|
|
||||||
private String json;
|
private String json;
|
||||||
|
|
||||||
|
public void addLinkProvider(final String providerName) {
|
||||||
|
addStringToList(providerName, this.linkProvider);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addSourceLinkPublisher(final String linkPublisher) {
|
||||||
|
addStringToList(linkPublisher, sourceLinkPublisher);
|
||||||
|
|
||||||
public void addLinkProvider(final String providerName) {
|
}
|
||||||
addStringToList(providerName, this.linkProvider);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addSourceLinkPublisher(final String linkPublisher) {
|
public void addTargetLinkPublisher(final String linkPublisher) {
|
||||||
addStringToList(linkPublisher, sourceLinkPublisher);
|
addStringToList(linkPublisher, targetLinkPublisher);
|
||||||
|
|
||||||
}
|
}
|
||||||
public void addTargetLinkPublisher(final String linkPublisher) {
|
|
||||||
addStringToList(linkPublisher, targetLinkPublisher);
|
|
||||||
|
|
||||||
}
|
public void addSourcePid(final String pid) {
|
||||||
|
addStringToList(pid, sourcePids);
|
||||||
|
}
|
||||||
|
|
||||||
public void addSourcePid(final String pid) {
|
public void addSourcePidType(final String pidType) {
|
||||||
addStringToList(pid, sourcePids);
|
addStringToList(pidType, sourcePidTypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addSourcePidType(final String pidType) {
|
public void addTargetPidType(final String pidType) {
|
||||||
addStringToList(pidType, sourcePidTypes);
|
addStringToList(pidType, targetPidTypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addTargetPidType(final String pidType) {
|
public void addTargetPid(final String pid) {
|
||||||
addStringToList(pidType, targetPidTypes);
|
addStringToList(pid, targetPids);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addStringToList(final String s, final List<String> l) {
|
||||||
|
if (l != null && !l.contains(s))
|
||||||
|
l.add(s);
|
||||||
|
}
|
||||||
|
|
||||||
public void addTargetPid(final String pid) {
|
public String getSourceDnetIdentifier() {
|
||||||
addStringToList(pid, targetPids);
|
return sourceDnetIdentifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addStringToList(final String s, final List<String>l ) {
|
public void setSourceDnetIdentifier(String sourceDnetIdentifier) {
|
||||||
if (l!= null && !l.contains(s))
|
this.sourceDnetIdentifier = sourceDnetIdentifier;
|
||||||
l.add(s);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public String getSourceDnetIdentifier() {
|
public String getTargetDnetIdentifier() {
|
||||||
return sourceDnetIdentifier;
|
return targetDnetIdentifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSourceDnetIdentifier(String sourceDnetIdentifier) {
|
public void setTargetDnetIdentifier(String targetDnetIdentifier) {
|
||||||
this.sourceDnetIdentifier = sourceDnetIdentifier;
|
this.targetDnetIdentifier = targetDnetIdentifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTargetDnetIdentifier() {
|
public List<String> getSourcePids() {
|
||||||
return targetDnetIdentifier;
|
return sourcePids;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setTargetDnetIdentifier(String targetDnetIdentifier) {
|
public void setSourcePids(List<String> sourcePids) {
|
||||||
this.targetDnetIdentifier = targetDnetIdentifier;
|
this.sourcePids = sourcePids;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getSourcePids() {
|
public List<String> getSourcePidTypes() {
|
||||||
return sourcePids;
|
return sourcePidTypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSourcePids(List<String> sourcePids) {
|
public void setSourcePidTypes(List<String> sourcePidTypes) {
|
||||||
this.sourcePids = sourcePids;
|
this.sourcePidTypes = sourcePidTypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getSourcePidTypes() {
|
public List<String> getTargetPids() {
|
||||||
return sourcePidTypes;
|
return targetPids;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSourcePidTypes(List<String> sourcePidTypes) {
|
public void setTargetPids(List<String> targetPids) {
|
||||||
this.sourcePidTypes = sourcePidTypes;
|
this.targetPids = targetPids;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getTargetPids() {
|
public List<String> getTargetPidTypes() {
|
||||||
return targetPids;
|
return targetPidTypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setTargetPids(List<String> targetPids) {
|
public void setTargetPidTypes(List<String> targetPidTypes) {
|
||||||
this.targetPids = targetPids;
|
this.targetPidTypes = targetPidTypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getTargetPidTypes() {
|
public List<String> getSourceLinkPublisher() {
|
||||||
return targetPidTypes;
|
return sourceLinkPublisher;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setTargetPidTypes(List<String> targetPidTypes) {
|
public void setSourceLinkPublisher(List<String> sourceLinkPublisher) {
|
||||||
this.targetPidTypes = targetPidTypes;
|
this.sourceLinkPublisher = sourceLinkPublisher;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getSourceLinkPublisher() {
|
public List<String> getTargetLinkPublisher() {
|
||||||
return sourceLinkPublisher;
|
return targetLinkPublisher;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSourceLinkPublisher(List<String> sourceLinkPublisher) {
|
public void setTargetLinkPublisher(List<String> targetLinkPublisher) {
|
||||||
this.sourceLinkPublisher = sourceLinkPublisher;
|
this.targetLinkPublisher = targetLinkPublisher;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getTargetLinkPublisher() {
|
public List<String> getLinkProvider() {
|
||||||
return targetLinkPublisher;
|
return linkProvider;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setTargetLinkPublisher(List<String> targetLinkPublisher) {
|
public void setLinkProvider(List<String> linkProvider) {
|
||||||
this.targetLinkPublisher = targetLinkPublisher;
|
this.linkProvider = linkProvider;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getPublicationDate() {
|
||||||
|
return publicationDate;
|
||||||
|
}
|
||||||
|
|
||||||
public List<String> getLinkProvider() {
|
public void setPublicationDate(String publicationDate) {
|
||||||
return linkProvider;
|
this.publicationDate = publicationDate;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLinkProvider(List<String> linkProvider) {
|
public String getJson() {
|
||||||
this.linkProvider = linkProvider;
|
return json;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getPublicationDate() {
|
public void setJson(String json) {
|
||||||
return publicationDate;
|
this.json = json;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPublicationDate(String publicationDate) {
|
public static ScholixFlat fromScholix(final Scholix scholix) throws JsonProcessingException {
|
||||||
this.publicationDate = publicationDate;
|
if (scholix == null || scholix.getSource() == null || scholix.getTarget() == null)
|
||||||
}
|
return null;
|
||||||
|
final ScholixFlat flat = new ScholixFlat();
|
||||||
|
if (scholix.getLinkprovider() != null)
|
||||||
|
scholix.getLinkprovider().forEach(l -> flat.addLinkProvider(l.getName()));
|
||||||
|
|
||||||
public String getJson() {
|
flat.setPublicationDate(scholix.getPublicationDate());
|
||||||
return json;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setJson(String json) {
|
final ScholixResource source = scholix.getSource();
|
||||||
this.json = json;
|
flat.setSourceDnetIdentifier(source.getDnetIdentifier());
|
||||||
}
|
if (source.getIdentifier() != null) {
|
||||||
|
source.getIdentifier().forEach(i -> {
|
||||||
|
flat.addSourcePid(i.getIdentifier());
|
||||||
|
flat.addSourcePidType(i.getSchema());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (source.getPublisher() != null) {
|
||||||
|
source.getPublisher().forEach(p -> flat.addSourceLinkPublisher(p.getName()));
|
||||||
|
}
|
||||||
|
|
||||||
public static ScholixFlat fromScholix(final Scholix scholix) throws JsonProcessingException {
|
final ScholixResource target = scholix.getSource();
|
||||||
if (scholix== null || scholix.getSource()==null || scholix.getTarget()== null)
|
flat.setTargetDnetIdentifier(target.getDnetIdentifier());
|
||||||
return null;
|
if (target.getIdentifier() != null) {
|
||||||
final ScholixFlat flat = new ScholixFlat();
|
target.getIdentifier().forEach(i -> {
|
||||||
if (scholix.getLinkprovider()!= null)
|
flat.addTargetPid(i.getIdentifier());
|
||||||
scholix.getLinkprovider().forEach(l ->flat.addLinkProvider(l.getName()));
|
flat.addTargetPidType(i.getSchema());
|
||||||
|
});
|
||||||
flat.setPublicationDate(scholix.getPublicationDate());
|
}
|
||||||
|
if (target.getPublisher() != null) {
|
||||||
final ScholixResource source = scholix.getSource();
|
target.getPublisher().forEach(p -> flat.addTargetLinkPublisher(p.getName()));
|
||||||
flat.setSourceDnetIdentifier(source.getDnetIdentifier());
|
}
|
||||||
if (source.getIdentifier()!= null) {
|
flat.setJson(MAPPER.writeValueAsString(scholix));
|
||||||
source.getIdentifier().forEach(i -> {
|
return flat;
|
||||||
flat.addSourcePid(i.getIdentifier());
|
}
|
||||||
flat.addSourcePidType(i.getSchema());
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if (source.getPublisher()!= null) {
|
|
||||||
source.getPublisher().forEach(p -> flat.addSourceLinkPublisher(p.getName()));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
final ScholixResource target = scholix.getSource();
|
|
||||||
flat.setTargetDnetIdentifier(target.getDnetIdentifier());
|
|
||||||
if (target.getIdentifier()!= null) {
|
|
||||||
target.getIdentifier().forEach(i -> {
|
|
||||||
flat.addTargetPid(i.getIdentifier());
|
|
||||||
flat.addTargetPidType(i.getSchema());
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if (target.getPublisher()!= null) {
|
|
||||||
target.getPublisher().forEach(p -> flat.addTargetLinkPublisher(p.getName()));
|
|
||||||
}
|
|
||||||
flat.setJson(MAPPER.writeValueAsString(scholix));
|
|
||||||
return flat;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,7 +56,7 @@ public class XmlRecordFactoryTest {
|
||||||
|
|
||||||
assertNotNull(doc);
|
assertNotNull(doc);
|
||||||
|
|
||||||
//System.out.println(doc.asXML());
|
// System.out.println(doc.asXML());
|
||||||
|
|
||||||
assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
|
assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
|
||||||
assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));
|
assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));
|
||||||
|
|
|
@ -6,19 +6,17 @@ import org.junit.Before
|
||||||
|
|
||||||
import org.junit.jupiter.api.{Test}
|
import org.junit.jupiter.api.{Test}
|
||||||
|
|
||||||
class ScholixFlatTest{
|
class ScholixFlatTest {
|
||||||
|
|
||||||
|
|
||||||
var spark:SparkSession = null
|
|
||||||
|
|
||||||
|
var spark: SparkSession = null
|
||||||
|
|
||||||
def initSpark(): Unit = {
|
def initSpark(): Unit = {
|
||||||
|
|
||||||
if (spark!= null)
|
if (spark != null)
|
||||||
return
|
return
|
||||||
println("SONO QUI")
|
println("SONO QUI")
|
||||||
val conf = new SparkConf
|
val conf = new SparkConf
|
||||||
conf.setAppName(getClass.getSimpleName )
|
conf.setAppName(getClass.getSimpleName)
|
||||||
conf.setMaster("local[*]")
|
conf.setMaster("local[*]")
|
||||||
conf.set("spark.driver.host", "localhost")
|
conf.set("spark.driver.host", "localhost")
|
||||||
conf.set("hive.metastore.local", "true")
|
conf.set("hive.metastore.local", "true")
|
||||||
|
@ -27,26 +25,22 @@ class ScholixFlatTest{
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(getClass.getSimpleName)
|
.appName(getClass.getSimpleName)
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def after(): Unit = {
|
def after(): Unit = {
|
||||||
spark.stop()
|
spark.stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
def testScholixConversion (): Unit = {
|
def testScholixConversion(): Unit = {
|
||||||
initSpark()
|
initSpark()
|
||||||
val p = getClass.getResource("/eu/dnetlib/dhp/sx/provision/scholix_dump.zip").getPath
|
val p = getClass.getResource("/eu/dnetlib/dhp/sx/provision/scholix_dump.zip").getPath
|
||||||
|
|
||||||
val t = spark.read.text(p).count
|
val t = spark.read.text(p).count
|
||||||
println(s"total =$t")
|
println(s"total =$t")
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -807,7 +807,7 @@
|
||||||
<mockito-core.version>3.3.3</mockito-core.version>
|
<mockito-core.version>3.3.3</mockito-core.version>
|
||||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||||
<vtd.version>[2.12,3.0)</vtd.version>
|
<vtd.version>[2.12,3.0)</vtd.version>
|
||||||
<dhp-schemas.version>[3.16.0]</dhp-schemas.version>
|
<dhp-schemas.version>[3.16.1-SNAPSHOT]</dhp-schemas.version>
|
||||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||||
|
|
Loading…
Reference in New Issue