forked from D-Net/dnet-hadoop
massive code refactor:
removed modules dhp-*-scholexplorer
This commit is contained in:
parent
829caee4fd
commit
c6fa8598e1
|
@ -6,7 +6,7 @@ import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import org.apache.commons.lang3.StringUtils
|
import org.apache.commons.lang3.StringUtils
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper
|
import com.fasterxml.jackson.databind.ObjectMapper
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
|
||||||
import org.json4s
|
import org.json4s
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.json4s.jackson.JsonMethods.parse
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
@ -118,11 +118,11 @@ object DoiBoostMappingUtil {
|
||||||
|
|
||||||
|
|
||||||
def getOpenAccessQualifier():AccessRight = {
|
def getOpenAccessQualifier():AccessRight = {
|
||||||
OafUtils.createAccessRight("OPEN","Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
OafMapperUtils.accessRight("OPEN","Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
||||||
}
|
}
|
||||||
|
|
||||||
def getRestrictedQualifier():AccessRight = {
|
def getRestrictedQualifier():AccessRight = {
|
||||||
OafUtils.createAccessRight("RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
OafMapperUtils.accessRight("RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -150,7 +150,7 @@ object DoiBoostMappingUtil {
|
||||||
if (item.openAccess)
|
if (item.openAccess)
|
||||||
i.setAccessright(getOpenAccessQualifier())
|
i.setAccessright(getOpenAccessQualifier())
|
||||||
val ar = getOpenAccessQualifier()
|
val ar = getOpenAccessQualifier()
|
||||||
publication.setBestaccessright(OafUtils.createQualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
hb = ModelConstants.UNKNOWN_REPOSITORY
|
hb = ModelConstants.UNKNOWN_REPOSITORY
|
||||||
|
@ -162,11 +162,11 @@ object DoiBoostMappingUtil {
|
||||||
if (ar.nonEmpty) {
|
if (ar.nonEmpty) {
|
||||||
if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){
|
if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){
|
||||||
val ar = getOpenAccessQualifier()
|
val ar = getOpenAccessQualifier()
|
||||||
publication.setBestaccessright(OafUtils.createQualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
val ar = getRestrictedQualifier()
|
val ar = getRestrictedQualifier()
|
||||||
publication.setBestaccessright(OafUtils.createQualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
publication
|
publication
|
||||||
|
@ -254,7 +254,7 @@ object DoiBoostMappingUtil {
|
||||||
di.setInferred(false)
|
di.setInferred(false)
|
||||||
di.setInvisible(false)
|
di.setInvisible(false)
|
||||||
di.setTrust(trust)
|
di.setTrust(trust)
|
||||||
di.setProvenanceaction(OafUtils.createQualifier(ModelConstants.SYSIMPORT_ACTIONSET, ModelConstants.DNET_PROVENANCE_ACTIONS))
|
di.setProvenanceaction(OafMapperUtils.qualifier(ModelConstants.SYSIMPORT_ACTIONSET,ModelConstants.SYSIMPORT_ACTIONSET, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS))
|
||||||
di
|
di
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -262,7 +262,7 @@ object DoiBoostMappingUtil {
|
||||||
|
|
||||||
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String): StructuredProperty = {
|
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String): StructuredProperty = {
|
||||||
val sp = new StructuredProperty
|
val sp = new StructuredProperty
|
||||||
sp.setQualifier(OafUtils.createQualifier(classId,className, schemeId, schemeName))
|
sp.setQualifier(OafMapperUtils.qualifier(classId,className, schemeId, schemeName))
|
||||||
sp.setValue(value)
|
sp.setValue(value)
|
||||||
sp
|
sp
|
||||||
|
|
||||||
|
@ -272,7 +272,7 @@ object DoiBoostMappingUtil {
|
||||||
|
|
||||||
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String, dataInfo: DataInfo): StructuredProperty = {
|
def createSP(value: String, classId: String,className:String, schemeId: String, schemeName:String, dataInfo: DataInfo): StructuredProperty = {
|
||||||
val sp = new StructuredProperty
|
val sp = new StructuredProperty
|
||||||
sp.setQualifier(OafUtils.createQualifier(classId,className, schemeId, schemeName))
|
sp.setQualifier(OafMapperUtils.qualifier(classId,className, schemeId, schemeName))
|
||||||
sp.setValue(value)
|
sp.setValue(value)
|
||||||
sp.setDataInfo(dataInfo)
|
sp.setDataInfo(dataInfo)
|
||||||
sp
|
sp
|
||||||
|
@ -281,7 +281,7 @@ object DoiBoostMappingUtil {
|
||||||
|
|
||||||
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
|
def createSP(value: String, classId: String, schemeId: String): StructuredProperty = {
|
||||||
val sp = new StructuredProperty
|
val sp = new StructuredProperty
|
||||||
sp.setQualifier(OafUtils.createQualifier(classId, schemeId))
|
sp.setQualifier(OafMapperUtils.qualifier(classId,classId, schemeId, schemeId))
|
||||||
sp.setValue(value)
|
sp.setValue(value)
|
||||||
sp
|
sp
|
||||||
|
|
||||||
|
@ -291,7 +291,7 @@ object DoiBoostMappingUtil {
|
||||||
|
|
||||||
def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
|
def createSP(value: String, classId: String, schemeId: String, dataInfo: DataInfo): StructuredProperty = {
|
||||||
val sp = new StructuredProperty
|
val sp = new StructuredProperty
|
||||||
sp.setQualifier(OafUtils.createQualifier(classId, schemeId))
|
sp.setQualifier(OafMapperUtils.qualifier(classId,classId, schemeId, schemeId))
|
||||||
sp.setValue(value)
|
sp.setValue(value)
|
||||||
sp.setDataInfo(dataInfo)
|
sp.setDataInfo(dataInfo)
|
||||||
sp
|
sp
|
||||||
|
|
|
@ -2,7 +2,7 @@ package eu.dnetlib.doiboost.crossref
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf._
|
import eu.dnetlib.dhp.schema.oaf._
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
|
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||||
import org.apache.commons.lang.StringUtils
|
import org.apache.commons.lang.StringUtils
|
||||||
|
@ -15,8 +15,6 @@ import org.slf4j.{Logger, LoggerFactory}
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
import scala.util.matching.Regex
|
import scala.util.matching.Regex
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils
|
|
||||||
|
|
||||||
import java.util
|
import java.util
|
||||||
|
|
||||||
case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
|
case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
|
||||||
|
@ -182,12 +180,12 @@ case object Crossref2Oaf {
|
||||||
|
|
||||||
if(has_review != JNothing) {
|
if(has_review != JNothing) {
|
||||||
instance.setRefereed(
|
instance.setRefereed(
|
||||||
OafUtils.createQualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS))
|
OafMapperUtils.qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS))
|
||||||
}
|
}
|
||||||
|
|
||||||
instance.setAccessright(getRestrictedQualifier())
|
instance.setAccessright(getRestrictedQualifier())
|
||||||
instance.setInstancetype(OafUtils.createQualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
instance.setInstancetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
||||||
result.setResourcetype(OafUtils.createQualifier(cobjCategory.substring(0, 4),ModelConstants.DNET_DATA_CITE_RESOURCE))
|
result.setResourcetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
||||||
|
|
||||||
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
||||||
if (StringUtils.isNotBlank(issuedDate)) {
|
if (StringUtils.isNotBlank(issuedDate)) {
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
package eu.dnetlib.dhp.sx.graph
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result
|
||||||
|
import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary
|
||||||
|
import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils
|
||||||
|
import org.apache.commons.io.IOUtils
|
||||||
|
import org.apache.spark.SparkConf
|
||||||
|
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
object SparkCreateSummaryObject {
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
val log: Logger = LoggerFactory.getLogger(getClass)
|
||||||
|
val conf: SparkConf = new SparkConf()
|
||||||
|
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/create_summaries_params.json")))
|
||||||
|
parser.parseArgument(args)
|
||||||
|
val spark: SparkSession =
|
||||||
|
SparkSession
|
||||||
|
.builder()
|
||||||
|
.config(conf)
|
||||||
|
.appName(getClass.getSimpleName)
|
||||||
|
.master(parser.get("master")).getOrCreate()
|
||||||
|
|
||||||
|
val sourcePath = parser.get("sourcePath")
|
||||||
|
log.info(s"sourcePath -> $sourcePath")
|
||||||
|
val targetPath = parser.get("targetPath")
|
||||||
|
log.info(s"targetPath -> $targetPath")
|
||||||
|
|
||||||
|
implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result]
|
||||||
|
|
||||||
|
implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary]
|
||||||
|
|
||||||
|
|
||||||
|
val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result]
|
||||||
|
|
||||||
|
ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).write.mode(SaveMode.Overwrite).save(targetPath)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,177 +0,0 @@
|
||||||
package eu.dnetlib.dhp.sx.graph.ebi
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.merge.AuthorMerger
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Dataset => OafDataset}
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, DLIUnknown}
|
|
||||||
import org.apache.spark.sql.{Encoder, Encoders}
|
|
||||||
import org.apache.spark.sql.expressions.Aggregator
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
object EBIAggregator {
|
|
||||||
|
|
||||||
def getDatasetAggregator(): Aggregator[(String, OafDataset), OafDataset, OafDataset] = new Aggregator[(String, OafDataset), OafDataset, OafDataset]{
|
|
||||||
|
|
||||||
override def zero: OafDataset = new OafDataset()
|
|
||||||
|
|
||||||
override def reduce(b: OafDataset, a: (String, OafDataset)): OafDataset = {
|
|
||||||
b.mergeFrom(a._2)
|
|
||||||
b.setAuthor(AuthorMerger.mergeAuthor(a._2.getAuthor, b.getAuthor))
|
|
||||||
if (b.getId == null)
|
|
||||||
b.setId(a._2.getId)
|
|
||||||
b
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
override def merge(wx: OafDataset, wy: OafDataset): OafDataset = {
|
|
||||||
wx.mergeFrom(wy)
|
|
||||||
wx.setAuthor(AuthorMerger.mergeAuthor(wy.getAuthor, wx.getAuthor))
|
|
||||||
if(wx.getId == null && wy.getId.nonEmpty)
|
|
||||||
wx.setId(wy.getId)
|
|
||||||
wx
|
|
||||||
}
|
|
||||||
override def finish(reduction: OafDataset): OafDataset = reduction
|
|
||||||
|
|
||||||
override def bufferEncoder: Encoder[OafDataset] =
|
|
||||||
Encoders.kryo(classOf[OafDataset])
|
|
||||||
|
|
||||||
override def outputEncoder: Encoder[OafDataset] =
|
|
||||||
Encoders.kryo(classOf[OafDataset])
|
|
||||||
}
|
|
||||||
|
|
||||||
def getDLIUnknownAggregator(): Aggregator[(String, DLIUnknown), DLIUnknown, DLIUnknown] = new Aggregator[(String, DLIUnknown), DLIUnknown, DLIUnknown]{
|
|
||||||
|
|
||||||
override def zero: DLIUnknown = new DLIUnknown()
|
|
||||||
|
|
||||||
override def reduce(b: DLIUnknown, a: (String, DLIUnknown)): DLIUnknown = {
|
|
||||||
b.mergeFrom(a._2)
|
|
||||||
if (b.getId == null)
|
|
||||||
b.setId(a._2.getId)
|
|
||||||
b
|
|
||||||
}
|
|
||||||
|
|
||||||
override def merge(wx: DLIUnknown, wy: DLIUnknown): DLIUnknown = {
|
|
||||||
wx.mergeFrom(wy)
|
|
||||||
if(wx.getId == null && wy.getId.nonEmpty)
|
|
||||||
wx.setId(wy.getId)
|
|
||||||
wx
|
|
||||||
}
|
|
||||||
override def finish(reduction: DLIUnknown): DLIUnknown = reduction
|
|
||||||
|
|
||||||
override def bufferEncoder: Encoder[DLIUnknown] =
|
|
||||||
Encoders.kryo(classOf[DLIUnknown])
|
|
||||||
|
|
||||||
override def outputEncoder: Encoder[DLIUnknown] =
|
|
||||||
Encoders.kryo(classOf[DLIUnknown])
|
|
||||||
}
|
|
||||||
|
|
||||||
def getDLIDatasetAggregator(): Aggregator[(String, DLIDataset), DLIDataset, DLIDataset] = new Aggregator[(String, DLIDataset), DLIDataset, DLIDataset]{
|
|
||||||
|
|
||||||
override def zero: DLIDataset = new DLIDataset()
|
|
||||||
|
|
||||||
override def reduce(b: DLIDataset, a: (String, DLIDataset)): DLIDataset = {
|
|
||||||
b.mergeFrom(a._2)
|
|
||||||
b.setAuthor(AuthorMerger.mergeAuthor(a._2.getAuthor, b.getAuthor))
|
|
||||||
if (b.getId == null)
|
|
||||||
b.setId(a._2.getId)
|
|
||||||
b
|
|
||||||
}
|
|
||||||
|
|
||||||
override def merge(wx: DLIDataset, wy: DLIDataset): DLIDataset = {
|
|
||||||
wx.mergeFrom(wy)
|
|
||||||
wx.setAuthor(AuthorMerger.mergeAuthor(wy.getAuthor, wx.getAuthor))
|
|
||||||
if(wx.getId == null && wy.getId.nonEmpty)
|
|
||||||
wx.setId(wy.getId)
|
|
||||||
wx
|
|
||||||
}
|
|
||||||
override def finish(reduction: DLIDataset): DLIDataset = reduction
|
|
||||||
|
|
||||||
override def bufferEncoder: Encoder[DLIDataset] =
|
|
||||||
Encoders.kryo(classOf[DLIDataset])
|
|
||||||
|
|
||||||
override def outputEncoder: Encoder[DLIDataset] =
|
|
||||||
Encoders.kryo(classOf[DLIDataset])
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def getDLIPublicationAggregator(): Aggregator[(String, DLIPublication), DLIPublication, DLIPublication] = new Aggregator[(String, DLIPublication), DLIPublication, DLIPublication]{
|
|
||||||
|
|
||||||
override def zero: DLIPublication = new DLIPublication()
|
|
||||||
|
|
||||||
override def reduce(b: DLIPublication, a: (String, DLIPublication)): DLIPublication = {
|
|
||||||
b.mergeFrom(a._2)
|
|
||||||
b.setAuthor(AuthorMerger.mergeAuthor(a._2.getAuthor, b.getAuthor))
|
|
||||||
|
|
||||||
if (b.getId == null)
|
|
||||||
b.setId(a._2.getId)
|
|
||||||
b
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
override def merge(wx: DLIPublication, wy: DLIPublication): DLIPublication = {
|
|
||||||
wx.mergeFrom(wy)
|
|
||||||
wx.setAuthor(AuthorMerger.mergeAuthor(wy.getAuthor, wx.getAuthor))
|
|
||||||
if(wx.getId == null && wy.getId.nonEmpty)
|
|
||||||
wx.setId(wy.getId)
|
|
||||||
wx
|
|
||||||
}
|
|
||||||
override def finish(reduction: DLIPublication): DLIPublication = reduction
|
|
||||||
|
|
||||||
override def bufferEncoder: Encoder[DLIPublication] =
|
|
||||||
Encoders.kryo(classOf[DLIPublication])
|
|
||||||
|
|
||||||
override def outputEncoder: Encoder[DLIPublication] =
|
|
||||||
Encoders.kryo(classOf[DLIPublication])
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def getPublicationAggregator(): Aggregator[(String, Publication), Publication, Publication] = new Aggregator[(String, Publication), Publication, Publication]{
|
|
||||||
|
|
||||||
override def zero: Publication = new Publication()
|
|
||||||
|
|
||||||
override def reduce(b: Publication, a: (String, Publication)): Publication = {
|
|
||||||
b.mergeFrom(a._2)
|
|
||||||
b.setAuthor(AuthorMerger.mergeAuthor(a._2.getAuthor, b.getAuthor))
|
|
||||||
if (b.getId == null)
|
|
||||||
b.setId(a._2.getId)
|
|
||||||
b
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
override def merge(wx: Publication, wy: Publication): Publication = {
|
|
||||||
wx.mergeFrom(wy)
|
|
||||||
wx.setAuthor(AuthorMerger.mergeAuthor(wy.getAuthor, wx.getAuthor))
|
|
||||||
if(wx.getId == null && wy.getId.nonEmpty)
|
|
||||||
wx.setId(wy.getId)
|
|
||||||
wx
|
|
||||||
}
|
|
||||||
override def finish(reduction: Publication): Publication = reduction
|
|
||||||
|
|
||||||
override def bufferEncoder: Encoder[Publication] =
|
|
||||||
Encoders.kryo(classOf[Publication])
|
|
||||||
|
|
||||||
override def outputEncoder: Encoder[Publication] =
|
|
||||||
Encoders.kryo(classOf[Publication])
|
|
||||||
}
|
|
||||||
|
|
||||||
def getRelationAggregator(): Aggregator[(String, Relation), Relation, Relation] = new Aggregator[(String, Relation), Relation, Relation]{
|
|
||||||
|
|
||||||
override def zero: Relation = new Relation()
|
|
||||||
|
|
||||||
override def reduce(b: Relation, a: (String, Relation)): Relation = {
|
|
||||||
a._2
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
override def merge(a: Relation, b: Relation): Relation = {
|
|
||||||
if(b!= null) b else a
|
|
||||||
}
|
|
||||||
override def finish(reduction: Relation): Relation = reduction
|
|
||||||
|
|
||||||
override def bufferEncoder: Encoder[Relation] =
|
|
||||||
Encoders.kryo(classOf[Relation])
|
|
||||||
|
|
||||||
override def outputEncoder: Encoder[Relation] =
|
|
||||||
Encoders.kryo(classOf[Relation])
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,248 +0,0 @@
|
||||||
package eu.dnetlib.dhp.sx.graph.ebi
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Author, Instance, Journal, KeyValue, Oaf, Publication, Relation, Dataset => OafDataset}
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils.createQualifier
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication, OafUtils, ProvenaceInfo}
|
|
||||||
import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal}
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
|
||||||
import eu.dnetlib.scholexplorer.relation.RelationMapper
|
|
||||||
import org.apache.commons.io.IOUtils
|
|
||||||
import org.apache.spark.SparkConf
|
|
||||||
import org.apache.spark.sql._
|
|
||||||
import org.json4s
|
|
||||||
import org.json4s.DefaultFormats
|
|
||||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
|
||||||
import org.json4s.jackson.JsonMethods.parse
|
|
||||||
import org.apache.spark.sql.functions._
|
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
|
||||||
|
|
||||||
object SparkAddLinkUpdates {
|
|
||||||
|
|
||||||
val relationMapper: RelationMapper = RelationMapper.load
|
|
||||||
|
|
||||||
|
|
||||||
case class EBILinks(relation:String, pubdate:String, tpid:String, tpidType:String, turl:String, title:String, publisher:String) {}
|
|
||||||
|
|
||||||
|
|
||||||
def generatePubmedDLICollectedFrom(): KeyValue = {
|
|
||||||
OafUtils.generateKeyValue("dli_________::europe_pmc__", "Europe PMC")
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def journalToOAF(pj:PMJournal): Journal = {
|
|
||||||
val j = new Journal
|
|
||||||
j.setIssnPrinted(pj.getIssn)
|
|
||||||
j.setVol(pj.getVolume)
|
|
||||||
j.setName(pj.getTitle)
|
|
||||||
j.setIss(pj.getIssue)
|
|
||||||
j.setDataInfo(OafUtils.generateDataInfo())
|
|
||||||
j
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def pubmedTOPublication(input:PMArticle):DLIPublication = {
|
|
||||||
|
|
||||||
|
|
||||||
val dnetPublicationId = s"50|${DHPUtils.md5(s"${input.getPmid}::pmid")}"
|
|
||||||
|
|
||||||
val p = new DLIPublication
|
|
||||||
p.setId(dnetPublicationId)
|
|
||||||
p.setDataInfo(OafUtils.generateDataInfo())
|
|
||||||
p.setPid(List(OafUtils.createSP(input.getPmid.toLowerCase.trim, "pmid", ModelConstants.DNET_PID_TYPES)).asJava)
|
|
||||||
p.setCompletionStatus("complete")
|
|
||||||
val pi = new ProvenaceInfo
|
|
||||||
pi.setId("dli_________::europe_pmc__")
|
|
||||||
pi.setName( "Europe PMC")
|
|
||||||
pi.setCompletionStatus("complete")
|
|
||||||
pi.setCollectionMode("collected")
|
|
||||||
p.setDlicollectedfrom(List(pi).asJava)
|
|
||||||
p.setCollectedfrom(List(generatePubmedDLICollectedFrom()).asJava)
|
|
||||||
|
|
||||||
if (input.getAuthors != null && input.getAuthors.size() >0) {
|
|
||||||
var aths: List[Author] = List()
|
|
||||||
input.getAuthors.asScala.filter(a=> a!= null).foreach(a => {
|
|
||||||
val c = new Author
|
|
||||||
c.setFullname(a.getFullName)
|
|
||||||
c.setName(a.getForeName)
|
|
||||||
c.setSurname(a.getLastName)
|
|
||||||
aths = aths ::: List(c)
|
|
||||||
})
|
|
||||||
if (aths.nonEmpty)
|
|
||||||
p.setAuthor(aths.asJava)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (input.getJournal != null)
|
|
||||||
p.setJournal(journalToOAF(input.getJournal))
|
|
||||||
p.setTitle(List(OafUtils.createSP(input.getTitle, "main title", ModelConstants.DNET_DATACITE_TITLE)).asJava)
|
|
||||||
p.setDateofacceptance(OafUtils.asField(input.getDate))
|
|
||||||
val i = new Instance
|
|
||||||
i.setCollectedfrom(generatePubmedDLICollectedFrom())
|
|
||||||
i.setDateofacceptance(p.getDateofacceptance)
|
|
||||||
i.setUrl(List(s"https://pubmed.ncbi.nlm.nih.gov/${input.getPmid}").asJava)
|
|
||||||
i.setInstancetype(createQualifier("0001", "Article", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
|
||||||
p.setInstance(List(i).asJava)
|
|
||||||
p
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def ebiLinksToOaf(input:(String, String)):List[Oaf] = {
|
|
||||||
val pmid :String = input._1
|
|
||||||
val input_json :String = input._2
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
||||||
lazy val json: json4s.JValue = parse(input_json)
|
|
||||||
|
|
||||||
|
|
||||||
val targets:List[EBILinks] = for {
|
|
||||||
JObject(link) <- json \\ "Category" \\ "Link"
|
|
||||||
JField("PublicationDate", JString(pubdate)) <- link
|
|
||||||
JField("RelationshipType", JObject(relationshipType)) <- link
|
|
||||||
JField("Name", JString(relname)) <- relationshipType
|
|
||||||
JField("Target", JObject(target)) <- link
|
|
||||||
JField("Identifier", JObject(identifier)) <- target
|
|
||||||
JField("ID", JString(tpid)) <- identifier
|
|
||||||
JField("IDScheme", JString(tpidtype)) <- identifier
|
|
||||||
JField("IDURL", JString(turl)) <- identifier
|
|
||||||
JField("Title", JString(title)) <- target
|
|
||||||
JField("Publisher", JObject(pub)) <- target
|
|
||||||
JField("Name", JString(publisher)) <- pub
|
|
||||||
} yield EBILinks(relname, pubdate, tpid, tpidtype, turl,title, publisher)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val dnetPublicationId = s"50|${DHPUtils.md5(s"$pmid::pmid")}"
|
|
||||||
|
|
||||||
targets.flatMap(l => {
|
|
||||||
val relation = new Relation
|
|
||||||
val inverseRelation = new Relation
|
|
||||||
val targetDnetId = s"50|${DHPUtils.md5(s"${l.tpid.toLowerCase.trim}::${l.tpidType.toLowerCase.trim}")}"
|
|
||||||
val relInfo = relationMapper.get(l.relation.toLowerCase)
|
|
||||||
val relationSemantic = relInfo.getOriginal
|
|
||||||
val inverseRelationSemantic = relInfo.getInverse
|
|
||||||
|
|
||||||
relation.setSource(dnetPublicationId)
|
|
||||||
relation.setTarget(targetDnetId)
|
|
||||||
relation.setRelClass("datacite")
|
|
||||||
relation.setRelType(relationSemantic)
|
|
||||||
relation.setCollectedfrom(List(generatePubmedDLICollectedFrom()).asJava)
|
|
||||||
|
|
||||||
inverseRelation.setSource(targetDnetId)
|
|
||||||
inverseRelation.setTarget(dnetPublicationId)
|
|
||||||
inverseRelation.setRelClass("datacite")
|
|
||||||
inverseRelation.setRelType(inverseRelationSemantic)
|
|
||||||
inverseRelation.setCollectedfrom(List(generatePubmedDLICollectedFrom()).asJava)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val d = new DLIDataset
|
|
||||||
d.setId(targetDnetId)
|
|
||||||
d.setDataInfo(OafUtils.generateDataInfo())
|
|
||||||
d.setPid(List(OafUtils.createSP(l.tpid.toLowerCase.trim, l.tpidType.toLowerCase.trim, ModelConstants.DNET_PID_TYPES)).asJava)
|
|
||||||
d.setCompletionStatus("complete")
|
|
||||||
val pi = new ProvenaceInfo
|
|
||||||
pi.setId("dli_________::europe_pmc__")
|
|
||||||
pi.setName( "Europe PMC")
|
|
||||||
pi.setCompletionStatus("complete")
|
|
||||||
pi.setCollectionMode("collected")
|
|
||||||
d.setDlicollectedfrom(List(pi).asJava)
|
|
||||||
d.setCollectedfrom(List(generatePubmedDLICollectedFrom()).asJava)
|
|
||||||
d.setPublisher(OafUtils.asField(l.publisher))
|
|
||||||
d.setTitle(List(OafUtils.createSP(l.title, "main title", ModelConstants.DNET_DATACITE_TITLE)).asJava)
|
|
||||||
d.setDateofacceptance(OafUtils.asField(l.pubdate))
|
|
||||||
val i = new Instance
|
|
||||||
i.setCollectedfrom(generatePubmedDLICollectedFrom())
|
|
||||||
i.setDateofacceptance(d.getDateofacceptance)
|
|
||||||
i.setUrl(List(l.turl).asJava)
|
|
||||||
i.setInstancetype(createQualifier("0021", "Dataset", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE))
|
|
||||||
d.setInstance(List(i).asJava)
|
|
||||||
List(relation, inverseRelation, d)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def main(args: Array[String]): Unit = {
|
|
||||||
val conf: SparkConf = new SparkConf()
|
|
||||||
val parser = new ArgumentApplicationParser(IOUtils.toString(SparkEBILinksToOaf.getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/ebi/ebi_to_df_params.json")))
|
|
||||||
parser.parseArgument(args)
|
|
||||||
val spark: SparkSession =
|
|
||||||
SparkSession
|
|
||||||
.builder()
|
|
||||||
.config(conf)
|
|
||||||
.appName(SparkEBILinksToOaf.getClass.getSimpleName)
|
|
||||||
.master(parser.get("master")).getOrCreate()
|
|
||||||
|
|
||||||
|
|
||||||
val workingPath = parser.get("workingPath")
|
|
||||||
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf])
|
|
||||||
implicit val oafpubEncoder: Encoder[Publication] = Encoders.kryo[Publication]
|
|
||||||
implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation])
|
|
||||||
implicit val datEncoder: Encoder[DLIDataset] = Encoders.kryo(classOf[DLIDataset])
|
|
||||||
implicit val pubEncoder: Encoder[DLIPublication] = Encoders.kryo(classOf[DLIPublication])
|
|
||||||
implicit val atEncoder: Encoder[Author] = Encoders.kryo(classOf[Author])
|
|
||||||
implicit val strEncoder:Encoder[String] = Encoders.STRING
|
|
||||||
implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle])
|
|
||||||
implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal])
|
|
||||||
implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor])
|
|
||||||
|
|
||||||
|
|
||||||
val ds:Dataset[(String,String)] = spark.read.load(s"$workingPath/baseline_links_updates").as[(String,String)](Encoders.tuple(Encoders.STRING, Encoders.STRING))
|
|
||||||
|
|
||||||
ds.flatMap(l =>ebiLinksToOaf(l)).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_links_updates_oaf")
|
|
||||||
|
|
||||||
ds.filter(s => s.isInstanceOf)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val oDataset:Dataset[Oaf] = spark.read.load(s"$workingPath/baseline_links_updates_oaf").as[Oaf]
|
|
||||||
|
|
||||||
oDataset.filter(p =>p.isInstanceOf[Relation]).map(p => p.asInstanceOf[Relation]).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_links_updates_relation")
|
|
||||||
oDataset.filter(p =>p.isInstanceOf[DLIDataset]).map(p => p.asInstanceOf[DLIDataset]).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_links_updates_dataset")
|
|
||||||
|
|
||||||
|
|
||||||
val idPublicationSolved:Dataset[String] = spark.read.load(s"$workingPath/baseline_links_updates").where(col("links").isNotNull).select("pmid").as[String]
|
|
||||||
val baseline:Dataset[(String, PMArticle)]= spark.read.load(s"$workingPath/baseline_dataset").as[PMArticle].map(p=> (p.getPmid, p))(Encoders.tuple(strEncoder,PMEncoder))
|
|
||||||
idPublicationSolved.joinWith(baseline, idPublicationSolved("pmid").equalTo(baseline("_1"))).map(k => pubmedTOPublication(k._2._2)).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_links_updates_publication")
|
|
||||||
|
|
||||||
|
|
||||||
val pmaDatasets = spark.read.load("/user/sandro.labruzzo/scholix/EBI/ebi_garr/baseline_dataset").as[PMArticle]
|
|
||||||
|
|
||||||
pmaDatasets.map(p => pubmedTOPublication(p)).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_publication_all")
|
|
||||||
|
|
||||||
val pubs: Dataset[(String,Publication)] = spark.read.load("/user/sandro.labruzzo/scholix/EBI/publication").as[Publication].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING,oafpubEncoder))
|
|
||||||
val pubdate:Dataset[(String,DLIPublication)] = spark.read.load(s"$workingPath/baseline_publication_all").as[DLIPublication].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING,pubEncoder))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
pubs.joinWith(pubdate, pubs("_1").equalTo(pubdate("_1"))).map(k => k._2._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_publication_ebi")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val dt : Dataset[DLIDataset] = spark.read.load(s"$workingPath/dataset").as[DLIDataset]
|
|
||||||
val update : Dataset[DLIDataset] = spark.read.load(s"$workingPath/ebi_garr/baseline_links_updates_dataset").as[DLIDataset]
|
|
||||||
|
|
||||||
|
|
||||||
dt.union(update).map(d => (d.getId,d))(Encoders.tuple(Encoders.STRING, datEncoder))
|
|
||||||
.groupByKey(_._1)(Encoders.STRING)
|
|
||||||
.agg(EBIAggregator.getDLIDatasetAggregator().toColumn)
|
|
||||||
.map(p => p._2)
|
|
||||||
.write.mode(SaveMode.Overwrite).save(s"$workingPath/baseline_dataset_ebi")
|
|
||||||
|
|
||||||
|
|
||||||
val rel: Dataset[Relation] = spark.read.load(s"$workingPath/relation").as[Relation]
|
|
||||||
val relupdate : Dataset[Relation] = spark.read.load(s"$workingPath/ebi_garr/baseline_links_updates_relation").as[Relation]
|
|
||||||
|
|
||||||
|
|
||||||
rel.union(relupdate)
|
|
||||||
.map(d => (s"${d.getSource}::${d.getRelType}::${d.getTarget}", d))(Encoders.tuple(Encoders.STRING, relEncoder))
|
|
||||||
.groupByKey(_._1)(Encoders.STRING)
|
|
||||||
.agg(EBIAggregator.getRelationAggregator().toColumn)
|
|
||||||
.map(p => p._2)
|
|
||||||
.write.mode(SaveMode.Overwrite)
|
|
||||||
.save(s"$workingPath/baseline_relation_ebi")
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,223 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.sx.graph.parser;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import javax.xml.stream.XMLStreamReader;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.DLIUnknown;
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.ProvenaceInfo;
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
||||||
import eu.dnetlib.scholexplorer.relation.RelInfo;
|
|
||||||
import eu.dnetlib.scholexplorer.relation.RelationMapper;
|
|
||||||
|
|
||||||
public abstract class AbstractScholexplorerParser {
|
|
||||||
|
|
||||||
protected static final Log log = LogFactory.getLog(AbstractScholexplorerParser.class);
|
|
||||||
static final Pattern pattern = Pattern.compile("10\\.\\d{4,9}/[-._;()/:A-Z0-9]+$", Pattern.CASE_INSENSITIVE);
|
|
||||||
private final List<String> datasetSubTypes = Arrays
|
|
||||||
.asList(
|
|
||||||
"dataset",
|
|
||||||
"software",
|
|
||||||
"film",
|
|
||||||
"sound",
|
|
||||||
"physicalobject",
|
|
||||||
"audiovisual",
|
|
||||||
"collection",
|
|
||||||
"other",
|
|
||||||
"study",
|
|
||||||
"metadata");
|
|
||||||
|
|
||||||
public abstract List<Oaf> parseObject(final String record, final RelationMapper relMapper);
|
|
||||||
|
|
||||||
protected Map<String, String> getAttributes(final XMLStreamReader parser) {
|
|
||||||
final Map<String, String> attributesMap = new HashMap<>();
|
|
||||||
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
|
||||||
attributesMap.put(parser.getAttributeLocalName(i), parser.getAttributeValue(i));
|
|
||||||
}
|
|
||||||
return attributesMap;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected List<StructuredProperty> extractSubject(List<VtdUtilityParser.Node> subjects) {
|
|
||||||
final List<StructuredProperty> subjectResult = new ArrayList<>();
|
|
||||||
if (subjects != null && subjects.size() > 0) {
|
|
||||||
subjects
|
|
||||||
.forEach(
|
|
||||||
subjectMap -> {
|
|
||||||
final StructuredProperty subject = new StructuredProperty();
|
|
||||||
subject.setValue(subjectMap.getTextValue());
|
|
||||||
final Qualifier schema = new Qualifier();
|
|
||||||
schema.setClassid("dnet:subject");
|
|
||||||
schema.setClassname("dnet:subject");
|
|
||||||
schema.setSchemeid(subjectMap.getAttributes().get("subjectScheme"));
|
|
||||||
schema.setSchemename(subjectMap.getAttributes().get("subjectScheme"));
|
|
||||||
subject.setQualifier(schema);
|
|
||||||
subjectResult.add(subject);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return subjectResult;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected StructuredProperty extractIdentifier(
|
|
||||||
List<VtdUtilityParser.Node> identifierType, final String fieldName) {
|
|
||||||
final StructuredProperty pid = new StructuredProperty();
|
|
||||||
if (identifierType != null && identifierType.size() > 0) {
|
|
||||||
final VtdUtilityParser.Node result = identifierType.get(0);
|
|
||||||
pid.setValue(result.getTextValue());
|
|
||||||
final Qualifier pidType = new Qualifier();
|
|
||||||
pidType.setClassname(result.getAttributes().get(fieldName));
|
|
||||||
pidType.setClassid(result.getAttributes().get(fieldName));
|
|
||||||
pidType.setSchemename(ModelConstants.DNET_PID_TYPES);
|
|
||||||
pidType.setSchemeid(ModelConstants.DNET_PID_TYPES);
|
|
||||||
pid.setQualifier(pidType);
|
|
||||||
return pid;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void inferPid(final StructuredProperty input) {
|
|
||||||
final Matcher matcher = pattern.matcher(input.getValue());
|
|
||||||
if (matcher.find()) {
|
|
||||||
input.setValue(matcher.group());
|
|
||||||
if (input.getQualifier() == null) {
|
|
||||||
input.setQualifier(new Qualifier());
|
|
||||||
input.getQualifier().setSchemename(ModelConstants.DNET_PID_TYPES);
|
|
||||||
input.getQualifier().setSchemeid(ModelConstants.DNET_PID_TYPES);
|
|
||||||
}
|
|
||||||
input.getQualifier().setClassid("doi");
|
|
||||||
input.getQualifier().setClassname("doi");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected String generateId(final String pid, final String pidType, final String entityType) {
|
|
||||||
String type;
|
|
||||||
switch (entityType) {
|
|
||||||
case "publication":
|
|
||||||
type = "50|";
|
|
||||||
break;
|
|
||||||
case "dataset":
|
|
||||||
type = "60|";
|
|
||||||
break;
|
|
||||||
case "unknown":
|
|
||||||
type = "70|";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException("unexpected value " + entityType);
|
|
||||||
}
|
|
||||||
if ("dnet".equalsIgnoreCase(pidType))
|
|
||||||
return type + StringUtils.substringAfter(pid, "::");
|
|
||||||
|
|
||||||
return type
|
|
||||||
+ DHPUtils
|
|
||||||
.md5(
|
|
||||||
String.format("%s::%s", pid.toLowerCase().trim(), pidType.toLowerCase().trim()));
|
|
||||||
}
|
|
||||||
|
|
||||||
protected DLIUnknown createUnknownObject(
|
|
||||||
final String pid,
|
|
||||||
final String pidType,
|
|
||||||
final KeyValue cf,
|
|
||||||
final DataInfo di,
|
|
||||||
final String dateOfCollection) {
|
|
||||||
final DLIUnknown uk = new DLIUnknown();
|
|
||||||
uk.setId(generateId(pid, pidType, "unknown"));
|
|
||||||
ProvenaceInfo pi = new ProvenaceInfo();
|
|
||||||
pi.setId(cf.getKey());
|
|
||||||
pi.setName(cf.getValue());
|
|
||||||
pi.setCompletionStatus("incomplete");
|
|
||||||
uk.setDataInfo(di);
|
|
||||||
uk.setDlicollectedfrom(Collections.singletonList(pi));
|
|
||||||
final StructuredProperty sourcePid = new StructuredProperty();
|
|
||||||
sourcePid.setValue(pid);
|
|
||||||
final Qualifier pt = new Qualifier();
|
|
||||||
pt.setClassname(pidType);
|
|
||||||
pt.setClassid(pidType);
|
|
||||||
pt.setSchemename(ModelConstants.DNET_PID_TYPES);
|
|
||||||
pt.setSchemeid(ModelConstants.DNET_PID_TYPES);
|
|
||||||
sourcePid.setQualifier(pt);
|
|
||||||
uk.setPid(Collections.singletonList(sourcePid));
|
|
||||||
uk.setDateofcollection(dateOfCollection);
|
|
||||||
return uk;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected Qualifier generateQualifier(final String classId, final String className, final String schemeId,
|
|
||||||
final String schemeName) {
|
|
||||||
final Qualifier q = new Qualifier();
|
|
||||||
q.setClassid(classId);
|
|
||||||
q.setClassid(className);
|
|
||||||
q.setSchemeid(schemeId);
|
|
||||||
q.setSchemename(schemeName);
|
|
||||||
return q;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void generateRelations(
|
|
||||||
RelationMapper relationMapper,
|
|
||||||
Result parsedObject,
|
|
||||||
List<Oaf> result,
|
|
||||||
DataInfo di,
|
|
||||||
String dateOfCollection,
|
|
||||||
List<VtdUtilityParser.Node> relatedIdentifiers) {
|
|
||||||
if (relatedIdentifiers != null) {
|
|
||||||
result
|
|
||||||
.addAll(
|
|
||||||
relatedIdentifiers
|
|
||||||
.stream()
|
|
||||||
.flatMap(
|
|
||||||
n -> {
|
|
||||||
final List<Relation> rels = new ArrayList<>();
|
|
||||||
Relation r = new Relation();
|
|
||||||
r.setSource(parsedObject.getId());
|
|
||||||
final String relatedPid = n.getTextValue();
|
|
||||||
final String relatedPidType = n.getAttributes().get("relatedIdentifierType");
|
|
||||||
final String relatedType = n.getAttributes().getOrDefault("entityType", "unknown");
|
|
||||||
String relationSemantic = n.getAttributes().get("relationType");
|
|
||||||
String inverseRelation;
|
|
||||||
final String targetId = generateId(relatedPid, relatedPidType, relatedType);
|
|
||||||
if (relationMapper.containsKey(relationSemantic.toLowerCase())) {
|
|
||||||
RelInfo relInfo = relationMapper.get(relationSemantic.toLowerCase());
|
|
||||||
relationSemantic = relInfo.getOriginal();
|
|
||||||
inverseRelation = relInfo.getInverse();
|
|
||||||
} else {
|
|
||||||
relationSemantic = "Unknown";
|
|
||||||
inverseRelation = "Unknown";
|
|
||||||
}
|
|
||||||
r.setTarget(targetId);
|
|
||||||
r.setRelType(relationSemantic);
|
|
||||||
r.setRelClass("datacite");
|
|
||||||
r.setCollectedfrom(parsedObject.getCollectedfrom());
|
|
||||||
r.setDataInfo(di);
|
|
||||||
rels.add(r);
|
|
||||||
r = new Relation();
|
|
||||||
r.setDataInfo(di);
|
|
||||||
r.setSource(targetId);
|
|
||||||
r.setTarget(parsedObject.getId());
|
|
||||||
r.setRelType(inverseRelation);
|
|
||||||
r.setRelClass("datacite");
|
|
||||||
r.setCollectedfrom(parsedObject.getCollectedfrom());
|
|
||||||
rels.add(r);
|
|
||||||
if ("unknown".equalsIgnoreCase(relatedType))
|
|
||||||
result
|
|
||||||
.add(
|
|
||||||
createUnknownObject(
|
|
||||||
relatedPid,
|
|
||||||
relatedPidType,
|
|
||||||
parsedObject.getCollectedfrom().get(0),
|
|
||||||
di,
|
|
||||||
dateOfCollection));
|
|
||||||
return rels.stream();
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,340 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.sx.graph.parser;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import com.ximpleware.AutoPilot;
|
|
||||||
import com.ximpleware.VTDGen;
|
|
||||||
import com.ximpleware.VTDNav;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
|
|
||||||
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser.Node;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.DLIDataset;
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.ProvenaceInfo;
|
|
||||||
import eu.dnetlib.scholexplorer.relation.RelationMapper;
|
|
||||||
|
|
||||||
public class DatasetScholexplorerParser extends AbstractScholexplorerParser {
|
|
||||||
@Override
|
|
||||||
public List<Oaf> parseObject(String record, final RelationMapper relationMapper) {
|
|
||||||
try {
|
|
||||||
final DLIDataset parsedObject = new DLIDataset();
|
|
||||||
final VTDGen vg = new VTDGen();
|
|
||||||
vg.setDoc(record.getBytes());
|
|
||||||
final List<Oaf> result = new ArrayList<>();
|
|
||||||
vg.parse(true);
|
|
||||||
|
|
||||||
final VTDNav vn = vg.getNav();
|
|
||||||
final AutoPilot ap = new AutoPilot(vn);
|
|
||||||
|
|
||||||
DataInfo di = new DataInfo();
|
|
||||||
di.setTrust("0.9");
|
|
||||||
di.setDeletedbyinference(false);
|
|
||||||
di.setInvisible(false);
|
|
||||||
parsedObject.setDataInfo(di);
|
|
||||||
|
|
||||||
parsedObject
|
|
||||||
.setOriginalId(
|
|
||||||
Collections
|
|
||||||
.singletonList(
|
|
||||||
VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='recordIdentifier']")));
|
|
||||||
|
|
||||||
parsedObject
|
|
||||||
.setOriginalObjIdentifier(
|
|
||||||
VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='objIdentifier']"));
|
|
||||||
String dateOfCollection = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='dateOfCollection']");
|
|
||||||
parsedObject.setDateofcollection(dateOfCollection);
|
|
||||||
|
|
||||||
final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resolvedDate']");
|
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(resolvedDate)) {
|
|
||||||
StructuredProperty currentDate = new StructuredProperty();
|
|
||||||
currentDate.setValue(resolvedDate);
|
|
||||||
final Qualifier dateQualifier = new Qualifier();
|
|
||||||
dateQualifier.setClassname("resolvedDate");
|
|
||||||
dateQualifier.setClassid("resolvedDate");
|
|
||||||
dateQualifier.setSchemename("dnet::date");
|
|
||||||
dateQualifier.setSchemeid("dnet::date");
|
|
||||||
currentDate.setQualifier(dateQualifier);
|
|
||||||
parsedObject.setRelevantdate(Collections.singletonList(currentDate));
|
|
||||||
}
|
|
||||||
final String completionStatus = VtdUtilityParser
|
|
||||||
.getSingleValue(ap, vn, "//*[local-name()='completionStatus']");
|
|
||||||
final String provisionMode = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='provisionMode']");
|
|
||||||
|
|
||||||
final String publisher = VtdUtilityParser
|
|
||||||
.getSingleValue(
|
|
||||||
ap, vn, "//*[local-name()='resource']/*[local-name()='publisher']");
|
|
||||||
|
|
||||||
List<VtdUtilityParser.Node> collectedFromNodes = VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap,
|
|
||||||
vn,
|
|
||||||
"//*[local-name()='collectedFrom']",
|
|
||||||
Arrays.asList("name", "id", "mode", "completionStatus"));
|
|
||||||
|
|
||||||
List<VtdUtilityParser.Node> resolvededFromNodes = VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap,
|
|
||||||
vn,
|
|
||||||
"//*[local-name()='resolvedFrom']",
|
|
||||||
Arrays.asList("name", "id", "mode", "completionStatus"));
|
|
||||||
|
|
||||||
Field<String> pf = new Field<>();
|
|
||||||
pf.setValue(publisher);
|
|
||||||
|
|
||||||
parsedObject.setPublisher(pf);
|
|
||||||
final List<ProvenaceInfo> provenances = new ArrayList<>();
|
|
||||||
if (collectedFromNodes != null && collectedFromNodes.size() > 0) {
|
|
||||||
collectedFromNodes
|
|
||||||
.forEach(
|
|
||||||
it -> {
|
|
||||||
final ProvenaceInfo provenance = new ProvenaceInfo();
|
|
||||||
provenance.setId(it.getAttributes().get("id"));
|
|
||||||
provenance.setName(it.getAttributes().get("name"));
|
|
||||||
provenance.setCollectionMode(provisionMode);
|
|
||||||
provenance.setCompletionStatus(it.getAttributes().get("completionStatus"));
|
|
||||||
provenances.add(provenance);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (resolvededFromNodes != null && resolvededFromNodes.size() > 0) {
|
|
||||||
resolvededFromNodes
|
|
||||||
.forEach(
|
|
||||||
it -> {
|
|
||||||
final ProvenaceInfo provenance = new ProvenaceInfo();
|
|
||||||
provenance.setId(it.getAttributes().get("id"));
|
|
||||||
provenance.setName(it.getAttributes().get("name"));
|
|
||||||
provenance.setCollectionMode("resolved");
|
|
||||||
provenance.setCompletionStatus(it.getAttributes().get("completionStatus"));
|
|
||||||
provenances.add(provenance);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
parsedObject.setDlicollectedfrom(provenances);
|
|
||||||
parsedObject
|
|
||||||
.setCollectedfrom(
|
|
||||||
parsedObject
|
|
||||||
.getDlicollectedfrom()
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
p -> {
|
|
||||||
final KeyValue cf = new KeyValue();
|
|
||||||
cf.setKey(p.getId());
|
|
||||||
cf.setValue(p.getName());
|
|
||||||
return cf;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
parsedObject
|
|
||||||
.setCompletionStatus(
|
|
||||||
VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='completionStatus']"));
|
|
||||||
|
|
||||||
final List<Node> identifierType = VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap,
|
|
||||||
vn,
|
|
||||||
"//*[local-name()='resource']/*[local-name()='identifier']",
|
|
||||||
Collections.singletonList("identifierType"));
|
|
||||||
|
|
||||||
StructuredProperty currentPid = extractIdentifier(identifierType, "identifierType");
|
|
||||||
if (currentPid == null)
|
|
||||||
return null;
|
|
||||||
inferPid(currentPid);
|
|
||||||
parsedObject.setPid(Collections.singletonList(currentPid));
|
|
||||||
|
|
||||||
String resolvedURL = null;
|
|
||||||
|
|
||||||
switch (currentPid.getQualifier().getClassname().toLowerCase()) {
|
|
||||||
case "uniprot":
|
|
||||||
resolvedURL = "https://www.uniprot.org/uniprot/" + currentPid.getValue();
|
|
||||||
break;
|
|
||||||
case "ena":
|
|
||||||
if (StringUtils.isNotBlank(currentPid.getValue()) && currentPid.getValue().length() > 7)
|
|
||||||
resolvedURL = "https://www.ebi.ac.uk/ena/data/view/" + currentPid.getValue().substring(0, 8);
|
|
||||||
break;
|
|
||||||
case "chembl":
|
|
||||||
resolvedURL = "https://www.ebi.ac.uk/chembl/compound_report_card/" + currentPid.getValue();
|
|
||||||
break;
|
|
||||||
|
|
||||||
case "ncbi-n":
|
|
||||||
resolvedURL = "https://www.ncbi.nlm.nih.gov/nuccore/" + currentPid.getValue();
|
|
||||||
break;
|
|
||||||
case "ncbi-p":
|
|
||||||
resolvedURL = "https://www.ncbi.nlm.nih.gov/nuccore/" + currentPid.getValue();
|
|
||||||
break;
|
|
||||||
case "genbank":
|
|
||||||
resolvedURL = "https://www.ncbi.nlm.nih.gov/nuccore/" + currentPid.getValue();
|
|
||||||
break;
|
|
||||||
case "pdb":
|
|
||||||
resolvedURL = "https://www.ncbi.nlm.nih.gov/nuccore/" + currentPid.getValue();
|
|
||||||
break;
|
|
||||||
case "url":
|
|
||||||
resolvedURL = currentPid.getValue();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
final String sourceId = generateId(
|
|
||||||
currentPid.getValue(), currentPid.getQualifier().getClassid(), "dataset");
|
|
||||||
parsedObject.setId(sourceId);
|
|
||||||
|
|
||||||
List<String> descs = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='description']");
|
|
||||||
if (descs != null && descs.size() > 0)
|
|
||||||
parsedObject
|
|
||||||
.setDescription(
|
|
||||||
descs
|
|
||||||
.stream()
|
|
||||||
// .map(it -> it.length() < 10000 ? it : it.substring(0, 10000))
|
|
||||||
.map(
|
|
||||||
it -> {
|
|
||||||
final Field<String> d = new Field<>();
|
|
||||||
d.setValue(it);
|
|
||||||
return d;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
|
|
||||||
final List<Node> relatedIdentifiers = VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap,
|
|
||||||
vn,
|
|
||||||
"//*[local-name()='relatedIdentifier']",
|
|
||||||
Arrays
|
|
||||||
.asList(
|
|
||||||
"relatedIdentifierType", "relationType", "entityType", "inverseRelationType"));
|
|
||||||
|
|
||||||
generateRelations(
|
|
||||||
relationMapper, parsedObject, result, di, dateOfCollection, relatedIdentifiers);
|
|
||||||
|
|
||||||
final List<Node> hostedBy = VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap, vn, "//*[local-name()='hostedBy']", Arrays.asList("id", "name"));
|
|
||||||
|
|
||||||
if (hostedBy != null) {
|
|
||||||
parsedObject
|
|
||||||
.setInstance(
|
|
||||||
hostedBy
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
it -> {
|
|
||||||
final Instance i = new Instance();
|
|
||||||
i.setUrl(Collections.singletonList(currentPid.getValue()));
|
|
||||||
KeyValue h = new KeyValue();
|
|
||||||
i.setHostedby(h);
|
|
||||||
h.setKey(it.getAttributes().get("id"));
|
|
||||||
h.setValue(it.getAttributes().get("name"));
|
|
||||||
return i;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
|
|
||||||
List<StructuredProperty> subjects = extractSubject(
|
|
||||||
VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap,
|
|
||||||
vn,
|
|
||||||
"//*[local-name()='resource']//*[local-name()='subject']",
|
|
||||||
Collections.singletonList("subjectScheme")));
|
|
||||||
|
|
||||||
parsedObject.setSubject(subjects);
|
|
||||||
|
|
||||||
Qualifier q = new Qualifier();
|
|
||||||
q.setClassname("dataset");
|
|
||||||
q.setClassid("dataset");
|
|
||||||
q.setSchemename("dataset");
|
|
||||||
q.setSchemeid("dataset");
|
|
||||||
parsedObject.setResulttype(q);
|
|
||||||
|
|
||||||
parsedObject.setCompletionStatus(completionStatus);
|
|
||||||
|
|
||||||
final List<String> creators = VtdUtilityParser
|
|
||||||
.getTextValue(
|
|
||||||
ap,
|
|
||||||
vn,
|
|
||||||
"//*[local-name()='resource']//*[local-name()='creator']/*[local-name()='creatorName']");
|
|
||||||
if (creators != null && creators.size() > 0) {
|
|
||||||
parsedObject
|
|
||||||
.setAuthor(
|
|
||||||
creators
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
a -> {
|
|
||||||
final Author author = new Author();
|
|
||||||
author.setFullname(a);
|
|
||||||
return author;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
final List<String> titles = VtdUtilityParser
|
|
||||||
.getTextValue(
|
|
||||||
ap, vn, "//*[local-name()='resource']//*[local-name()='title']");
|
|
||||||
if (titles != null && titles.size() > 0) {
|
|
||||||
parsedObject
|
|
||||||
.setTitle(
|
|
||||||
titles
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
t -> {
|
|
||||||
final StructuredProperty st = new StructuredProperty();
|
|
||||||
st.setValue(t);
|
|
||||||
st.setQualifier(ModelConstants.MAIN_TITLE_QUALIFIER);
|
|
||||||
return st;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
|
|
||||||
final List<String> dates = VtdUtilityParser
|
|
||||||
.getTextValue(
|
|
||||||
ap,
|
|
||||||
vn,
|
|
||||||
"//*[local-name()='resource']/*[local-name()='dates']/*[local-name()='date']");
|
|
||||||
|
|
||||||
if (dates != null && dates.size() > 0) {
|
|
||||||
parsedObject
|
|
||||||
.setRelevantdate(
|
|
||||||
dates
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
cd -> {
|
|
||||||
StructuredProperty date = new StructuredProperty();
|
|
||||||
date.setValue(cd);
|
|
||||||
final Qualifier dq = new Qualifier();
|
|
||||||
dq.setClassname("date");
|
|
||||||
dq.setClassid("date");
|
|
||||||
dq.setSchemename("dnet::date");
|
|
||||||
dq.setSchemeid("dnet::date");
|
|
||||||
date.setQualifier(dq);
|
|
||||||
return date;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// TERRIBLE HACK TO AVOID EMPTY COLLECTED FROM
|
|
||||||
if (parsedObject.getDlicollectedfrom() == null) {
|
|
||||||
|
|
||||||
final KeyValue cf = new KeyValue();
|
|
||||||
cf.setKey("dli_________::europe_pmc__");
|
|
||||||
cf.setValue("Europe PMC");
|
|
||||||
parsedObject.setCollectedfrom(Collections.singletonList(cf));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(resolvedURL)) {
|
|
||||||
Instance i = new Instance();
|
|
||||||
i.setCollectedfrom(parsedObject.getCollectedfrom().get(0));
|
|
||||||
i.setUrl(Collections.singletonList(resolvedURL));
|
|
||||||
parsedObject.setInstance(Collections.singletonList(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
result.add(parsedObject);
|
|
||||||
return result;
|
|
||||||
} catch (Throwable e) {
|
|
||||||
log.error("Error on parsing record " + record, e);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,264 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.sx.graph.parser;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import com.ximpleware.AutoPilot;
|
|
||||||
import com.ximpleware.VTDGen;
|
|
||||||
import com.ximpleware.VTDNav;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
|
|
||||||
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser.Node;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.DLIPublication;
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.ProvenaceInfo;
|
|
||||||
import eu.dnetlib.scholexplorer.relation.RelationMapper;
|
|
||||||
|
|
||||||
public class PublicationScholexplorerParser extends AbstractScholexplorerParser {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Oaf> parseObject(final String record, final RelationMapper relationMapper) {
|
|
||||||
try {
|
|
||||||
final List<Oaf> result = new ArrayList<>();
|
|
||||||
final DLIPublication parsedObject = new DLIPublication();
|
|
||||||
final VTDGen vg = new VTDGen();
|
|
||||||
vg.setDoc(record.getBytes());
|
|
||||||
vg.parse(true);
|
|
||||||
|
|
||||||
final VTDNav vn = vg.getNav();
|
|
||||||
final AutoPilot ap = new AutoPilot(vn);
|
|
||||||
|
|
||||||
final DataInfo di = new DataInfo();
|
|
||||||
di.setTrust("0.9");
|
|
||||||
di.setDeletedbyinference(false);
|
|
||||||
di.setInvisible(false);
|
|
||||||
|
|
||||||
String dateOfCollection = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='dateOfCollection']");
|
|
||||||
parsedObject.setDateofcollection(dateOfCollection);
|
|
||||||
|
|
||||||
final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resolvedDate']");
|
|
||||||
parsedObject
|
|
||||||
.setOriginalId(
|
|
||||||
Collections
|
|
||||||
.singletonList(
|
|
||||||
VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='recordIdentifier']")));
|
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(resolvedDate)) {
|
|
||||||
StructuredProperty currentDate = new StructuredProperty();
|
|
||||||
currentDate.setValue(resolvedDate);
|
|
||||||
final Qualifier dateQualifier = new Qualifier();
|
|
||||||
dateQualifier.setClassname("resolvedDate");
|
|
||||||
dateQualifier.setClassid("resolvedDate");
|
|
||||||
dateQualifier.setSchemename("dnet::date");
|
|
||||||
dateQualifier.setSchemeid("dnet::date");
|
|
||||||
currentDate.setQualifier(dateQualifier);
|
|
||||||
parsedObject.setRelevantdate(Collections.singletonList(currentDate));
|
|
||||||
}
|
|
||||||
|
|
||||||
final List<Node> pid = VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap, vn, "//*[local-name()='pid']", Arrays.asList("type"));
|
|
||||||
|
|
||||||
StructuredProperty currentPid = extractIdentifier(pid, "type");
|
|
||||||
if (currentPid == null)
|
|
||||||
return null;
|
|
||||||
inferPid(currentPid);
|
|
||||||
parsedObject.setPid(Collections.singletonList(currentPid));
|
|
||||||
final String sourceId = generateId(
|
|
||||||
currentPid.getValue(), currentPid.getQualifier().getClassid(), "publication");
|
|
||||||
parsedObject.setId(sourceId);
|
|
||||||
|
|
||||||
parsedObject
|
|
||||||
.setOriginalObjIdentifier(
|
|
||||||
VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='objIdentifier']"));
|
|
||||||
|
|
||||||
String provisionMode = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='provisionMode']");
|
|
||||||
|
|
||||||
List<Node> collectedFromNodes = VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap,
|
|
||||||
vn,
|
|
||||||
"//*[local-name()='collectedFrom']",
|
|
||||||
Arrays.asList("name", "id", "mode", "completionStatus"));
|
|
||||||
|
|
||||||
List<Node> resolvededFromNodes = VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap,
|
|
||||||
vn,
|
|
||||||
"//*[local-name()='resolvedFrom']",
|
|
||||||
Arrays.asList("name", "id", "mode", "completionStatus"));
|
|
||||||
|
|
||||||
final String publisher = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='publisher']");
|
|
||||||
Field<String> pf = new Field<>();
|
|
||||||
pf.setValue(publisher);
|
|
||||||
|
|
||||||
parsedObject.setPublisher(pf);
|
|
||||||
final List<ProvenaceInfo> provenances = new ArrayList<>();
|
|
||||||
if (collectedFromNodes != null && collectedFromNodes.size() > 0) {
|
|
||||||
collectedFromNodes
|
|
||||||
.forEach(
|
|
||||||
it -> {
|
|
||||||
final ProvenaceInfo provenance = new ProvenaceInfo();
|
|
||||||
provenance.setId(it.getAttributes().get("id"));
|
|
||||||
provenance.setName(it.getAttributes().get("name"));
|
|
||||||
provenance.setCollectionMode(provisionMode);
|
|
||||||
provenance.setCompletionStatus(it.getAttributes().get("completionStatus"));
|
|
||||||
provenances.add(provenance);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (resolvededFromNodes != null && resolvededFromNodes.size() > 0) {
|
|
||||||
resolvededFromNodes
|
|
||||||
.forEach(
|
|
||||||
it -> {
|
|
||||||
final ProvenaceInfo provenance = new ProvenaceInfo();
|
|
||||||
provenance.setId(it.getAttributes().get("id"));
|
|
||||||
provenance.setName(it.getAttributes().get("name"));
|
|
||||||
provenance.setCollectionMode("resolved");
|
|
||||||
provenance.setCompletionStatus(it.getAttributes().get("completionStatus"));
|
|
||||||
provenances.add(provenance);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
parsedObject.setDlicollectedfrom(provenances);
|
|
||||||
parsedObject
|
|
||||||
.setCompletionStatus(
|
|
||||||
VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='completionStatus']"));
|
|
||||||
|
|
||||||
parsedObject
|
|
||||||
.setCollectedfrom(
|
|
||||||
parsedObject
|
|
||||||
.getDlicollectedfrom()
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
p -> {
|
|
||||||
final KeyValue cf = new KeyValue();
|
|
||||||
cf.setKey(p.getId());
|
|
||||||
cf.setValue(p.getName());
|
|
||||||
return cf;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
|
|
||||||
final List<Node> relatedIdentifiers = VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap,
|
|
||||||
vn,
|
|
||||||
"//*[local-name()='relatedIdentifier']",
|
|
||||||
Arrays
|
|
||||||
.asList(
|
|
||||||
"relatedIdentifierType", "relationType", "entityType", "inverseRelationType"));
|
|
||||||
generateRelations(
|
|
||||||
relationMapper, parsedObject, result, di, dateOfCollection, relatedIdentifiers);
|
|
||||||
|
|
||||||
final List<Node> hostedBy = VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap, vn, "//*[local-name()='hostedBy']", Arrays.asList("id", "name"));
|
|
||||||
|
|
||||||
if (hostedBy != null) {
|
|
||||||
parsedObject
|
|
||||||
.setInstance(
|
|
||||||
hostedBy
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
it -> {
|
|
||||||
final Instance i = new Instance();
|
|
||||||
i.setUrl(Collections.singletonList(currentPid.getValue()));
|
|
||||||
KeyValue h = new KeyValue();
|
|
||||||
i.setHostedby(h);
|
|
||||||
h.setKey(it.getAttributes().get("id"));
|
|
||||||
h.setValue(it.getAttributes().get("name"));
|
|
||||||
return i;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
|
|
||||||
final List<String> authorsNode = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='creator']");
|
|
||||||
if (authorsNode != null)
|
|
||||||
parsedObject
|
|
||||||
.setAuthor(
|
|
||||||
authorsNode
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
a -> {
|
|
||||||
final Author author = new Author();
|
|
||||||
author.setFullname(a);
|
|
||||||
return author;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
|
|
||||||
final List<String> titles = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='title']");
|
|
||||||
if (titles != null) {
|
|
||||||
parsedObject
|
|
||||||
.setTitle(
|
|
||||||
titles
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
t -> {
|
|
||||||
final StructuredProperty st = new StructuredProperty();
|
|
||||||
st.setValue(t);
|
|
||||||
st
|
|
||||||
.setQualifier(
|
|
||||||
generateQualifier(
|
|
||||||
"main title", "main title", "dnet:dataCite_title",
|
|
||||||
"dnet:dataCite_title"));
|
|
||||||
return st;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
|
|
||||||
Field<String> description = new Field<>();
|
|
||||||
|
|
||||||
description
|
|
||||||
.setValue(
|
|
||||||
VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='description']"));
|
|
||||||
|
|
||||||
// if (StringUtils.isNotBlank(description.getValue())
|
|
||||||
// && description.getValue().length() > 10000) {
|
|
||||||
// description.setValue(description.getValue().substring(0, 10000));
|
|
||||||
// }
|
|
||||||
|
|
||||||
parsedObject.setDescription(Collections.singletonList(description));
|
|
||||||
|
|
||||||
final String cd = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='date']");
|
|
||||||
|
|
||||||
StructuredProperty date = new StructuredProperty();
|
|
||||||
date.setValue(cd);
|
|
||||||
final Qualifier dq = new Qualifier();
|
|
||||||
dq.setClassname("date");
|
|
||||||
dq.setClassid("date");
|
|
||||||
dq.setSchemename("dnet::date");
|
|
||||||
dq.setSchemeid("dnet::date");
|
|
||||||
date.setQualifier(dq);
|
|
||||||
parsedObject.setRelevantdate(Collections.singletonList(date));
|
|
||||||
|
|
||||||
List<StructuredProperty> subjects = extractSubject(
|
|
||||||
VtdUtilityParser
|
|
||||||
.getTextValuesWithAttributes(
|
|
||||||
ap, vn, "//*[local-name()='subject']", Collections.singletonList("scheme")));
|
|
||||||
parsedObject.setSubject(subjects);
|
|
||||||
|
|
||||||
parsedObject.setDataInfo(di);
|
|
||||||
|
|
||||||
parsedObject.setSubject(subjects);
|
|
||||||
Qualifier q = new Qualifier();
|
|
||||||
q.setClassname("publication");
|
|
||||||
q.setClassid("publication");
|
|
||||||
q.setSchemename("publication");
|
|
||||||
q.setSchemeid("publication");
|
|
||||||
parsedObject.setResulttype(q);
|
|
||||||
result.add(parsedObject);
|
|
||||||
return result;
|
|
||||||
|
|
||||||
} catch (Throwable e) {
|
|
||||||
log.error("Input record: " + record);
|
|
||||||
log.error("Error on parsing record ", e);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
package eu.dnetlib.dhp.sx.graph.scholix
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{Dataset, Result}
|
||||||
|
import eu.dnetlib.dhp.schema.sx.summary.{SchemeValue, ScholixSummary, TypedIdentifier, Typology}
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
|
object ScholixUtils {
|
||||||
|
|
||||||
|
|
||||||
|
def resultToSummary(r:Result):ScholixSummary = {
|
||||||
|
val s = new ScholixSummary
|
||||||
|
s.setId(r.getId)
|
||||||
|
s.setLocalIdentifier(r.getPid.asScala.map(p => new TypedIdentifier(p.getValue, p.getQualifier.getClassid)).asJava)
|
||||||
|
|
||||||
|
if (r.isInstanceOf[Dataset])
|
||||||
|
s.setTypology(Typology.dataset)
|
||||||
|
else
|
||||||
|
s.setTypology(Typology.publication)
|
||||||
|
|
||||||
|
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
||||||
|
|
||||||
|
if (r.getTitle!= null && r.getTitle.asScala.nonEmpty) {
|
||||||
|
s.setTitle(r.getTitle.asScala.map(t => t.getValue).asJava)
|
||||||
|
}
|
||||||
|
|
||||||
|
if(r.getAuthor!= null && !r.getAuthor.isEmpty) {
|
||||||
|
s.setAuthor(r.getAuthor.asScala.map(a=> a.getFullname).asJava)
|
||||||
|
}
|
||||||
|
if (r.getInstance() != null) {
|
||||||
|
val dt:List[String] = r.getInstance().asScala.filter(i => i.getDateofacceptance != null).map(i => i.getDateofacceptance.getValue)(collection.breakOut)
|
||||||
|
if (dt.nonEmpty)
|
||||||
|
s.setDate(dt.asJava)
|
||||||
|
}
|
||||||
|
if (r.getDescription!= null && !r.getDescription.isEmpty) {
|
||||||
|
val d = r.getDescription.asScala.find(f => f.getValue!=null)
|
||||||
|
if (d.isDefined)
|
||||||
|
s.setDescription(d.get.getValue)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r.getSubject!= null && !r.getSubject.isEmpty)
|
||||||
|
s.setSubject(r.getSubject.asScala.map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue)).asJava)
|
||||||
|
|
||||||
|
if (r.getPublisher!= null)
|
||||||
|
s.setPublisher(List(r.getPublisher.getValue).asJava)
|
||||||
|
|
||||||
|
s.setRelatedDatasets(0)
|
||||||
|
s.setRelatedPublications(0)
|
||||||
|
s.setRelatedUnknown(0)
|
||||||
|
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,5 @@
|
||||||
|
[
|
||||||
|
{"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
|
||||||
|
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true},
|
||||||
|
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true}
|
||||||
|
]
|
|
@ -10,7 +10,7 @@
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="ResolveRelations"/>
|
<start to="CreateSummaries"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
@ -64,9 +64,38 @@
|
||||||
<arg>--workingPath</arg><arg>${targetPath}/resolved/</arg>
|
<arg>--workingPath</arg><arg>${targetPath}/resolved/</arg>
|
||||||
<arg>--entityPath</arg><arg>${targetPath}/dedup</arg>
|
<arg>--entityPath</arg><arg>${targetPath}/dedup</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
<ok to="CreateSummaries"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
<action name="CreateSummaries">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Convert Entities to summaries</name>
|
||||||
|
<class>eu.dnetlib.dhp.sx.graph.SparkCreateSummaryObject</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.shuffle.partitions=5000
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--master</arg><arg>yarn</arg>
|
||||||
|
<arg>--sourcePath</arg><arg>${targetPath}/dedup</arg>
|
||||||
|
<arg>--targetPath</arg><arg>${targetPath}/provision/summaries</arg>
|
||||||
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
</workflow-app>
|
</workflow-app>
|
|
@ -1,63 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.sx.graph;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import com.fasterxml.jackson.databind.SerializationFeature;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
||||||
import eu.dnetlib.dhp.sx.graph.parser.DatasetScholexplorerParser;
|
|
||||||
import eu.dnetlib.dhp.sx.graph.parser.PublicationScholexplorerParser;
|
|
||||||
import eu.dnetlib.scholexplorer.relation.RelationMapper;
|
|
||||||
|
|
||||||
public class ScholexplorerParserTest {
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testDataciteParser() throws Exception {
|
|
||||||
String xml = IOUtils.toString(this.getClass().getResourceAsStream("dmf.xml"));
|
|
||||||
|
|
||||||
DatasetScholexplorerParser p = new DatasetScholexplorerParser();
|
|
||||||
List<Oaf> oaves = p.parseObject(xml, RelationMapper.load());
|
|
||||||
|
|
||||||
ObjectMapper m = new ObjectMapper();
|
|
||||||
m.enable(SerializationFeature.INDENT_OUTPUT);
|
|
||||||
|
|
||||||
oaves
|
|
||||||
.forEach(
|
|
||||||
oaf -> {
|
|
||||||
try {
|
|
||||||
System.out.println(m.writeValueAsString(oaf));
|
|
||||||
System.out.println("----------------------------");
|
|
||||||
} catch (JsonProcessingException e) {
|
|
||||||
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testPublicationParser() throws Exception {
|
|
||||||
String xml = IOUtils.toString(this.getClass().getResourceAsStream("pmf.xml"));
|
|
||||||
|
|
||||||
PublicationScholexplorerParser p = new PublicationScholexplorerParser();
|
|
||||||
List<Oaf> oaves = p.parseObject(xml, RelationMapper.load());
|
|
||||||
|
|
||||||
ObjectMapper m = new ObjectMapper();
|
|
||||||
m.enable(SerializationFeature.INDENT_OUTPUT);
|
|
||||||
|
|
||||||
oaves
|
|
||||||
.forEach(
|
|
||||||
oaf -> {
|
|
||||||
try {
|
|
||||||
System.out.println(m.writeValueAsString(oaf));
|
|
||||||
System.out.println("----------------------------");
|
|
||||||
} catch (JsonProcessingException e) {
|
|
||||||
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,54 +0,0 @@
|
||||||
package eu.dnetlib.dhp.sx.graph
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
|
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.DLIPublication
|
|
||||||
import eu.dnetlib.dhp.sx.graph.ebi.EBIAggregator
|
|
||||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
|
||||||
import org.junit.jupiter.api.Assertions._
|
|
||||||
import org.junit.jupiter.api.Test
|
|
||||||
|
|
||||||
import scala.io.Source
|
|
||||||
|
|
||||||
class SparkScholexplorerAggregationTest {
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
def testFunderRelationshipsMapping(): Unit = {
|
|
||||||
val publications = Source.fromInputStream(getClass.getResourceAsStream("publication.json")).mkString
|
|
||||||
|
|
||||||
var s: List[DLIPublication] = List[DLIPublication]()
|
|
||||||
|
|
||||||
val m: ObjectMapper = new ObjectMapper()
|
|
||||||
|
|
||||||
m.enable(SerializationFeature.INDENT_OUTPUT)
|
|
||||||
|
|
||||||
for (line <- publications.lines) {
|
|
||||||
s = m.readValue(line, classOf[DLIPublication]) :: s
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
implicit val pubEncoder: Encoder[DLIPublication] = Encoders.kryo[DLIPublication]
|
|
||||||
val spark: SparkSession = SparkSession.builder().appName("Test").master("local[*]").config("spark.driver.bindAddress", "127.0.0.1").getOrCreate()
|
|
||||||
|
|
||||||
|
|
||||||
val ds: Dataset[DLIPublication] = spark.createDataset(spark.sparkContext.parallelize(s)).as[DLIPublication]
|
|
||||||
|
|
||||||
val unique = ds.map(d => (d.getId, d))(Encoders.tuple(Encoders.STRING, pubEncoder))
|
|
||||||
.groupByKey(_._1)(Encoders.STRING)
|
|
||||||
.agg(EBIAggregator.getDLIPublicationAggregator().toColumn)
|
|
||||||
.map(p => p._2)
|
|
||||||
|
|
||||||
val uniquePubs: DLIPublication = unique.first()
|
|
||||||
|
|
||||||
s.foreach(pp => assertFalse(pp.getAuthor.isEmpty))
|
|
||||||
|
|
||||||
|
|
||||||
assertNotNull(uniquePubs.getAuthor)
|
|
||||||
assertFalse(uniquePubs.getAuthor.isEmpty)
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,6 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.sx.graph;
|
|
||||||
|
|
||||||
public class SparkScholexplorerGraphImporterTest {
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,5 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.sx.graph;
|
|
||||||
|
|
||||||
public class SparkScholexplorerMergeEntitiesJobTest {
|
|
||||||
}
|
|
|
@ -8,10 +8,9 @@ import eu.dnetlib.dhp.common.PacePerson
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction
|
import eu.dnetlib.dhp.schema.action.AtomicAction
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Author, Dataset, ExternalReference, Field, Instance, KeyValue, Oaf, Publication, Qualifier, Relation, Result, StructuredProperty}
|
import eu.dnetlib.dhp.schema.oaf.{Author, Dataset, ExternalReference, Field, Instance, KeyValue, Oaf, Publication, Qualifier, Relation, Result, StructuredProperty}
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.{DLIDataset, DLIPublication}
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import org.apache.commons.lang3.StringUtils
|
import org.apache.commons.lang3.StringUtils
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils._
|
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
public class SparkIndexCollectionOnES {
|
public class
|
||||||
|
SparkIndexCollectionOnES {
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
|
|
@ -24,8 +24,8 @@
|
||||||
<module>dhp-dedup-openaire</module>
|
<module>dhp-dedup-openaire</module>
|
||||||
<module>dhp-enrichment</module>
|
<module>dhp-enrichment</module>
|
||||||
<module>dhp-graph-provision</module>
|
<module>dhp-graph-provision</module>
|
||||||
<module>dhp-dedup-scholexplorer</module>
|
<!-- <module>dhp-dedup-scholexplorer</module>-->
|
||||||
<module>dhp-graph-provision-scholexplorer</module>
|
<!-- <module>dhp-graph-provision-scholexplorer</module>-->
|
||||||
<module>dhp-blacklist</module>
|
<module>dhp-blacklist</module>
|
||||||
<module>dhp-stats-update</module>
|
<module>dhp-stats-update</module>
|
||||||
<module>dhp-stats-promote</module>
|
<module>dhp-stats-promote</module>
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -736,7 +736,7 @@
|
||||||
<mockito-core.version>3.3.3</mockito-core.version>
|
<mockito-core.version>3.3.3</mockito-core.version>
|
||||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||||
<vtd.version>[2.12,3.0)</vtd.version>
|
<vtd.version>[2.12,3.0)</vtd.version>
|
||||||
<dhp-schemas.version>[2.6.13]</dhp-schemas.version>
|
<dhp-schemas.version>[2.6.14]</dhp-schemas.version>
|
||||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||||
|
|
Loading…
Reference in New Issue