implemented new datamodel including all the openAire typologies
This commit is contained in:
parent
9c1df15071
commit
1bbf408a08
|
@ -1,14 +1,23 @@
|
|||
package eu.dnetlib.dhp.sx.graph.scholix
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty}
|
||||
import eu.dnetlib.dhp.schema.oaf.{
|
||||
Dataset,
|
||||
OtherResearchProduct,
|
||||
Publication,
|
||||
Relation,
|
||||
Result,
|
||||
Software,
|
||||
StructuredProperty
|
||||
}
|
||||
import eu.dnetlib.dhp.schema.sx.scholix._
|
||||
import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology}
|
||||
import eu.dnetlib.dhp.schema.sx.summary.{AuthorPid, CollectedFromType, SchemeValue, ScholixSummary, Typology}
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import org.apache.spark.sql.expressions.Aggregator
|
||||
import org.apache.spark.sql.{Encoder, Encoders}
|
||||
import org.json4s
|
||||
import org.json4s.DefaultFormats
|
||||
import org.json4s.jackson.JsonMethods.parse
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.io.Source
|
||||
|
||||
|
@ -232,7 +241,16 @@ object ScholixUtils extends Serializable {
|
|||
|
||||
if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) {
|
||||
val l: List[ScholixEntityId] =
|
||||
summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList
|
||||
summaryObject.getAuthor.asScala
|
||||
.map(a => {
|
||||
if (a.getORCID != null)
|
||||
new ScholixEntityId(
|
||||
a.getFullname,
|
||||
List(new ScholixIdentifier(a.getORCID, "ORCID", s"https://orcid.org/${a.getORCID}")).asJava
|
||||
)
|
||||
else new ScholixEntityId(a.getFullname, null)
|
||||
})
|
||||
.toList
|
||||
if (l.nonEmpty)
|
||||
r.setCreator(l.asJava)
|
||||
}
|
||||
|
@ -377,11 +395,13 @@ object ScholixUtils extends Serializable {
|
|||
if (persistentIdentifiers.isEmpty)
|
||||
return null
|
||||
s.setLocalIdentifier(persistentIdentifiers.asJava)
|
||||
if (r.isInstanceOf[Publication])
|
||||
s.setTypology(Typology.publication)
|
||||
else
|
||||
s.setTypology(Typology.dataset)
|
||||
|
||||
r match {
|
||||
case _: Publication => s.setTypology(Typology.publication)
|
||||
case _: Dataset => s.setTypology(Typology.dataset)
|
||||
case _: Software => s.setTypology(Typology.software)
|
||||
case _: OtherResearchProduct => s.setTypology(Typology.otherresearchproduct)
|
||||
case _ =>
|
||||
}
|
||||
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
||||
|
||||
if (r.getTitle != null && r.getTitle.asScala.nonEmpty) {
|
||||
|
@ -393,7 +413,20 @@ object ScholixUtils extends Serializable {
|
|||
}
|
||||
|
||||
if (r.getAuthor != null && !r.getAuthor.isEmpty) {
|
||||
val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname).toList
|
||||
val authors: List[AuthorPid] = r.getAuthor.asScala
|
||||
.map(a => {
|
||||
var ORCID: String = null;
|
||||
if (a.getPid != null) {
|
||||
val result = a.getPid.asScala.find(p =>
|
||||
p.getQualifier != null && p.getQualifier.getClassid != null && p.getQualifier.getClassid.toLowerCase
|
||||
.contains("orcid")
|
||||
)
|
||||
if (result.isDefined)
|
||||
ORCID = result.get.getValue
|
||||
}
|
||||
new AuthorPid(a.getFullname, ORCID)
|
||||
})
|
||||
.toList
|
||||
if (authors.nonEmpty)
|
||||
s.setAuthor(authors.asJava)
|
||||
}
|
||||
|
|
|
@ -58,7 +58,7 @@ object SparkConvertObjectToJson {
|
|||
case "scholix" =>
|
||||
log.info("Serialize Scholix")
|
||||
val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix]
|
||||
// val u: Dataset[Scholix] = spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix]
|
||||
val u: Dataset[Scholix] = spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix]
|
||||
if (maxPidNumberFilter != null && toInt(maxPidNumberFilter).isDefined) {
|
||||
val mp = toInt(maxPidNumberFilter).get
|
||||
d
|
||||
|
|
|
@ -34,7 +34,6 @@ object SparkCreateSummaryObject {
|
|||
log.info(s"targetPath -> $targetPath")
|
||||
|
||||
implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result]
|
||||
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
|
||||
|
||||
implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary]
|
||||
|
||||
|
|
Loading…
Reference in New Issue