implemented new datamodel including all the openAire typologies
This commit is contained in:
parent
9c1df15071
commit
1bbf408a08
|
@ -1,14 +1,23 @@
|
||||||
package eu.dnetlib.dhp.sx.graph.scholix
|
package eu.dnetlib.dhp.sx.graph.scholix
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty}
|
import eu.dnetlib.dhp.schema.oaf.{
|
||||||
|
Dataset,
|
||||||
|
OtherResearchProduct,
|
||||||
|
Publication,
|
||||||
|
Relation,
|
||||||
|
Result,
|
||||||
|
Software,
|
||||||
|
StructuredProperty
|
||||||
|
}
|
||||||
import eu.dnetlib.dhp.schema.sx.scholix._
|
import eu.dnetlib.dhp.schema.sx.scholix._
|
||||||
import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology}
|
import eu.dnetlib.dhp.schema.sx.summary.{AuthorPid, CollectedFromType, SchemeValue, ScholixSummary, Typology}
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils
|
import eu.dnetlib.dhp.utils.DHPUtils
|
||||||
import org.apache.spark.sql.expressions.Aggregator
|
import org.apache.spark.sql.expressions.Aggregator
|
||||||
import org.apache.spark.sql.{Encoder, Encoders}
|
import org.apache.spark.sql.{Encoder, Encoders}
|
||||||
import org.json4s
|
import org.json4s
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.json4s.jackson.JsonMethods.parse
|
import org.json4s.jackson.JsonMethods.parse
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.io.Source
|
import scala.io.Source
|
||||||
|
|
||||||
|
@ -232,7 +241,16 @@ object ScholixUtils extends Serializable {
|
||||||
|
|
||||||
if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) {
|
if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) {
|
||||||
val l: List[ScholixEntityId] =
|
val l: List[ScholixEntityId] =
|
||||||
summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList
|
summaryObject.getAuthor.asScala
|
||||||
|
.map(a => {
|
||||||
|
if (a.getORCID != null)
|
||||||
|
new ScholixEntityId(
|
||||||
|
a.getFullname,
|
||||||
|
List(new ScholixIdentifier(a.getORCID, "ORCID", s"https://orcid.org/${a.getORCID}")).asJava
|
||||||
|
)
|
||||||
|
else new ScholixEntityId(a.getFullname, null)
|
||||||
|
})
|
||||||
|
.toList
|
||||||
if (l.nonEmpty)
|
if (l.nonEmpty)
|
||||||
r.setCreator(l.asJava)
|
r.setCreator(l.asJava)
|
||||||
}
|
}
|
||||||
|
@ -377,11 +395,13 @@ object ScholixUtils extends Serializable {
|
||||||
if (persistentIdentifiers.isEmpty)
|
if (persistentIdentifiers.isEmpty)
|
||||||
return null
|
return null
|
||||||
s.setLocalIdentifier(persistentIdentifiers.asJava)
|
s.setLocalIdentifier(persistentIdentifiers.asJava)
|
||||||
if (r.isInstanceOf[Publication])
|
r match {
|
||||||
s.setTypology(Typology.publication)
|
case _: Publication => s.setTypology(Typology.publication)
|
||||||
else
|
case _: Dataset => s.setTypology(Typology.dataset)
|
||||||
s.setTypology(Typology.dataset)
|
case _: Software => s.setTypology(Typology.software)
|
||||||
|
case _: OtherResearchProduct => s.setTypology(Typology.otherresearchproduct)
|
||||||
|
case _ =>
|
||||||
|
}
|
||||||
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
|
||||||
|
|
||||||
if (r.getTitle != null && r.getTitle.asScala.nonEmpty) {
|
if (r.getTitle != null && r.getTitle.asScala.nonEmpty) {
|
||||||
|
@ -393,7 +413,20 @@ object ScholixUtils extends Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (r.getAuthor != null && !r.getAuthor.isEmpty) {
|
if (r.getAuthor != null && !r.getAuthor.isEmpty) {
|
||||||
val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname).toList
|
val authors: List[AuthorPid] = r.getAuthor.asScala
|
||||||
|
.map(a => {
|
||||||
|
var ORCID: String = null;
|
||||||
|
if (a.getPid != null) {
|
||||||
|
val result = a.getPid.asScala.find(p =>
|
||||||
|
p.getQualifier != null && p.getQualifier.getClassid != null && p.getQualifier.getClassid.toLowerCase
|
||||||
|
.contains("orcid")
|
||||||
|
)
|
||||||
|
if (result.isDefined)
|
||||||
|
ORCID = result.get.getValue
|
||||||
|
}
|
||||||
|
new AuthorPid(a.getFullname, ORCID)
|
||||||
|
})
|
||||||
|
.toList
|
||||||
if (authors.nonEmpty)
|
if (authors.nonEmpty)
|
||||||
s.setAuthor(authors.asJava)
|
s.setAuthor(authors.asJava)
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,7 +58,7 @@ object SparkConvertObjectToJson {
|
||||||
case "scholix" =>
|
case "scholix" =>
|
||||||
log.info("Serialize Scholix")
|
log.info("Serialize Scholix")
|
||||||
val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix]
|
val d: Dataset[Scholix] = spark.read.load(sourcePath).as[Scholix]
|
||||||
// val u: Dataset[Scholix] = spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix]
|
val u: Dataset[Scholix] = spark.read.load(s"$scholixUpdatePath/scholix").as[Scholix]
|
||||||
if (maxPidNumberFilter != null && toInt(maxPidNumberFilter).isDefined) {
|
if (maxPidNumberFilter != null && toInt(maxPidNumberFilter).isDefined) {
|
||||||
val mp = toInt(maxPidNumberFilter).get
|
val mp = toInt(maxPidNumberFilter).get
|
||||||
d
|
d
|
||||||
|
|
|
@ -34,7 +34,6 @@ object SparkCreateSummaryObject {
|
||||||
log.info(s"targetPath -> $targetPath")
|
log.info(s"targetPath -> $targetPath")
|
||||||
|
|
||||||
implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result]
|
implicit val resultEncoder: Encoder[Result] = Encoders.kryo[Result]
|
||||||
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
|
|
||||||
|
|
||||||
implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary]
|
implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue