Use SparkSQL in place of Hive for executing step16-createIndicatorsTables.sql of stats update wf #386

Merged
claudio.atzori merged 2 commits from stats_with_spark_sql into beta 2024-01-29 09:12:00 +01:00
3 changed files with 11 additions and 3 deletions
Showing only changes of commit 926903b06b - Show all commits

View File

@ -25,7 +25,7 @@ case class mappingAffiliation(name: String) {}
case class mappingAuthor(
given: Option[String],
family: String,
family: Option[String],
sequence: Option[String],
ORCID: Option[String],
affiliation: Option[mappingAffiliation]
@ -226,14 +226,14 @@ case object Crossref2Oaf {
//Mapping Author
val authorList: List[mappingAuthor] =
(json \ "author").extractOrElse[List[mappingAuthor]](List())
(json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined)
val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) =>
a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first")
)
result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) =>
generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)
generateAuhtor(a.given.orNull, a.family.get, a.ORCID.orNull, index)
}.asJava)
// Mapping instance

File diff suppressed because one or more lines are too long

View File

@ -22,6 +22,13 @@ class CrossrefMappingTest {
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
val mapper = new ObjectMapper()
@Test
def testMissingAuthorParser():Unit = {
val json: String = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json")).mkString
val result = Crossref2Oaf.convert(json)
result.filter(o => o.isInstanceOf[Publication]).map(p=> p.asInstanceOf[Publication]).foreach(p =>assertTrue(p.getAuthor.size()>0))
}
@Test
def testFunderRelationshipsMapping(): Unit = {
val template = Source