Master branch updates from beta September 2023 #337

Manually merged
claudio.atzori merged 1271 commits from beta into master 2023-09-06 11:31:09 +02:00
13 changed files with 27 additions and 89 deletions
Showing only changes of commit 57e2c4b749 - Show all commits

View File

@ -3,8 +3,7 @@ package eu.dnetlib.dhp.datacite
import org.json4s.jackson.JsonMethods.{compact, parse, render}
import org.json4s.{DefaultFormats, JValue}
class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until: Long = -1)
extends AbstractRestClient {
class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until: Long = -1) extends AbstractRestClient {
override def extractInfo(input: String): Unit = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats

View File

@ -327,9 +327,7 @@ object DataciteToOAFTransformation {
a.setFullname(c.name.orNull)
a.setName(c.givenName.orNull)
a.setSurname(c.familyName.orNull)
if (
c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null
) {
if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) {
a.setPid(
c.nameIdentifiers.get
.map(ni => {
@ -395,9 +393,7 @@ object DataciteToOAFTransformation {
.find(d => d.dateType.get.equalsIgnoreCase("issued"))
.map(d => extract_date(d.date.get))
val a_date: Option[String] = dates
.filter(d =>
d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available")
)
.filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available"))
.map(d => extract_date(d.date.get))
.find(d => d != null && d.isDefined)
.map(d => d.get)

View File

@ -2,12 +2,7 @@ package eu.dnetlib.dhp.sx.bio.pubmed
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.{
GraphCleaningFunctions,
IdentifierFactory,
OafMapperUtils,
PidType
}
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
import eu.dnetlib.dhp.schema.oaf._
import collection.JavaConverters._
@ -169,9 +164,7 @@ object PubMedToOaf {
pubmedInstance.setInstancetype(cojbCategory)
} else {
val i_type = article.getPublicationTypes.asScala
.map(s =>
getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)
)
.map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue))
.find(q => q != null)
if (i_type.isDefined)
pubmedInstance.setInstancetype(i_type.get)

View File

@ -59,8 +59,7 @@ object SparkGenerateDoiBoost {
val workingDirPath = parser.get("workingPath")
val openaireOrganizationPath = parser.get("openaireOrganizationPath")
val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication]
with Serializable {
val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable {
override def zero: Publication = new Publication
override def reduce(b: Publication, a: (String, Publication)): Publication = {

View File

@ -438,11 +438,10 @@ case object Crossref2Oaf {
funders.foreach(funder => {
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
funder.DOI.get match {
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" |
"10.13039/501100000780" | "10.13039/100010665" =>
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
"10.13039/100010665" =>
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" |
"10.13039/501100000780" =>
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
case "10.13039/501100000781" =>
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
@ -512,8 +511,7 @@ case object Crossref2Oaf {
case "European Union's" =>
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
case "The French National Research Agency (ANR)" |
"The French National Research Agency" =>
case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
generateSimpleRelationFromAward(funder, "anr_________", a => a)
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
generateSimpleRelationFromAward(funder, "conicytf____", extractECAward)

View File

@ -15,9 +15,7 @@ object SparkProcessMAG {
def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = {
d.where(col("Doi").isNotNull)
.groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING)
.reduceGroups((p1: MagPapers, p2: MagPapers) =>
ConversionUtil.choiceLatestMagArtitcle(p1, p2)
)
.reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2))
.map(_._2)(Encoders.product[MagPapers])
.map(mp => {
MagPapers(

View File

@ -223,9 +223,7 @@ class CrossrefMappingTest {
val collectedFromList = result.getCollectedfrom.asScala
assert(
collectedFromList.exists(c =>
c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
),
collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
"Wrong collected from assertion"
)
@ -301,9 +299,7 @@ class CrossrefMappingTest {
val collectedFromList = result.getCollectedfrom.asScala
assert(
collectedFromList.exists(c =>
c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
),
collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
"Wrong collected from assertion"
)
@ -435,9 +431,7 @@ class CrossrefMappingTest {
val collectedFromList = result.getCollectedfrom.asScala
assert(
collectedFromList.exists(c =>
c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
),
collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
"Wrong collected from assertion"
)
@ -553,9 +547,7 @@ class CrossrefMappingTest {
println(mapper.writeValueAsString(item))
assertTrue(
item.getInstance().asScala exists (i =>
i.getLicense.getValue.equals("https://www.springer.com/vor")
)
item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))
)
assertTrue(
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED"))
@ -590,9 +582,7 @@ class CrossrefMappingTest {
)
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN")))
assertTrue(
item.getInstance().asScala exists (i =>
i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid
)
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
)
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
println(mapper.writeValueAsString(item))
@ -627,9 +617,7 @@ class CrossrefMappingTest {
)
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN")))
assertTrue(
item.getInstance().asScala exists (i =>
i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid
)
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
)
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
println(mapper.writeValueAsString(item))

View File

@ -2,14 +2,7 @@ package eu.dnetlib.dhp.sx.graph
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.{
OtherResearchProduct,
Publication,
Relation,
Result,
Software,
Dataset => OafDataset
}
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset}
import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}

View File

@ -90,8 +90,7 @@ object PangaeaUtils {
)
}
def getDatasetAggregator()
: Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] =
def getDatasetAggregator(): Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] =
new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] {
override def zero: PangaeaDataModel = null

View File

@ -1,11 +1,7 @@
package eu.dnetlib.dhp.oa.graph.hostedbymap
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{
joinResHBM,
prepareResultInfo,
toEntityInfo
}
import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo}
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}

View File

@ -51,8 +51,7 @@ class TestPreprocess extends java.io.Serializable {
ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1
)
assertTrue(
ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata"))
.count == 1
ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1
)
assertTrue(
ds.filter(hbi =>

View File

@ -189,11 +189,7 @@ class ResolveEntitiesTest extends Serializable {
var ct = pubDS.count()
var et = pubDS
.filter(p =>
p.getTitle != null && p.getTitle.asScala.forall(t =>
t.getValue != null && t.getValue.nonEmpty
)
)
.filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
.count()
assertEquals(ct, et)
@ -208,11 +204,7 @@ class ResolveEntitiesTest extends Serializable {
.count()
ct = datDS.count()
et = datDS
.filter(p =>
p.getTitle != null && p.getTitle.asScala.forall(t =>
t.getValue != null && t.getValue.nonEmpty
)
)
.filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
.count()
assertEquals(ct, et)
@ -226,11 +218,7 @@ class ResolveEntitiesTest extends Serializable {
.count()
ct = softDS.count()
et = softDS
.filter(p =>
p.getTitle != null && p.getTitle.asScala.forall(t =>
t.getValue != null && t.getValue.nonEmpty
)
)
.filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
.count()
assertEquals(ct, et)
@ -245,11 +233,7 @@ class ResolveEntitiesTest extends Serializable {
ct = orpDS.count()
et = orpDS
.filter(p =>
p.getTitle != null && p.getTitle.asScala.forall(t =>
t.getValue != null && t.getValue.nonEmpty
)
)
.filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
.count()
assertEquals(ct, et)

View File

@ -56,9 +56,7 @@ class ScholixGraphTest extends AbstractVocabularyTest {
assertNotNull(result)
assertEquals(result.size, items.size)
val d = result.find(s =>
s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty)
)
val d = result.find(s => s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty))
assertFalse(d.isDefined)
println(mapper.writeValueAsString(result.head))
@ -74,9 +72,7 @@ class ScholixGraphTest extends AbstractVocabularyTest {
val result: List[(Relation, ScholixSummary)] = inputRelations.lines
.sliding(2)
.map(s => (s.head, s(1)))
.map(p =>
(mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary]))
)
.map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])))
.toList
assertNotNull(result)
assertTrue(result.nonEmpty)