1
0
Fork 0

formatted code

This commit is contained in:
Sandro La Bruzzo 2022-01-12 09:40:28 +01:00
parent b78d2b71f0
commit 57e2c4b749
13 changed files with 27 additions and 89 deletions

View File

@ -3,8 +3,7 @@ package eu.dnetlib.dhp.datacite
import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.json4s.jackson.JsonMethods.{compact, parse, render}
import org.json4s.{DefaultFormats, JValue} import org.json4s.{DefaultFormats, JValue}
class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until: Long = -1) class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until: Long = -1) extends AbstractRestClient {
extends AbstractRestClient {
override def extractInfo(input: String): Unit = { override def extractInfo(input: String): Unit = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats

View File

@ -327,9 +327,7 @@ object DataciteToOAFTransformation {
a.setFullname(c.name.orNull) a.setFullname(c.name.orNull)
a.setName(c.givenName.orNull) a.setName(c.givenName.orNull)
a.setSurname(c.familyName.orNull) a.setSurname(c.familyName.orNull)
if ( if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) {
c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null
) {
a.setPid( a.setPid(
c.nameIdentifiers.get c.nameIdentifiers.get
.map(ni => { .map(ni => {
@ -395,9 +393,7 @@ object DataciteToOAFTransformation {
.find(d => d.dateType.get.equalsIgnoreCase("issued")) .find(d => d.dateType.get.equalsIgnoreCase("issued"))
.map(d => extract_date(d.date.get)) .map(d => extract_date(d.date.get))
val a_date: Option[String] = dates val a_date: Option[String] = dates
.filter(d => .filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available"))
d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available")
)
.map(d => extract_date(d.date.get)) .map(d => extract_date(d.date.get))
.find(d => d != null && d.isDefined) .find(d => d != null && d.isDefined)
.map(d => d.get) .map(d => d.get)

View File

@ -2,12 +2,7 @@ package eu.dnetlib.dhp.sx.bio.pubmed
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.{ import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
GraphCleaningFunctions,
IdentifierFactory,
OafMapperUtils,
PidType
}
import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf._
import collection.JavaConverters._ import collection.JavaConverters._
@ -169,9 +164,7 @@ object PubMedToOaf {
pubmedInstance.setInstancetype(cojbCategory) pubmedInstance.setInstancetype(cojbCategory)
} else { } else {
val i_type = article.getPublicationTypes.asScala val i_type = article.getPublicationTypes.asScala
.map(s => .map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue))
getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)
)
.find(q => q != null) .find(q => q != null)
if (i_type.isDefined) if (i_type.isDefined)
pubmedInstance.setInstancetype(i_type.get) pubmedInstance.setInstancetype(i_type.get)

View File

@ -59,8 +59,7 @@ object SparkGenerateDoiBoost {
val workingDirPath = parser.get("workingPath") val workingDirPath = parser.get("workingPath")
val openaireOrganizationPath = parser.get("openaireOrganizationPath") val openaireOrganizationPath = parser.get("openaireOrganizationPath")
val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable {
with Serializable {
override def zero: Publication = new Publication override def zero: Publication = new Publication
override def reduce(b: Publication, a: (String, Publication)): Publication = { override def reduce(b: Publication, a: (String, Publication)): Publication = {

View File

@ -438,11 +438,10 @@ case object Crossref2Oaf {
funders.foreach(funder => { funders.foreach(funder => {
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) { if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
funder.DOI.get match { funder.DOI.get match {
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
"10.13039/501100000780" | "10.13039/100010665" => "10.13039/100010665" =>
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
"10.13039/501100000780" =>
generateSimpleRelationFromAward(funder, "corda_______", extractECAward) generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
case "10.13039/501100000781" => case "10.13039/501100000781" =>
generateSimpleRelationFromAward(funder, "corda_______", extractECAward) generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
@ -512,8 +511,7 @@ case object Crossref2Oaf {
case "European Union's" => case "European Union's" =>
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward) generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
generateSimpleRelationFromAward(funder, "corda_______", extractECAward) generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
case "The French National Research Agency (ANR)" | case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
"The French National Research Agency" =>
generateSimpleRelationFromAward(funder, "anr_________", a => a) generateSimpleRelationFromAward(funder, "anr_________", a => a)
case "CONICYT, Programa de Formación de Capital Humano Avanzado" => case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
generateSimpleRelationFromAward(funder, "conicytf____", extractECAward) generateSimpleRelationFromAward(funder, "conicytf____", extractECAward)

View File

@ -15,9 +15,7 @@ object SparkProcessMAG {
def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = { def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = {
d.where(col("Doi").isNotNull) d.where(col("Doi").isNotNull)
.groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING) .groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING)
.reduceGroups((p1: MagPapers, p2: MagPapers) => .reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2))
ConversionUtil.choiceLatestMagArtitcle(p1, p2)
)
.map(_._2)(Encoders.product[MagPapers]) .map(_._2)(Encoders.product[MagPapers])
.map(mp => { .map(mp => {
MagPapers( MagPapers(

View File

@ -223,9 +223,7 @@ class CrossrefMappingTest {
val collectedFromList = result.getCollectedfrom.asScala val collectedFromList = result.getCollectedfrom.asScala
assert( assert(
collectedFromList.exists(c => collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
),
"Wrong collected from assertion" "Wrong collected from assertion"
) )
@ -301,9 +299,7 @@ class CrossrefMappingTest {
val collectedFromList = result.getCollectedfrom.asScala val collectedFromList = result.getCollectedfrom.asScala
assert( assert(
collectedFromList.exists(c => collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
),
"Wrong collected from assertion" "Wrong collected from assertion"
) )
@ -435,9 +431,7 @@ class CrossrefMappingTest {
val collectedFromList = result.getCollectedfrom.asScala val collectedFromList = result.getCollectedfrom.asScala
assert( assert(
collectedFromList.exists(c => collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
),
"Wrong collected from assertion" "Wrong collected from assertion"
) )
@ -553,9 +547,7 @@ class CrossrefMappingTest {
println(mapper.writeValueAsString(item)) println(mapper.writeValueAsString(item))
assertTrue( assertTrue(
item.getInstance().asScala exists (i => item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))
i.getLicense.getValue.equals("https://www.springer.com/vor")
)
) )
assertTrue( assertTrue(
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED")) item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED"))
@ -590,9 +582,7 @@ class CrossrefMappingTest {
) )
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN")))
assertTrue( assertTrue(
item.getInstance().asScala exists (i => item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid
)
) )
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
println(mapper.writeValueAsString(item)) println(mapper.writeValueAsString(item))
@ -627,9 +617,7 @@ class CrossrefMappingTest {
) )
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN")))
assertTrue( assertTrue(
item.getInstance().asScala exists (i => item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid
)
) )
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
println(mapper.writeValueAsString(item)) println(mapper.writeValueAsString(item))

View File

@ -2,14 +2,7 @@ package eu.dnetlib.dhp.sx.graph
import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.{ import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset}
OtherResearchProduct,
Publication,
Relation,
Result,
Software,
Dataset => OafDataset
}
import org.apache.commons.io.IOUtils import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf import org.apache.spark.SparkConf
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}

View File

@ -90,8 +90,7 @@ object PangaeaUtils {
) )
} }
def getDatasetAggregator() def getDatasetAggregator(): Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] =
: Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] =
new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] { new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] {
override def zero: PangaeaDataModel = null override def zero: PangaeaDataModel = null

View File

@ -1,11 +1,7 @@
package eu.dnetlib.dhp.oa.graph.hostedbymap package eu.dnetlib.dhp.oa.graph.hostedbymap
import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{ import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo}
joinResHBM,
prepareResultInfo,
toEntityInfo
}
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo
import org.apache.spark.SparkConf import org.apache.spark.SparkConf
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}

View File

@ -51,8 +51,7 @@ class TestPreprocess extends java.io.Serializable {
ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1 ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1
) )
assertTrue( assertTrue(
ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")) ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1
.count == 1
) )
assertTrue( assertTrue(
ds.filter(hbi => ds.filter(hbi =>

View File

@ -189,11 +189,7 @@ class ResolveEntitiesTest extends Serializable {
var ct = pubDS.count() var ct = pubDS.count()
var et = pubDS var et = pubDS
.filter(p => .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
p.getTitle != null && p.getTitle.asScala.forall(t =>
t.getValue != null && t.getValue.nonEmpty
)
)
.count() .count()
assertEquals(ct, et) assertEquals(ct, et)
@ -208,11 +204,7 @@ class ResolveEntitiesTest extends Serializable {
.count() .count()
ct = datDS.count() ct = datDS.count()
et = datDS et = datDS
.filter(p => .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
p.getTitle != null && p.getTitle.asScala.forall(t =>
t.getValue != null && t.getValue.nonEmpty
)
)
.count() .count()
assertEquals(ct, et) assertEquals(ct, et)
@ -226,11 +218,7 @@ class ResolveEntitiesTest extends Serializable {
.count() .count()
ct = softDS.count() ct = softDS.count()
et = softDS et = softDS
.filter(p => .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
p.getTitle != null && p.getTitle.asScala.forall(t =>
t.getValue != null && t.getValue.nonEmpty
)
)
.count() .count()
assertEquals(ct, et) assertEquals(ct, et)
@ -245,11 +233,7 @@ class ResolveEntitiesTest extends Serializable {
ct = orpDS.count() ct = orpDS.count()
et = orpDS et = orpDS
.filter(p => .filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
p.getTitle != null && p.getTitle.asScala.forall(t =>
t.getValue != null && t.getValue.nonEmpty
)
)
.count() .count()
assertEquals(ct, et) assertEquals(ct, et)

View File

@ -56,9 +56,7 @@ class ScholixGraphTest extends AbstractVocabularyTest {
assertNotNull(result) assertNotNull(result)
assertEquals(result.size, items.size) assertEquals(result.size, items.size)
val d = result.find(s => val d = result.find(s => s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty))
s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty)
)
assertFalse(d.isDefined) assertFalse(d.isDefined)
println(mapper.writeValueAsString(result.head)) println(mapper.writeValueAsString(result.head))
@ -74,9 +72,7 @@ class ScholixGraphTest extends AbstractVocabularyTest {
val result: List[(Relation, ScholixSummary)] = inputRelations.lines val result: List[(Relation, ScholixSummary)] = inputRelations.lines
.sliding(2) .sliding(2)
.map(s => (s.head, s(1))) .map(s => (s.head, s(1)))
.map(p => .map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])))
(mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary]))
)
.toList .toList
assertNotNull(result) assertNotNull(result)
assertTrue(result.nonEmpty) assertTrue(result.nonEmpty)