forked from antonis.lempesis/dnet-hadoop
formatted code
This commit is contained in:
parent
b78d2b71f0
commit
57e2c4b749
|
@ -3,8 +3,7 @@ package eu.dnetlib.dhp.datacite
|
|||
import org.json4s.jackson.JsonMethods.{compact, parse, render}
|
||||
import org.json4s.{DefaultFormats, JValue}
|
||||
|
||||
class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until: Long = -1)
|
||||
extends AbstractRestClient {
|
||||
class DataciteAPIImporter(timestamp: Long = 0, blocks: Long = 10, until: Long = -1) extends AbstractRestClient {
|
||||
|
||||
override def extractInfo(input: String): Unit = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
|
|
@ -327,9 +327,7 @@ object DataciteToOAFTransformation {
|
|||
a.setFullname(c.name.orNull)
|
||||
a.setName(c.givenName.orNull)
|
||||
a.setSurname(c.familyName.orNull)
|
||||
if (
|
||||
c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null
|
||||
) {
|
||||
if (c.nameIdentifiers != null && c.nameIdentifiers.isDefined && c.nameIdentifiers.get != null) {
|
||||
a.setPid(
|
||||
c.nameIdentifiers.get
|
||||
.map(ni => {
|
||||
|
@ -395,9 +393,7 @@ object DataciteToOAFTransformation {
|
|||
.find(d => d.dateType.get.equalsIgnoreCase("issued"))
|
||||
.map(d => extract_date(d.date.get))
|
||||
val a_date: Option[String] = dates
|
||||
.filter(d =>
|
||||
d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available")
|
||||
)
|
||||
.filter(d => d.date.isDefined && d.dateType.isDefined && d.dateType.get.equalsIgnoreCase("available"))
|
||||
.map(d => extract_date(d.date.get))
|
||||
.find(d => d != null && d.isDefined)
|
||||
.map(d => d.get)
|
||||
|
|
|
@ -2,12 +2,7 @@ package eu.dnetlib.dhp.sx.bio.pubmed
|
|||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{
|
||||
GraphCleaningFunctions,
|
||||
IdentifierFactory,
|
||||
OafMapperUtils,
|
||||
PidType
|
||||
}
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType}
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import collection.JavaConverters._
|
||||
|
||||
|
@ -169,9 +164,7 @@ object PubMedToOaf {
|
|||
pubmedInstance.setInstancetype(cojbCategory)
|
||||
} else {
|
||||
val i_type = article.getPublicationTypes.asScala
|
||||
.map(s =>
|
||||
getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)
|
||||
)
|
||||
.map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue))
|
||||
.find(q => q != null)
|
||||
if (i_type.isDefined)
|
||||
pubmedInstance.setInstancetype(i_type.get)
|
||||
|
|
|
@ -59,8 +59,7 @@ object SparkGenerateDoiBoost {
|
|||
val workingDirPath = parser.get("workingPath")
|
||||
val openaireOrganizationPath = parser.get("openaireOrganizationPath")
|
||||
|
||||
val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication]
|
||||
with Serializable {
|
||||
val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable {
|
||||
override def zero: Publication = new Publication
|
||||
|
||||
override def reduce(b: Publication, a: (String, Publication)): Publication = {
|
||||
|
|
|
@ -438,11 +438,10 @@ case object Crossref2Oaf {
|
|||
funders.foreach(funder => {
|
||||
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
|
||||
funder.DOI.get match {
|
||||
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" |
|
||||
"10.13039/501100000780" | "10.13039/100010665" =>
|
||||
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
|
||||
"10.13039/100010665" =>
|
||||
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
||||
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" |
|
||||
"10.13039/501100000780" =>
|
||||
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
|
||||
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
||||
case "10.13039/501100000781" =>
|
||||
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
||||
|
@ -512,8 +511,7 @@ case object Crossref2Oaf {
|
|||
case "European Union's" =>
|
||||
generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward)
|
||||
generateSimpleRelationFromAward(funder, "corda_______", extractECAward)
|
||||
case "The French National Research Agency (ANR)" |
|
||||
"The French National Research Agency" =>
|
||||
case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
|
||||
generateSimpleRelationFromAward(funder, "anr_________", a => a)
|
||||
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
|
||||
generateSimpleRelationFromAward(funder, "conicytf____", extractECAward)
|
||||
|
|
|
@ -15,9 +15,7 @@ object SparkProcessMAG {
|
|||
def getDistinctResults(d: Dataset[MagPapers]): Dataset[MagPapers] = {
|
||||
d.where(col("Doi").isNotNull)
|
||||
.groupByKey(mp => DoiBoostMappingUtil.normalizeDoi(mp.Doi))(Encoders.STRING)
|
||||
.reduceGroups((p1: MagPapers, p2: MagPapers) =>
|
||||
ConversionUtil.choiceLatestMagArtitcle(p1, p2)
|
||||
)
|
||||
.reduceGroups((p1: MagPapers, p2: MagPapers) => ConversionUtil.choiceLatestMagArtitcle(p1, p2))
|
||||
.map(_._2)(Encoders.product[MagPapers])
|
||||
.map(mp => {
|
||||
MagPapers(
|
||||
|
|
|
@ -223,9 +223,7 @@ class CrossrefMappingTest {
|
|||
|
||||
val collectedFromList = result.getCollectedfrom.asScala
|
||||
assert(
|
||||
collectedFromList.exists(c =>
|
||||
c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
|
||||
),
|
||||
collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
|
||||
"Wrong collected from assertion"
|
||||
)
|
||||
|
||||
|
@ -301,9 +299,7 @@ class CrossrefMappingTest {
|
|||
|
||||
val collectedFromList = result.getCollectedfrom.asScala
|
||||
assert(
|
||||
collectedFromList.exists(c =>
|
||||
c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
|
||||
),
|
||||
collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
|
||||
"Wrong collected from assertion"
|
||||
)
|
||||
|
||||
|
@ -435,9 +431,7 @@ class CrossrefMappingTest {
|
|||
|
||||
val collectedFromList = result.getCollectedfrom.asScala
|
||||
assert(
|
||||
collectedFromList.exists(c =>
|
||||
c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")
|
||||
),
|
||||
collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
|
||||
"Wrong collected from assertion"
|
||||
)
|
||||
|
||||
|
@ -553,9 +547,7 @@ class CrossrefMappingTest {
|
|||
println(mapper.writeValueAsString(item))
|
||||
|
||||
assertTrue(
|
||||
item.getInstance().asScala exists (i =>
|
||||
i.getLicense.getValue.equals("https://www.springer.com/vor")
|
||||
)
|
||||
item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))
|
||||
)
|
||||
assertTrue(
|
||||
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED"))
|
||||
|
@ -590,9 +582,7 @@ class CrossrefMappingTest {
|
|||
)
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN")))
|
||||
assertTrue(
|
||||
item.getInstance().asScala exists (i =>
|
||||
i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid
|
||||
)
|
||||
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
|
||||
)
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
|
@ -627,9 +617,7 @@ class CrossrefMappingTest {
|
|||
)
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN")))
|
||||
assertTrue(
|
||||
item.getInstance().asScala exists (i =>
|
||||
i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid
|
||||
)
|
||||
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
|
||||
)
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
|
|
|
@ -2,14 +2,7 @@ package eu.dnetlib.dhp.sx.graph
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.schema.oaf.{
|
||||
OtherResearchProduct,
|
||||
Publication,
|
||||
Relation,
|
||||
Result,
|
||||
Software,
|
||||
Dataset => OafDataset
|
||||
}
|
||||
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset}
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession}
|
||||
|
|
|
@ -90,8 +90,7 @@ object PangaeaUtils {
|
|||
)
|
||||
}
|
||||
|
||||
def getDatasetAggregator()
|
||||
: Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] =
|
||||
def getDatasetAggregator(): Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] =
|
||||
new Aggregator[(String, PangaeaDataModel), PangaeaDataModel, PangaeaDataModel] {
|
||||
|
||||
override def zero: PangaeaDataModel = null
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
package eu.dnetlib.dhp.oa.graph.hostedbymap
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{
|
||||
joinResHBM,
|
||||
prepareResultInfo,
|
||||
toEntityInfo
|
||||
}
|
||||
import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo}
|
||||
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
|
||||
|
|
|
@ -51,8 +51,7 @@ class TestPreprocess extends java.io.Serializable {
|
|||
ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1
|
||||
)
|
||||
assertTrue(
|
||||
ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata"))
|
||||
.count == 1
|
||||
ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1
|
||||
)
|
||||
assertTrue(
|
||||
ds.filter(hbi =>
|
||||
|
|
|
@ -189,11 +189,7 @@ class ResolveEntitiesTest extends Serializable {
|
|||
|
||||
var ct = pubDS.count()
|
||||
var et = pubDS
|
||||
.filter(p =>
|
||||
p.getTitle != null && p.getTitle.asScala.forall(t =>
|
||||
t.getValue != null && t.getValue.nonEmpty
|
||||
)
|
||||
)
|
||||
.filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
|
||||
.count()
|
||||
|
||||
assertEquals(ct, et)
|
||||
|
@ -208,11 +204,7 @@ class ResolveEntitiesTest extends Serializable {
|
|||
.count()
|
||||
ct = datDS.count()
|
||||
et = datDS
|
||||
.filter(p =>
|
||||
p.getTitle != null && p.getTitle.asScala.forall(t =>
|
||||
t.getValue != null && t.getValue.nonEmpty
|
||||
)
|
||||
)
|
||||
.filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
|
||||
.count()
|
||||
assertEquals(ct, et)
|
||||
|
||||
|
@ -226,11 +218,7 @@ class ResolveEntitiesTest extends Serializable {
|
|||
.count()
|
||||
ct = softDS.count()
|
||||
et = softDS
|
||||
.filter(p =>
|
||||
p.getTitle != null && p.getTitle.asScala.forall(t =>
|
||||
t.getValue != null && t.getValue.nonEmpty
|
||||
)
|
||||
)
|
||||
.filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
|
||||
.count()
|
||||
assertEquals(ct, et)
|
||||
|
||||
|
@ -245,11 +233,7 @@ class ResolveEntitiesTest extends Serializable {
|
|||
|
||||
ct = orpDS.count()
|
||||
et = orpDS
|
||||
.filter(p =>
|
||||
p.getTitle != null && p.getTitle.asScala.forall(t =>
|
||||
t.getValue != null && t.getValue.nonEmpty
|
||||
)
|
||||
)
|
||||
.filter(p => p.getTitle != null && p.getTitle.asScala.forall(t => t.getValue != null && t.getValue.nonEmpty))
|
||||
.count()
|
||||
assertEquals(ct, et)
|
||||
|
||||
|
|
|
@ -56,9 +56,7 @@ class ScholixGraphTest extends AbstractVocabularyTest {
|
|||
assertNotNull(result)
|
||||
|
||||
assertEquals(result.size, items.size)
|
||||
val d = result.find(s =>
|
||||
s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty)
|
||||
)
|
||||
val d = result.find(s => s.getLocalIdentifier.asScala.exists(i => i.getUrl == null || i.getUrl.isEmpty))
|
||||
assertFalse(d.isDefined)
|
||||
println(mapper.writeValueAsString(result.head))
|
||||
|
||||
|
@ -74,9 +72,7 @@ class ScholixGraphTest extends AbstractVocabularyTest {
|
|||
val result: List[(Relation, ScholixSummary)] = inputRelations.lines
|
||||
.sliding(2)
|
||||
.map(s => (s.head, s(1)))
|
||||
.map(p =>
|
||||
(mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary]))
|
||||
)
|
||||
.map(p => (mapper.readValue(p._1, classOf[Relation]), mapper.readValue(p._2, classOf[ScholixSummary])))
|
||||
.toList
|
||||
assertNotNull(result)
|
||||
assertTrue(result.nonEmpty)
|
||||
|
|
Loading…
Reference in New Issue