1
0
Fork 0

applied cherry pick

This commit is contained in:
Sandro La Bruzzo 2024-02-14 11:41:28 +01:00
parent ee1fcb672b
commit 5281f010a5
10 changed files with 3008 additions and 192 deletions

View File

@ -1,5 +1,6 @@
package eu.dnetlib.dhp.collection.crossref package eu.dnetlib.dhp.collection.crossref
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.{field, qualifier, structuredProperty, subject} import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.{field, qualifier, structuredProperty, subject}
@ -48,6 +49,13 @@ case object Crossref2Oaf {
json.extract[List[funderInfo]] json.extract[List[funderInfo]]
} }
def getIrishId(doi: String): Option[String] = {
val id = doi.split("/").last
irishFunder
.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id))))
.map(f => f.id)
}
def createCrossrefCollectedFrom(): KeyValue = { def createCrossrefCollectedFrom(): KeyValue = {
val cf = new KeyValue val cf = new KeyValue
@ -190,14 +198,8 @@ case object Crossref2Oaf {
} }
def getIrishId(doi: String): Option[String] = {
val id = doi.split("/").last
irishFunder
.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id))))
.map(f => f.id)
}
def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = { def mappingResult(result: Result, json: JValue, instanceType:Qualifier, originalType: String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID //MAPPING Crossref DOI into PID
@ -370,27 +372,13 @@ case object Crossref2Oaf {
instance.setAccessright( instance.setAccessright(
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue) decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
) )
instance.setInstancetype( instance.setInstancetype(instanceType)
OafMapperUtils.qualifier(
cobjCategory.substring(0, 4),
cobjCategory.substring(5),
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE
)
)
//ADD ORIGINAL TYPE to the mapping //ADD ORIGINAL TYPE to the mapping
val itm = new InstanceTypeMapping val itm = new InstanceTypeMapping
itm.setOriginalType(originalType) itm.setOriginalType(originalType)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1) itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
instance.setInstanceTypeMapping(List(itm).asJava) instance.setInstanceTypeMapping(List(itm).asJava)
result.setResourcetype(
OafMapperUtils.qualifier(
cobjCategory.substring(0, 4),
cobjCategory.substring(5),
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE
)
)
instance.setCollectedfrom(createCrossrefCollectedFrom()) instance.setCollectedfrom(createCrossrefCollectedFrom())
if (StringUtils.isNotBlank(issuedDate)) { if (StringUtils.isNotBlank(issuedDate)) {
@ -452,7 +440,40 @@ case object Crossref2Oaf {
a a
} }
def convert(input: String): List[Oaf] = { /** *
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
* to generate one of the following main entities:
* - publication
* - dataset
* - software
* - otherresearchproduct
*
* @param resourceType
* @param vocabularies
* @return
*/
def getTypeQualifier(
resourceType: String,
vocabularies: VocabularyGroup
): (Qualifier, Qualifier, String) = {
if (resourceType != null && resourceType.nonEmpty) {
val typeQualifier =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
if (typeQualifier != null)
return (
typeQualifier,
vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid
),
resourceType
)
}
null
}
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input) lazy val json: json4s.JValue = parse(input)
@ -462,17 +483,17 @@ case object Crossref2Oaf {
val objectSubType = (json \ "subtype").extractOrElse[String](null) val objectSubType = (json \ "subtype").extractOrElse[String](null)
if (objectType == null) if (objectType == null)
return resultList return resultList
val typology =getTypeQualifier(objectType, vocabularies)
val result = generateItemFromType(objectType, objectSubType) if (typology == null)
return List()
val result = generateItemFromType(typology._2)
if (result == null) if (result == null)
return List() return List()
val cOBJCategory = mappingCrossrefSubType.getOrElse(
objectType,
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
)
val originalType = if (mappingCrossrefSubType.contains(objectType)) objectType else objectSubType mappingResult(result, json, typology._1, typology._3)
mappingResult(result, json, cOBJCategory, originalType)
if (result == null || result.getId == null) if (result == null || result.getId == null)
return List() return List()
@ -490,7 +511,7 @@ case object Crossref2Oaf {
} }
result match { result match {
case publication: Publication => convertPublication(publication, json, cOBJCategory) case publication: Publication => convertPublication(publication, json, typology._1)
case dataset: Dataset => convertDataset(dataset) case dataset: Dataset => convertDataset(dataset)
} }
@ -720,12 +741,12 @@ case object Crossref2Oaf {
// TODO check if there are other info to map into the Dataset // TODO check if there are other info to map into the Dataset
} }
def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = { def convertPublication(publication: Publication, json: JValue, cobjCategory: Qualifier): Unit = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
//Mapping book //Mapping book
if (cobjCategory.toLowerCase.contains("book")) { if (cobjCategory.getClassname.toLowerCase.contains("book")) {
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
if (ISBN.nonEmpty && containerTitles.nonEmpty) { if (ISBN.nonEmpty && containerTitles.nonEmpty) {
val source = s"${containerTitles.head} ISBN: ${ISBN.head}" val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
@ -806,12 +827,24 @@ case object Crossref2Oaf {
null null
} }
def generateItemFromType(objectType: String, objectSubType: String): Result = { def generateItemFromType(objectType: Qualifier): Result = {
if (mappingCrossrefType.contains(objectType)) { if (objectType.getClassid.equalsIgnoreCase("publication")) {
if (mappingCrossrefType(objectType).equalsIgnoreCase("publication")) val item = new Publication
return new Publication() item.setResourcetype(objectType)
if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset")) return item
return new Dataset() } else if (objectType.getClassid.equalsIgnoreCase("dataset")) {
val item = new Dataset
item.setResourcetype(objectType)
return item
}
else if (objectType.getClassid.equalsIgnoreCase("software")){
val item = new Software
item.setResourcetype(objectType)
return item
}else if (objectType.getClassid.equalsIgnoreCase("OtherResearchProduct")){
val item = new OtherResearchProduct
item.setResourcetype(objectType)
return item
} }
null null
} }

View File

@ -1,6 +1,26 @@
[ [
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the OAF Orcid transformed", "paramRequired": true}, {
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source path ", "paramRequired": false}, "paramName": "t",
{"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} "paramLongName": "targetPath",
"paramDescription": "the path of the OAF Orcid transformed",
"paramRequired": true
},
{
"paramName": "i",
"paramLongName": "isLookupUrl",
"paramDescription": "the isLookup URL",
"paramRequired": true
},
{
"paramName": "s",
"paramLongName": "sourcePath",
"paramDescription": "the source path ",
"paramRequired": false
},
{
"paramName": "m",
"paramLongName": "master",
"paramDescription": "the master name",
"paramRequired": true
}
] ]

View File

@ -1,5 +1,6 @@
package eu.dnetlib.doiboost.crossref package eu.dnetlib.doiboost.crossref
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
@ -47,59 +48,6 @@ case object Crossref2Oaf {
json.extract[List[funderInfo]] json.extract[List[funderInfo]]
} }
val mappingCrossrefType = Map(
"book-section" -> "publication",
"book" -> "publication",
"book-chapter" -> "publication",
"book-part" -> "publication",
"book-series" -> "publication",
"book-set" -> "publication",
"book-track" -> "publication",
"edited-book" -> "publication",
"reference-book" -> "publication",
"monograph" -> "publication",
"journal-article" -> "publication",
"dissertation" -> "publication",
"other" -> "publication",
"peer-review" -> "publication",
"proceedings" -> "publication",
"proceedings-article" -> "publication",
"reference-entry" -> "publication",
"report" -> "publication",
"report-series" -> "publication",
"standard" -> "publication",
"standard-series" -> "publication",
"posted-content" -> "publication",
"dataset" -> "dataset"
)
val mappingCrossrefSubType = Map(
"book-section" -> "0013 Part of book or chapter of book",
"book" -> "0002 Book",
"book-chapter" -> "0013 Part of book or chapter of book",
"book-part" -> "0013 Part of book or chapter of book",
"book-series" -> "0002 Book",
"book-set" -> "0002 Book",
"book-track" -> "0002 Book",
"edited-book" -> "0002 Book",
"reference-book" -> "0002 Book",
"monograph" -> "0002 Book",
"journal-article" -> "0001 Article",
"dissertation" -> "0044 Thesis",
"other" -> "0038 Other literature type",
"peer-review" -> "0015 Review",
"proceedings" -> "0004 Conference object",
"proceedings-article" -> "0004 Conference object",
"reference-entry" -> "0013 Part of book or chapter of book",
"report" -> "0017 Report",
"report-series" -> "0017 Report",
"standard" -> "0038 Other literature type",
"standard-series" -> "0038 Other literature type",
"dataset" -> "0021 Dataset",
"preprint" -> "0016 Preprint",
"report" -> "0017 Report"
)
def getIrishId(doi: String): Option[String] = { def getIrishId(doi: String): Option[String] = {
val id = doi.split("/").last val id = doi.split("/").last
irishFunder irishFunder
@ -107,7 +55,9 @@ case object Crossref2Oaf {
.map(f => f.id) .map(f => f.id)
} }
def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
def mappingResult(result: Result, json: JValue, instanceType:Qualifier, originalType: String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID //MAPPING Crossref DOI into PID
@ -275,27 +225,20 @@ case object Crossref2Oaf {
instance.setAccessright( instance.setAccessright(
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue) decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
) )
instance.setInstancetype( instance.setInstancetype(instanceType)
OafMapperUtils.qualifier(
cobjCategory.substring(0, 4),
cobjCategory.substring(5),
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE
)
)
//ADD ORIGINAL TYPE to the mapping //ADD ORIGINAL TYPE to the mapping
val itm = new InstanceTypeMapping val itm = new InstanceTypeMapping
itm.setOriginalType(originalType) itm.setOriginalType(originalType)
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1) itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
instance.setInstanceTypeMapping(List(itm).asJava) instance.setInstanceTypeMapping(List(itm).asJava)
result.setResourcetype( // result.setResourcetype(
OafMapperUtils.qualifier( // OafMapperUtils.qualifier(
cobjCategory.substring(0, 4), // cobjCategory.substring(0, 4),
cobjCategory.substring(5), // cobjCategory.substring(5),
ModelConstants.DNET_PUBLICATION_RESOURCE, // ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE // ModelConstants.DNET_PUBLICATION_RESOURCE
) // )
) // )
instance.setCollectedfrom(createCrossrefCollectedFrom()) instance.setCollectedfrom(createCrossrefCollectedFrom())
if (StringUtils.isNotBlank(issuedDate)) { if (StringUtils.isNotBlank(issuedDate)) {
@ -354,7 +297,40 @@ case object Crossref2Oaf {
a a
} }
def convert(input: String): List[Oaf] = { /** *
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
* to generate one of the following main entities:
* - publication
* - dataset
* - software
* - otherresearchproduct
*
* @param resourceType
* @param vocabularies
* @return
*/
def getTypeQualifier(
resourceType: String,
vocabularies: VocabularyGroup
): (Qualifier, Qualifier, String) = {
if (resourceType != null && resourceType.nonEmpty) {
val typeQualifier =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
if (typeQualifier != null)
return (
typeQualifier,
vocabularies.getSynonymAsQualifier(
ModelConstants.DNET_RESULT_TYPOLOGIES,
typeQualifier.getClassid
),
resourceType
)
}
null
}
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input) lazy val json: json4s.JValue = parse(input)
@ -364,17 +340,17 @@ case object Crossref2Oaf {
val objectSubType = (json \ "subtype").extractOrElse[String](null) val objectSubType = (json \ "subtype").extractOrElse[String](null)
if (objectType == null) if (objectType == null)
return resultList return resultList
val typology =getTypeQualifier(objectType, vocabularies)
val result = generateItemFromType(objectType, objectSubType) if (typology == null)
return List()
val result = generateItemFromType(typology._2)
if (result == null) if (result == null)
return List() return List()
val cOBJCategory = mappingCrossrefSubType.getOrElse(
objectType,
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
)
val originalType = if (mappingCrossrefSubType.contains(objectType)) objectType else objectSubType
mappingResult(result, json, cOBJCategory, originalType) mappingResult(result, json, typology._1, typology._3)
if (result == null || result.getId == null) if (result == null || result.getId == null)
return List() return List()
@ -392,7 +368,7 @@ case object Crossref2Oaf {
} }
result match { result match {
case publication: Publication => convertPublication(publication, json, cOBJCategory) case publication: Publication => convertPublication(publication, json, typology._1)
case dataset: Dataset => convertDataset(dataset) case dataset: Dataset => convertDataset(dataset)
} }
@ -622,12 +598,12 @@ case object Crossref2Oaf {
// TODO check if there are other info to map into the Dataset // TODO check if there are other info to map into the Dataset
} }
def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = { def convertPublication(publication: Publication, json: JValue, cobjCategory: Qualifier): Unit = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
//Mapping book //Mapping book
if (cobjCategory.toLowerCase.contains("book")) { if (cobjCategory.getClassname.toLowerCase.contains("book")) {
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
if (ISBN.nonEmpty && containerTitles.nonEmpty) { if (ISBN.nonEmpty && containerTitles.nonEmpty) {
val source = s"${containerTitles.head} ISBN: ${ISBN.head}" val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
@ -708,12 +684,24 @@ case object Crossref2Oaf {
null null
} }
def generateItemFromType(objectType: String, objectSubType: String): Result = { def generateItemFromType(objectType: Qualifier): Result = {
if (mappingCrossrefType.contains(objectType)) { if (objectType.getClassid.equalsIgnoreCase("publication")) {
if (mappingCrossrefType(objectType).equalsIgnoreCase("publication")) val item = new Publication
return new Publication() item.setResourcetype(objectType)
if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset")) return item
return new Dataset() } else if (objectType.getClassid.equalsIgnoreCase("dataset")) {
val item = new Dataset
item.setResourcetype(objectType)
return item
}
else if (objectType.getClassid.equalsIgnoreCase("software")){
val item = new Software
item.setResourcetype(objectType)
return item
}else if (objectType.getClassid.equalsIgnoreCase("OtherResearchProduct")){
val item = new OtherResearchProduct
item.setResourcetype(objectType)
return item
} }
null null
} }

View File

@ -1,8 +1,10 @@
package eu.dnetlib.doiboost.crossref package eu.dnetlib.doiboost.crossref
import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.oaf import eu.dnetlib.dhp.schema.oaf
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset} import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset}
import eu.dnetlib.dhp.utils.ISLookupClientFactory
import org.apache.commons.io.IOUtils import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf import org.apache.spark.SparkConf
import org.apache.spark.sql._ import org.apache.spark.sql._
@ -40,11 +42,17 @@ object SparkMapDumpIntoOAF {
implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset] implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset]
val targetPath = parser.get("targetPath") val targetPath = parser.get("targetPath")
val isLookupUrl: String = parser.get("isLookupUrl")
logger.info("isLookupUrl: {}", isLookupUrl)
val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
require(vocabularies != null)
spark.read spark.read
.load(parser.get("sourcePath")) .load(parser.get("sourcePath"))
.as[CrossrefDT] .as[CrossrefDT]
.flatMap(k => Crossref2Oaf.convert(k.json)) .flatMap(k => Crossref2Oaf.convert(k.json, vocabularies))
.filter(o => o != null) .filter(o => o != null)
.write .write
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)

View File

@ -0,0 +1,51 @@
package eu.dnetlib.dhp.aggregation;
import static org.mockito.Mockito.lenient;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.mockito.Mock;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public abstract class AbstractVocabularyTest {
@Mock
protected ISLookUpService isLookUpService;
protected VocabularyGroup vocabularies;
public void setUpVocabulary() throws ISLookUpException, IOException {
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
lenient()
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
.thenReturn(synonyms());
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
}
private static List<String> vocs() throws IOException {
return IOUtils
.readLines(
Objects
.requireNonNull(
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/terms.txt")));
}
private static List<String> synonyms() throws IOException {
return IOUtils
.readLines(
Objects
.requireNonNull(
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/synonyms.txt")));
}
}

File diff suppressed because it is too large Load Diff

View File

@ -57,7 +57,7 @@
] ]
] ]
}, },
"type": "posted-content", "type": "journal-article",
"URL": "http://dx.doi.org/10.1101/030080", "URL": "http://dx.doi.org/10.1101/030080",
"is-referenced-by-count": 2, "is-referenced-by-count": 2,
"link": [ "link": [

View File

@ -1,37 +1,43 @@
package eu.dnetlib.dhp.doiboost.crossref package eu.dnetlib.dhp.doiboost.crossref
import com.fasterxml.jackson.databind.SerializationFeature
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.utils.DHPUtils import eu.dnetlib.dhp.utils.DHPUtils
import eu.dnetlib.doiboost.crossref.Crossref2Oaf import eu.dnetlib.doiboost.crossref.Crossref2Oaf
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
import org.json4s import org.json4s
import com.fasterxml.jackson.databind.ObjectMapper
import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.JsonAST.{JField, JObject, JString}
import org.json4s.{DefaultFormats, JValue} import org.json4s.{DefaultFormats, JValue}
import org.json4s.jackson.JsonMethods import org.json4s.jackson.JsonMethods
import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.Assertions._
import org.junit.jupiter.api.Test import org.junit.jupiter.api.extension.ExtendWith
import org.junit.jupiter.api.{BeforeEach, Test}
import org.mockito.junit.jupiter.MockitoExtension
import org.slf4j.{Logger, LoggerFactory} import org.slf4j.{Logger, LoggerFactory}
import java.nio.file.Files
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
import scala.io.Source import scala.io.Source
import scala.util.matching.Regex import scala.util.matching.Regex
class CrossrefMappingTest { @ExtendWith(Array(classOf[MockitoExtension]))
class CrossrefMappingTest extends AbstractVocabularyTest{
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
val mapper = new ObjectMapper() val mapper = new ObjectMapper()
@BeforeEach
def setUp(): Unit = {
super.setUpVocabulary()
}
@Test @Test
def testMissingAuthorParser():Unit = { def testMissingAuthorParser():Unit = {
val json: String = Source val json: String = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json")).mkString
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json")) val result = Crossref2Oaf.convert(json, vocabularies)
.mkString result.filter(o => o.isInstanceOf[Publication]).map(p=> p.asInstanceOf[Publication]).foreach(p =>assertTrue(p.getAuthor.size()>0))
val result = Crossref2Oaf.convert(json)
result
.filter(o => o.isInstanceOf[Publication])
.map(p => p.asInstanceOf[Publication])
.foreach(p => assertTrue(p.getAuthor.size() > 0))
} }
@Test @Test
@ -50,13 +56,13 @@ class CrossrefMappingTest {
for (line <- funder_doi.linesWithSeparators.map(l => l.stripLineEnd)) { for (line <- funder_doi.linesWithSeparators.map(l => l.stripLineEnd)) {
val json = template.replace("%s", line) val json = template.replace("%s", line)
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
checkRelation(resultList) checkRelation(resultList)
} }
for (line <- funder_name.linesWithSeparators.map(l => l.stripLineEnd)) { for (line <- funder_name.linesWithSeparators.map(l => l.stripLineEnd)) {
val json = template.replace("%s", line) val json = template.replace("%s", line)
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
checkRelation(resultList) checkRelation(resultList)
} }
@ -96,7 +102,7 @@ class CrossrefMappingTest {
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty) assertFalse(json.isEmpty)
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Result]) val items = resultList.filter(p => p.isInstanceOf[Result])
@ -115,14 +121,14 @@ class CrossrefMappingTest {
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty) assertFalse(json.isEmpty)
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Result]) val items = resultList.filter(p => p.isInstanceOf[Result])
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
items.foreach(p => println(mapper.writeValueAsString(p))) items.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
} }
@ -142,7 +148,7 @@ class CrossrefMappingTest {
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty) assertFalse(json.isEmpty)
val result: List[Oaf] = Crossref2Oaf.convert(json) val result: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(result.nonEmpty) assertTrue(result.nonEmpty)
@ -163,8 +169,8 @@ class CrossrefMappingTest {
assertEquals(doisReference.size, relationList.size) assertEquals(doisReference.size, relationList.size)
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
relationList.foreach(p => println(mapper.writeValueAsString(p))) relationList.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
} }
@Test @Test
@ -178,14 +184,14 @@ class CrossrefMappingTest {
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Result]) val items = resultList.filter(p => p.isInstanceOf[Result])
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
items.foreach(p => println(mapper.writeValueAsString(p))) items.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
} }
@ -194,18 +200,17 @@ class CrossrefMappingTest {
val json = Source val json = Source
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/prwTest.json")) .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/prwTest.json"))
.mkString .mkString
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Result]) val items = resultList.filter(p => p.isInstanceOf[Result])
items.foreach(p => logger.info(mapper.writeValueAsString(p))) items.foreach(p => logger.info(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
} }
@ -235,7 +240,7 @@ class CrossrefMappingTest {
assertFalse(json.isEmpty) assertFalse(json.isEmpty)
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
val rels: List[Relation] = val rels: List[Relation] =
@ -255,7 +260,7 @@ class CrossrefMappingTest {
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -266,7 +271,7 @@ class CrossrefMappingTest {
val result: Result = items.head.asInstanceOf[Result] val result: Result = items.head.asInstanceOf[Result]
assertNotNull(result) assertNotNull(result)
logger.info(mapper.writeValueAsString(result)); logger.info(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(result));
assertNotNull(result.getDataInfo, "Datainfo test not null Failed"); assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
assertNotNull( assertNotNull(
@ -331,7 +336,7 @@ class CrossrefMappingTest {
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -415,7 +420,7 @@ class CrossrefMappingTest {
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -463,7 +468,7 @@ class CrossrefMappingTest {
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -542,7 +547,7 @@ class CrossrefMappingTest {
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -568,7 +573,7 @@ class CrossrefMappingTest {
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -578,7 +583,8 @@ class CrossrefMappingTest {
assert(items.size == 1) assert(items.size == 1)
val result: Result = items.head.asInstanceOf[Publication] val result: Result = items.head.asInstanceOf[Publication]
assertNotNull(result) assertNotNull(result)
logger.info(mapper.writeValueAsString(result));
logger.info(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(result));
} }
@Test @Test
@ -591,7 +597,7 @@ class CrossrefMappingTest {
val line: String = val line: String =
"\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}]," "\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}],"
val json = template.replace("%s", line) val json = template.replace("%s", line)
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Publication]) val items = resultList.filter(p => p.isInstanceOf[Publication])
val result: Result = items.head.asInstanceOf[Publication] val result: Result = items.head.asInstanceOf[Publication]
@ -610,7 +616,7 @@ class CrossrefMappingTest {
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json")) .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json"))
.mkString .mkString
val resultList: List[Oaf] = Crossref2Oaf.convert(template) val resultList: List[Oaf] = Crossref2Oaf.convert(template, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Publication]) val items = resultList.filter(p => p.isInstanceOf[Publication])
val result: Result = items.head.asInstanceOf[Publication] val result: Result = items.head.asInstanceOf[Publication]
@ -634,14 +640,14 @@ class CrossrefMappingTest {
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result]
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
println(mapper.writeValueAsString(item)) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
assertTrue( assertTrue(
item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor")) item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))
@ -664,7 +670,7 @@ class CrossrefMappingTest {
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -681,8 +687,8 @@ class CrossrefMappingTest {
assertTrue( assertTrue(
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid) item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
) )
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
println(mapper.writeValueAsString(item)) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
} }
@ -699,7 +705,7 @@ class CrossrefMappingTest {
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -716,8 +722,7 @@ class CrossrefMappingTest {
assertTrue( assertTrue(
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid) item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
) )
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
println(mapper.writeValueAsString(item))
} }
@ -734,7 +739,7 @@ class CrossrefMappingTest {
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -751,8 +756,7 @@ class CrossrefMappingTest {
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO")) item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))
) )
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
println(mapper.writeValueAsString(item))
} }
@ -769,7 +773,7 @@ class CrossrefMappingTest {
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -786,8 +790,8 @@ class CrossrefMappingTest {
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO")) item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))
) )
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
println(mapper.writeValueAsString(item)) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
} }
@ -802,7 +806,7 @@ class CrossrefMappingTest {
assertNotNull(json) assertNotNull(json)
assertFalse(json.isEmpty); assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json) val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
assertTrue(resultList.nonEmpty) assertTrue(resultList.nonEmpty)
@ -812,9 +816,8 @@ class CrossrefMappingTest {
assertEquals(1, item.getInstance().get(0).getUrl().size()) assertEquals(1, item.getInstance().get(0).getUrl().size())
assertEquals( assertEquals(
"https://doi.org/10.1016/j.jas.2019.105013", "https://doi.org/10.1016/j.jas.2019.105013",
item.getInstance().get(0).getUrl().get(0) item.getInstance().get(0).getUrl.get(0)
) )
//println(mapper.writeValueAsString(item))
} }