forked from D-Net/dnet-hadoop
applied cherry pick
This commit is contained in:
parent
ee1fcb672b
commit
5281f010a5
|
@ -165,7 +165,7 @@ public class OaiIterator implements Iterator<String> {
|
|||
} catch (final DocumentException e1) {
|
||||
final String resumptionToken = extractResumptionToken(xml);
|
||||
if (resumptionToken == null) {
|
||||
report.put(e1.getClass().getName(), e1.getMessage());
|
||||
report.put(e1.getClass().getName(), e1.getMessage());
|
||||
throw new CollectorException("Error parsing cleaned document:\n" + cleaned, e1);
|
||||
}
|
||||
return resumptionToken;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package eu.dnetlib.dhp.collection.crossref
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.{field, qualifier, structuredProperty, subject}
|
||||
|
@ -48,6 +49,13 @@ case object Crossref2Oaf {
|
|||
json.extract[List[funderInfo]]
|
||||
}
|
||||
|
||||
def getIrishId(doi: String): Option[String] = {
|
||||
val id = doi.split("/").last
|
||||
irishFunder
|
||||
.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id))))
|
||||
.map(f => f.id)
|
||||
}
|
||||
|
||||
def createCrossrefCollectedFrom(): KeyValue = {
|
||||
|
||||
val cf = new KeyValue
|
||||
|
@ -190,14 +198,8 @@ case object Crossref2Oaf {
|
|||
}
|
||||
|
||||
|
||||
def getIrishId(doi: String): Option[String] = {
|
||||
val id = doi.split("/").last
|
||||
irishFunder
|
||||
.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id))))
|
||||
.map(f => f.id)
|
||||
}
|
||||
|
||||
def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
|
||||
def mappingResult(result: Result, json: JValue, instanceType:Qualifier, originalType: String): Result = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
||||
//MAPPING Crossref DOI into PID
|
||||
|
@ -370,27 +372,13 @@ case object Crossref2Oaf {
|
|||
instance.setAccessright(
|
||||
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
|
||||
)
|
||||
instance.setInstancetype(
|
||||
OafMapperUtils.qualifier(
|
||||
cobjCategory.substring(0, 4),
|
||||
cobjCategory.substring(5),
|
||||
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
)
|
||||
)
|
||||
instance.setInstancetype(instanceType)
|
||||
|
||||
//ADD ORIGINAL TYPE to the mapping
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType(originalType)
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
instance.setInstanceTypeMapping(List(itm).asJava)
|
||||
result.setResourcetype(
|
||||
OafMapperUtils.qualifier(
|
||||
cobjCategory.substring(0, 4),
|
||||
cobjCategory.substring(5),
|
||||
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
)
|
||||
)
|
||||
|
||||
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
||||
if (StringUtils.isNotBlank(issuedDate)) {
|
||||
|
@ -452,7 +440,40 @@ case object Crossref2Oaf {
|
|||
a
|
||||
}
|
||||
|
||||
def convert(input: String): List[Oaf] = {
|
||||
/** *
|
||||
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
||||
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
||||
* to generate one of the following main entities:
|
||||
* - publication
|
||||
* - dataset
|
||||
* - software
|
||||
* - otherresearchproduct
|
||||
*
|
||||
* @param resourceType
|
||||
* @param vocabularies
|
||||
* @return
|
||||
*/
|
||||
def getTypeQualifier(
|
||||
resourceType: String,
|
||||
vocabularies: VocabularyGroup
|
||||
): (Qualifier, Qualifier, String) = {
|
||||
if (resourceType != null && resourceType.nonEmpty) {
|
||||
val typeQualifier =
|
||||
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
||||
if (typeQualifier != null)
|
||||
return (
|
||||
typeQualifier,
|
||||
vocabularies.getSynonymAsQualifier(
|
||||
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
||||
typeQualifier.getClassid
|
||||
),
|
||||
resourceType
|
||||
)
|
||||
}
|
||||
null
|
||||
}
|
||||
|
||||
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
|
||||
|
@ -462,17 +483,17 @@ case object Crossref2Oaf {
|
|||
val objectSubType = (json \ "subtype").extractOrElse[String](null)
|
||||
if (objectType == null)
|
||||
return resultList
|
||||
val typology =getTypeQualifier(objectType, vocabularies)
|
||||
|
||||
val result = generateItemFromType(objectType, objectSubType)
|
||||
if (typology == null)
|
||||
return List()
|
||||
|
||||
val result = generateItemFromType(typology._2)
|
||||
if (result == null)
|
||||
return List()
|
||||
val cOBJCategory = mappingCrossrefSubType.getOrElse(
|
||||
objectType,
|
||||
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
|
||||
)
|
||||
|
||||
val originalType = if (mappingCrossrefSubType.contains(objectType)) objectType else objectSubType
|
||||
mappingResult(result, json, cOBJCategory, originalType)
|
||||
mappingResult(result, json, typology._1, typology._3)
|
||||
|
||||
if (result == null || result.getId == null)
|
||||
return List()
|
||||
|
||||
|
@ -490,7 +511,7 @@ case object Crossref2Oaf {
|
|||
}
|
||||
|
||||
result match {
|
||||
case publication: Publication => convertPublication(publication, json, cOBJCategory)
|
||||
case publication: Publication => convertPublication(publication, json, typology._1)
|
||||
case dataset: Dataset => convertDataset(dataset)
|
||||
}
|
||||
|
||||
|
@ -720,12 +741,12 @@ case object Crossref2Oaf {
|
|||
// TODO check if there are other info to map into the Dataset
|
||||
}
|
||||
|
||||
def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = {
|
||||
def convertPublication(publication: Publication, json: JValue, cobjCategory: Qualifier): Unit = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
|
||||
|
||||
//Mapping book
|
||||
if (cobjCategory.toLowerCase.contains("book")) {
|
||||
if (cobjCategory.getClassname.toLowerCase.contains("book")) {
|
||||
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
|
||||
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
||||
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
||||
|
@ -806,12 +827,24 @@ case object Crossref2Oaf {
|
|||
null
|
||||
}
|
||||
|
||||
def generateItemFromType(objectType: String, objectSubType: String): Result = {
|
||||
if (mappingCrossrefType.contains(objectType)) {
|
||||
if (mappingCrossrefType(objectType).equalsIgnoreCase("publication"))
|
||||
return new Publication()
|
||||
if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset"))
|
||||
return new Dataset()
|
||||
def generateItemFromType(objectType: Qualifier): Result = {
|
||||
if (objectType.getClassid.equalsIgnoreCase("publication")) {
|
||||
val item = new Publication
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
} else if (objectType.getClassid.equalsIgnoreCase("dataset")) {
|
||||
val item = new Dataset
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
}
|
||||
else if (objectType.getClassid.equalsIgnoreCase("software")){
|
||||
val item = new Software
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
}else if (objectType.getClassid.equalsIgnoreCase("OtherResearchProduct")){
|
||||
val item = new OtherResearchProduct
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
}
|
||||
null
|
||||
}
|
||||
|
|
|
@ -1,6 +1,26 @@
|
|||
[
|
||||
{"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the OAF Orcid transformed", "paramRequired": true},
|
||||
{"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source path ", "paramRequired": false},
|
||||
{"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}
|
||||
|
||||
{
|
||||
"paramName": "t",
|
||||
"paramLongName": "targetPath",
|
||||
"paramDescription": "the path of the OAF Orcid transformed",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "i",
|
||||
"paramLongName": "isLookupUrl",
|
||||
"paramDescription": "the isLookup URL",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "s",
|
||||
"paramLongName": "sourcePath",
|
||||
"paramDescription": "the source path ",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "m",
|
||||
"paramLongName": "master",
|
||||
"paramDescription": "the master name",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -1,5 +1,6 @@
|
|||
package eu.dnetlib.doiboost.crossref
|
||||
package eu.dnetlib.doiboost.crossref
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
|
||||
|
@ -47,67 +48,16 @@ case object Crossref2Oaf {
|
|||
json.extract[List[funderInfo]]
|
||||
}
|
||||
|
||||
val mappingCrossrefType = Map(
|
||||
"book-section" -> "publication",
|
||||
"book" -> "publication",
|
||||
"book-chapter" -> "publication",
|
||||
"book-part" -> "publication",
|
||||
"book-series" -> "publication",
|
||||
"book-set" -> "publication",
|
||||
"book-track" -> "publication",
|
||||
"edited-book" -> "publication",
|
||||
"reference-book" -> "publication",
|
||||
"monograph" -> "publication",
|
||||
"journal-article" -> "publication",
|
||||
"dissertation" -> "publication",
|
||||
"other" -> "publication",
|
||||
"peer-review" -> "publication",
|
||||
"proceedings" -> "publication",
|
||||
"proceedings-article" -> "publication",
|
||||
"reference-entry" -> "publication",
|
||||
"report" -> "publication",
|
||||
"report-series" -> "publication",
|
||||
"standard" -> "publication",
|
||||
"standard-series" -> "publication",
|
||||
"posted-content" -> "publication",
|
||||
"dataset" -> "dataset"
|
||||
)
|
||||
|
||||
val mappingCrossrefSubType = Map(
|
||||
"book-section" -> "0013 Part of book or chapter of book",
|
||||
"book" -> "0002 Book",
|
||||
"book-chapter" -> "0013 Part of book or chapter of book",
|
||||
"book-part" -> "0013 Part of book or chapter of book",
|
||||
"book-series" -> "0002 Book",
|
||||
"book-set" -> "0002 Book",
|
||||
"book-track" -> "0002 Book",
|
||||
"edited-book" -> "0002 Book",
|
||||
"reference-book" -> "0002 Book",
|
||||
"monograph" -> "0002 Book",
|
||||
"journal-article" -> "0001 Article",
|
||||
"dissertation" -> "0044 Thesis",
|
||||
"other" -> "0038 Other literature type",
|
||||
"peer-review" -> "0015 Review",
|
||||
"proceedings" -> "0004 Conference object",
|
||||
"proceedings-article" -> "0004 Conference object",
|
||||
"reference-entry" -> "0013 Part of book or chapter of book",
|
||||
"report" -> "0017 Report",
|
||||
"report-series" -> "0017 Report",
|
||||
"standard" -> "0038 Other literature type",
|
||||
"standard-series" -> "0038 Other literature type",
|
||||
"dataset" -> "0021 Dataset",
|
||||
"preprint" -> "0016 Preprint",
|
||||
"report" -> "0017 Report"
|
||||
)
|
||||
|
||||
def getIrishId(doi: String): Option[String] = {
|
||||
def getIrishId(doi: String): Option[String] = {
|
||||
val id = doi.split("/").last
|
||||
irishFunder
|
||||
.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id))))
|
||||
.map(f => f.id)
|
||||
}
|
||||
|
||||
def mappingResult(result: Result, json: JValue, cobjCategory: String, originalType: String): Result = {
|
||||
|
||||
|
||||
def mappingResult(result: Result, json: JValue, instanceType:Qualifier, originalType: String): Result = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
||||
//MAPPING Crossref DOI into PID
|
||||
|
@ -275,27 +225,20 @@ case object Crossref2Oaf {
|
|||
instance.setAccessright(
|
||||
decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)
|
||||
)
|
||||
instance.setInstancetype(
|
||||
OafMapperUtils.qualifier(
|
||||
cobjCategory.substring(0, 4),
|
||||
cobjCategory.substring(5),
|
||||
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
)
|
||||
)
|
||||
instance.setInstancetype(instanceType)
|
||||
//ADD ORIGINAL TYPE to the mapping
|
||||
val itm = new InstanceTypeMapping
|
||||
itm.setOriginalType(originalType)
|
||||
itm.setVocabularyName(ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1)
|
||||
instance.setInstanceTypeMapping(List(itm).asJava)
|
||||
result.setResourcetype(
|
||||
OafMapperUtils.qualifier(
|
||||
cobjCategory.substring(0, 4),
|
||||
cobjCategory.substring(5),
|
||||
ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||
ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
)
|
||||
)
|
||||
// result.setResourcetype(
|
||||
// OafMapperUtils.qualifier(
|
||||
// cobjCategory.substring(0, 4),
|
||||
// cobjCategory.substring(5),
|
||||
// ModelConstants.DNET_PUBLICATION_RESOURCE,
|
||||
// ModelConstants.DNET_PUBLICATION_RESOURCE
|
||||
// )
|
||||
// )
|
||||
|
||||
instance.setCollectedfrom(createCrossrefCollectedFrom())
|
||||
if (StringUtils.isNotBlank(issuedDate)) {
|
||||
|
@ -354,7 +297,40 @@ case object Crossref2Oaf {
|
|||
a
|
||||
}
|
||||
|
||||
def convert(input: String): List[Oaf] = {
|
||||
/** *
|
||||
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
||||
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
||||
* to generate one of the following main entities:
|
||||
* - publication
|
||||
* - dataset
|
||||
* - software
|
||||
* - otherresearchproduct
|
||||
*
|
||||
* @param resourceType
|
||||
* @param vocabularies
|
||||
* @return
|
||||
*/
|
||||
def getTypeQualifier(
|
||||
resourceType: String,
|
||||
vocabularies: VocabularyGroup
|
||||
): (Qualifier, Qualifier, String) = {
|
||||
if (resourceType != null && resourceType.nonEmpty) {
|
||||
val typeQualifier =
|
||||
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
||||
if (typeQualifier != null)
|
||||
return (
|
||||
typeQualifier,
|
||||
vocabularies.getSynonymAsQualifier(
|
||||
ModelConstants.DNET_RESULT_TYPOLOGIES,
|
||||
typeQualifier.getClassid
|
||||
),
|
||||
resourceType
|
||||
)
|
||||
}
|
||||
null
|
||||
}
|
||||
|
||||
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
|
||||
|
@ -364,17 +340,17 @@ case object Crossref2Oaf {
|
|||
val objectSubType = (json \ "subtype").extractOrElse[String](null)
|
||||
if (objectType == null)
|
||||
return resultList
|
||||
val typology =getTypeQualifier(objectType, vocabularies)
|
||||
|
||||
val result = generateItemFromType(objectType, objectSubType)
|
||||
if (typology == null)
|
||||
return List()
|
||||
|
||||
val result = generateItemFromType(typology._2)
|
||||
if (result == null)
|
||||
return List()
|
||||
val cOBJCategory = mappingCrossrefSubType.getOrElse(
|
||||
objectType,
|
||||
mappingCrossrefSubType.getOrElse(objectSubType, "0038 Other literature type")
|
||||
)
|
||||
|
||||
val originalType = if (mappingCrossrefSubType.contains(objectType)) objectType else objectSubType
|
||||
mappingResult(result, json, cOBJCategory, originalType)
|
||||
|
||||
mappingResult(result, json, typology._1, typology._3)
|
||||
if (result == null || result.getId == null)
|
||||
return List()
|
||||
|
||||
|
@ -392,7 +368,7 @@ case object Crossref2Oaf {
|
|||
}
|
||||
|
||||
result match {
|
||||
case publication: Publication => convertPublication(publication, json, cOBJCategory)
|
||||
case publication: Publication => convertPublication(publication, json, typology._1)
|
||||
case dataset: Dataset => convertDataset(dataset)
|
||||
}
|
||||
|
||||
|
@ -622,12 +598,12 @@ case object Crossref2Oaf {
|
|||
// TODO check if there are other info to map into the Dataset
|
||||
}
|
||||
|
||||
def convertPublication(publication: Publication, json: JValue, cobjCategory: String): Unit = {
|
||||
def convertPublication(publication: Publication, json: JValue, cobjCategory: Qualifier): Unit = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
|
||||
|
||||
//Mapping book
|
||||
if (cobjCategory.toLowerCase.contains("book")) {
|
||||
if (cobjCategory.getClassname.toLowerCase.contains("book")) {
|
||||
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
|
||||
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
|
||||
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
|
||||
|
@ -708,12 +684,24 @@ case object Crossref2Oaf {
|
|||
null
|
||||
}
|
||||
|
||||
def generateItemFromType(objectType: String, objectSubType: String): Result = {
|
||||
if (mappingCrossrefType.contains(objectType)) {
|
||||
if (mappingCrossrefType(objectType).equalsIgnoreCase("publication"))
|
||||
return new Publication()
|
||||
if (mappingCrossrefType(objectType).equalsIgnoreCase("dataset"))
|
||||
return new Dataset()
|
||||
def generateItemFromType(objectType: Qualifier): Result = {
|
||||
if (objectType.getClassid.equalsIgnoreCase("publication")) {
|
||||
val item = new Publication
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
} else if (objectType.getClassid.equalsIgnoreCase("dataset")) {
|
||||
val item = new Dataset
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
}
|
||||
else if (objectType.getClassid.equalsIgnoreCase("software")){
|
||||
val item = new Software
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
}else if (objectType.getClassid.equalsIgnoreCase("OtherResearchProduct")){
|
||||
val item = new OtherResearchProduct
|
||||
item.setResourcetype(objectType)
|
||||
return item
|
||||
}
|
||||
null
|
||||
}
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
package eu.dnetlib.doiboost.crossref
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
|
||||
import eu.dnetlib.dhp.schema.oaf
|
||||
import eu.dnetlib.dhp.schema.oaf.{Oaf, Publication, Relation, Dataset => OafDataset}
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.spark.SparkConf
|
||||
import org.apache.spark.sql._
|
||||
|
@ -40,11 +42,17 @@ object SparkMapDumpIntoOAF {
|
|||
implicit val mapEncoderDatasets: Encoder[oaf.Dataset] = Encoders.kryo[OafDataset]
|
||||
|
||||
val targetPath = parser.get("targetPath")
|
||||
val isLookupUrl: String = parser.get("isLookupUrl")
|
||||
logger.info("isLookupUrl: {}", isLookupUrl)
|
||||
val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl)
|
||||
val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService)
|
||||
require(vocabularies != null)
|
||||
|
||||
|
||||
spark.read
|
||||
.load(parser.get("sourcePath"))
|
||||
.as[CrossrefDT]
|
||||
.flatMap(k => Crossref2Oaf.convert(k.json))
|
||||
.flatMap(k => Crossref2Oaf.convert(k.json, vocabularies))
|
||||
.filter(o => o != null)
|
||||
.write
|
||||
.mode(SaveMode.Overwrite)
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
package eu.dnetlib.dhp.aggregation;
|
||||
|
||||
import static org.mockito.Mockito.lenient;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.mockito.Mock;
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
public abstract class AbstractVocabularyTest {
|
||||
|
||||
@Mock
|
||||
protected ISLookUpService isLookUpService;
|
||||
|
||||
protected VocabularyGroup vocabularies;
|
||||
|
||||
public void setUpVocabulary() throws ISLookUpException, IOException {
|
||||
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||
|
||||
lenient()
|
||||
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||
.thenReturn(synonyms());
|
||||
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||
}
|
||||
|
||||
private static List<String> vocs() throws IOException {
|
||||
return IOUtils
|
||||
.readLines(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/terms.txt")));
|
||||
}
|
||||
|
||||
private static List<String> synonyms() throws IOException {
|
||||
return IOUtils
|
||||
.readLines(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
AbstractVocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/synonyms.txt")));
|
||||
}
|
||||
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -57,7 +57,7 @@
|
|||
]
|
||||
]
|
||||
},
|
||||
"type": "posted-content",
|
||||
"type": "journal-article",
|
||||
"URL": "http://dx.doi.org/10.1101/030080",
|
||||
"is-referenced-by-count": 2,
|
||||
"link": [
|
||||
|
|
|
@ -1,37 +1,43 @@
|
|||
package eu.dnetlib.dhp.doiboost.crossref
|
||||
|
||||
import com.fasterxml.jackson.databind.SerializationFeature
|
||||
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||
import eu.dnetlib.dhp.schema.oaf._
|
||||
import eu.dnetlib.dhp.utils.DHPUtils
|
||||
import eu.dnetlib.doiboost.crossref.Crossref2Oaf
|
||||
import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig}
|
||||
import org.json4s
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import org.json4s.JsonAST.{JField, JObject, JString}
|
||||
import org.json4s.{DefaultFormats, JValue}
|
||||
import org.json4s.jackson.JsonMethods
|
||||
import org.junit.jupiter.api.Assertions._
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||
import org.mockito.junit.jupiter.MockitoExtension
|
||||
import org.slf4j.{Logger, LoggerFactory}
|
||||
|
||||
import java.nio.file.Files
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.io.Source
|
||||
import scala.util.matching.Regex
|
||||
|
||||
class CrossrefMappingTest {
|
||||
@ExtendWith(Array(classOf[MockitoExtension]))
|
||||
class CrossrefMappingTest extends AbstractVocabularyTest{
|
||||
|
||||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||
val mapper = new ObjectMapper()
|
||||
|
||||
@BeforeEach
|
||||
def setUp(): Unit = {
|
||||
super.setUpVocabulary()
|
||||
}
|
||||
|
||||
@Test
|
||||
def testMissingAuthorParser(): Unit = {
|
||||
val json: String = Source
|
||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json"))
|
||||
.mkString
|
||||
val result = Crossref2Oaf.convert(json)
|
||||
result
|
||||
.filter(o => o.isInstanceOf[Publication])
|
||||
.map(p => p.asInstanceOf[Publication])
|
||||
.foreach(p => assertTrue(p.getAuthor.size() > 0))
|
||||
def testMissingAuthorParser():Unit = {
|
||||
val json: String = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json")).mkString
|
||||
val result = Crossref2Oaf.convert(json, vocabularies)
|
||||
result.filter(o => o.isInstanceOf[Publication]).map(p=> p.asInstanceOf[Publication]).foreach(p =>assertTrue(p.getAuthor.size()>0))
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -50,13 +56,13 @@ class CrossrefMappingTest {
|
|||
|
||||
for (line <- funder_doi.linesWithSeparators.map(l => l.stripLineEnd)) {
|
||||
val json = template.replace("%s", line)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
assertTrue(resultList.nonEmpty)
|
||||
checkRelation(resultList)
|
||||
}
|
||||
for (line <- funder_name.linesWithSeparators.map(l => l.stripLineEnd)) {
|
||||
val json = template.replace("%s", line)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
assertTrue(resultList.nonEmpty)
|
||||
checkRelation(resultList)
|
||||
}
|
||||
|
@ -96,7 +102,7 @@ class CrossrefMappingTest {
|
|||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||
|
@ -115,14 +121,14 @@ class CrossrefMappingTest {
|
|||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty)
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
items.foreach(p => println(mapper.writeValueAsString(p)))
|
||||
|
||||
items.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||
|
||||
}
|
||||
|
||||
|
@ -142,7 +148,7 @@ class CrossrefMappingTest {
|
|||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty)
|
||||
|
||||
val result: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val result: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(result.nonEmpty)
|
||||
|
||||
|
@ -163,8 +169,8 @@ class CrossrefMappingTest {
|
|||
|
||||
assertEquals(doisReference.size, relationList.size)
|
||||
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
relationList.foreach(p => println(mapper.writeValueAsString(p)))
|
||||
|
||||
relationList.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -178,14 +184,14 @@ class CrossrefMappingTest {
|
|||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
items.foreach(p => println(mapper.writeValueAsString(p)))
|
||||
|
||||
items.foreach(p => println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||
|
||||
}
|
||||
|
||||
|
@ -194,18 +200,17 @@ class CrossrefMappingTest {
|
|||
val json = Source
|
||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/prwTest.json"))
|
||||
.mkString
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Result])
|
||||
|
||||
items.foreach(p => logger.info(mapper.writeValueAsString(p)))
|
||||
items.foreach(p => logger.info(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p)))
|
||||
|
||||
}
|
||||
|
||||
|
@ -235,7 +240,7 @@ class CrossrefMappingTest {
|
|||
|
||||
assertFalse(json.isEmpty)
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
val rels: List[Relation] =
|
||||
|
@ -255,7 +260,7 @@ class CrossrefMappingTest {
|
|||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -266,7 +271,7 @@ class CrossrefMappingTest {
|
|||
val result: Result = items.head.asInstanceOf[Result]
|
||||
assertNotNull(result)
|
||||
|
||||
logger.info(mapper.writeValueAsString(result));
|
||||
logger.info(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(result));
|
||||
|
||||
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
|
||||
assertNotNull(
|
||||
|
@ -331,7 +336,7 @@ class CrossrefMappingTest {
|
|||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -415,7 +420,7 @@ class CrossrefMappingTest {
|
|||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -463,7 +468,7 @@ class CrossrefMappingTest {
|
|||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -542,7 +547,7 @@ class CrossrefMappingTest {
|
|||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -568,7 +573,7 @@ class CrossrefMappingTest {
|
|||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -578,7 +583,8 @@ class CrossrefMappingTest {
|
|||
assert(items.size == 1)
|
||||
val result: Result = items.head.asInstanceOf[Publication]
|
||||
assertNotNull(result)
|
||||
logger.info(mapper.writeValueAsString(result));
|
||||
|
||||
logger.info(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(result));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -591,7 +597,7 @@ class CrossrefMappingTest {
|
|||
val line: String =
|
||||
"\"funder\": [{\"name\": \"Wellcome Trust Masters Fellowship\",\"award\": [\"090633\"]}],"
|
||||
val json = template.replace("%s", line)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
assertTrue(resultList.nonEmpty)
|
||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||
val result: Result = items.head.asInstanceOf[Publication]
|
||||
|
@ -610,7 +616,7 @@ class CrossrefMappingTest {
|
|||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/article.json"))
|
||||
.mkString
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(template)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(template, vocabularies)
|
||||
assertTrue(resultList.nonEmpty)
|
||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||
val result: Result = items.head.asInstanceOf[Publication]
|
||||
|
@ -634,14 +640,14 @@ class CrossrefMappingTest {
|
|||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val item: Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result]
|
||||
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
|
||||
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||
|
||||
assertTrue(
|
||||
item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))
|
||||
|
@ -664,7 +670,7 @@ class CrossrefMappingTest {
|
|||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -681,8 +687,8 @@ class CrossrefMappingTest {
|
|||
assertTrue(
|
||||
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
|
||||
)
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
|
||||
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||
|
||||
}
|
||||
|
||||
|
@ -699,7 +705,7 @@ class CrossrefMappingTest {
|
|||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -716,8 +722,7 @@ class CrossrefMappingTest {
|
|||
assertTrue(
|
||||
item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)
|
||||
)
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||
|
||||
}
|
||||
|
||||
|
@ -734,7 +739,7 @@ class CrossrefMappingTest {
|
|||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -751,8 +756,7 @@ class CrossrefMappingTest {
|
|||
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))
|
||||
)
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||
|
||||
}
|
||||
|
||||
|
@ -769,7 +773,7 @@ class CrossrefMappingTest {
|
|||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -786,8 +790,8 @@ class CrossrefMappingTest {
|
|||
item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))
|
||||
)
|
||||
assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null))
|
||||
mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT)
|
||||
println(mapper.writeValueAsString(item))
|
||||
|
||||
println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(item))
|
||||
|
||||
}
|
||||
|
||||
|
@ -802,7 +806,7 @@ class CrossrefMappingTest {
|
|||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json, vocabularies)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
|
@ -812,9 +816,8 @@ class CrossrefMappingTest {
|
|||
assertEquals(1, item.getInstance().get(0).getUrl().size())
|
||||
assertEquals(
|
||||
"https://doi.org/10.1016/j.jas.2019.105013",
|
||||
item.getInstance().get(0).getUrl().get(0)
|
||||
item.getInstance().get(0).getUrl.get(0)
|
||||
)
|
||||
//println(mapper.writeValueAsString(item))
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue