exclude FAIRsharing records from Datacite

This commit is contained in:
Alessia Bardi 2022-06-13 16:17:17 +02:00
parent b8cda65487
commit 88d531dc91
3 changed files with 28 additions and 6 deletions

View File

@ -47,13 +47,18 @@ object DataciteToOAFTransformation {
}
/** This method should skip record if json contains invalid text
* defined in gile datacite_filter
* defined in file datacite_filter
*
* @param json
* @param record : unparsed datacite record
* @param json : parsed record
* @return True if the record should be skipped
*/
def skip_record(json: String): Boolean = {
datacite_filter.exists(f => json.contains(f))
def skip_record(record: String, json: org.json4s.JValue): Boolean = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher")
.extractOrElse[String]("")
.equalsIgnoreCase("FAIRsharing")
}
@deprecated("this method will be removed", "dhp")
@ -304,12 +309,13 @@ object DataciteToOAFTransformation {
vocabularies: VocabularyGroup,
exportLinks: Boolean
): List[Oaf] = {
if (skip_record(input))
return List()
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json = parse(input)
if (skip_record(input, json))
return List()
val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
val resourceTypeGeneral =
(json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)

View File

@ -107,4 +107,19 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
}
@Test
def testFilter(): Unit = {
val record = Source
.fromInputStream(
getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record_fairsharing.json")
)
.mkString
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
val res: List[Oaf] = DataciteToOAFTransformation.generateOAF(record, 0L, 0L, vocabularies, true)
assertTrue(res.isEmpty)
}
}