forked from D-Net/dnet-hadoop
exclude FAIRsharing records from Datacite
This commit is contained in:
parent
b8cda65487
commit
88d531dc91
|
@ -47,13 +47,18 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This method should skip record if json contains invalid text
|
/** This method should skip record if json contains invalid text
|
||||||
* defined in gile datacite_filter
|
* defined in file datacite_filter
|
||||||
*
|
*
|
||||||
* @param json
|
* @param record : unparsed datacite record
|
||||||
|
* @param json : parsed record
|
||||||
* @return True if the record should be skipped
|
* @return True if the record should be skipped
|
||||||
*/
|
*/
|
||||||
def skip_record(json: String): Boolean = {
|
def skip_record(record: String, json: org.json4s.JValue): Boolean = {
|
||||||
datacite_filter.exists(f => json.contains(f))
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher")
|
||||||
|
.extractOrElse[String]("")
|
||||||
|
.equalsIgnoreCase("FAIRsharing")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@deprecated("this method will be removed", "dhp")
|
@deprecated("this method will be removed", "dhp")
|
||||||
|
@ -304,12 +309,13 @@ object DataciteToOAFTransformation {
|
||||||
vocabularies: VocabularyGroup,
|
vocabularies: VocabularyGroup,
|
||||||
exportLinks: Boolean
|
exportLinks: Boolean
|
||||||
): List[Oaf] = {
|
): List[Oaf] = {
|
||||||
if (skip_record(input))
|
|
||||||
return List()
|
|
||||||
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json = parse(input)
|
lazy val json = parse(input)
|
||||||
|
|
||||||
|
if (skip_record(input, json))
|
||||||
|
return List()
|
||||||
|
|
||||||
val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
|
val resourceType = (json \ "attributes" \ "types" \ "resourceType").extractOrElse[String](null)
|
||||||
val resourceTypeGeneral =
|
val resourceTypeGeneral =
|
||||||
(json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)
|
(json \ "attributes" \ "types" \ "resourceTypeGeneral").extractOrElse[String](null)
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -107,4 +107,19 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testFilter(): Unit = {
|
||||||
|
val record = Source
|
||||||
|
.fromInputStream(
|
||||||
|
getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/record_fairsharing.json")
|
||||||
|
)
|
||||||
|
.mkString
|
||||||
|
|
||||||
|
val mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT)
|
||||||
|
val res: List[Oaf] = DataciteToOAFTransformation.generateOAF(record, 0L, 0L, vocabularies, true)
|
||||||
|
|
||||||
|
assertTrue(res.isEmpty)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue