This commit is contained in:
Claudio Atzori 2022-10-19 15:21:02 +02:00
commit ec39b84898
1 changed files with 64 additions and 68 deletions

View File

@ -24,11 +24,11 @@ import scala.io.Source
object DataciteToOAFTransformation { object DataciteToOAFTransformation {
case class HostedByMapType( case class HostedByMapType(
openaire_id: String, openaire_id: String,
datacite_name: String, datacite_name: String,
official_name: String, official_name: String,
similarity: Option[Float] similarity: Option[Float]
) {} ) {}
val mapper = new ObjectMapper() val mapper = new ObjectMapper()
@ -47,12 +47,12 @@ object DataciteToOAFTransformation {
} }
/** This method should skip record if json contains invalid text /** This method should skip record if json contains invalid text
* defined in file datacite_filter * defined in file datacite_filter
* *
* @param record : not parsed Datacite record * @param record : not parsed Datacite record
* @param json : parsed record * @param json : parsed record
* @return True if the record should be skipped * @return True if the record should be skipped
*/ */
def skip_record(record: String, json: org.json4s.JValue): Boolean = { def skip_record(record: String, json: org.json4s.JValue): Boolean = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher") datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher")
@ -98,11 +98,10 @@ object DataciteToOAFTransformation {
} }
/** /** This utility method indicates whether the embargo date has been reached
* This utility method indicates whether the embargo date has been reached * @param embargo_end_date
* @param embargo_end_date * @return True if the embargo date has been reached, false otherwise
* @return True if the embargo date has been reached, false otherwise */
*/
def embargo_end(embargo_end_date: String): Boolean = { def embargo_end(embargo_end_date: String): Boolean = {
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]")) val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
val td = LocalDate.now() val td = LocalDate.now()
@ -147,27 +146,27 @@ object DataciteToOAFTransformation {
} }
} }
/*** /** *
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type. * Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym * Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
* to generate one of the following main entities: * to generate one of the following main entities:
* - publication * - publication
* - dataset * - dataset
* - software * - software
* - otherresearchproduct * - otherresearchproduct
*
* @param resourceType * @param resourceType
* @param resourceTypeGeneral * @param resourceTypeGeneral
* @param schemaOrg * @param schemaOrg
* @param vocabularies * @param vocabularies
* @return * @return
*/ */
def getTypeQualifier( def getTypeQualifier(
resourceType: String, resourceType: String,
resourceTypeGeneral: String, resourceTypeGeneral: String,
schemaOrg: String, schemaOrg: String,
vocabularies: VocabularyGroup vocabularies: VocabularyGroup
): (Qualifier, Qualifier) = { ): (Qualifier, Qualifier) = {
if (resourceType != null && resourceType.nonEmpty) { if (resourceType != null && resourceType.nonEmpty) {
val typeQualifier = val typeQualifier =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
@ -212,11 +211,11 @@ object DataciteToOAFTransformation {
} }
def getResult( def getResult(
resourceType: String, resourceType: String,
resourceTypeGeneral: String, resourceTypeGeneral: String,
schemaOrg: String, schemaOrg: String,
vocabularies: VocabularyGroup vocabularies: VocabularyGroup
): Result = { ): Result = {
val typeQualifiers: (Qualifier, Qualifier) = val typeQualifiers: (Qualifier, Qualifier) =
getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
if (typeQualifiers == null) if (typeQualifiers == null)
@ -258,11 +257,11 @@ object DataciteToOAFTransformation {
} }
/** As describe in ticket #6377 /** As describe in ticket #6377
* when the result come from figshare we need to remove subject * when the result come from figshare we need to remove subject
* and set Access rights OPEN. * and set Access rights OPEN.
* *
* @param r * @param r
*/ */
def fix_figshare(r: Result): Unit = { def fix_figshare(r: Result): Unit = {
if (r.getInstance() != null) { if (r.getInstance() != null) {
@ -289,12 +288,12 @@ object DataciteToOAFTransformation {
} }
def generateRelation( def generateRelation(
sourceId: String, sourceId: String,
targetId: String, targetId: String,
relClass: String, relClass: String,
cf: KeyValue, cf: KeyValue,
di: DataInfo di: DataInfo
): Relation = { ): Relation = {
val r = new Relation val r = new Relation
r.setSource(sourceId) r.setSource(sourceId)
@ -323,12 +322,12 @@ object DataciteToOAFTransformation {
} }
def generateOAF( def generateOAF(
input: String, input: String,
ts: Long, ts: Long,
dateOfCollection: Long, dateOfCollection: Long,
vocabularies: VocabularyGroup, vocabularies: VocabularyGroup,
exportLinks: Boolean exportLinks: Boolean
): List[Oaf] = { ): List[Oaf] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json = parse(input) lazy val json = parse(input)
@ -413,7 +412,6 @@ object DataciteToOAFTransformation {
return List() return List()
result.setAuthor(authors.asJava) result.setAuthor(authors.asJava)
val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List()) val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
result.setTitle( result.setTitle(
@ -437,8 +435,6 @@ object DataciteToOAFTransformation {
.asJava .asJava
) )
val dates = (json \\ "dates").extract[List[DateType]] val dates = (json \\ "dates").extract[List[DateType]]
val publication_year = (json \\ "publicationYear").extractOrElse[String](null) val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
@ -645,16 +641,16 @@ object DataciteToOAFTransformation {
} }
private def generateRelations( private def generateRelations(
rels: List[RelatedIdentifierType], rels: List[RelatedIdentifierType],
id: String, id: String,
date: String date: String
): List[Relation] = { ): List[Relation] = {
rels rels
.filter(r => .filter(r =>
subRelTypeMapping subRelTypeMapping
.contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") || .contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") ||
r.relatedIdentifierType.equalsIgnoreCase("pmid") || r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
r.relatedIdentifierType.equalsIgnoreCase("arxiv")) r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
) )
.map(r => { .map(r => {
val rel = new Relation val rel = new Relation
@ -686,4 +682,4 @@ object DataciteToOAFTransformation {
s"10|$b::${DHPUtils.md5(a)}" s"10|$b::${DHPUtils.md5(a)}"
} }
} }