From a1f94530a3a170b8095de027660df0c2d4a35b09 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 13 Oct 2022 11:47:11 +0200 Subject: [PATCH] added documentation --- .../DataciteToOAFTransformation.scala | 122 +++++++++++------- 1 file changed, 74 insertions(+), 48 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala index c29614d33..29f5cb99c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/datacite/DataciteToOAFTransformation.scala @@ -24,11 +24,11 @@ import scala.io.Source object DataciteToOAFTransformation { case class HostedByMapType( - openaire_id: String, - datacite_name: String, - official_name: String, - similarity: Option[Float] - ) {} + openaire_id: String, + datacite_name: String, + official_name: String, + similarity: Option[Float] + ) {} val mapper = new ObjectMapper() @@ -47,12 +47,12 @@ object DataciteToOAFTransformation { } /** This method should skip record if json contains invalid text - * defined in file datacite_filter - * - * @param record : unparsed datacite record - * @param json : parsed record - * @return True if the record should be skipped - */ + * defined in file datacite_filter + * + * @param record : not parsed Datacite record + * @param json : parsed record + * @return True if the record should be skipped + */ def skip_record(record: String, json: org.json4s.JValue): Boolean = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher") @@ -98,6 +98,11 @@ object DataciteToOAFTransformation { } + /** + * This utility method indicates whether the embargo date has been reached + * @param embargo_end_date + * @return True if the embargo date has been reached, false otherwise + */ def embargo_end(embargo_end_date: String): Boolean = { val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]")) val td = LocalDate.now() @@ -142,12 +147,27 @@ object DataciteToOAFTransformation { } } + /*** + * Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type. + * Using the dnet:result_typologies vocabulary, we look up the instance.type synonym + * to generate one of the following main entities: + * - publication + * - dataset + * - software + * - otherresearchproduct + + * @param resourceType + * @param resourceTypeGeneral + * @param schemaOrg + * @param vocabularies + * @return + */ def getTypeQualifier( - resourceType: String, - resourceTypeGeneral: String, - schemaOrg: String, - vocabularies: VocabularyGroup - ): (Qualifier, Qualifier) = { + resourceType: String, + resourceTypeGeneral: String, + schemaOrg: String, + vocabularies: VocabularyGroup + ): (Qualifier, Qualifier) = { if (resourceType != null && resourceType.nonEmpty) { val typeQualifier = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType) @@ -192,11 +212,11 @@ object DataciteToOAFTransformation { } def getResult( - resourceType: String, - resourceTypeGeneral: String, - schemaOrg: String, - vocabularies: VocabularyGroup - ): Result = { + resourceType: String, + resourceTypeGeneral: String, + schemaOrg: String, + vocabularies: VocabularyGroup + ): Result = { val typeQualifiers: (Qualifier, Qualifier) = getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies) if (typeQualifiers == null) @@ -238,11 +258,11 @@ object DataciteToOAFTransformation { } /** As describe in ticket #6377 - * when the result come from figshare we need to remove subject - * and set Access rights OPEN. - * - * @param r - */ + * when the result come from figshare we need to remove subject + * and set Access rights OPEN. + * + * @param r + */ def fix_figshare(r: Result): Unit = { if (r.getInstance() != null) { @@ -269,12 +289,12 @@ object DataciteToOAFTransformation { } def generateRelation( - sourceId: String, - targetId: String, - relClass: String, - cf: KeyValue, - di: DataInfo - ): Relation = { + sourceId: String, + targetId: String, + relClass: String, + cf: KeyValue, + di: DataInfo + ): Relation = { val r = new Relation r.setSource(sourceId) @@ -303,12 +323,12 @@ object DataciteToOAFTransformation { } def generateOAF( - input: String, - ts: Long, - dateOfCollection: Long, - vocabularies: VocabularyGroup, - exportLinks: Boolean - ): List[Oaf] = { + input: String, + ts: Long, + dateOfCollection: Long, + vocabularies: VocabularyGroup, + exportLinks: Boolean + ): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) @@ -330,6 +350,7 @@ object DataciteToOAFTransformation { if (result == null) return List() + // DOI is mapped on a PID inside a Instance object val doi_q = OafMapperUtils.qualifier( "doi", "doi", @@ -338,6 +359,8 @@ object DataciteToOAFTransformation { ) val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo) result.setPid(List(pid).asJava) + + // This identifiere will be replaced in a second moment using the PID logic generation result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true)) result.setOriginalId(List(doi).asJava) @@ -386,6 +409,11 @@ object DataciteToOAFTransformation { a } + if (authors == null || authors.isEmpty || !authors.exists(a => a != null)) + return List() + result.setAuthor(authors.asJava) + + val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List()) result.setTitle( @@ -409,9 +437,7 @@ object DataciteToOAFTransformation { .asJava ) - if (authors == null || authors.isEmpty || !authors.exists(a => a != null)) - return List() - result.setAuthor(authors.asJava) + val dates = (json \\ "dates").extract[List[DateType]] val publication_year = (json \\ "publicationYear").extractOrElse[String](null) @@ -619,16 +645,16 @@ object DataciteToOAFTransformation { } private def generateRelations( - rels: List[RelatedIdentifierType], - id: String, - date: String - ): List[Relation] = { + rels: List[RelatedIdentifierType], + id: String, + date: String + ): List[Relation] = { rels .filter(r => subRelTypeMapping .contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") || - r.relatedIdentifierType.equalsIgnoreCase("pmid") || - r.relatedIdentifierType.equalsIgnoreCase("arxiv")) + r.relatedIdentifierType.equalsIgnoreCase("pmid") || + r.relatedIdentifierType.equalsIgnoreCase("arxiv")) ) .map(r => { val rel = new Relation @@ -660,4 +686,4 @@ object DataciteToOAFTransformation { s"10|$b::${DHPUtils.md5(a)}" } -} +} \ No newline at end of file