package eu.dnetlib.dhp.bioschema import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.bioschema.BioschemaModelConstants._ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.schema.oaf.{Dataset => OafDataset, _} import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.lang3.StringUtils import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse import java.time.LocalDate import scala.collection.JavaConverters._ object BioschemaToOAFTransformation { val mapper = new ObjectMapper() def extract_date(input: String): Option[String] = { val d = Date_regex .map(pattern => { val matcher = pattern.matcher(input) if (matcher.find()) matcher.group(0) else null }) .find(s => s != null) if (d.isDefined) { val a_date = if (d.get.length == 4) s"01-01-${d.get}" else d.get try { return Some(LocalDate.parse(a_date, df_en).toString) } catch { case _: Throwable => try { return Some(LocalDate.parse(a_date, df_it).toString) } catch { case _: Throwable => return None } } } d } def getResult(resourceClassName: String): Result = { val i = new Instance resourceClassName.toUpperCase() match { case "PROTEIN" => i.setInstancetype( PROTEIN_RESOURCETYPE ) val d = new OafDataset d.setInstance(List(i).asJava) d.setResourcetype( PROTEIN_RESOURCETYPE ) return d } null } def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = { OafMapperUtils.structuredProperty(dt, q, null) } def generateOAF( input: String, exportLinks: Boolean, datasourceKey: String, resourceClassName: String ): List[Oaf] = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json = parse(input) val result = getResult(resourceClassName) if (result == null) return List() val pid = (json \ "id").extract[String] result.setPid( List( OafMapperUtils.structuredProperty( pid, datasourceKey, datasourceKey, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, DATA_INFO ) ).asJava ) result.setId(OafMapperUtils.createOpenaireId(50, s"${datasourceKeyPrefix(datasourceKey)}::$pid", true)) result.setOriginalId(List(pid).asJava) result.setDataInfo(dataInfo) val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List()) if (titles.isEmpty) { return List() } result.setTitle( titles .filter(t => t.title.nonEmpty) .map(t => { OafMapperUtils .structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo) }) .asJava ) val dates = (json \\ "dates").extract[List[DateType]] val collected_date = dates .filter(d => d.date.isDefined && d.dateType.isDefined) .find(d => d.dateType.get.equalsIgnoreCase("collected")) .map(d => extract_date(d.date.get)) .find(d => d != null && d.isDefined) .map(d => d.get) if (collected_date.isDefined) { result.setDateofcollection(collected_date.get) } result.setRelevantdate( dates .filter(d => d.date.isDefined && d.dateType.isDefined) .map(d => (extract_date(d.date.get), d.dateType.get)) .filter(d => d._1.isDefined) .map(d => ( d._1.get, OafMapperUtils.qualifier( d._2.toLowerCase(), d._2.toLowerCase(), ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE ) ) ) .filter(d => d._2 != null) .map(d => generateOAFDate(d._1, d._2)) .asJava ) result.setCollectedfrom(List(collectedFromMap(datasourceKey)).asJava) val descriptions = (json \\ "descriptions").extract[List[DescriptionType]] result.setDescription( descriptions .filter(d => d.description.isDefined) .map(d => OafMapperUtils.field(d.description.get, null)) .filter(s => s != null) .asJava ) val subjects = (json \\ "subjects").extract[List[SubjectType]] result.setSubject( subjects .filter(s => s.value.nonEmpty && s.subjectScheme.nonEmpty && s.schemeURI.nonEmpty) .map(s => OafMapperUtils.structuredProperty( s.value.get, s.subjectScheme.get, s.schemeURI.get, ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES, null ) ) .asJava ) val instance = result.getInstance().get(0) val ids: List[IdentifierType] = for { JObject(identifiers) <- json \\ "identifiers" JField("identifier", JString(identifier)) <- identifiers JField("identifierType", JString(identifierType)) <- identifiers } yield IdentifierType(identifier, identifierType) instance.setUrl( ids .map(id => { id.identifier }) .asJava ) if (instance.getUrl.isEmpty) { return List() } instance.setCollectedfrom(collectedFromMap(datasourceKey)) instance.setPid(result.getPid) result.setId(IdentifierFactory.createIdentifier(result)) var relations: List[Relation] = List() if (result.getId == null) return List() val alternativeIdentifierUrls: List[AlternateIdentifierType] = for { JObject(alternateIdentifiers) <- json \\ "alternateIdentifiers" JField("alternateIdentifier", JString(alternateIdentifier)) <- alternateIdentifiers } yield AlternateIdentifierType(alternateIdentifier) val alternativeIdentifierUrl: AlternateIdentifierType = alternativeIdentifierUrls.asJava.get(0) val alternativeIdentifiers = resolvedURLPattern .map(pattern => { if (alternativeIdentifierUrl.alternateIdentifier.startsWith(s"${pattern._1}")) { val relatedId = StringUtils.substringAfter(alternativeIdentifierUrl.alternateIdentifier, s"${pattern._1}") OafMapperUtils.structuredProperty( relatedId, pattern._2, pattern._2, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo ) } else null }) .find(s => s != null) .get val defaultAlternatedIdentifer: StructuredProperty = OafMapperUtils.structuredProperty( pid, datasourceKey, datasourceKey, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo ) var finalAlternativeIdentifiers: List[StructuredProperty] = List() finalAlternativeIdentifiers = List(alternativeIdentifiers) ::: List(defaultAlternatedIdentifer) instance.setAlternateIdentifier(finalAlternativeIdentifiers.asJava) if (exportLinks) { val rels: List[RelatedIdentifierType] = for { JObject(relIdentifier) <- json \\ "relatedIdentifiers" JField("relationType", JString(relationType)) <- relIdentifier JField("relatedIdentifierType", JString(relatedIdentifierType)) <- relIdentifier JField("relatedIdentifier", JString(relatedIdentifier)) <- relIdentifier } yield RelatedIdentifierType(relationType, relatedIdentifier, relatedIdentifierType) relations = relations ::: generateRelations( rels, result.getId, null, pid, datasourceKey ) } if (relations != null && relations.nonEmpty) { List(result) ::: relations } else List(result) } private def generateRelations( rels: List[RelatedIdentifierType], id: String, date: String, pid: String, datasourceKey: String ): List[Relation] = { rels .map(r => { val rel = new Relation rel.setCollectedfrom(List(collectedFromMap(datasourceKey)).asJava) rel.setDataInfo(dataInfo) val subRelType = subRelTypeMapping(r.relationType).relType rel.setRelType(REL_TYPE_VALUE) rel.setSubRelType(subRelType) rel.setRelClass(r.relationType) val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) rel.setProperties(List(dateProps).asJava) resolvedURLPattern .map(p => { if (r.relatedIdentifier.startsWith(s"${p._1}")) { val relatedId = StringUtils.substringAfter(r.relatedIdentifier, s"${p._1}") rel.setTarget( DHPUtils.generateUnresolvedIdentifier(relatedId, p._2) ) } else null }) .find(s => s != null) rel.setSource(id) rel.setCollectedfrom(List(collectedFromMap(datasourceKey)).asJava) rel.getCollectedfrom.asScala.map(c => c.getValue).toList rel }) } }