dnet-hadoop/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/bioschema/BioschemaModelConstants.scala

283 lines
8.6 KiB
Scala

package eu.dnetlib.dhp.bioschema
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.{DataInfo, KeyValue, Qualifier}
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
import java.time.format.DateTimeFormatter
import java.util.Locale
import java.util.regex.Pattern
/*
The following class are utility class used for the mapping from
bioschema json datacite to OAF Schema
*/
case class RelatedIdentifierType(
relationType: String,
relatedIdentifier: String,
relatedIdentifierType: String
) {}
case class IdentifierType(
identifier: String,
identifierType: String
) {}
case class NameIdentifiersType(
nameIdentifierScheme: Option[String],
schemeUri: Option[String],
nameIdentifier: Option[String]
) {}
case class CreatorType(
nameType: Option[String],
nameIdentifiers: Option[List[NameIdentifiersType]],
name: Option[String],
familyName: Option[String],
givenName: Option[String],
affiliation: Option[List[String]]
) {}
case class TitleType(title: Option[String], titleType: Option[String], lang: Option[String]) {}
case class SubjectType(
schemeURI: Option[String],
value: Option[String],
subjectScheme: Option[String]
) {}
case class DescriptionType(descriptionType: Option[String], description: Option[String]) {}
case class DateType(date: Option[String], dateType: Option[String]) {}
case class OAFRelations(relation: String, inverse: String, relType: String)
class BioschemaModelConstants extends Serializable {}
object BioschemaModelConstants {
val PROTEIN_RESOURCETYPE: Qualifier =
qualifier("protein", "protein", ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)
val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(
false,
null,
false,
false,
ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER,
"0.9"
)
val PED_PREFIX: String = "ped_________"
val resolvedURL: Map[String, String] = Map(
"uniprot" -> "https://www.uniprot.org/uniprot/",
"pubmed" -> "https://pubmed.ncbi.nlm.nih.gov/"
)
val collectedFromMap: Map[String, KeyValue] = {
val PEDCollectedFrom: KeyValue = OafMapperUtils.keyValue(
//TODO create pedDatasourceId and update this value
"10|ped_________::pedDatasourceId",
"Protein Ensemble Database"
)
PEDCollectedFrom.setDataInfo(DATA_INFO)
Map(
"ped" -> PEDCollectedFrom
)
}
val datasourceKeyPrefix: Map[String, String] = Map(
"ped" -> PED_PREFIX
)
val REL_TYPE_VALUE: String = "resultResult"
val DATE_RELATION_KEY = "RelationDate"
val dataInfo: DataInfo = bioschemaDataInfo("0.9")
val subRelTypeMapping: Map[String, OAFRelations] = Map(
ModelConstants.REFERENCES -> OAFRelations(
ModelConstants.REFERENCES,
ModelConstants.IS_REFERENCED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_REFERENCED_BY -> OAFRelations(
ModelConstants.IS_REFERENCED_BY,
ModelConstants.REFERENCES,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations(
ModelConstants.IS_SUPPLEMENTED_BY,
ModelConstants.IS_SUPPLEMENT_TO,
ModelConstants.SUPPLEMENT
),
ModelConstants.IS_SUPPLEMENT_TO -> OAFRelations(
ModelConstants.IS_SUPPLEMENT_TO,
ModelConstants.IS_SUPPLEMENTED_BY,
ModelConstants.SUPPLEMENT
),
ModelConstants.HAS_PART -> OAFRelations(
ModelConstants.HAS_PART,
ModelConstants.IS_PART_OF,
ModelConstants.PART
),
ModelConstants.IS_PART_OF -> OAFRelations(
ModelConstants.IS_PART_OF,
ModelConstants.HAS_PART,
ModelConstants.PART
),
ModelConstants.IS_VERSION_OF -> OAFRelations(
ModelConstants.IS_VERSION_OF,
ModelConstants.HAS_VERSION,
ModelConstants.VERSION
),
ModelConstants.HAS_VERSION -> OAFRelations(
ModelConstants.HAS_VERSION,
ModelConstants.IS_VERSION_OF,
ModelConstants.VERSION
),
ModelConstants.IS_IDENTICAL_TO -> OAFRelations(
ModelConstants.IS_IDENTICAL_TO,
ModelConstants.IS_IDENTICAL_TO,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_CONTINUED_BY -> OAFRelations(
ModelConstants.IS_CONTINUED_BY,
ModelConstants.CONTINUES,
ModelConstants.RELATIONSHIP
),
ModelConstants.CONTINUES -> OAFRelations(
ModelConstants.CONTINUES,
ModelConstants.IS_CONTINUED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_NEW_VERSION_OF -> OAFRelations(
ModelConstants.IS_NEW_VERSION_OF,
ModelConstants.IS_PREVIOUS_VERSION_OF,
ModelConstants.VERSION
),
ModelConstants.IS_PREVIOUS_VERSION_OF -> OAFRelations(
ModelConstants.IS_PREVIOUS_VERSION_OF,
ModelConstants.IS_NEW_VERSION_OF,
ModelConstants.VERSION
),
ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(
ModelConstants.IS_DOCUMENTED_BY,
ModelConstants.DOCUMENTS,
ModelConstants.RELATIONSHIP
),
ModelConstants.DOCUMENTS -> OAFRelations(
ModelConstants.DOCUMENTS,
ModelConstants.IS_DOCUMENTED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_SOURCE_OF -> OAFRelations(
ModelConstants.IS_SOURCE_OF,
ModelConstants.IS_DERIVED_FROM,
ModelConstants.VERSION
),
ModelConstants.IS_DERIVED_FROM -> OAFRelations(
ModelConstants.IS_DERIVED_FROM,
ModelConstants.IS_SOURCE_OF,
ModelConstants.VERSION
),
ModelConstants.CITES -> OAFRelations(
ModelConstants.CITES,
ModelConstants.IS_CITED_BY,
ModelConstants.CITATION
),
ModelConstants.IS_CITED_BY -> OAFRelations(
ModelConstants.IS_CITED_BY,
ModelConstants.CITES,
ModelConstants.CITATION
),
ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations(
ModelConstants.IS_VARIANT_FORM_OF,
ModelConstants.IS_DERIVED_FROM,
ModelConstants.VERSION
),
ModelConstants.IS_OBSOLETED_BY -> OAFRelations(
ModelConstants.IS_OBSOLETED_BY,
ModelConstants.IS_NEW_VERSION_OF,
ModelConstants.VERSION
),
ModelConstants.REVIEWS -> OAFRelations(
ModelConstants.REVIEWS,
ModelConstants.IS_REVIEWED_BY,
ModelConstants.REVIEW
),
ModelConstants.IS_REVIEWED_BY -> OAFRelations(
ModelConstants.IS_REVIEWED_BY,
ModelConstants.REVIEWS,
ModelConstants.REVIEW
),
ModelConstants.DOCUMENTS -> OAFRelations(
ModelConstants.DOCUMENTS,
ModelConstants.IS_DOCUMENTED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(
ModelConstants.IS_DOCUMENTED_BY,
ModelConstants.DOCUMENTS,
ModelConstants.RELATIONSHIP
),
ModelConstants.COMPILES -> OAFRelations(
ModelConstants.COMPILES,
ModelConstants.IS_COMPILED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_COMPILED_BY -> OAFRelations(
ModelConstants.IS_COMPILED_BY,
ModelConstants.COMPILES,
ModelConstants.RELATIONSHIP
)
)
def bioschemaDataInfo(trust: String): DataInfo = OafMapperUtils.dataInfo(
false,
null,
false,
false,
ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER,
trust
)
val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern(
"[MM-dd-yyyy][MM/dd/yyyy][dd-MM-yy][dd-MMM-yyyy][dd/MMM/yyyy][dd-MMM-yy][dd/MMM/yy][dd-MM-yy][dd/MM/yy][dd-MM-yyyy][dd/MM/yyyy][yyyy-MM-dd][yyyy/MM/dd]",
Locale.ENGLISH
)
val df_it: DateTimeFormatter =
DateTimeFormatter.ofPattern("[dd-MM-yyyy][dd/MM/yyyy]", Locale.ITALIAN)
val Date_regex: List[Pattern] = List(
//Y-M-D
Pattern.compile(
"(18|19|20)\\d\\d([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])",
Pattern.MULTILINE
),
//M-D-Y
Pattern.compile(
"((0[1-9]|1[012])|([1-9]))([- /.])(0[1-9]|[12][0-9]|3[01])([- /.])(18|19|20)?\\d\\d",
Pattern.MULTILINE
),
//D-M-Y
Pattern.compile(
"(?:(?:31(/|-|\\.)(?:0?[13578]|1[02]|(?:Jan|Mar|May|Jul|Aug|Oct|Dec)))\\1|(?:(?:29|30)(/|-|\\.)(?:0?[1,3-9]|1[0-2]|(?:Jan|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))\\2))(?:(?:1[6-9]|[2-9]\\d)?\\d{2})|(?:29(/|-|\\.)(?:0?2|(?:Feb))\\3(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))|(?:0?[1-9]|1\\d|2[0-8])(/|-|\\.)(?:(?:0?[1-9]|(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep))|(?:1[0-2]|(?:Oct|Nov|Dec)))\\4(?:(?:1[6-9]|[2-9]\\d)?\\d{2})",
Pattern.MULTILINE
),
//Y
Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE)
)
private def qualifier(classid: String, classname: String, schemeid: String, schemename: String) = {
val q = new Qualifier
q.setClassid(classid)
q.setClassname(classname)
q.setSchemeid(schemeid)
q.setSchemename(schemename)
q
}
}