forked from D-Net/dnet-hadoop
formatted code
This commit is contained in:
parent
9b449110c6
commit
72f0d88d6c
|
@ -24,11 +24,11 @@ import scala.io.Source
|
||||||
object DataciteToOAFTransformation {
|
object DataciteToOAFTransformation {
|
||||||
|
|
||||||
case class HostedByMapType(
|
case class HostedByMapType(
|
||||||
openaire_id: String,
|
openaire_id: String,
|
||||||
datacite_name: String,
|
datacite_name: String,
|
||||||
official_name: String,
|
official_name: String,
|
||||||
similarity: Option[Float]
|
similarity: Option[Float]
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
@ -47,12 +47,12 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This method should skip record if json contains invalid text
|
/** This method should skip record if json contains invalid text
|
||||||
* defined in file datacite_filter
|
* defined in file datacite_filter
|
||||||
*
|
*
|
||||||
* @param record : not parsed Datacite record
|
* @param record : not parsed Datacite record
|
||||||
* @param json : parsed record
|
* @param json : parsed record
|
||||||
* @return True if the record should be skipped
|
* @return True if the record should be skipped
|
||||||
*/
|
*/
|
||||||
def skip_record(record: String, json: org.json4s.JValue): Boolean = {
|
def skip_record(record: String, json: org.json4s.JValue): Boolean = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher")
|
datacite_filter.exists(f => record.contains(f)) || (json \\ "publisher")
|
||||||
|
@ -98,11 +98,10 @@ object DataciteToOAFTransformation {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** This utility method indicates whether the embargo date has been reached
|
||||||
* This utility method indicates whether the embargo date has been reached
|
* @param embargo_end_date
|
||||||
* @param embargo_end_date
|
* @return True if the embargo date has been reached, false otherwise
|
||||||
* @return True if the embargo date has been reached, false otherwise
|
*/
|
||||||
*/
|
|
||||||
def embargo_end(embargo_end_date: String): Boolean = {
|
def embargo_end(embargo_end_date: String): Boolean = {
|
||||||
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
|
val dt = LocalDate.parse(embargo_end_date, DateTimeFormatter.ofPattern("[yyyy-MM-dd]"))
|
||||||
val td = LocalDate.now()
|
val td = LocalDate.now()
|
||||||
|
@ -147,27 +146,27 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/***
|
/** *
|
||||||
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
|
||||||
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
|
||||||
* to generate one of the following main entities:
|
* to generate one of the following main entities:
|
||||||
* - publication
|
* - publication
|
||||||
* - dataset
|
* - dataset
|
||||||
* - software
|
* - software
|
||||||
* - otherresearchproduct
|
* - otherresearchproduct
|
||||||
|
*
|
||||||
* @param resourceType
|
* @param resourceType
|
||||||
* @param resourceTypeGeneral
|
* @param resourceTypeGeneral
|
||||||
* @param schemaOrg
|
* @param schemaOrg
|
||||||
* @param vocabularies
|
* @param vocabularies
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
def getTypeQualifier(
|
def getTypeQualifier(
|
||||||
resourceType: String,
|
resourceType: String,
|
||||||
resourceTypeGeneral: String,
|
resourceTypeGeneral: String,
|
||||||
schemaOrg: String,
|
schemaOrg: String,
|
||||||
vocabularies: VocabularyGroup
|
vocabularies: VocabularyGroup
|
||||||
): (Qualifier, Qualifier) = {
|
): (Qualifier, Qualifier) = {
|
||||||
if (resourceType != null && resourceType.nonEmpty) {
|
if (resourceType != null && resourceType.nonEmpty) {
|
||||||
val typeQualifier =
|
val typeQualifier =
|
||||||
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, resourceType)
|
||||||
|
@ -212,11 +211,11 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
def getResult(
|
def getResult(
|
||||||
resourceType: String,
|
resourceType: String,
|
||||||
resourceTypeGeneral: String,
|
resourceTypeGeneral: String,
|
||||||
schemaOrg: String,
|
schemaOrg: String,
|
||||||
vocabularies: VocabularyGroup
|
vocabularies: VocabularyGroup
|
||||||
): Result = {
|
): Result = {
|
||||||
val typeQualifiers: (Qualifier, Qualifier) =
|
val typeQualifiers: (Qualifier, Qualifier) =
|
||||||
getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
|
getTypeQualifier(resourceType, resourceTypeGeneral, schemaOrg, vocabularies)
|
||||||
if (typeQualifiers == null)
|
if (typeQualifiers == null)
|
||||||
|
@ -258,11 +257,11 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** As describe in ticket #6377
|
/** As describe in ticket #6377
|
||||||
* when the result come from figshare we need to remove subject
|
* when the result come from figshare we need to remove subject
|
||||||
* and set Access rights OPEN.
|
* and set Access rights OPEN.
|
||||||
*
|
*
|
||||||
* @param r
|
* @param r
|
||||||
*/
|
*/
|
||||||
def fix_figshare(r: Result): Unit = {
|
def fix_figshare(r: Result): Unit = {
|
||||||
|
|
||||||
if (r.getInstance() != null) {
|
if (r.getInstance() != null) {
|
||||||
|
@ -289,12 +288,12 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateRelation(
|
def generateRelation(
|
||||||
sourceId: String,
|
sourceId: String,
|
||||||
targetId: String,
|
targetId: String,
|
||||||
relClass: String,
|
relClass: String,
|
||||||
cf: KeyValue,
|
cf: KeyValue,
|
||||||
di: DataInfo
|
di: DataInfo
|
||||||
): Relation = {
|
): Relation = {
|
||||||
|
|
||||||
val r = new Relation
|
val r = new Relation
|
||||||
r.setSource(sourceId)
|
r.setSource(sourceId)
|
||||||
|
@ -323,12 +322,12 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
def generateOAF(
|
def generateOAF(
|
||||||
input: String,
|
input: String,
|
||||||
ts: Long,
|
ts: Long,
|
||||||
dateOfCollection: Long,
|
dateOfCollection: Long,
|
||||||
vocabularies: VocabularyGroup,
|
vocabularies: VocabularyGroup,
|
||||||
exportLinks: Boolean
|
exportLinks: Boolean
|
||||||
): List[Oaf] = {
|
): List[Oaf] = {
|
||||||
|
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json = parse(input)
|
lazy val json = parse(input)
|
||||||
|
@ -413,7 +412,6 @@ object DataciteToOAFTransformation {
|
||||||
return List()
|
return List()
|
||||||
result.setAuthor(authors.asJava)
|
result.setAuthor(authors.asJava)
|
||||||
|
|
||||||
|
|
||||||
val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
|
val titles: List[TitleType] = (json \\ "titles").extractOrElse[List[TitleType]](List())
|
||||||
|
|
||||||
result.setTitle(
|
result.setTitle(
|
||||||
|
@ -437,8 +435,6 @@ object DataciteToOAFTransformation {
|
||||||
.asJava
|
.asJava
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val dates = (json \\ "dates").extract[List[DateType]]
|
val dates = (json \\ "dates").extract[List[DateType]]
|
||||||
val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
|
val publication_year = (json \\ "publicationYear").extractOrElse[String](null)
|
||||||
|
|
||||||
|
@ -645,16 +641,16 @@ object DataciteToOAFTransformation {
|
||||||
}
|
}
|
||||||
|
|
||||||
private def generateRelations(
|
private def generateRelations(
|
||||||
rels: List[RelatedIdentifierType],
|
rels: List[RelatedIdentifierType],
|
||||||
id: String,
|
id: String,
|
||||||
date: String
|
date: String
|
||||||
): List[Relation] = {
|
): List[Relation] = {
|
||||||
rels
|
rels
|
||||||
.filter(r =>
|
.filter(r =>
|
||||||
subRelTypeMapping
|
subRelTypeMapping
|
||||||
.contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") ||
|
.contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") ||
|
||||||
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
|
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
|
||||||
r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
|
r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
|
||||||
)
|
)
|
||||||
.map(r => {
|
.map(r => {
|
||||||
val rel = new Relation
|
val rel = new Relation
|
||||||
|
@ -686,4 +682,4 @@ object DataciteToOAFTransformation {
|
||||||
s"10|$b::${DHPUtils.md5(a)}"
|
s"10|$b::${DHPUtils.md5(a)}"
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue