forked from antonis.lempesis/dnet-hadoop
reintroduced the hostedby patching to the datacite records
This commit is contained in:
parent
42e8f76778
commit
b37bc277c4
File diff suppressed because it is too large
Load Diff
|
@ -19,11 +19,33 @@ import java.time.chrono.ThaiBuddhistDate
|
||||||
import java.time.format.DateTimeFormatter
|
import java.time.format.DateTimeFormatter
|
||||||
import java.util.{Date, Locale}
|
import java.util.{Date, Locale}
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
|
import scala.io.{Codec, Source}
|
||||||
|
|
||||||
object DataciteToOAFTransformation {
|
object DataciteToOAFTransformation {
|
||||||
|
|
||||||
|
case class HostedByMapType(
|
||||||
|
openaire_id: String,
|
||||||
|
datacite_name: String,
|
||||||
|
official_name: String,
|
||||||
|
similarity: Option[Float]
|
||||||
|
) {}
|
||||||
|
|
||||||
val mapper = new ObjectMapper()
|
val mapper = new ObjectMapper()
|
||||||
|
|
||||||
|
val unknown_repository: HostedByMapType = HostedByMapType(
|
||||||
|
ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID,
|
||||||
|
ModelConstants.UNKNOWN_REPOSITORY.getValue,
|
||||||
|
ModelConstants.UNKNOWN_REPOSITORY.getValue,
|
||||||
|
Some(1.0f)
|
||||||
|
)
|
||||||
|
|
||||||
|
val hostedByMap: Map[String, HostedByMapType] = {
|
||||||
|
val s = Source.fromInputStream(getClass.getResourceAsStream("hostedBy_map.json")).mkString
|
||||||
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
lazy val json: org.json4s.JValue = parse(s)
|
||||||
|
json.extract[Map[String, HostedByMapType]]
|
||||||
|
}
|
||||||
|
|
||||||
/** This method should skip record if json contains invalid text
|
/** This method should skip record if json contains invalid text
|
||||||
* defined in gile datacite_filter
|
* defined in gile datacite_filter
|
||||||
*
|
*
|
||||||
|
@ -534,12 +556,9 @@ object DataciteToOAFTransformation {
|
||||||
|
|
||||||
if (client.isDefined) {
|
if (client.isDefined) {
|
||||||
|
|
||||||
instance.setHostedby(
|
val hb = hostedByMap.getOrElse(client.get.toUpperCase(), unknown_repository)
|
||||||
OafMapperUtils.keyValue(
|
instance.setHostedby(OafMapperUtils.keyValue(generateDSId(hb.openaire_id), hb.official_name))
|
||||||
generateDSId(ModelConstants.UNKNOWN_REPOSITORY_ORIGINALID),
|
|
||||||
ModelConstants.UNKNOWN_REPOSITORY.getValue
|
|
||||||
)
|
|
||||||
)
|
|
||||||
instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
|
instance.setCollectedfrom(DATACITE_COLLECTED_FROM)
|
||||||
instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
|
instance.setUrl(List(s"https://dx.doi.org/$doi").asJava)
|
||||||
instance.setAccessright(access_rights_qualifier)
|
instance.setAccessright(access_rights_qualifier)
|
||||||
|
|
Loading…
Reference in New Issue