forked from D-Net/dnet-hadoop
added normalization step for the doi
This commit is contained in:
parent
801763a0fa
commit
cf758f4f91
|
@ -16,9 +16,10 @@ import scala.collection.JavaConverters._
|
|||
import scala.collection.mutable
|
||||
import scala.util.matching.Regex
|
||||
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils
|
||||
|
||||
import java.util
|
||||
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
|
||||
case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
|
||||
|
||||
case class mappingAffiliation(name: String) {}
|
||||
|
@ -89,7 +90,7 @@ case object Crossref2Oaf {
|
|||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
|
||||
//MAPPING Crossref DOI into PID
|
||||
val doi: String = (json \ "DOI").extract[String]
|
||||
val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||
result.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
||||
|
||||
//MAPPING Crossref DOI into OriginalId
|
||||
|
@ -101,6 +102,7 @@ case object Crossref2Oaf {
|
|||
val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava)
|
||||
result.setOriginalId(originalIds)
|
||||
|
||||
|
||||
// Add DataInfo
|
||||
result.setDataInfo(generateDataInfo())
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package eu.dnetlib.doiboost.crossref
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import org.apache.commons.io.IOUtils
|
||||
import org.apache.hadoop.io.{IntWritable, Text}
|
||||
import org.apache.spark.SparkConf
|
||||
|
@ -21,7 +22,7 @@ object CrossrefDataset {
|
|||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(input)
|
||||
val ts:Long = (json \ "indexed" \ "timestamp").extract[Long]
|
||||
val doi:String = (json \ "DOI").extract[String]
|
||||
val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||
CrossrefDT(doi, input, ts)
|
||||
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package eu.dnetlib.doiboost.crossref
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||
import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item
|
||||
import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass
|
||||
import org.apache.hadoop.io.{IntWritable, Text}
|
||||
|
@ -27,7 +28,7 @@ object GenerateCrossrefDataset {
|
|||
def crossrefElement(meta: String): CrossrefDT = {
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: json4s.JValue = parse(meta)
|
||||
val doi:String = (json \ "DOI").extract[String]
|
||||
val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||
val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long]
|
||||
CrossrefDT(doi, meta, timestamp)
|
||||
|
||||
|
|
Loading…
Reference in New Issue