forked from D-Net/dnet-hadoop
added normalization step for the doi
This commit is contained in:
parent
801763a0fa
commit
cf758f4f91
|
@ -16,9 +16,10 @@ import scala.collection.JavaConverters._
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
import scala.util.matching.Regex
|
import scala.util.matching.Regex
|
||||||
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils
|
import eu.dnetlib.dhp.schema.scholexplorer.OafUtils
|
||||||
|
|
||||||
import java.util
|
import java.util
|
||||||
|
|
||||||
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||||
|
|
||||||
case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
|
case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
|
||||||
|
|
||||||
case class mappingAffiliation(name: String) {}
|
case class mappingAffiliation(name: String) {}
|
||||||
|
@ -89,7 +90,7 @@ case object Crossref2Oaf {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
|
|
||||||
//MAPPING Crossref DOI into PID
|
//MAPPING Crossref DOI into PID
|
||||||
val doi: String = (json \ "DOI").extract[String]
|
val doi: String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||||
result.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
result.setPid(List(createSP(doi, "doi", ModelConstants.DNET_PID_TYPES)).asJava)
|
||||||
|
|
||||||
//MAPPING Crossref DOI into OriginalId
|
//MAPPING Crossref DOI into OriginalId
|
||||||
|
@ -101,6 +102,7 @@ case object Crossref2Oaf {
|
||||||
val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava)
|
val originalIds = new util.ArrayList(tmp.filter(id => id != null).asJava)
|
||||||
result.setOriginalId(originalIds)
|
result.setOriginalId(originalIds)
|
||||||
|
|
||||||
|
|
||||||
// Add DataInfo
|
// Add DataInfo
|
||||||
result.setDataInfo(generateDataInfo())
|
result.setDataInfo(generateDataInfo())
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package eu.dnetlib.doiboost.crossref
|
package eu.dnetlib.doiboost.crossref
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||||
import org.apache.commons.io.IOUtils
|
import org.apache.commons.io.IOUtils
|
||||||
import org.apache.hadoop.io.{IntWritable, Text}
|
import org.apache.hadoop.io.{IntWritable, Text}
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
|
@ -21,7 +22,7 @@ object CrossrefDataset {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json: json4s.JValue = parse(input)
|
lazy val json: json4s.JValue = parse(input)
|
||||||
val ts:Long = (json \ "indexed" \ "timestamp").extract[Long]
|
val ts:Long = (json \ "indexed" \ "timestamp").extract[Long]
|
||||||
val doi:String = (json \ "DOI").extract[String]
|
val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||||
CrossrefDT(doi, input, ts)
|
CrossrefDT(doi, input, ts)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package eu.dnetlib.doiboost.crossref
|
package eu.dnetlib.doiboost.crossref
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||||
import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item
|
import eu.dnetlib.doiboost.crossref.CrossrefDataset.to_item
|
||||||
import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass
|
import eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries.getClass
|
||||||
import org.apache.hadoop.io.{IntWritable, Text}
|
import org.apache.hadoop.io.{IntWritable, Text}
|
||||||
|
@ -27,7 +28,7 @@ object GenerateCrossrefDataset {
|
||||||
def crossrefElement(meta: String): CrossrefDT = {
|
def crossrefElement(meta: String): CrossrefDT = {
|
||||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||||
lazy val json: json4s.JValue = parse(meta)
|
lazy val json: json4s.JValue = parse(meta)
|
||||||
val doi:String = (json \ "DOI").extract[String]
|
val doi:String = DoiBoostMappingUtil.normalizeDoi((json \ "DOI").extract[String])
|
||||||
val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long]
|
val timestamp: Long = (json \ "indexed" \ "timestamp").extract[Long]
|
||||||
CrossrefDT(doi, meta, timestamp)
|
CrossrefDT(doi, meta, timestamp)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue