DHP-Explorer/src/main/java/eu/dnetlib/doiboost/mag/MagUtility.scala

109 lines
3.4 KiB
Scala

package eu.dnetlib.doiboost.mag
import scala.collection.JavaConverters._
import eu.dnetlib.dhp.schema.oaf.{KeyValue, Relation}
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.jackson.JsonMethods
object MagUtility {
val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
val DOI_PREFIX = "10."
case class MagPapers(
PaperId: Long,
Rank: Integer,
Doi: String,
DocType: String,
PaperTitle: String,
OriginalTitle: String,
BookTitle: String,
Year: Option[Integer],
Date: Option[java.sql.Timestamp],
Publisher: String,
JournalId: Option[Long],
ConferenceSeriesId: Option[Long],
ConferenceInstanceId: Option[Long],
Volume: String,
Issue: String,
FirstPage: String,
LastPage: String,
ReferenceCount: Option[Long],
CitationCount: Option[Long],
EstimatedCitation: Option[Long],
OriginalVenue: String,
FamilyId: Option[Long],
CreatedDate: java.sql.Timestamp
) {}
case class MagPaperCitation(
PaperId: Option[Long],
PaperReferenceId: Option[Long],
CitationContext: Option[String]
) {}
def extractST(input: String): List[String] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = JsonMethods.parse(input)
val source = (json \ "source").extract[String]
val target = (json \ "source").extract[String]
List(source, target)
}
def createCiteRealtion(from:String, to:String):List[Relation] = {
val cf = new KeyValue
cf.setValue("Microsoft Academic Graph")
cf.setKey("10|openaire____::" + IdentifierFactory.md5("microsoft"))
val b = new Relation
val t = IdentifierFactory.idFromPid("50", "doi", from, true)
val s = IdentifierFactory.idFromPid("50", "doi", to, true)
b.setSource(s)
b.setTarget(t)
b.setRelType("resultResult")
b.setSubRelType("citation")
b.setRelClass("IsCitedBy")
b.setCollectedfrom(List(cf).asJava)
val a = new Relation
val source = IdentifierFactory.idFromPid("50", "doi", from, true)
val target = IdentifierFactory.idFromPid("50", "doi", to, true)
a.setSource(source)
a.setTarget(target)
a.setRelType("resultResult")
a.setSubRelType("citation")
a.setRelClass("Cites")
a.setCollectedfrom(List(cf).asJava)
List(a,b)
}
def isEmpty(x: String) = x == null || x.trim.isEmpty
def normalizeDoi(input: String): String = {
if (input == null)
return null
val replaced = input
.replaceAll("(?:\\n|\\r|\\t|\\s)", "")
.toLowerCase
.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
if (isEmpty(replaced))
return null
if (replaced.indexOf("10.") < 0)
return null
val ret = replaced.substring(replaced.indexOf("10."))
if (!ret.startsWith(DOI_PREFIX))
return null
return ret
}
}