109 lines
3.4 KiB
Scala
109 lines
3.4 KiB
Scala
package eu.dnetlib.doiboost.mag
|
|
|
|
import scala.collection.JavaConverters._
|
|
import eu.dnetlib.dhp.schema.oaf.{KeyValue, Relation}
|
|
import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
|
|
import org.json4s
|
|
import org.json4s.DefaultFormats
|
|
import org.json4s.jackson.JsonMethods
|
|
|
|
object MagUtility {
|
|
|
|
val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
|
|
val DOI_PREFIX = "10."
|
|
|
|
case class MagPapers(
|
|
PaperId: Long,
|
|
Rank: Integer,
|
|
Doi: String,
|
|
DocType: String,
|
|
PaperTitle: String,
|
|
OriginalTitle: String,
|
|
BookTitle: String,
|
|
Year: Option[Integer],
|
|
Date: Option[java.sql.Timestamp],
|
|
Publisher: String,
|
|
JournalId: Option[Long],
|
|
ConferenceSeriesId: Option[Long],
|
|
ConferenceInstanceId: Option[Long],
|
|
Volume: String,
|
|
Issue: String,
|
|
FirstPage: String,
|
|
LastPage: String,
|
|
ReferenceCount: Option[Long],
|
|
CitationCount: Option[Long],
|
|
EstimatedCitation: Option[Long],
|
|
OriginalVenue: String,
|
|
FamilyId: Option[Long],
|
|
CreatedDate: java.sql.Timestamp
|
|
) {}
|
|
|
|
|
|
case class MagPaperCitation(
|
|
PaperId: Option[Long],
|
|
PaperReferenceId: Option[Long],
|
|
CitationContext: Option[String]
|
|
) {}
|
|
|
|
def extractST(input: String): List[String] = {
|
|
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
|
lazy val json: json4s.JValue = JsonMethods.parse(input)
|
|
val source = (json \ "source").extract[String]
|
|
val target = (json \ "source").extract[String]
|
|
List(source, target)
|
|
|
|
}
|
|
|
|
def createCiteRealtion(from:String, to:String):List[Relation] = {
|
|
|
|
val cf = new KeyValue
|
|
cf.setValue("Microsoft Academic Graph")
|
|
cf.setKey("10|openaire____::" + IdentifierFactory.md5("microsoft"))
|
|
|
|
|
|
val b = new Relation
|
|
val t = IdentifierFactory.idFromPid("50", "doi", from, true)
|
|
val s = IdentifierFactory.idFromPid("50", "doi", to, true)
|
|
b.setSource(s)
|
|
b.setTarget(t)
|
|
b.setRelType("resultResult")
|
|
b.setSubRelType("citation")
|
|
b.setRelClass("IsCitedBy")
|
|
b.setCollectedfrom(List(cf).asJava)
|
|
val a = new Relation
|
|
val source = IdentifierFactory.idFromPid("50", "doi", from, true)
|
|
val target = IdentifierFactory.idFromPid("50", "doi", to, true)
|
|
a.setSource(source)
|
|
a.setTarget(target)
|
|
a.setRelType("resultResult")
|
|
a.setSubRelType("citation")
|
|
a.setRelClass("Cites")
|
|
a.setCollectedfrom(List(cf).asJava)
|
|
List(a,b)
|
|
}
|
|
def isEmpty(x: String) = x == null || x.trim.isEmpty
|
|
|
|
def normalizeDoi(input: String): String = {
|
|
if (input == null)
|
|
return null
|
|
val replaced = input
|
|
.replaceAll("(?:\\n|\\r|\\t|\\s)", "")
|
|
.toLowerCase
|
|
.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
|
|
if (isEmpty(replaced))
|
|
return null
|
|
|
|
if (replaced.indexOf("10.") < 0)
|
|
return null
|
|
|
|
val ret = replaced.substring(replaced.indexOf("10."))
|
|
|
|
if (!ret.startsWith(DOI_PREFIX))
|
|
return null
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
}
|