DHP-Explorer/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefUtils.scala

121 lines
4.1 KiB
Scala

package eu.dnetlib.doiboost.crossref
import org.json4s
import org.json4s.JsonAST.JField
import org.json4s.{DefaultFormats, JObject, JString}
import org.json4s.jackson.JsonMethods
case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
object CrossrefUtils {
def extractInfo(input:String):(String,String, String,String,String) = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = JsonMethods.parse(input)
val relType = (json \"relationship"\"name").extract[String]
val sourceType = (json \"source"\"objectType").extract[String]
val sourcesubType = (json \"source"\"objectSubType").extract[String]
val targetType = (json \ "target" \ "objectType").extract[String]
val targetsubType = (json \ "target" \ "objectSubType").extract[String]
(sourceType, sourcesubType, relType, targetType, targetsubType)
}
def extractST(input: String): (String, String,String, Boolean) = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = JsonMethods.parse(input)
val source = (json \ "source").extractOrElse[String](null)
val target = (json \ "target").extractOrElse[String](null)
val relClass = (json \ "relClass").extractOrElse[String](null)
val dbi = (json \ "dataInfo" \"deletedbyinference").extractOrElse[Boolean](false)
(source, target,relClass, dbi)
}
def extractSourceTargetId(input: String): (String, String) = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = JsonMethods.parse(input)
val source = (json \ "source" \ "dnetIdentifier").extractOrElse[String](null)
val target = (json \ "target" \"dnetIdentifier").extractOrElse[String](null)
(source, target)
}
def extractStats(input: String): (String, String, String) = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = JsonMethods.parse(input)
val source = (json \ "source"\"objectType").extractOrElse[String](null)
val target = (json \ "target"\"objectType").extractOrElse[String](null)
val relClass = (json \ "relationship"\"name").extractOrElse[String](null)
(source, target, relClass)
}
def extractIdType(input: String): (String, String) = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = JsonMethods.parse(input)
val source = (json \ "id" ).extractOrElse[String](null)
val target = (json \ "typology" ).extractOrElse[String](null)
(source, target)
}
// def extractId(input: String): String = {
// implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
// lazy val json: json4s.JValue = JsonMethods.parse(input)
// val source = (json \ "id").extractOrElse[String](null)
//
//
// source
//
// }
def extractTypeSubtype(input:String):(String,String) = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = JsonMethods.parse(input)
val objectType = (json \ "type").extractOrElse[String](null)
val objectSubType = (json \ "subtype").extractOrElse[String](null)
(objectType, objectSubType)
}
def extractCF(input: String): List[(String, String)] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = JsonMethods.parse(input)
val id:String = (json \ "id").extract[String]
val l:List[(String, String)] =for {
JObject(cf) <- json\"collectedfrom"
JField("value", JString(cf_name)) <- cf
} yield (cf_name, id)
l
}
def extractId(input:String):String = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = JsonMethods.parse(input)
(json\ "id").extract[String]
}
}
//scholix.joinWith(id, scholix("_2").equalTo(id("_1")), "leftouter").map(s => {
// if (s._2 != null)
// ( s._1.getString(1) , s._2.getString(1) )
// else
// ("publication", s._1.getString(1))
//} ).where("_1 = 'UKN'").write.mode("Overwrite").save("scholix_prod_join")