From a104d2b6ad1b38f6fe47ff1cab6f05f62ec57a48 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 26 Nov 2020 11:12:00 +0100 Subject: [PATCH] cleanup --- .../eu/dnetlib/dhp/doiboost/QueryTest.scala | 74 ------------------- 1 file changed, 74 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/QueryTest.scala diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/QueryTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/QueryTest.scala deleted file mode 100644 index 698c8cc79..000000000 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/QueryTest.scala +++ /dev/null @@ -1,74 +0,0 @@ -package eu.dnetlib.dhp.doiboost - -import eu.dnetlib.dhp.schema.oaf.{Publication, Relation} -import org.apache.spark.SparkContext -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} -import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} -import org.json4s -import org.json4s.DefaultFormats -import org.json4s.jackson.JsonMethods._ - -import scala.collection.JavaConverters._ -class QueryTest { - - def extract_payload(input:String) :String = { - - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json: json4s.JValue = parse(input) - - - compact(render((json \ "payload"))) - - - - } - - - def has_ands(r:Relation) :Boolean = { - - r.getCollectedfrom!= null && r.getCollectedfrom.asScala.count(k => k.getValue.contains("Australian")) > 0 - - } - - def hasInstanceWithUrl(p:Publication):Boolean = { - val c = p.getInstance.asScala.map(i => i.getUrl!= null && !i.getUrl.isEmpty).size - !(!p.getInstance.isEmpty && c == p.getInstance().size) - } - - - def hasNullAccessRights(p:Publication):Boolean = { - val c = p.getInstance.asScala.map(i => i.getAccessright!= null && i.getAccessright.getClassname.nonEmpty).size - !p.getInstance.isEmpty && c == p.getInstance().size() - } - - - - def extractId(input:String):String = { - implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats - lazy val json: json4s.JValue = parse(input) - (json \ "id").extractOrElse[String](null) - - - } - - - def myQuery(spark:SparkSession, sc:SparkContext): Unit = { - implicit val mapEncoderPub: Encoder[Publication] = Encoders.kryo[Publication] - val mapper = new ObjectMapper() - mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) - - val ds:Dataset[Publication] = spark.read.load("/tmp/p").as[Publication] - - - val sc = spark.sparkContext - - - ds.filter(p =>p.getBestaccessright!= null && p.getBestaccessright.getClassname.nonEmpty).count() - val typologies =List("dataset","datasource","organization","otherresearchproduct","project","publication","software") - val basePath ="/tt" - - typologies.map(tp => sc.textFile(s"$basePath/dataset").map(s =>extractId(tp) ).distinct.count()).sum() - - } - -}