Added stuff

2023-04-12 10:09:05 +02:00 · 2023-04-12 10:09:05 +02:00 · 00ed8d5b31
parent 0dc33def41
commit 00ed8d5b31
26 changed files with 1637 additions and 25 deletions
--- a/execute_notebook.py
+++ b/execute_notebook.py
@ -15,7 +15,8 @@ def extract_argument(path):
        for line in f:
            if not line.startswith("#"):
                s = line.strip().split("=")
-                arguments[s[0].strip()] = s[1].strip()
+                if len(s) ==2:
+                    arguments[s[0].strip()] = s[1].strip()
    return arguments


@ -111,8 +112,12 @@ if __name__ == "__main__":
        j_name = ",".join(["sandro_nb/"+ item for item in jars])
        name = main_jar_path.replace("target/", "")

+        jar_section = ""
+        if len(jars) > 0:
+            jar_section =f"--jars {j_name}"
+
        class_args = extracting_class_args(script_argument)
-        command = f"spark2-submit --master yarn --jars {j_name} --class {script_argument['reference_class']} sandro_nb/{name} {class_args}"
+        command = f"spark2-submit --master yarn {jar_section} --executor-memory 4G --class {script_argument['reference_class']} --conf \"spark.sql.shuffle.partitions=10000\"  sandro_nb/{name} {class_args}"
        
        print(f"executing command {command}")       

--- a/pom.xml
+++ b/pom.xml
@ -135,7 +135,13 @@
        <dependency>
            <groupId>eu.dnetlib.dhp</groupId>
            <artifactId>dhp-schemas</artifactId>
-            <version>3.15.0</version>
+            <version>2.12.1-patched</version>
+        </dependency>
+        <!-- JAR NEED -->
+        <dependency>
+            <groupId>eu.dnetlib.dhp</groupId>
+            <artifactId>dhp-common</artifactId>
+            <version>1.2.5-SNAPSHOT</version>
        </dependency>
        <dependency>
            <groupId>org.junit.jupiter</groupId>
--- a/src/main/java/com/sandro/app/SparkApp.scala
+++ b/src/main/java/com/sandro/app/SparkApp.scala
@ -14,23 +14,7 @@ trait SparkApp extends Serializable {
   * @param args the list of arguments.properties
   */
  def parseArguments(args: Array[String]): mutable.Map[String, String] = {
-    var currentVariable: String = null
-    val argumentMap: mutable.Map[String, String] = mutable.Map()
-
-    args.zipWithIndex.foreach {
-      case (x, i) =>
-        if (i % 2 == 0) {
-          // ERROR in case the syntax is wrong
-          if (!x.startsWith("-")) throw new IllegalArgumentException("wrong input syntax expected -variable_name value")
-
-          if (x.startsWith("--"))
-            currentVariable = x.substring(2)
-          else
-            currentVariable = x.substring(1)
-        }
-        else argumentMap += (currentVariable -> x)
-    }
-    argumentMap
+    SparkUtility.parseArguments(args)
  }

  /** Here all the spark applications runs this method
--- a/src/main/java/com/sandro/app/SparkUtility.scala
+++ b/src/main/java/com/sandro/app/SparkUtility.scala
@ -0,0 +1,27 @@
+package com.sandro.app
+
+import scala.collection.mutable
+
+object SparkUtility {
+
+  def parseArguments(args: Array[String]): mutable.Map[String, String] = {
+    var currentVariable: String = null
+    val argumentMap: mutable.Map[String, String] = mutable.Map()
+
+    args.zipWithIndex.foreach {
+      case (x, i) =>
+        if (i % 2 == 0) {
+          // ERROR in case the syntax is wrong
+          if (!x.startsWith("-")) throw new IllegalArgumentException("wrong input syntax expected -variable_name value")
+
+          if (x.startsWith("--"))
+            currentVariable = x.substring(2)
+          else
+            currentVariable = x.substring(1)
+        }
+        else argumentMap += (currentVariable -> x)
+    }
+    argumentMap
+  }
+
+}
--- a/src/main/java/com/sandro/app/fs/FsChecks.java
+++ b/src/main/java/com/sandro/app/fs/FsChecks.java
@ -0,0 +1,119 @@
+package com.sandro.app.fs;
+
+import com.amazonaws.services.dynamodbv2.xspec.M;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.sandro.app.SparkUtility;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.*;
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.SparkSession;
+import scala.collection.mutable.Map;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Objects;
+
+public class FsChecks {
+
+
+    public static Configuration getHadoopConfiguration(String nameNode) {
+        // ====== Init HDFS File System Object
+        Configuration conf = new Configuration();
+        // Set FileSystem URI
+        conf.set("fs.defaultFS", nameNode);
+        // Because of Maven
+        conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
+        conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
+
+        System.setProperty("hadoop.home.dir", "/");
+        return conf;
+    }
+
+    private static MDStoreInfo extractPath(final String path, final String basePath) {
+
+        int res = path.indexOf(basePath);
+        if (res >0){
+            String[] split = path.substring(res).split("/");
+            if (split.length > 2) {
+                final String ts = split[split.length -1];
+                final String mdStore = split[split.length -2];
+                return  new MDStoreInfo(mdStore, null, Long.parseLong(ts));
+            }
+        }
+        return  null;
+
+
+    }
+
+    public static void main(String[] args) throws IOException {
+
+        Map<String, String> parsedArgs = SparkUtility.parseArguments(args);
+
+        final String namenode = parsedArgs.get("namenode").getOrElse(null);
+        final String master = parsedArgs.getOrElse("master", null);
+
+        final SparkConf conf = new SparkConf();
+        final SparkSession spark =SparkSession
+                .builder()
+                .config(conf)
+                .master(master)
+                .appName(FsChecks.class.getSimpleName())
+                .getOrCreate();
+
+
+        spark.sparkContext().setLogLevel("WARN");
+
+        final FileSystem fileSystem = FileSystem.get(getHadoopConfiguration(namenode));
+
+
+        final  String stores =IOUtils.toString(Objects.requireNonNull(FsChecks.class.getResourceAsStream("/mdstore_info.json")));
+
+
+        final ObjectMapper mapper = new ObjectMapper();
+        final List<MDStoreInfo> storesINfo =mapper.readValue(stores, new TypeReference<List<MDStoreInfo>>(){});
+
+
+        final String basePath ="/user/sandro.labruzzo/stores/";
+        Path p = new Path(basePath);
+        final java.util.Map<String, MDStoreInfo> hdfs_store= new HashMap<>();
+        final RemoteIterator<LocatedFileStatus> ls = fileSystem.listFiles(p, true);
+        while (ls.hasNext()){
+            String current =ls.next().getPath().toString();
+            final MDStoreInfo info = extractPath(current, basePath);
+                     if (info!= null) {
+                    hdfs_store.put(info.getMdstore(), info);
+            }
+        }
+
+        storesINfo.stream().filter(s ->s.getLatestTimestamp() != null).forEach( s ->{
+          if (!hdfs_store.containsKey(s.getMdstore())) {
+              System.out.printf("Adding mdstore %s\n",s.getMdstore());
+              try {
+                  fileSystem.mkdirs(new Path(basePath+s.getMdstore()));
+                  fileSystem.create(new Path(basePath+s.getMdstore()+"/"+s.getLatestTimestamp()), true);
+                  System.out.printf("Added path %s/%s/%d\n",basePath, s.getMdstore(),s.getLatestTimestamp());
+              } catch (IOException e) {
+                  throw new RuntimeException(e);
+              }
+          }
+          else {
+              final MDStoreInfo current =  hdfs_store.get(s.getMdstore());
+              if (s.getLatestTimestamp() > current.getLatestTimestamp())  {
+                  System.out.println("Updating MDStore "+s.getMdstore());
+                  final String rmPath = String.format("%s%s/%d", basePath, current.getMdstore(), current.getLatestTimestamp());
+                  try {
+                      System.out.println("deleting "+rmPath);
+                      fileSystem.create(new Path(basePath+s.getMdstore()+"/"+s.getLatestTimestamp()), true);
+
+                      fileSystem.delete(new Path(rmPath), true);
+                  } catch (IOException e) {
+                      throw new RuntimeException("Unable to remove path "+rmPath, e);
+                  }
+              }
+          }
+        });
+    }
+}
--- a/src/main/java/com/sandro/app/fs/MDStoreInfo.java
+++ b/src/main/java/com/sandro/app/fs/MDStoreInfo.java
@ -0,0 +1,53 @@
+package com.sandro.app.fs;
+
+public class MDStoreInfo {
+    private String mdstore;
+    private String currentId;
+    private Long latestTimestamp;
+
+
+    public MDStoreInfo() {
+    }
+
+    public MDStoreInfo(String mdstore, String currentId, Long latestTimestamp) {
+        this.mdstore = mdstore;
+        this.currentId = currentId;
+        this.latestTimestamp = latestTimestamp;
+    }
+
+    public String getMdstore() {
+        return mdstore;
+    }
+
+    public MDStoreInfo setMdstore(String mdstore) {
+        this.mdstore = mdstore;
+        return this;
+    }
+
+    public String getCurrentId() {
+        return currentId;
+    }
+
+    public MDStoreInfo setCurrentId(String currentId) {
+        this.currentId = currentId;
+        return this;
+    }
+
+    public Long getLatestTimestamp() {
+        return latestTimestamp;
+    }
+
+    public MDStoreInfo setLatestTimestamp(Long latestTimestamp) {
+        this.latestTimestamp = latestTimestamp;
+        return this;
+    }
+
+    @Override
+    public String toString() {
+        return "MDStoreInfo{" +
+                "mdstore='" + mdstore + '\'' +
+                ", currentId='" + currentId + '\'' +
+                ", latestTimestamp=" + latestTimestamp +
+                '}';
+    }
+}
--- a/src/main/java/com/sandro/app/fs/OAFParser.scala
+++ b/src/main/java/com/sandro/app/fs/OAFParser.scala
@ -0,0 +1,83 @@
+package com.sandro.app.fs
+
+import scala.collection.mutable
+import scala.xml.MetaData
+import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader}
+
+
+case class OAFInfo(datasourcePrefix: String ,cobjCategory: String ,openAccess: List[String], identifierTypes: List[String] , hostedBy: List[String], projectid:String) {}
+
+/** @param xml
+ */
+class OAFParser(xml: XMLEventReader) {
+
+  def extractAttributes(attrs: MetaData, key: String): String = {
+    val res = attrs.get(key)
+    if (res.isDefined) {
+      val s = res.get
+      if (s != null && s.nonEmpty)
+        s.head.text
+      else
+        null
+    } else null
+  }
+
+
+  def extractStats(): OAFInfo = {
+    var currNode: String = null
+    var datasourcePrefix: String = null
+    var cobjCategory: String= null
+    val openAccess: mutable.Set[String] = mutable.Set()
+    val identifierTypes: mutable.Set[String] = mutable.Set()
+    val hostedBy: mutable.Set[String] = mutable.Set()
+    var projectid:String = null
+    var node_status:String = null
+
+
+    while (xml.hasNext) {
+      xml.next match {
+        case EvElemStart(_, label, attrs, _) =>
+          currNode = label
+          label match {
+            case "datasourceprefix" => node_status = "datasourceprefix"
+            case "CobjCategory" => node_status = "CobjCategory"
+            case "accessrights" => node_status = "accessrights"
+            case "projectid" => node_status = "projectid"
+            case "hostedBy" =>
+              val it = extractAttributes(attrs, "name")
+              if (it != null && it.nonEmpty)
+                hostedBy += it
+            case "identifier" =>
+              val it = extractAttributes(attrs, "identifierType")
+              if (it != null && it.nonEmpty)
+                identifierTypes += it
+            case _           =>
+          }
+        case EvElemEnd(_, label) =>
+          label match {
+            case "datasourceprefix" => node_status = null
+            case "CobjCategory" => node_status = null
+            case "accessrights" => node_status = null
+            case "hostedBy" => node_status = null
+            case "projectid" => node_status = null
+            case _                 =>
+          }
+        case EvText(text) =>
+          if (node_status != null && text.trim.nonEmpty)
+            node_status match {
+              case "projectid" =>
+                projectid = text
+              case "datasourceprefix" =>
+                datasourcePrefix = text
+              case "CobjCategory" =>
+                cobjCategory = text
+              case "accessrights" =>
+                openAccess += text
+              case _ =>
+            }
+        case _ =>
+      }
+    }
+    OAFInfo(datasourcePrefix = datasourcePrefix, cobjCategory = cobjCategory, openAccess = openAccess.toList, identifierTypes = identifierTypes.toList, hostedBy = hostedBy.toList,projectid)
+  }
+}
--- a/src/main/java/com/sandro/app/fs/OafStat.java
+++ b/src/main/java/com/sandro/app/fs/OafStat.java
@ -0,0 +1,89 @@
+package com.sandro.app.fs;
+
+import org.apache.commons.lang3.StringUtils;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+public class OafStat implements Serializable {
+
+    private String datasourcePrefix;
+    private String cobjCategory;
+    private  List<String> openAccess;
+    private  List<String> identifierTypes;
+    private  List<String> hostedBy;
+
+
+    public String getDatasourcePrefix() {
+        return datasourcePrefix;
+    }
+
+    public OafStat setDatasourcePrefix(String datasourcePrefix) {
+        this.datasourcePrefix = datasourcePrefix;
+        return this;
+    }
+
+    public String getCobjCategory() {
+        return cobjCategory;
+    }
+
+    public OafStat setCobjCategory(String cobjCategory) {
+        this.cobjCategory = cobjCategory;
+        return this;
+    }
+
+    private void add_value_to_list(final String value, final List<String> l) {
+        if (value==null || StringUtils.isEmpty(value))
+            return;
+        final String  normalized_value = value.toLowerCase().trim();
+        if (l.stream().anyMatch(s-> s.equalsIgnoreCase(normalized_value))){
+            l.add(normalized_value);
+        }
+    }
+
+    public List<String> getOpenAccess() {
+        return openAccess;
+    }
+
+    public List<String> getIdentifierTypes() {
+        return identifierTypes;
+    }
+
+    public List<String> getHostedBy() {
+        return hostedBy;
+    }
+
+    public void addIdentifierType(final String value) {
+        if (identifierTypes== null)
+            identifierTypes = new ArrayList<>();
+        add_value_to_list(value, identifierTypes);
+    }
+
+    public void addOpenAccess(final String value) {
+        if (openAccess == null)
+            openAccess= new ArrayList<>();
+        add_value_to_list(value, openAccess);
+    }
+
+    public void addHostedBy(final String value) {
+        if (hostedBy == null)
+            hostedBy = new ArrayList<>();
+        add_value_to_list(value, hostedBy);
+    }
+
+    public OafStat setOpenAccess(List<String> openAccess) {
+        this.openAccess = openAccess;
+        return this;
+    }
+
+    public OafStat setIdentifierTypes(List<String> identifierTypes) {
+        this.identifierTypes = identifierTypes;
+        return this;
+    }
+
+    public OafStat setHostedBy(List<String> hostedBy) {
+        this.hostedBy = hostedBy;
+        return this;
+    }
+}
--- a/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefStatJob.scala
+++ b/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefStatJob.scala
@ -0,0 +1,48 @@
+package eu.dnetlib.doiboost.crossref
+
+import com.sandro.app.AbstractScalaApplication
+import org.apache.commons.cli.MissingArgumentException
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions.count
+import org.json4s
+import org.json4s.DefaultFormats
+import org.json4s.jackson.JsonMethods
+import org.slf4j.{Logger, LoggerFactory}
+
+class CrossrefStatJob  ( args: Array[String], log: Logger)  extends AbstractScalaApplication( args: Array[String], log: Logger) {
+
+
+  def extractTypologies(spark:SparkSession, path:String):Unit = {
+    import  spark.implicits._
+
+    val df =spark.read.text(path).as[String].map(s => CrossrefUtils.extractTypeSubtype(s)).distinct()
+
+
+    spark.read.text(path).as[String].map(s => CrossrefUtils.extractTypeSubtype(s)).groupBy("_1", "_2").agg(count("_1").alias("total")).show(200,false)
+
+
+
+  }
+
+  /** Here all the spark applications runs this method
+   * where the whole logic of the spark node is defined
+   */
+  override def run(): Unit = {
+    val path: String = argumentMap.get("path").orNull
+    if (path == null) throw new MissingArgumentException("Missing argument path")
+    extractTypologies(spark, path)
+
+
+
+  }
+}
+
+
+
+object CrossrefStatJob {
+  val log: Logger = LoggerFactory.getLogger(getClass)
+
+  def main(args: Array[String]): Unit = {
+    new CrossrefStatJob(args = args, log = log).initialize().run()
+  }
+}
--- a/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefUtils.scala
+++ b/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefUtils.scala
@ -0,0 +1,121 @@
+package eu.dnetlib.doiboost.crossref
+
+import org.json4s
+import org.json4s.JsonAST.JField
+import org.json4s.{DefaultFormats, JObject, JString}
+import org.json4s.jackson.JsonMethods
+
+
+case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
+
+
+
+object CrossrefUtils {
+
+  def extractInfo(input:String):(String,String, String,String,String) = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: json4s.JValue = JsonMethods.parse(input)
+    val relType = (json \"relationship"\"name").extract[String]
+
+    val sourceType = (json \"source"\"objectType").extract[String]
+    val sourcesubType = (json \"source"\"objectSubType").extract[String]
+    val targetType = (json \ "target" \ "objectType").extract[String]
+    val targetsubType = (json \ "target" \ "objectSubType").extract[String]
+
+    (sourceType, sourcesubType, relType, targetType, targetsubType)
+  }
+
+
+  def extractST(input: String): (String, String,String, Boolean) = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: json4s.JValue = JsonMethods.parse(input)
+    val source = (json \ "source").extractOrElse[String](null)
+    val target = (json \ "target").extractOrElse[String](null)
+    val relClass = (json \ "relClass").extractOrElse[String](null)
+    val dbi = (json \ "dataInfo" \"deletedbyinference").extractOrElse[Boolean](false)
+    (source, target,relClass, dbi)
+
+  }
+
+
+  def extractSourceTargetId(input: String): (String, String) = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: json4s.JValue = JsonMethods.parse(input)
+    val source = (json \ "source" \ "dnetIdentifier").extractOrElse[String](null)
+    val target = (json \ "target" \"dnetIdentifier").extractOrElse[String](null)
+
+    (source, target)
+
+  }
+
+  def extractStats(input: String): (String, String, String) = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: json4s.JValue = JsonMethods.parse(input)
+    val source = (json \ "source"\"objectType").extractOrElse[String](null)
+    val target = (json \ "target"\"objectType").extractOrElse[String](null)
+    val relClass = (json \ "relationship"\"name").extractOrElse[String](null)
+
+    (source, target, relClass)
+
+  }
+
+
+  def extractIdType(input: String): (String, String) = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: json4s.JValue = JsonMethods.parse(input)
+    val source = (json \ "id" ).extractOrElse[String](null)
+    val target = (json \ "typology" ).extractOrElse[String](null)
+
+    (source, target)
+
+  }
+
+//  def extractId(input: String): String = {
+//    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+//    lazy val json: json4s.JValue = JsonMethods.parse(input)
+//    val source = (json \ "id").extractOrElse[String](null)
+//
+//
+//    source
+//
+//  }
+
+
+  def extractTypeSubtype(input:String):(String,String) = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: json4s.JValue = JsonMethods.parse(input)
+    val objectType = (json \ "type").extractOrElse[String](null)
+    val objectSubType = (json \ "subtype").extractOrElse[String](null)
+    (objectType, objectSubType)
+
+  }
+
+
+  def extractCF(input: String): List[(String, String)] = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: json4s.JValue = JsonMethods.parse(input)
+    val id:String = (json \ "id").extract[String]
+    val l:List[(String, String)] =for {
+      JObject(cf) <-  json\"collectedfrom"
+      JField("value", JString(cf_name)) <- cf
+
+    } yield (cf_name, id)
+    l
+  }
+
+
+
+  def extractId(input:String):String = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: json4s.JValue = JsonMethods.parse(input)
+    (json\ "id").extract[String]
+  }
+
+}
+
+//scholix.joinWith(id, scholix("_2").equalTo(id("_1")), "leftouter").map(s => {
+//  if (s._2 != null)
+//    ( s._1.getString(1) , s._2.getString(1) )
+//  else
+//    ("publication", s._1.getString(1))
+//} ).where("_1 = 'UKN'").write.mode("Overwrite").save("scholix_prod_join")
--- a/src/main/java/eu/dnetlib/doiboost/mag/MagUtility.scala
+++ b/src/main/java/eu/dnetlib/doiboost/mag/MagUtility.scala
@ -0,0 +1,108 @@
+package eu.dnetlib.doiboost.mag
+
+import scala.collection.JavaConverters._
+import eu.dnetlib.dhp.schema.oaf.{KeyValue, Relation}
+import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils}
+import org.json4s
+import org.json4s.DefaultFormats
+import org.json4s.jackson.JsonMethods
+
+object MagUtility {
+
+  val DOI_PREFIX_REGEX = "(^10\\.|\\/10.)"
+  val DOI_PREFIX = "10."
+
+  case class MagPapers(
+                        PaperId: Long,
+                        Rank: Integer,
+                        Doi: String,
+                        DocType: String,
+                        PaperTitle: String,
+                        OriginalTitle: String,
+                        BookTitle: String,
+                        Year: Option[Integer],
+                        Date: Option[java.sql.Timestamp],
+                        Publisher: String,
+                        JournalId: Option[Long],
+                        ConferenceSeriesId: Option[Long],
+                        ConferenceInstanceId: Option[Long],
+                        Volume: String,
+                        Issue: String,
+                        FirstPage: String,
+                        LastPage: String,
+                        ReferenceCount: Option[Long],
+                        CitationCount: Option[Long],
+                        EstimatedCitation: Option[Long],
+                        OriginalVenue: String,
+                        FamilyId: Option[Long],
+                        CreatedDate: java.sql.Timestamp
+                      ) {}
+
+
+  case class MagPaperCitation(
+                               PaperId: Option[Long],
+                               PaperReferenceId: Option[Long],
+                               CitationContext: Option[String]
+                             ) {}
+
+  def extractST(input: String): List[String] = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: json4s.JValue = JsonMethods.parse(input)
+    val source = (json \ "source").extract[String]
+    val target = (json \ "source").extract[String]
+    List(source, target)
+
+  }
+
+  def createCiteRealtion(from:String, to:String):List[Relation] = {
+
+    val cf = new KeyValue
+    cf.setValue("Microsoft Academic Graph")
+    cf.setKey("10|openaire____::" + IdentifierFactory.md5("microsoft"))
+
+
+    val b = new Relation
+    val t = IdentifierFactory.idFromPid("50", "doi", from, true)
+    val s = IdentifierFactory.idFromPid("50", "doi", to, true)
+    b.setSource(s)
+    b.setTarget(t)
+    b.setRelType("resultResult")
+    b.setSubRelType("citation")
+    b.setRelClass("IsCitedBy")
+    b.setCollectedfrom(List(cf).asJava)
+    val a = new Relation
+    val source = IdentifierFactory.idFromPid("50", "doi", from, true)
+    val target = IdentifierFactory.idFromPid("50", "doi", to, true)
+    a.setSource(source)
+    a.setTarget(target)
+    a.setRelType("resultResult")
+    a.setSubRelType("citation")
+    a.setRelClass("Cites")
+    a.setCollectedfrom(List(cf).asJava)
+    List(a,b)
+  }
+  def isEmpty(x: String) = x == null || x.trim.isEmpty
+
+  def normalizeDoi(input: String): String = {
+    if (input == null)
+      return null
+    val replaced = input
+      .replaceAll("(?:\\n|\\r|\\t|\\s)", "")
+      .toLowerCase
+      .replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX)
+    if (isEmpty(replaced))
+      return null
+
+    if (replaced.indexOf("10.") < 0)
+      return null
+
+    val ret = replaced.substring(replaced.indexOf("10."))
+
+    if (!ret.startsWith(DOI_PREFIX))
+      return null
+
+    return ret
+
+  }
+
+}
--- a/src/main/java/eu/dnetlib/doiboost/mag/SparkMagCitation.scala
+++ b/src/main/java/eu/dnetlib/doiboost/mag/SparkMagCitation.scala
@ -0,0 +1,61 @@
+package eu.dnetlib.doiboost.mag
+
+import com.fasterxml.jackson.databind.ObjectMapper
+import com.sandro.app.AbstractScalaApplication
+import eu.dnetlib.dhp.schema.oaf.Relation
+import eu.dnetlib.doiboost.mag.MagUtility.{MagPaperCitation, MagPapers, normalizeDoi}
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
+import org.slf4j.{Logger, LoggerFactory}
+
+class SparkMagCitation ( args: Array[String], log: Logger)  extends AbstractScalaApplication( args: Array[String], log: Logger) {
+
+
+  def extractCitationRelationDOI(spark: SparkSession, magBasePath: String, workingDir: String) = {
+    import spark.implicits._
+    val papersDs: Dataset[(Long, String)] = spark.read.load(s"$magBasePath/Papers").select("PaperId", "Doi").where(col("PaperId").isNotNull).where(col("Doi").isNotNull).as[(Long, String)]
+    val citationDS: Dataset[(Long, Long)] = spark.read.load(s"$magBasePath/PaperCitationContexts").select("PaperId", "PaperReferenceId").where(col("PaperId").isNotNull).where(col("PaperReferenceId").isNotNull).as[(Long, Long)]
+    val DOI_ID = papersDs.map(s => (s._1, normalizeDoi(s._2).toLowerCase.trim)).filter(s =>  s._2 != null)
+    citationDS.joinWith(DOI_ID, citationDS("PaperId").equalTo(DOI_ID("_1"))).map(s => (s._2._2, s._1._2)).as[(String, Long)].write.mode(SaveMode.Overwrite).save(s"$workingDir/citation_one_side")
+    val oneSideRelationDs = spark.read.load(s"$workingDir/citation_one_side").as[(String, Long)]
+    oneSideRelationDs.joinWith(DOI_ID, oneSideRelationDs("_2").equalTo(DOI_ID("_1")), "inner").map(s => (s._1._1, s._2._2)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDir/citation_mag_doi_doi")
+  }
+
+  def createRelations(spark:SparkSession,  workingDir:String) :Unit = {
+    val mapper = new ObjectMapper()
+    import spark.implicits._
+    implicit val resultEncoder:Encoder[Relation] = Encoders.kryo[Relation]
+    val ctM = spark.read.load(s"$workingDir/citation_mag_doi_doi").as[(String, String)]
+    ctM.flatMap(t => MagUtility.createCiteRealtion(t._1, t._2)).as[Relation].map(m => mapper.writeValueAsString(m)).write.mode(SaveMode.Overwrite)
+      .option("compression", "gzip").text(s"$workingDir/relations")
+  }
+
+
+  def checkRelation(spark:SparkSession,  workingDir:String) :Unit = {
+    import spark.implicits._
+    spark.read.text(s"$workingDir/relations").as[String].flatMap(s => MagUtility.extractST(s)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDir/distinctID")
+
+  }
+
+
+  /** Here all the spark applications runs this method
+   * where the whole logic of the spark node is defined
+   */
+  override def run(): Unit = {
+
+    //extractCitationRelationDOI(spark,"/data/doiboost/input/mag/dataset", "/user/sandro.labruzzo/mag")
+
+    //createRelations(spark,"/user/sandro.labruzzo/mag")
+    checkRelation(spark,"/user/sandro.labruzzo/mag")
+  }
+}
+
+
+object SparkMagCitation {
+  val log:Logger = LoggerFactory.getLogger(getClass)
+
+
+  def main(args: Array[String]): Unit = {
+    new SparkMagCitation(args,log).initialize().run()
+  }
+}
--- a/src/main/java/eu/dnetlib/graph/raw/CheckOpenAireFailure.scala
+++ b/src/main/java/eu/dnetlib/graph/raw/CheckOpenAireFailure.scala
@ -0,0 +1,45 @@
+package eu.dnetlib.graph.raw
+
+import com.sandro.app.AbstractScalaApplication
+import eu.dnetlib.doiboost.crossref.CrossrefUtils
+import org.apache.commons.cli.MissingArgumentException
+import org.apache.spark.sql.{Dataset, SparkSession}
+import org.slf4j.{Logger, LoggerFactory}
+import org.apache.spark.sql.functions.{count, desc}
+
+class CheckOpenAireFailure  ( args: Array[String], log: Logger)  extends AbstractScalaApplication( args: Array[String], log: Logger) {
+
+  def count_collectedFromByEntity(spark:SparkSession,base_path_1:String,base_path_2:String): Unit = {
+
+    import spark.implicits._
+//    val l_types = List("dataset", "datasource","organization","otherresearchproduct","project","publication","software")
+    println(s"Publication in $base_path_1")
+    spark.read.text(s"$base_path_1/publication").as[String].flatMap(s => CrossrefUtils.extractCF(s)).groupBy("_1").agg(count("_2").alias("Total")).orderBy(desc("total")).show(100,false)
+    println(s"Publication in $base_path_2")
+    spark.read.text(s"$base_path_2/publication").as[String].flatMap(s => CrossrefUtils.extractCF(s)).groupBy("_1").agg(count("_2").alias("Total")).orderBy(desc("total")).show(100, false)
+
+
+
+
+
+  }
+
+
+
+  /** Here all the spark applications runs this method
+   * where the whole logic of the spark node is defined
+   */
+  override def run(): Unit = {
+    count_collectedFromByEntity(spark, "/tmp/beta_provision/graph/00_prod_graph_aggregator","/tmp/prod_provision/graph/00_graph_aggregator" )
+  }
+}
+
+
+object CheckOpenAireFailure {
+  val log = LoggerFactory.getLogger(getClass)
+
+
+  def main(args: Array[String]): Unit = {
+    new CheckOpenAireFailure(args, log).initialize().run();
+  }
+}
--- a/src/main/java/eu/dnetlib/graph/raw/CheckPath.java
+++ b/src/main/java/eu/dnetlib/graph/raw/CheckPath.java
@ -0,0 +1,25 @@
+package eu.dnetlib.graph.raw;
+
+import org.apache.hadoop.io.Text;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+
+public class CheckPath {
+    public static void main(String[] args) {
+        final SparkConf conf= new SparkConf();
+        final SparkSession spark =   SparkSession
+                .builder()
+                .config(conf)
+                .appName(CheckPath.class.getSimpleName())
+                .master("yarn")
+                .getOrCreate();
+
+
+        final String sp ="/data/aggregator_contents/PROD_for_BETA/mdstore/*/*";
+        final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+        final Long total = sc.sequenceFile(sp, Text.class, Text.class).count();
+        System.out.println("total = " + total);
+
+    }
+}
--- a/src/main/java/eu/dnetlib/graph/raw/GenerateMDStoreStats.scala
+++ b/src/main/java/eu/dnetlib/graph/raw/GenerateMDStoreStats.scala
@ -0,0 +1,86 @@
+package eu.dnetlib.graph.raw
+
+import com.sandro.app.AbstractScalaApplication
+import com.sandro.app.fs.{OAFInfo, OafStat}
+import eu.dnetlib.scholix.{DHPUtils, Measurement}
+import org.apache.commons.cli.MissingArgumentException
+import org.apache.hadoop.io.Text
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql._
+import org.apache.spark.sql.functions._
+import org.slf4j.{Logger, LoggerFactory}
+
+class GenerateMDStoreStats ( args: Array[String], log: Logger)  extends AbstractScalaApplication( args: Array[String], log: Logger) {
+
+
+
+  def hasHostedBy(o: List[String]): Int = {
+    if (o == null || o.isEmpty)
+      0
+    else
+      o.count(h => !h.equalsIgnoreCase("unknown repository"))
+  }
+
+  def   generateMDStoreStats(spark:SparkSession,statsDBPath:String,  ts:Long, targetPath:String) :Unit = {
+    import spark.implicits._
+    val df:Dataset[OAFInfo] = spark.read.load(statsDBPath).as[OAFInfo]
+
+    log.error("Generating Total Item measurement")
+    df.groupBy("datasourcePrefix").agg(count("datasourcePrefix").as("Total"))
+      .map(r => Measurement(name = "Total Item", nsprefix = r.getAs[String]("datasourcePrefix"), timestamp = ts, value = r.getAs[Long]("Total")))
+      .write.mode(SaveMode.Overwrite).save(targetPath)
+
+    log.error("Generating Total Item related to a project measurement")
+    df.groupBy("datasourcePrefix").agg(count("projectid").alias("ItemWithProject"))
+      .map(r => {
+        Measurement(name = "Project Relation count", nsprefix = r.getAs[String]("datasourcePrefix"), timestamp = ts, value = r.getAs[Long]("ItemWithProject"))
+      }).write.mode(SaveMode.Append).save(targetPath)
+
+    df.map(o => (o.datasourcePrefix, hasHostedBy(o.hostedBy)))
+      .groupBy("_1")
+      .agg(sum("_2"))
+      .map(r =>
+        Measurement("Hosted By Record count", nsprefix = r.getString(0), timestamp = ts, value =r.getAs[Long](1) )
+      ).write.mode(SaveMode.Append).save(targetPath)
+
+  }
+
+
+  def generateInfoOaf(spark:SparkSession, basePath:String, statsDBPath:String):Unit = {
+    val sc = spark.sparkContext
+    import spark.implicits._
+
+    println(s"base Path is $basePath")
+    val mdstores :RDD[OAFInfo] = sc.sequenceFile(basePath, classOf[Text],classOf[Text]).map(x=>x._2.toString).map(x=> DHPUtils.convertTOOAFStat(x))
+    val df:Dataset[OAFInfo] =spark.createDataset(mdstores)
+    df.write.mode(SaveMode.Overwrite).save(statsDBPath)
+
+
+  }
+
+
+  /** Here all the spark applications runs this method
+   * where the whole logic of the spark node is defined
+   */
+  override def run(): Unit = {
+    val path:String = argumentMap.get("path").orNull
+    if (path == null) throw new MissingArgumentException("Missing argument path")
+    println(s"base Path is $path")
+    generateInfoOaf(spark, path, "/user/sandro.labruzzo/prod_for_beta_stats")
+    generateMDStoreStats(spark, "/user/sandro.labruzzo/prod_for_beta_stats", System.currentTimeMillis(),"/user/sandro.labruzzo/prod_for_beta_mesaurement")
+    spark.close()
+  }
+}
+
+
+object  GenerateMDStoreStats{
+
+  val log: Logger = LoggerFactory.getLogger(GenerateMDStoreStats.getClass)
+
+  def main(args: Array[String]): Unit = {
+
+    new GenerateMDStoreStats(args,log ).initialize().run()
+
+  }
+
+}
--- a/src/main/java/eu/dnetlib/scholix/CheckEBIStats.scala
+++ b/src/main/java/eu/dnetlib/scholix/CheckEBIStats.scala
@ -0,0 +1,59 @@
+package eu.dnetlib.scholix
+
+import com.sandro.app.AbstractScalaApplication
+import org.apache.spark.sql.functions.{count, desc}
+import org.apache.spark.sql.{Dataset, SparkSession}
+import org.json4s.DefaultFormats
+import org.json4s.JsonAST.{JField, JObject, JString}
+import org.json4s.jackson.JsonMethods.parse
+import org.slf4j.{Logger, LoggerFactory}
+
+class CheckEBIStats( args: Array[String], log: Logger)  extends AbstractScalaApplication( args: Array[String], log: Logger) {
+
+
+
+  def extractPidSchema(input:String) :String = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: org.json4s.JValue = parse(input)
+
+    val source = (json \ "source").extractOrElse[String](null)
+    if (source != null) {
+      null
+    }
+    else {
+      val l: List[String] = for {
+        JObject(pids) <- json \\ "pid"
+        JField("qualifier", JObject(qualifier)) <- pids
+        JField("classid", JString(classid)) <- qualifier
+      } yield classid
+      l.head
+    }
+  }
+
+
+  def listPidType(spark:SparkSession, path:String) :Unit = {
+    import spark.implicits._
+    val df:Dataset[String] = spark.read.text(path).as[String]
+
+
+    df.map(extractPidSchema).filter(s=> s!=null).groupBy("value").agg(count("value").alias("Total")).orderBy(desc("Total")).show(300, false)
+
+  }
+
+
+  override def run(): Unit = {
+    val path = argumentMap("path")
+    log.warn(s"path is $path")
+    listPidType(spark, path)
+  }
+}
+
+object  CheckEBIStats {
+  val log: Logger = LoggerFactory.getLogger(getClass.getName)
+
+  def main(args: Array[String]): Unit = {
+    new CheckEBIStats(args,log).initialize().run()
+  }
+
+
+}
--- a/src/main/java/eu/dnetlib/scholix/CheckMDStoreContent.scala
+++ b/src/main/java/eu/dnetlib/scholix/CheckMDStoreContent.scala
@ -2,11 +2,10 @@ package eu.dnetlib.scholix

 import com.sandro.app.AbstractScalaApplication
 import org.apache.spark.sql.SparkSession
-import org.slf4j.{Logger, LoggerFactory}
 import org.json4s.DefaultFormats
 import org.json4s.JsonAST.{JField, JObject, JString}
 import org.json4s.jackson.JsonMethods.parse
-import org.apache.spark.sql.functions.{count,desc}
+import org.slf4j.{Logger, LoggerFactory}

 class CheckMDStoreContent( args: Array[String], log: Logger)  extends AbstractScalaApplication( args: Array[String], log: Logger) {

@ -20,7 +19,7 @@ class CheckMDStoreContent( args: Array[String], log: Logger)  extends AbstractSc
    val source = (json \ "source").extractOrElse[String](null)
    if (source != null) {
      val rel =(json \"relClass").extract[String]
-      s"Relation:$rel"
+      s"Relation"
    }
    else {
      val l: List[String] = for {
@ -32,13 +31,55 @@ class CheckMDStoreContent( args: Array[String], log: Logger)  extends AbstractSc
    }
  }

+  def filter_relationId(input:String):List[String] = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: org.json4s.JValue = parse(input)
+    val source = (json \ "source").extractOrElse[String](null)
+    if (source != null) {
+      val target =(json \"target").extract[String]
+      List(source, target)
+    } else
+    List()
+  }
+
+
+  def filter_entity_id(input:String):(String, String) = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: org.json4s.JValue = parse(input)
+    val result_type = (json \ "resulttype" \ "classid").extractOrElse[String](null)
+    val id = (json \ "id").extractOrElse[String](null)
+    if (id == null)
+      null
+    else
+      (id,result_type)
+  }
+

  def show_typologies(spark:SparkSession, path:String): Unit = {

    import spark.implicits._
    val df = spark.read.text(path).as[String]

-    df.map(s =>get_type(s)).groupBy("value").agg(count("value").alias("Total")).orderBy(desc("Total")).show(300, false)
+    val id_rels = df.flatMap(s => filter_relationId(s))
+        .filter(s=>s.startsWith("unresolved::") && s.contains("pmid"))
+        .distinct()
+
+    log.warn(s"Total pubmed pubs imported in scholexplorer ${id_rels.count}")
+//    df.map(s =>filter_entity_id(s))
+//      .filter(s =>s!=null)
+//      .map(_._2)
+//      .groupBy("value").agg(count("value").alias("Total")).orderBy(desc("Total")).show(300, false)
+
+
+//    val id_datacite = df.map(s =>filter_entity_id(s))
+//      .filter(s =>s!=null)
+//      .filter(s => "publication".equalsIgnoreCase(s._2))
+//      .map(_._1)
+//      .distinct()
+//
+//    val total_pubs = id_datacite.joinWith(id_rels, id_datacite("value").equalTo(id_rels("value")), "inner").count()
+//
+//    log.warn(s"total doi rel in datacite : $total_pubs")

  }

--- a/src/main/java/eu/dnetlib/scholix/CheckRelation.scala
+++ b/src/main/java/eu/dnetlib/scholix/CheckRelation.scala
@ -60,7 +60,7 @@ class CheckRelation( args: Array[String], log: Logger) extends AbstractScalaAppl
      .filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference)
      .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
      .filter(r => filterRelations(r))
-      .map(r => r.getSubRelType).as[String].groupBy("value")
+      .map(r => r.getRelClass).as[String].groupBy("value")
      .agg(count("value").alias("Total"))
      .orderBy(desc("Total"))
      .show(300, truncate = false)
--- a/src/main/java/eu/dnetlib/scholix/CheckSummaries.scala
+++ b/src/main/java/eu/dnetlib/scholix/CheckSummaries.scala
@ -0,0 +1,96 @@
+package eu.dnetlib.scholix
+import com.fasterxml.jackson.databind.ObjectMapper
+import com.sandro.app.AbstractScalaApplication
+import eu.dnetlib.dhp.schema.oaf.{Oaf, OafEntity, Relation}
+import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary
+import eu.dnetlib.scholix.CheckRelation.logger
+import org.apache.spark.sql.functions.count
+import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
+import org.json4s.DefaultFormats
+import org.json4s.jackson.JsonMethods.parse
+import org.slf4j.{Logger, LoggerFactory}
+
+class CheckSummaries ( args: Array[String], log: Logger) extends AbstractScalaApplication( args: Array[String], log: Logger)  {
+
+
+  def filterRelations(r: Relation): Boolean = {
+    val relClassFilter = List(
+      "merges",
+      "isMergedIn",
+      "HasAmongTopNSimilarDocuments",
+      "IsAmongTopNSimilarDocuments"
+    )
+    if (relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
+      false
+    else {
+      if (r.getCollectedfrom == null || r.getCollectedfrom.size() == 0)
+        false
+      else if (r.getCollectedfrom.size() > 1)
+        true
+      else if (r.getCollectedfrom.size() == 1 && r.getCollectedfrom.get(0)!=null && "OpenCitations".equalsIgnoreCase(r.getCollectedfrom.get(0).getValue))
+        false
+      else
+        true
+    }
+  }
+
+  def extractSourceTarget(input:String, path:String) :String = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: org.json4s.JValue = parse(input)
+    (json \ path).extract[String]
+  }
+
+  def countSummaries(basePath:String, spark:SparkSession) :Unit = {
+    implicit  val  summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary]
+    implicit  val relEncoder: Encoder[Relation] = Encoders.kryo[Relation]
+    implicit  val oafEncoder: Encoder[OafEntity] = Encoders.kryo[OafEntity]
+    import spark.implicits._
+
+    val relPath = s"/tmp/beta_provision/scholix/relation"
+
+    val pubPath = s"$basePath/entities/publication"
+
+
+
+//    val ds:Dataset[ScholixSummary] = spark.read.load(path).as[ScholixSummary]
+//
+//
+//    ds.map(s => s.getTypology.toString).groupBy("value").agg(count("value").alias("Total")).show(300, truncate = false)
+
+      val mapper = new ObjectMapper()
+
+      val  df =spark.read.load(relPath).as[Relation]
+
+      val totalIDS = df.flatMap(r=> List(r.getSource,r.getTarget))
+        .filter(s => s.startsWith("50"))
+        .distinct()
+
+      val pubId = spark.read.load(pubPath).as[OafEntity].map(o =>o.getId).distinct()
+
+      val idPubsTotal =  pubId.joinWith(totalIDS, pubId("value").equalTo(totalIDS("value")), "inner").count()
+
+      log.warn(s"Total ids in input Relation of type publication $idPubsTotal")
+
+  }
+
+
+  /** Here all the spark applications runs this method
+   * where the whole logic of the spark node is defined
+   */
+  override def run():  Unit = {
+    val path = argumentMap("path")
+    logger.warn(s"path properties is $path")
+    if (path == null || path.isEmpty)
+      throw new IllegalArgumentException("missing path arguments.properties -path when launch file, check if it is inside the arguments.properties")
+    countSummaries(path, spark)
+  }
+}
+
+object  CheckSummaries {
+  val logger: Logger = LoggerFactory.getLogger(CheckRelation.getClass.getName)
+
+  def main(args: Array[String]): Unit = {
+    new CheckSummaries(args,logger).initialize().run()
+  }
+
+}
--- a/src/main/java/eu/dnetlib/scholix/DHPUtils.scala
+++ b/src/main/java/eu/dnetlib/scholix/DHPUtils.scala
@ -0,0 +1,139 @@
+package eu.dnetlib.scholix
+
+import com.sandro.app.fs.{OAFInfo, OAFParser}
+import eu.dnetlib.dhp.schema.common.ModelConstants
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
+import eu.dnetlib.dhp.schema.oaf.{DataInfo, KeyValue, Relation, Result}
+import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils.DATE_RELATION_KEY
+
+import scala.io.Source
+import scala.xml.pull.XMLEventReader
+import org.json4s.DefaultFormats
+import org.json4s.JsonAST.{JField, JObject, JString}
+import org.json4s.jackson.JsonMethods.parse
+
+import collection.JavaConverters._
+
+case class Measurement(name:String, nsprefix:String, timestamp:Long, value:Long) {}
+
+object DHPUtils {
+
+
+  val DATA_INFO: DataInfo = OafMapperUtils.dataInfo(
+    false,
+    null,
+    false,
+    false,
+    ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER,
+    "0.9"
+  )
+
+
+  val relations = Map(
+    "IsSupplementTo"->"IsSupplementedBy",
+    "IsSupplementedBy"->"IsSupplementTo",
+    "References"->"IsReferencedBy",
+    "IsReferencedBy"->"References",
+    "IsRelatedTo"->"IsRelatedTo"  )
+
+
+val ElsevierCollectedFrom: KeyValue = OafMapperUtils.keyValue("10|openaire____::8f87e10869299a5fe80b315695296b88", "Elsevier")
+
+
+  def createInverseRelationships(r:Relation): List[Relation] = {
+
+    val inverse = new Relation()
+    inverse.setDataInfo(r.getDataInfo)
+    inverse.setCollectedfrom(r.getCollectedfrom)
+    inverse.setProperties(r.getProperties)
+    inverse.setSource(r.getTarget)
+    inverse.setTarget(r.getSource)
+    inverse.setRelType(r.getRelType)
+    inverse.setSubRelType(r.getSubRelType)
+    inverse.setRelClass(relations.getOrElse(r.getRelClass, r.getRelClass))
+    List(r, inverse)
+
+  }
+
+
+  def extractPidMap(r:Result):List[(String, String)] = {
+    if (r == null || r.getInstance()==null)
+      return null
+    r.getInstance().asScala.filter(i => i.getPid!= null).flatMap(i =>i.getPid.asScala).map(p => (r.getId, generate_unresolved_id(p.getValue, p.getQualifier.getClassid))).toList
+  }
+
+  def extractIdRel(input:String):String = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: org.json4s.JValue = parse(input)
+
+    val relName: String = (json \ "RelationshipType" \ "Name").extract[String]
+    val sourcePid = (json \ "Source" \ "Identifier" \ "ID").extract[String]
+    val sourcePidType = (json \ "Source" \ "Identifier" \ "IDScheme").extract[String]
+    val targetPid = (json \ "Target" \ "Identifier" \ "ID").extract[String]
+    val targetPidType = (json \ "Target" \ "Identifier" \ "IDScheme").extract[String]
+     s"$sourcePid::$sourcePidType::$relName::$targetPid::$targetPidType".toLowerCase()
+
+  }
+
+
+  def eventDataToRelation(input:String):Relation = {
+    implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+    lazy val json: org.json4s.JValue = parse(input)
+
+    val relName:String = (json \ "RelationshipType" \ "Name").extract[String]
+    val sourcePid = (json\ "Source" \ "Identifier"\ "ID").extract[String]
+    val sourcePidType = (json\ "Source" \ "Identifier"\ "IDScheme").extract[String]
+    val targetPid = (json \ "Target" \ "Identifier" \ "ID").extract[String]
+    val targetPidType = (json \ "Target" \ "Identifier" \ "IDScheme").extract[String]
+    val date:String = (json\"LinkPublicationDate").extract[String]
+
+    createRelation(generate_unresolved_id(sourcePid, sourcePidType),generate_unresolved_id(targetPid, targetPidType), ElsevierCollectedFrom, "relationship",relName, date)
+
+
+
+
+  }
+
+
+  def createRelation(
+
+                      sourceId: String,
+                      targetId:String,
+                      collectedFrom: KeyValue,
+                      subRelType: String,
+                      relClass: String,
+                      date: String
+                    ): Relation = {
+
+    val rel = new Relation
+    rel.setCollectedfrom(List(ElsevierCollectedFrom).asJava)
+    rel.setDataInfo(DATA_INFO)
+
+    rel.setRelType(ModelConstants.RESULT_RESULT)
+    rel.setSubRelType(subRelType)
+    rel.setRelClass(relClass)
+
+    rel.setSource(sourceId)
+
+    rel.setTarget(targetId)
+
+    val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
+
+    rel.setProperties(List(dateProps).asJava)
+    rel.setCollectedfrom(List(collectedFrom).asJava)
+    rel
+
+  }
+
+
+  def generate_unresolved_id(pid: String, pidType: String): String = {
+    s"unresolved::${pid.toLowerCase()}::${pidType.toLowerCase()}"
+  }
+
+  def convertTOOAFStat(input: String): OAFInfo = {
+    val xml = new XMLEventReader(Source.fromString(input))
+    val parser = new OAFParser(xml)
+    parser.extractStats()
+  }
+
+}
--- a/src/main/java/eu/dnetlib/scholix/GenerateEventDataRelations.scala
+++ b/src/main/java/eu/dnetlib/scholix/GenerateEventDataRelations.scala
@ -0,0 +1,236 @@
+package eu.dnetlib.scholix
+
+import com.fasterxml.jackson.databind.ObjectMapper
+import com.sandro.app.AbstractScalaApplication
+import eu.dnetlib.dhp.schema.oaf.{Relation, Result}
+import eu.dnetlib.dhp.schema.sx.scholix.Scholix
+import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary
+import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils
+import eu.dnetlib.doiboost.crossref.CrossrefDT
+import org.apache.commons.cli.MissingArgumentException
+import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
+import org.slf4j.{Logger, LoggerFactory}
+import org.apache.spark.sql.functions.count
+
+class GenerateEventDataRelations ( args: Array[String], log: Logger)  extends AbstractScalaApplication( args: Array[String], log: Logger) {
+
+
+  private def convertEventDataToRelations(spark:SparkSession, sourcePath:String, relation_path:String): Unit = {
+    implicit val relEncoders:Encoder[Relation] = Encoders.kryo[Relation]
+    import  spark.implicits._
+
+    spark.read.load(sourcePath)
+      .select("json")
+      .map(r => r.getString(0))
+      .map(r=>DHPUtils.eventDataToRelation(r))
+      .write.mode(SaveMode.Overwrite).save(relation_path)
+  }
+
+  private def resolveRelations(spark:SparkSession, workingPath:String): Unit = {
+    val entityPath ="/tmp/beta_provision/scholix/entities/*"
+
+    implicit  val resultEncoder:Encoder[Result]= Encoders.kryo[Result]
+    implicit val relEncoders:Encoder[Relation] = Encoders.kryo[Relation]
+    import  spark.implicits._
+    val df =spark.read.load(entityPath).as[Result]
+    df.filter(r =>r != null && r.getDataInfo!= null && false == r.getDataInfo.getDeletedbyinference)
+      .flatMap(r =>DHPUtils.extractPidMap(r))
+      .write.mode(SaveMode.Overwrite)
+      .save(s"$workingPath/pidMap")
+
+
+    val pidMap: Dataset[(String, String)] = spark.read.load(s"$workingPath/pidMap").as[(String, String)]
+
+    val unresolvedSourceRelation: Dataset[(String,Relation)] = spark.read.load(s"$workingPath/relations").as[Relation].map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING,relEncoders))
+
+
+    unresolvedSourceRelation.joinWith(pidMap, unresolvedSourceRelation("_1").equalTo(pidMap("_2")), "leftouter")
+      .map(k =>{
+        if (k._2 == null)
+          null
+        else {
+          val rel = k._1._2
+          val pid = k._2._1
+          rel.setSource(pid)
+          rel
+        }
+      }).as[Relation].filter(r => !r.getSource.startsWith("unresolved")).write.mode(SaveMode.Overwrite).save(s"$workingPath/relSourceResolved")
+
+
+    val rsolved: Dataset[(String,Relation)]  =spark.read.load(s"$workingPath/relSourceResolved").as[Relation].map(r => (r.getTarget, r))(Encoders.tuple(Encoders.STRING,relEncoders))
+
+    rsolved.joinWith(pidMap, rsolved("_1").equalTo(pidMap("_2")), "leftouter")
+      .map(k => {
+        if (k._2 == null)
+          null
+        else {
+          val rel = k._1._2
+          val pid = k._2._1
+          rel.setTarget(pid)
+          rel
+        }
+      }).as[Relation].filter(r => !r.getTarget.startsWith("unresolved")).flatMap(r=>DHPUtils.createInverseRelationships(r)).write.mode(SaveMode.Overwrite).save(s"$workingPath/relResolved")
+
+
+
+    val totRels = unresolvedSourceRelation.count()
+    val resolved = spark.read.load(s"$workingPath/relResolved").as[Relation]
+    val totResolved = resolved.count()
+    println(s"RESOLVED  $totResolved/$totRels")
+ }
+
+  private def serializeScholix(spark:SparkSession, workingPath:String):Unit = {
+    implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
+    val scholix = spark.read.load(s"$workingPath/scholix").as[Scholix]
+    val mapper = new ObjectMapper()
+    import spark.implicits._
+
+    scholix.map(s => mapper.writeValueAsString(s)).write.mode(SaveMode.Overwrite).text(s"$workingPath/scholix_json")
+  }
+
+
+
+  private def checkRelations(spark:SparkSession, workingPath:String, rel_path:String): Unit = {
+    implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary]
+    implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
+    implicit val relEncoders: Encoder[Relation] = Encoders.kryo[Relation]
+
+    import spark.implicits._
+
+
+    val rels = spark.read.load(rel_path)
+    val scholix = spark.read.load(s"$workingPath/scholix").as[Scholix]
+
+    println(scholix.count())
+
+    scholix.map(s => s.getRelationship.getName).groupBy("value").agg(count("value").alias("Total")).show()
+
+
+    println(rels.count())
+
+
+    println( rels.select("json").map(r=>DHPUtils.extractIdRel(r.getString(0))).distinct().count())
+
+  }
+
+
+  private def createScholix(spark:SparkSession, workingPath:String, summaryPath:String):Unit = {
+    implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary]
+    implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
+    implicit val relEncoders: Encoder[Relation] = Encoders.kryo[Relation]
+    import spark.implicits._
+
+
+    val summaryDS =spark.read.load("/tmp/beta_provision/scholix/provision/summaries").as[ScholixSummary]
+      .map(s => (s.getId,s))(Encoders.tuple(Encoders.STRING, summaryEncoder))
+    val relationDS = spark.read.load(s"$workingPath/relResolved").as[Relation].map(r => (r.getSource,r))(Encoders.tuple(Encoders.STRING, relEncoders))
+
+    relationDS
+      .joinWith(summaryDS, relationDS("_1").equalTo(summaryDS("_1")), "left")
+      .map { input: ((String, Relation), (String, ScholixSummary)) =>
+        if (input._1 != null && input._2 != null) {
+          val rel: Relation = input._1._2
+          val source: ScholixSummary = input._2._2
+          (rel.getTarget, ScholixUtils.scholixFromSource(rel, source))
+        } else null
+      }(Encoders.tuple(Encoders.STRING, scholixEncoder))
+      .filter(r => r != null)
+      .write
+      .mode(SaveMode.Overwrite)
+      .save(s"$workingPath/scholix_from_source")
+
+    val scholixSource: Dataset[(String, Scholix)] = spark.read
+      .load(s"$workingPath/scholix_from_source")
+      .as[(String, Scholix)](Encoders.tuple(Encoders.STRING, scholixEncoder))
+
+    scholixSource
+      .joinWith(summaryDS, scholixSource("_1").equalTo(summaryDS("_1")), "left")
+      .map { input: ((String, Scholix), (String, ScholixSummary)) =>
+        if (input._2 == null) {
+          null
+        } else {
+          val s: Scholix = input._1._2
+          val target: ScholixSummary = input._2._2
+          ScholixUtils.generateCompleteScholix(s, target)
+        }
+      }
+      .filter(s => s != null)
+      .write
+      .mode(SaveMode.Overwrite)
+      .save(s"$workingPath/scholix_one_verse")
+
+    val scholix_o_v: Dataset[Scholix] =
+      spark.read.load(s"$workingPath/scholix_one_verse").as[Scholix]
+
+    scholix_o_v
+      .flatMap(s => List(s, ScholixUtils.createInverseScholixRelation(s)))
+      .as[Scholix]
+      .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, scholixEncoder))
+      .groupByKey(_._1)
+      .agg(ScholixUtils.scholixAggregator.toColumn)
+      .map(s => s._2)
+      .write
+      .mode(SaveMode.Overwrite)
+      .save(s"$workingPath/scholix")
+
+    val scholix_final: Dataset[Scholix] = spark.read.load(s"$workingPath/scholix").as[Scholix]
+
+
+    println(scholix_final.count())
+  }
+
+
+
+  private def checkCrossrefDOI(spark:SparkSession):Unit ={
+    implicit val mrEncoder: Encoder[CrossrefDT] = Encoders.kryo[CrossrefDT]
+    import spark.implicits._
+
+//    val df = spark.read.load("/data/doiboost/input/crossref/crossref_ds").as[CrossrefDT]
+//    val tot = df.filter(d=> d.doi.equalsIgnoreCase("10.1107/s2052252521010563/yc50352sup3.hkl")).count()
+//    println(s"Found $tot")
+
+
+    val tot2 = spark.read.text("/tmp/beta_provision/graph/19_graph_blacklisted/*").as[String].filter(s=> s.contains("10.1107/s2052252521010563/yc50352sup3.hkl")).count()
+    println(s"Found in the final graph $tot2")
+
+  }
+
+
+
+
+
+
+
+
+
+
+
+  /** Here all the spark applications runs this method
+   * where the whole logic of the spark node is defined
+   */
+  override def run(): Unit = {
+    val source_path: String = argumentMap.get("source_path").orNull
+    val working_path : String = argumentMap.get("working_path").orNull
+    if (source_path == null) throw new MissingArgumentException("Missing argument path")
+    if (working_path == null) throw new MissingArgumentException("Missing argument path")
+//    convertEventDataToRelations(spark, source_path, s"$working_path/relations")
+//    resolveRelations(spark, working_path )
+//    checkRelations(spark, working_path, source_path)
+    //createScholix(spark, working_path, "/tmp/beta_provision/scholix/provision/summaries")
+//    serializeScholix(spark, working_path)
+
+    checkCrossrefDOI(spark)
+
+    spark.close()
+  }
+}
+
+
+object GenerateEventDataRelations {
+  val log: Logger = LoggerFactory.getLogger(getClass.getName)
+
+  def main(args: Array[String]): Unit = {
+    new GenerateEventDataRelations(args,log).initialize().run()
+  }
+
+}
--- a/src/main/resources/mdstore_info.json
+++ b/src/main/resources/mdstore_info.json
--- a/src/test/java/com/sandro/app/FSCheckTest.java
+++ b/src/test/java/com/sandro/app/FSCheckTest.java
@ -0,0 +1,40 @@
+package com.sandro.app;
+
+import com.sandro.app.fs.MDStoreInfo;
+import org.junit.jupiter.api.Test;
+
+public class FSCheckTest {
+
+
+    private MDStoreInfo extractPath(final String path, final String basePath) {
+
+        int res = path.indexOf(basePath);
+        if (res >0){
+
+            String[] split = path.substring(res).split("/");
+            if (split.length > 2) {
+                final String ts = split[split.length -1];
+                final String mdStore = split[split.length -2];
+                return  new MDStoreInfo(mdStore, null, Long.parseLong(ts));
+            }
+
+        }
+        return  null;
+
+
+    }
+
+    @Test
+    public void doTest() {
+
+        final String basePath = "/user/sandro.labruzzo/stores/";
+        final String path = "hdfs://nameservice1/user/sandro.labruzzo/stores/4a0cddf2-20e9-4558-a3c1-4d20cfecffa8_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==/1592574025511";
+
+
+        System.out.println(extractPath(path,basePath));
+
+
+
+
+    }
+}
--- a/src/test/java/com/sandro/app/xmlParser.scala
+++ b/src/test/java/com/sandro/app/xmlParser.scala
@ -0,0 +1,23 @@
+package com.sandro.app
+
+import com.sandro.app.fs.OAFParser
+import org.junit.jupiter.api.Test
+
+import scala.io.Source
+import scala.xml.pull.XMLEventReader
+
+class xmlParser {
+
+
+  @Test
+  def testParse(): Unit = {
+    val xml =Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/store/odf.xml")).mkString
+
+
+    val xml_e = new XMLEventReader(Source.fromString(xml))
+    val parser = new OAFParser(xml_e)
+    println(parser.extractStats())
+
+
+  }
+}
--- a/src/test/resources/eu/dnetlib/store/oaf_1.xml
+++ b/src/test/resources/eu/dnetlib/store/oaf_1.xml
@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<record xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:oaf="http://namespace.openaire.eu/oaf">
+    <header xmlns="http://namespace.openaire.eu/">
+        <dri:objIdentifier>rep______umk::000b9ae2fe88cd81444bdc4b56cd39fe</dri:objIdentifier>
+        <dri:recordIdentifier>002e31dc-3a44-4a56-a02c-5f60cfa13c9e_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==::oai:repozytorium.umk.pl:item/1440</dri:recordIdentifier>
+        <dri:dateOfCollection/>
+        <dri:mdFormat/>
+        <dri:mdFormatInterpretation/>
+        <dri:repositoryId>7f1d17f1-4d1d-4df5-bc55-003f203ac6b5_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId>
+        <dr:objectIdentifier/>
+        <dr:dateOfCollection>2014-07-17T10:09:47Z</dr:dateOfCollection>
+        <oaf:datasourceprefix>rep______umk</oaf:datasourceprefix>
+    </header>
+    <metadata xmlns="http://namespace.openaire.eu/">
+        <dc:creator>Perlik, Kamila</dc:creator>
+        <dc:dateAccepted>2011-12-08</dc:dateAccepted>
+        <dc:description>Artykuł relacjonuje przebieg odbywającej się na początku kwietnia 2011 r. piątej konferencji naukowej Instytutu Informacji Naukowej i Studiów Bibliologicznych Uniwersytetu Warszawskiego. Celem spotkania była diagnoza zmian zachodzących w nauce o informacji i jej polu badawczym oraz w praktyce działalności informacyjnej, wyodrębnienie najważniejszych współczesnych kierunków rozwoju w obu tych płaszczyznach oraz ocena zakresu ich reprezentacji w edukacji specjalistów informacji. Główne obszary dyskusji objęły m.in. teoretyczne podstawy nauki o informacji, metodologię badań w nauce o informacji, zarządzanie informacją i wiedzą, badania użytkowników i użytkowania informacji, społeczną recepcję technologii informacyjnych i rolę ICT w życiu społecznym, prawne i etyczne aspekty działalności informacyjnej, nowe role bibliotekarzy, kształcenie specjalistów informacji, ilościowe badania piśmiennictwa naukowego (webometria, infometria, bibliometria) oraz kwestie tworzenia bibliotek cyfrowych, repozytoriów i elektronicznego publikowania.</dc:description>
+        <dc:identifier>http://repozytorium.umk.pl/handle/item/1440</dc:identifier>
+        <dc:language>pol</dc:language>
+        <dc:title>5. Konferencja Naukowa Instytutu Informacji Naukowej i Studiów Bibliologicznych Uniwersytetu Warszawskiego „Nauka o informacji (informacja naukowa) w okresie zmian” (Warszawa, 4–5 kwietnia 2011 r.)</dc:title>
+        <dc:subject>biblioteki cyfrowe</dc:subject>
+        <dc:subject>repozytoria</dc:subject>
+        <dc:subject>metodologia bedań</dc:subject>
+        <dc:subject>prawo</dc:subject>
+        <dc:subject>etyka</dc:subject>
+        <dr:CobjCategory>0001</dr:CobjCategory>
+        <dr:CobjIdentifier>Toruńskie Studia Bibliologiczne, No. 2 (7), Vol. 4, pp. 165-169</dr:CobjIdentifier>
+        <dr:CobjIdentifier>2080-1807</dr:CobjIdentifier>
+        <dr:CobjIdentifier>doi:10.12775/TSB.2011.026</dr:CobjIdentifier>
+        <oaf:collectedDatasourceid>driver______::d6a8dc01-db12-48d7-be88-9919f1c912c6</oaf:collectedDatasourceid>
+        <oaf:accessrights>OPEN</oaf:accessrights>
+        <oaf:accessrights>OPEN</oaf:accessrights>
+        <oaf:hostedBy name="Repozytorium Uniwersytetu Mikołaja Kopernika" id="driver______::d6a8dc01-db12-48d7-be88-9919f1c912c6"/>
+        <oaf:hostedBy name="Repozytorium Uniwersytetu Mikołaja Kopernika" id="driver______::d6a8dc01-db12-48d7-be88-9919f1c912c6"/>
+        <oaf:identifier identifierType="doi">10.12775/TSB.2011.026</oaf:identifier>
+    </metadata>
+</record>
--- a/src/test/resources/eu/dnetlib/store/odf.xml
+++ b/src/test/resources/eu/dnetlib/store/odf.xml
@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<record xmlns:dr="http://www.driver-repository.eu/namespace/dr"
+        xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <oai:header xmlns="http://namespace.openaire.eu/"
+                xmlns:dc="http://purl.org/dc/elements/1.1/"
+                xmlns:dri="http://www.driver-repository.eu/namespace/dri"
+                xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
+        <dri:objIdentifier>od______2659::0000170f23c42f3712d9892367b217fe</dri:objIdentifier>
+        <dri:recordIdentifier>oai:zenodo.org:582984</dri:recordIdentifier>
+        <dri:dateOfCollection>2020-01-26T00:05:19.414Z</dri:dateOfCollection>
+        <oaf:datasourceprefix>od______2659</oaf:datasourceprefix>
+        <identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:zenodo.org:582984</identifier>
+        <datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2020-01-21T07:23:17Z</datestamp>
+        <setSpec xmlns="http://www.openarchives.org/OAI/2.0/">openaire_data</setSpec>
+        <setSpec xmlns="http://www.openarchives.org/OAI/2.0/">user-powertac</setSpec>
+    </oai:header>
+    <metadata>
+        <resource xmlns="http://datacite.org/schema/kernel-3"
+                  xmlns:dc="http://purl.org/dc/elements/1.1/"
+                  xmlns:dri="http://www.driver-repository.eu/namespace/dri"
+                  xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
+            <identifier identifierType="DOI">10.5281/zenodo.582984</identifier>
+            <creators>
+                <creator>
+                    <creatorName>PowerTAC</creatorName>
+                </creator>
+            </creators>
+            <titles>
+                <title>PowerTAC 2016-06 Finals Game 64</title>
+            </titles>
+            <publisher>Zenodo</publisher>
+            <publicationYear>2016</publicationYear>
+            <subjects>
+                <subject>PowerTAC</subject>
+            </subjects>
+            <dates>
+                <date dateType="Issued">2016-06-07</date>
+            </dates>
+            <resourceType resourceTypeGeneral="Dataset"/>
+            <relatedIdentifiers>
+                <relatedIdentifier relatedIdentifierType="URL" relationType="IsPartOf">https://zenodo.org/communities/powertac</relatedIdentifier>
+            </relatedIdentifiers>
+            <rightsList>
+                <rights rightsURI="http://creativecommons.org/licenses/by/4.0/legalcode">Creative Commons Attribution 4.0 International</rights>
+                <rights rightsURI="info:eu-repo/semantics/openAccess">Open Access</rights>
+            </rightsList>
+            <descriptions>
+                <description descriptionType="Abstract">Log and boot files of game 64</description>
+            </descriptions>
+        </resource>
+        <dr:CobjCategory type="dataset">0021</dr:CobjCategory>
+        <oaf:dateAccepted>2016-01-01</oaf:dateAccepted>
+        <oaf:accessrights>OPEN</oaf:accessrights>
+        <oaf:language>und</oaf:language>
+        <oaf:concept id="https://zenodo.org/communities/powertac"/>
+        <oaf:hostedBy id="opendoar____::2659" name="ZENODO"/>
+        <oaf:collectedFrom id="opendoar____::2659" name="ZENODO"/>
+    </metadata>
+    <about xmlns:dc="http://purl.org/dc/elements/1.1/"
+           xmlns:dri="http://www.driver-repository.eu/namespace/dri"
+           xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
+        <provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
+            <originDescription altered="true" harvestDate="2020-01-26T00:05:19.414Z">
+                <baseURL>https%3A%2F%2Fzenodo.org%2Foai2d</baseURL>
+                <identifier>oai:zenodo.org:582984</identifier>
+                <datestamp>2020-01-21T07:23:17Z</datestamp>
+                <metadataNamespace/>
+            </originDescription>
+        </provenance>
+        <oaf:datainfo>
+            <oaf:inferred>false</oaf:inferred>
+            <oaf:deletedbyinference>false</oaf:deletedbyinference>
+            <oaf:trust>0.9</oaf:trust>
+            <oaf:inferenceprovenance/>
+            <oaf:provenanceaction classid="sysimport:crosswalk:repository"
+                                  classname="sysimport:crosswalk:repository"
+                                  schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
+        </oaf:datainfo>
+    </about>
+</record>