diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java
index 3f27b94422..58009bfcfc 100644
--- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java
+++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java
@@ -38,8 +38,7 @@ import scala.Tuple2;
/**
* Groups the graph content by entity identifier to ensure ID uniqueness
*/
-public class
-GroupEntitiesSparkJob {
+public class GroupEntitiesSparkJob {
private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class);
diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
index 686a2f1f15..502cb370f8 100644
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala
@@ -245,6 +245,8 @@ object DoiBoostMappingUtil {
if (item != null) {
hb.setValue(item.officialname)
hb.setKey(generateDSId(item.id))
+ //TODO replace with the one above as soon as the new HBM will be used
+ //hb.setKey(item.id)
if (item.openAccess) {
i.setAccessright(getOpenAccessQualifier())
i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold)
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala
deleted file mode 100644
index 561d2bbf45..0000000000
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-package eu.dnetlib.dhp.oa.graph.hostebymap
-
-import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn}
-import org.apache.spark.sql.expressions.Aggregator
-
-
-case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {}
-case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {}
-
-object Aggregators {
-
-
-
- def getId(s1:String, s2:String) : String = {
- if (!s1.equals("")){
- return s1}
- s2
- }
-
-
- def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = {
- val transformedData : Dataset[HostedByItemType] = df
- .groupByKey(_.id)(Encoders.STRING)
- .agg(Aggregators.hostedByAggregator)
- .map{
- case (id:String , res:HostedByItemType) => res
- }(Encoders.product[HostedByItemType])
-
- transformedData
- }
-
- val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] {
- override def zero: HostedByItemType = HostedByItemType("","","","","",false)
- override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = {
- return merge(b, a)
- }
- override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = {
- if (b1 == null){
- return b2
- }
- if(b2 == null){
- return b1
- }
-
- HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess)
-
- }
- override def finish(reduction: HostedByItemType): HostedByItemType = reduction
- override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType]
-
- override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType]
- }.toColumn
-
-}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java
deleted file mode 100644
index b07e33cd19..0000000000
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package eu.dnetlib.dhp.oa.graph.hostebymap;
-
-public class Constants {
-
-
-
- public static final String OPENAIRE = "openaire";
- public static final String DOAJ = "doaj";
- public static final String UNIBI = "unibi";
-
-
- public static final String ISSN = "issn";
- public static final String EISSN = "eissn";
- public static final String ISSNL = "issnl";
-}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java
deleted file mode 100644
index d397886a32..0000000000
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java
+++ /dev/null
@@ -1,111 +0,0 @@
-package eu.dnetlib.dhp.oa.graph.hostebymap;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.opencsv.bean.CsvToBeanBuilder;
-import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-import org.apache.hadoop.conf.Configuration;
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-import java.io.*;
-import java.net.URL;
-import java.net.URLConnection;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.util.List;
-import java.util.Optional;
-
-public class GetCSV {
- private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV.class);
-
- public static void main(final String[] args) throws Exception {
- final ArgumentApplicationParser parser = new ArgumentApplicationParser(
- IOUtils
- .toString(
- GetCSV.class
- .getResourceAsStream(
- "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")));
-
- parser.parseArgument(args);
-
- final String fileURL = parser.get("fileURL");
- final String hdfsPath = parser.get("hdfsPath");
- final String hdfsNameNode = parser.get("hdfsNameNode");
- final String classForName = parser.get("classForName");
- final Boolean shouldReplace = Optional.ofNullable((parser.get("replace")))
- .map(Boolean::valueOf)
- .orElse(false);
-
-
- URLConnection connection = new URL(fileURL).openConnection();
- connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
- connection.connect();
-
- BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8")));
-
- if(shouldReplace){
- PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv")));
- String line = null;
- while((line = in.readLine())!= null){
- writer.println(line.replace("\\\"", "\""));
- }
- writer.close();
- in.close();
- in = new BufferedReader(new FileReader("/tmp/DOAJ.csv"));
- }
-
- Configuration conf = new Configuration();
- conf.set("fs.defaultFS", hdfsNameNode);
-
- FileSystem fileSystem = FileSystem.get(conf);
- Path hdfsWritePath = new Path(hdfsPath);
- FSDataOutputStream fsDataOutputStream = null;
- if (fileSystem.exists(hdfsWritePath)) {
- fileSystem.delete(hdfsWritePath, false);
- }
- fsDataOutputStream = fileSystem.create(hdfsWritePath);
-
- BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
-
- Class> clazz = Class.forName(classForName);
-
- ObjectMapper mapper = new ObjectMapper();
-
- new CsvToBeanBuilder(in)
- .withType(clazz)
- .withMultilineLimit(1)
- .build()
- .parse()
- .forEach(line -> {
- try {
- writer.write(mapper.writeValueAsString(line));
- writer.newLine();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- });
-
-
-
- writer.close();
- in.close();
- if(shouldReplace){
- File f = new File("/tmp/DOAJ.csv");
- f.delete();
- }
-
-
- }
-
-
-
-
-}
-
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java
deleted file mode 100644
index fe1d14a763..0000000000
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package eu.dnetlib.dhp.oa.graph.hostebymap.model;
-
-import java.io.Serializable;
-
-import com.opencsv.bean.CsvBindByName;
-
-
-public class DOAJModel implements Serializable {
- @CsvBindByName(column = "Journal title")
- private String journalTitle;
-
- @CsvBindByName(column = "Journal ISSN (print version)")
- private String issn ;
-
- @CsvBindByName(column = "Journal EISSN (online version)")
- private String eissn;
-
- @CsvBindByName(column = "Review process")
- private String reviewProcess;
-
-
- public String getJournalTitle() {
- return journalTitle;
- }
-
- public void setJournalTitle(String journalTitle) {
- this.journalTitle = journalTitle;
- }
-
- public String getIssn() {
- return issn;
- }
-
- public void setIssn(String issn) {
- this.issn = issn;
- }
-
- public String getEissn() {
- return eissn;
- }
-
- public void setEissn(String eissn) {
- this.eissn = eissn;
- }
-
- public String getReviewProcess() {
- return reviewProcess;
- }
-
- public void setReviewProcess(String reviewProcess) {
- this.reviewProcess = reviewProcess;
- }
-}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java
deleted file mode 100644
index 309f74eea5..0000000000
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package eu.dnetlib.dhp.oa.graph.hostebymap.model;
-
-import com.opencsv.bean.CsvBindByName;
-
-import java.io.Serializable;
-
-public class UnibiGoldModel implements Serializable {
- @CsvBindByName(column = "ISSN")
- private String issn;
- @CsvBindByName(column = "ISSN_L")
- private String issn_l;
- @CsvBindByName(column = "TITLE")
- private String title;
- @CsvBindByName(column = "TITLE_SOURCE")
- private String title_source;
-
- public String getIssn() {
- return issn;
- }
-
- public void setIssn(String issn) {
- this.issn = issn;
- }
-
- public String getIssn_l() {
- return issn_l;
- }
-
- public String getTitle() {
- return title;
- }
-
- public void setTitle(String title) {
- this.title = title;
- }
-
- public String getTitle_source() {
- return title_source;
- }
-
- public void setTitle_source(String title_source) {
- this.title_source = title_source;
- }
-}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala
new file mode 100644
index 0000000000..6a9346ed50
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala
@@ -0,0 +1,97 @@
+package eu.dnetlib.dhp.oa.graph.hostedbymap
+
+import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn}
+import org.apache.spark.sql.expressions.Aggregator
+
+
+case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {}
+case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {}
+
+object Aggregators {
+
+
+
+ def getId(s1:String, s2:String) : String = {
+ if (s1.startsWith("10|")){
+ return s1}
+ s2
+ }
+
+ def getValue(s1:String, s2:String) : String = {
+ if(!s1.equals("")){
+ return s1
+ }
+ s2
+ }
+
+
+ def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = {
+ val transformedData : Dataset[HostedByItemType] = df
+ .groupByKey(_.id)(Encoders.STRING)
+ .agg(Aggregators.hostedByAggregator)
+ .map{
+ case (id:String , res:HostedByItemType) => res
+ }(Encoders.product[HostedByItemType])
+
+ transformedData
+ }
+
+ val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] {
+ override def zero: HostedByItemType = HostedByItemType("","","","","",false)
+ override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = {
+ return merge(b, a)
+ }
+ override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = {
+ if (b1 == null){
+ return b2
+ }
+ if(b2 == null){
+ return b1
+ }
+
+ HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess)
+
+ }
+ override def finish(reduction: HostedByItemType): HostedByItemType = reduction
+ override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType]
+
+ override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType]
+ }.toColumn
+
+ def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = {
+ val transformedData : Dataset[(String, HostedByItemType)] = df
+ .groupByKey(_._1)(Encoders.STRING)
+ .agg(Aggregators.hostedByAggregator1)
+ .map{
+ case (id:String , res:(String, HostedByItemType)) => res
+ }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]))
+
+ transformedData
+ }
+
+ val hostedByAggregator1: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] {
+ override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false))
+ override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = {
+ return merge(b, a)
+ }
+ override def merge(b1: (String, HostedByItemType), b2: (String, HostedByItemType)): (String, HostedByItemType) = {
+ if (b1 == null){
+ return b2
+ }
+ if(b2 == null){
+ return b1
+ }
+ if(b1._2.id.startsWith("10|")){
+ return (b1._1, HostedByItemType(b1._2.id, b1._2.officialname, b1._2.issn, b1._2.eissn, b1._2.lissn, b1._2.openAccess || b2._2.openAccess))
+
+ }
+ return (b2._1, HostedByItemType(b2._2.id, b2._2.officialname, b2._2.issn, b2._2.eissn, b2._2.lissn, b1._2.openAccess || b2._2.openAccess))
+
+ }
+ override def finish(reduction: (String,HostedByItemType)): (String, HostedByItemType) = reduction
+ override def bufferEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType])
+
+ override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType])
+ }.toColumn
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java
new file mode 100644
index 0000000000..b29877a48f
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java
@@ -0,0 +1,13 @@
+
+package eu.dnetlib.dhp.oa.graph.hostedbymap;
+
+public class Constants {
+
+ public static final String OPENAIRE = "openaire";
+ public static final String DOAJ = "doaj";
+ public static final String UNIBI = "unibi";
+
+ public static final String ISSN = "issn";
+ public static final String EISSN = "eissn";
+ public static final String ISSNL = "issnl";
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java
new file mode 100644
index 0000000000..9516cf6f76
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java
@@ -0,0 +1,107 @@
+
+package eu.dnetlib.dhp.oa.graph.hostedbymap;
+
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.opencsv.bean.CsvToBeanBuilder;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class GetCSV {
+ private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV.class);
+
+ public static void main(final String[] args) throws Exception {
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(
+ IOUtils
+ .toString(
+ GetCSV.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")));
+
+ parser.parseArgument(args);
+
+ final String fileURL = parser.get("fileURL");
+ final String hdfsPath = parser.get("workingPath");
+ final String hdfsNameNode = parser.get("hdfsNameNode");
+ final String classForName = parser.get("classForName");
+ final Boolean shouldReplace = Optional
+ .ofNullable((parser.get("replace")))
+ .map(Boolean::valueOf)
+ .orElse(false);
+
+ URLConnection connection = new URL(fileURL).openConnection();
+ connection
+ .setRequestProperty(
+ "User-Agent",
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
+ connection.connect();
+
+ BufferedReader in = new BufferedReader(
+ new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8")));
+
+ if (shouldReplace) {
+ PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv")));
+ String line = null;
+ while ((line = in.readLine()) != null) {
+ writer.println(line.replace("\\\"", "\""));
+ }
+ writer.close();
+ in.close();
+ in = new BufferedReader(new FileReader("/tmp/DOAJ.csv"));
+ }
+
+ Configuration conf = new Configuration();
+ conf.set("fs.defaultFS", hdfsNameNode);
+
+ FileSystem fileSystem = FileSystem.get(conf);
+ Path hdfsWritePath = new Path(hdfsPath);
+ FSDataOutputStream fsDataOutputStream = null;
+ if (fileSystem.exists(hdfsWritePath)) {
+ fileSystem.delete(hdfsWritePath, false);
+ }
+ fsDataOutputStream = fileSystem.create(hdfsWritePath);
+
+ BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
+
+ Class> clazz = Class.forName(classForName);
+
+ ObjectMapper mapper = new ObjectMapper();
+
+ new CsvToBeanBuilder(in)
+ .withType(clazz)
+ .withMultilineLimit(1)
+ .build()
+ .parse()
+ .forEach(line -> {
+ try {
+ writer.write(mapper.writeValueAsString(line));
+ writer.newLine();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+
+ writer.close();
+ in.close();
+ if (shouldReplace) {
+ File f = new File("/tmp/DOAJ.csv");
+ f.delete();
+ }
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala
similarity index 76%
rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala
rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala
index b66e972818..c44f2cbed7 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala
@@ -1,17 +1,23 @@
-package eu.dnetlib.dhp.oa.graph.hostebymap
+package eu.dnetlib.dhp.oa.graph.hostedbymap
import eu.dnetlib.dhp.application.ArgumentApplicationParser
-import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel}
-import eu.dnetlib.dhp.schema.oaf.{Datasource}
+import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel}
+import eu.dnetlib.dhp.schema.oaf.Datasource
import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession}
import org.json4s.DefaultFormats
import org.slf4j.{Logger, LoggerFactory}
-
import com.fasterxml.jackson.databind.ObjectMapper
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
+import java.io.PrintWriter
-object SparkPrepareHostedByMapData {
+import org.apache.hadoop.io.compress.GzipCodec
+
+
+object SparkProduceHostedByMap {
implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])
@@ -37,24 +43,32 @@ object SparkPrepareHostedByMapData {
}
}
-// def toHostedByMap(input: HostedByItemType): ListBuffer[String] = {
-// implicit val formats = DefaultFormats
-// val serializedJSON:String = write(input)
-//
-// var hostedBy = new ListBuffer[String]()
-// if(!input.issn.equals("")){
-// hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}"
-// }
-// if(!input.eissn.equals("")){
-// hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}"
-// }
-// if(!input.lissn.equals("")){
-// hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}"
-// }
-//
-// hostedBy
-//
-// }
+ def toHostedByMap(input: (String, HostedByItemType)): String = {
+ import org.json4s.jackson.Serialization
+
+ implicit val formats = org.json4s.DefaultFormats
+
+ val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 )
+
+ Serialization.write(map)
+
+
+ }
+
+ /**
+ *
+ def toHostedByMap(input: Map[String, HostedByItemType]): String = {
+ import org.json4s.jackson.Serialization
+
+ implicit val formats = org.json4s.DefaultFormats
+
+
+
+ Serialization.write(input)
+
+
+ }
+ */
def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = {
if(issn != null){
@@ -166,11 +180,31 @@ object SparkPrepareHostedByMapData {
}
+
+ def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = {
+ val conf = new Configuration()
+
+ conf.set("fs.defaultFS", hdfsNameNode)
+ val fs= FileSystem.get(conf)
+ val output = fs.create(new Path(outputPath))
+ val writer = new PrintWriter(output)
+ try {
+ input.foreach(hbi => writer.println(hbi))
+ }
+ finally {
+ writer.close()
+
+ }
+
+ }
+
+
+
def main(args: Array[String]): Unit = {
val logger: Logger = LoggerFactory.getLogger(getClass)
val conf: SparkConf = new SparkConf()
- val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedby/prepare_hostedby_params.json")))
+ val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json")))
parser.parseArgument(args)
val spark: SparkSession =
SparkSession
@@ -179,11 +213,10 @@ object SparkPrepareHostedByMapData {
.appName(getClass.getSimpleName)
.master(parser.get("master")).getOrCreate()
- import spark.implicits._
val datasourcePath = parser.get("datasourcePath")
val workingDirPath = parser.get("workingPath")
-
+ val outputPath = parser.get("outputPath")
implicit val formats = DefaultFormats
@@ -191,29 +224,15 @@ object SparkPrepareHostedByMapData {
logger.info("Getting the Datasources")
- // val doajDataset: Dataset[DOAJModel] = spark.read.textFile(workingDirPath + "/doaj").as[DOAJModel]
- val dats : Dataset[HostedByItemType] =
- oaHostedByDataset(spark, datasourcePath)
+ Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath)
.union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold"))
.union(doajHostedByDataset(spark, workingDirPath + "/doaj"))
- dats.flatMap(hbi => toList(hbi))
- .groupByKey(_._1)
+ .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|"))
+ .map(hbi => toHostedByMap(hbi))(Encoders.STRING)
+ .rdd.saveAsTextFile(outputPath + "/hostedByMap", classOf[GzipCodec])
-//
-//
-
-//
-
-//
-// Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left")
-// .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType)
-// .filter(i => i != null))
-// .flatMap(toHostedByMap)
-// .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap")
-//
-//
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java
new file mode 100644
index 0000000000..ba804b939d
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java
@@ -0,0 +1,52 @@
+
+package eu.dnetlib.dhp.oa.graph.hostedbymap.model;
+
+import java.io.Serializable;
+
+import com.opencsv.bean.CsvBindByName;
+
+public class DOAJModel implements Serializable {
+ @CsvBindByName(column = "Journal title")
+ private String journalTitle;
+
+ @CsvBindByName(column = "Journal ISSN (print version)")
+ private String issn;
+
+ @CsvBindByName(column = "Journal EISSN (online version)")
+ private String eissn;
+
+ @CsvBindByName(column = "Review process")
+ private String reviewProcess;
+
+ public String getJournalTitle() {
+ return journalTitle;
+ }
+
+ public void setJournalTitle(String journalTitle) {
+ this.journalTitle = journalTitle;
+ }
+
+ public String getIssn() {
+ return issn;
+ }
+
+ public void setIssn(String issn) {
+ this.issn = issn;
+ }
+
+ public String getEissn() {
+ return eissn;
+ }
+
+ public void setEissn(String eissn) {
+ this.eissn = eissn;
+ }
+
+ public String getReviewProcess() {
+ return reviewProcess;
+ }
+
+ public void setReviewProcess(String reviewProcess) {
+ this.reviewProcess = reviewProcess;
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java
new file mode 100644
index 0000000000..0927a136be
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java
@@ -0,0 +1,45 @@
+
+package eu.dnetlib.dhp.oa.graph.hostedbymap.model;
+
+import java.io.Serializable;
+
+import com.opencsv.bean.CsvBindByName;
+
+public class UnibiGoldModel implements Serializable {
+ @CsvBindByName(column = "ISSN")
+ private String issn;
+ @CsvBindByName(column = "ISSN_L")
+ private String issn_l;
+ @CsvBindByName(column = "TITLE")
+ private String title;
+ @CsvBindByName(column = "TITLE_SOURCE")
+ private String title_source;
+
+ public String getIssn() {
+ return issn;
+ }
+
+ public void setIssn(String issn) {
+ this.issn = issn;
+ }
+
+ public String getIssn_l() {
+ return issn_l;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public String getTitle_source() {
+ return title_source;
+ }
+
+ public void setTitle_source(String title_source) {
+ this.title_source = title_source;
+ }
+}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json
new file mode 100644
index 0000000000..fba048343b
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json
@@ -0,0 +1,37 @@
+
+[
+
+ {
+ "paramName":"fu",
+ "paramLongName":"fileURL",
+ "paramDescription": "the url to download the csv file ",
+ "paramRequired": true
+ },
+
+ {
+ "paramName":"wp",
+ "paramLongName":"workingPath",
+ "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles",
+ "paramRequired": true
+ },
+ {
+ "paramName": "hnn",
+ "paramLongName": "hdfsNameNode",
+ "paramDescription": "the path used to store the HostedByMap",
+ "paramRequired": true
+ },
+ {
+ "paramName": "cfn",
+ "paramLongName": "classForName",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": true
+ },
+ {
+ "paramName": "sr",
+ "paramLongName": "replace",
+ "paramDescription": "true if the input file has to be cleaned before parsing",
+ "paramRequired": false
+ }
+]
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json
new file mode 100644
index 0000000000..9173b78aed
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json
@@ -0,0 +1,38 @@
+
+[
+
+ {
+ "paramName":"dsp",
+ "paramLongName":"datasourcePath",
+ "paramDescription": "the path to the datasource ",
+ "paramRequired": true
+ },
+
+ {
+ "paramName":"wp",
+ "paramLongName":"workingPath",
+ "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the path used to store the HostedByMap",
+ "paramRequired": true
+ },
+ {
+ "paramName": "ssm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": false
+ },
+ {
+ "paramName": "m",
+ "paramLongName": "master",
+ "paramDescription": "true if the spark session is managed, false otherwise",
+ "paramRequired": true
+ }
+]
+
+
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml
new file mode 100644
index 0000000000..e5ec3d0aee
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml
@@ -0,0 +1,30 @@
+
+
+ jobTracker
+ yarnRM
+
+
+ nameNode
+ hdfs://nameservice1
+
+
+ oozie.use.system.libpath
+ true
+
+
+ hiveMetastoreUris
+ thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
+
+
+ hiveJdbcUrl
+ jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000
+
+
+ hiveDbName
+ openaire
+
+
+ oozie.launcher.mapreduce.user.classpath.first
+ true
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
new file mode 100644
index 0000000000..ecf6c3b316
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
@@ -0,0 +1,148 @@
+
+
+
+
+ sourcePath
+ the source path
+
+
+ outputPath
+ the output path
+
+
+ sparkDriverMemory
+ memory for driver process
+
+
+ sparkExecutorMemory
+ memory for individual executor
+
+
+ sparkExecutorCores
+ number of cores used by single executor
+
+
+ oozieActionShareLibForSpark2
+ oozie action sharelib for spark 2.*
+
+
+ spark2ExtraListeners
+ com.cloudera.spark.lineage.NavigatorAppListener
+ spark 2.* extra listeners classname
+
+
+ spark2SqlQueryExecutionListeners
+ com.cloudera.spark.lineage.NavigatorQueryListener
+ spark 2.* sql query execution listeners classname
+
+
+ spark2YarnHistoryServerAddress
+ spark 2.* yarn history server address
+
+
+ spark2EventLogDir
+ spark 2.* event log dir location
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+
+
+ mapreduce.job.queuename
+ ${queueName}
+
+
+ oozie.launcher.mapred.job.queue.name
+ ${oozieLauncherQueueName}
+
+
+ oozie.action.sharelib.for.spark
+ ${oozieActionShareLibForSpark2}
+
+
+
+
+
+
+
+
+ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV
+ --hdfsNameNode${nameNode}
+ --fileURL${unibiFileURL}
+ --workingPath${workingDir}/unibi_gold
+ --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV
+ --hdfsNameNode${nameNode}
+ --fileURL${doajFileURL}
+ --workingPath${workingDir}/doaj
+ --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel
+ --replacetrue
+
+
+
+
+
+
+
+
+
+
+
+ yarn-cluster
+ Produce the new HostedByMap
+ eu.dnetlib.dhp.oa.graph.hostedbymap.SparkProduceHostedByMap
+ dhp-graph-mapper-${projectVersion}.jar
+
+ --executor-memory=${sparkExecutorMemory}
+ --executor-cores=${sparkExecutorCores}
+ --driver-memory=${sparkDriverMemory}
+ --conf spark.extraListeners=${spark2ExtraListeners}
+ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+
+ --datasourcePath${sourcePath}/datasource
+ --workingPath${workingDir}
+ --outputPath${outputPath}
+ --masteryarn-cluster
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala
index 8e5657bfc1..2ed76a72ac 100644
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala
@@ -1,19 +1,14 @@
package eu.dnetlib.dhp.oa.graph.hostedbymap
-import java.sql.Timestamp
-
-import com.fasterxml.jackson.databind.ObjectMapper
-import eu.dnetlib.dhp.oa.graph.hostebymap.{Constants, HostedByInfo, SparkPrepareHostedByMapData}
+import eu.dnetlib.dhp.oa.graph.hostedbymap.{Aggregators, Constants, HostedByInfo, HostedByItemType, SparkProduceHostedByMap}
import eu.dnetlib.dhp.schema.oaf.Datasource
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
import org.json4s.DefaultFormats
import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue}
import org.junit.jupiter.api.Test
-import org.slf4j.{Logger, LoggerFactory}
-
-import scala.collection.mutable.ListBuffer
-import scala.io.Source
+import org.junit.jupiter.api.Assertions._
+import org.json4s.jackson.Serialization.write
class TestPreprocess extends java.io.Serializable{
@@ -21,19 +16,14 @@ class TestPreprocess extends java.io.Serializable{
implicit val schema = Encoders.product[HostedByInfo]
+ def toHBIString (hbi:HostedByItemType): String = {
+ implicit val formats = DefaultFormats
+
+ write(hbi)
+ }
@Test
def readDatasource():Unit = {
-
-
- import org.apache.spark.sql.Encoders
- implicit val formats = DefaultFormats
-
- val logger: Logger = LoggerFactory.getLogger(getClass)
- val mapper = new ObjectMapper()
-
-
-
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.set("spark.driver.host", "localhost")
@@ -45,25 +35,29 @@ class TestPreprocess extends java.io.Serializable{
.getOrCreate()
val path = getClass.getResource("datasource.json").getPath
+ val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.oaHostedByDataset(spark, path)
- println(SparkPrepareHostedByMapData.oaHostedByDataset(spark, path).count)
+ assertEquals(9, ds.count)
+ assertEquals(8, ds.filter(hbi => !hbi.issn.equals("")).count)
+ assertEquals(5, ds.filter(hbi => !hbi.eissn.equals("")).count)
+ assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count)
+ assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count)
+ assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365")).count == 1)
+ assertTrue(ds.filter(hbi => hbi.eissn.equals("2253-900X")).count == 1)
+ assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1)
+ assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1)
+ assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.id.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1)
+ ds.foreach(hbi => assertTrue(hbi.id.startsWith("10|")))
+ ds.foreach(hbi => println(toHBIString(hbi)))
spark.close()
}
@Test
def readGold():Unit = {
-
- implicit val formats = DefaultFormats
-
- val logger: Logger = LoggerFactory.getLogger(getClass)
- val mapper = new ObjectMapper()
-
-
-
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.set("spark.driver.host", "localhost")
@@ -76,23 +70,27 @@ class TestPreprocess extends java.io.Serializable{
val path = getClass.getResource("unibi_transformed.json").getPath
- println(SparkPrepareHostedByMapData.goldHostedByDataset(spark, path).count)
+ val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.goldHostedByDataset(spark, path)
+ assertEquals(29, ds.count)
+ assertEquals(29, ds.filter(hbi => !hbi.issn.equals("")).count)
+ assertEquals(0, ds.filter(hbi => !hbi.eissn.equals("")).count)
+ assertEquals(29, ds.filter(hbi => !hbi.lissn.equals("")).count)
+
+ assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count)
+
+ assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().officialname.equals("European journal of sustainable development."))
+ assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().lissn.equals("2239-5938"))
+ assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).count == 1)
+ ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.UNIBI)))
+ ds.foreach(hbi => println(toHBIString(hbi)))
spark.close()
}
@Test
def readDoaj():Unit = {
-
- implicit val formats = DefaultFormats
-
- val logger: Logger = LoggerFactory.getLogger(getClass)
- val mapper = new ObjectMapper()
-
-
-
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.set("spark.driver.host", "localhost")
@@ -104,14 +102,69 @@ class TestPreprocess extends java.io.Serializable{
.getOrCreate()
val path = getClass.getResource("doaj_transformed.json").getPath
+ val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.doajHostedByDataset(spark, path)
- println(SparkPrepareHostedByMapData.doajHostedByDataset(spark, path).count)
+ assertEquals(25, ds.count)
+ assertEquals(14, ds.filter(hbi => !hbi.issn.equals("")).count)
+ assertEquals(21, ds.filter(hbi => !hbi.eissn.equals("")).count)
+ assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count)
+ assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count)
+
+ assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().officialname.equals("Journal of Space Technology"))
+ assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().eissn.equals("2411-5029"))
+ assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).count == 1)
+ assertTrue(ds.filter(hbi => hbi.eissn.equals("2077-2955")).first().issn.equals(""))
+ ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.DOAJ)))
+ ds.foreach(hbi => println(toHBIString(hbi)))
spark.close()
}
+ @Test
+ def testAggregator() : Unit = {
+
+ val conf = new SparkConf()
+ conf.setMaster("local[*]")
+ conf.set("spark.driver.host", "localhost")
+ val spark: SparkSession =
+ SparkSession
+ .builder()
+ .appName(getClass.getSimpleName)
+ .config(conf)
+ .getOrCreate()
+
+
+ val tmp = SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath)
+ .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath))
+ .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath))
+ .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]))
+
+ assertEquals(106, tmp.count)
+ assertEquals(82, tmp.map(i => i._1)(Encoders.STRING).distinct().count)
+
+
+ val ds :Dataset[(String, HostedByItemType)] = Aggregators.explodeHostedByItemType(SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath)
+ .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath))
+ .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath))
+ .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])))
+
+ assertEquals(82, ds.count)
+
+ assertEquals(13, ds.filter(i => i._2.id.startsWith("10|")).count)
+
+ assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.id.startsWith("10|"))
+ assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.openAccess)
+ assertEquals(1, ds.filter(i => i._1.equals("2077-3757")).count)
+
+ val hbmap : Dataset[String] = ds.filter(hbi => hbi._2.id.startsWith("10|")).map(SparkProduceHostedByMap.toHostedByMap)(Encoders.STRING)
+
+ hbmap.foreach(entry => println(entry))
+ spark.close()
+
+ }
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java
index 01c70502c7..f886b275b4 100644
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java
@@ -1,111 +1,109 @@
+
package eu.dnetlib.dhp.oa.graph.hostedbymap;
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+import java.nio.charset.Charset;
+import java.util.List;
+
+import org.junit.jupiter.api.Test;
+
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.opencsv.bean.CsvToBeanBuilder;
-import eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV;
-import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel;
-import org.junit.jupiter.api.Test;
-import java.io.*;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.net.URLConnection;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.util.List;
+import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel;
-public class TestReadCSV {
+public class TestReadCSV {
- @Test
- public void testCSVUnibi() throws FileNotFoundException {
+ @Test
+ public void testCSVUnibi() throws FileNotFoundException {
+ final String sourcePath = getClass()
+ .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv")
+ .getPath();
- final String sourcePath = getClass()
- .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv")
- .getPath();
+ List beans = new CsvToBeanBuilder(new FileReader(sourcePath))
+ .withType(UnibiGoldModel.class)
+ .build()
+ .parse();
- List beans = new CsvToBeanBuilder(new FileReader(sourcePath))
- .withType(UnibiGoldModel.class)
- .build()
- .parse();
+ ObjectMapper mapper = new ObjectMapper();
- ObjectMapper mapper = new ObjectMapper();
+ beans.forEach(r -> {
+ try {
+ System.out.println(mapper.writeValueAsString(r));
+ } catch (JsonProcessingException e) {
+ e.printStackTrace();
+ }
+ });
- beans.forEach(r -> {
- try {
- System.out.println(mapper.writeValueAsString(r));
- } catch (JsonProcessingException e) {
- e.printStackTrace();
- }
- });
+ }
+ @Test
+ public void testCSVUrlUnibi() throws IOException {
- }
+ URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv");
- @Test
- public void testCSVUrlUnibi() throws IOException {
+ BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream()));
+ ObjectMapper mapper = new ObjectMapper();
- URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv");
+ new CsvToBeanBuilder(in)
+ .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel.class)
+ .build()
+ .parse()
+ .forEach(line ->
- BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream()));
- ObjectMapper mapper = new ObjectMapper();
+ {
+ try {
+ System.out.println(mapper.writeValueAsString(line));
+ } catch (JsonProcessingException e) {
+ e.printStackTrace();
+ }
+ }
- new CsvToBeanBuilder(in)
- .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel.class)
- .build()
- .parse()
- .forEach(line ->
+ );
+ }
- {
- try {
- System.out.println(mapper.writeValueAsString(line));
- } catch (JsonProcessingException e) {
- e.printStackTrace();
- }
- }
+ @Test
+ public void testCSVUrlDOAJ() throws IOException {
+ URLConnection connection = new URL("https://doaj.org/csv").openConnection();
+ connection
+ .setRequestProperty(
+ "User-Agent",
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
+ connection.connect();
- );
- }
+ BufferedReader in = new BufferedReader(
+ new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8")));
+ // BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv"));
+ PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")));
+ String line = null;
+ while ((line = in.readLine()) != null) {
+ writer.println(line.replace("\\\"", "\""));
+ }
+ writer.close();
+ in.close();
+ in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv"));
+ ObjectMapper mapper = new ObjectMapper();
- @Test
- public void testCSVUrlDOAJ() throws IOException {
+ new CsvToBeanBuilder(in)
+ .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel.class)
+ .withMultilineLimit(1)
+ .build()
+ .parse()
+ .forEach(lline ->
- URLConnection connection = new URL("https://doaj.org/csv").openConnection();
- connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
- connection.connect();
+ {
+ try {
+ System.out.println(mapper.writeValueAsString(lline));
+ } catch (JsonProcessingException e) {
+ e.printStackTrace();
+ }
+ }
- BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8")));
- //BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv"));
- PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")));
- String line = null;
- while((line = in.readLine())!= null){
- writer.println(line.replace("\\\"", "\""));
- }
- writer.close();
- in.close();
- in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv"));
- ObjectMapper mapper = new ObjectMapper();
-
-
-
- new CsvToBeanBuilder(in)
- .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.DOAJModel.class)
- .withMultilineLimit(1)
- .build()
- .parse()
- .forEach(lline ->
-
- {
- try {
- System.out.println(mapper.writeValueAsString(lline));
- } catch (JsonProcessingException e) {
- e.printStackTrace();
- }
- }
-
-
- );
- }
+ );
+ }
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
index c41a6c68c0..63f18a803a 100644
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@@ -1,13 +1,13 @@
package eu.dnetlib.dhp.oa.graph.raw;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
-import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest;
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.oaf.*;
-import eu.dnetlib.dhp.schema.oaf.utils.PidType;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+import static org.junit.jupiter.api.Assertions.*;
+import static org.mockito.Mockito.lenient;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Optional;
+
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach;
@@ -16,12 +16,14 @@ import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
-import java.io.IOException;
-import java.util.List;
-import java.util.Optional;
+import com.fasterxml.jackson.databind.ObjectMapper;
-import static org.junit.jupiter.api.Assertions.*;
-import static org.mockito.Mockito.lenient;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest;
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.schema.oaf.utils.PidType;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
public class MappersTest {
@@ -340,7 +342,7 @@ public class MappersTest {
assertEquals(2, p.getOriginalId().size());
assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739")));
- //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0));
+ // assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0));
assertValidId(p.getCollectedfrom().get(0).getKey());
assertTrue(p.getAuthor().size() > 0);
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json
index 818aaa7167..4467c702f6 100644
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json
@@ -1,6 +1,6 @@
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}}
-{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}}
+{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2077-3757","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}}
{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem
new file mode 100644
index 0000000000..093c57a9c7
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem
@@ -0,0 +1,9 @@
+{"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","officialname":"Известия высших учебных заведений: Проблемы энергетики","issn":"1998-9903","eissn":"","lissn":"","openAccess":false}
+{"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","officialname":"Thémata","issn":"0212-8365","eissn":"2253-900X","lissn":"","openAccess":false}
+{"id":"10|issn___print::051e86306840dc8255d95c5671e97928","officialname":"Science Technology & Public Policy","issn":"2640-4613","eissn":"","lissn":"","openAccess":false}
+{"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","officialname":"Cahiers d’études germaniques","issn":"0751-4239","eissn":"2605-8359","lissn":"","openAccess":false}
+{"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","officialname":"Regional Economics Theory and Practice","issn":"2073-1477","eissn":"2311-8733","lissn":"","openAccess":false}
+{"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","officialname":"Transplantation","issn":"0041-1337","eissn":"","lissn":"","openAccess":false}
+{"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","officialname":"International Journal of Operations Research and Information Systems","issn":"1947-9328","eissn":"1947-9336","lissn":"","openAccess":false}
+{"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","officialname":"Bulletin of the British Mycological Society","issn":"0007-1528","eissn":"","lissn":"","openAccess":false}
+{"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","officialname":"Journal of Technology and Innovation","issn":"","eissn":"2410-3993","lissn":"","openAccess":false}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem
new file mode 100644
index 0000000000..effd0dd601
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem
@@ -0,0 +1,25 @@
+{"id":"doaj","officialname":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","lissn":"","openAccess":true}
+{"id":"doaj","officialname":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Membranes","issn":"2077-0375","eissn":"","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Agriculture","issn":"","eissn":"2077-0472","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Religions","issn":"","eissn":"2077-1444","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"UCV-Scientia","issn":"2077-172X","eissn":"","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Granì","issn":"2077-1800","eissn":"2413-8738","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Science Education International","issn":"","eissn":"2077-2327","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Edumecentro","issn":"","eissn":"2077-2874","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Monteverdia","issn":"","eissn":"2077-2890","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Transformación","issn":"","eissn":"2077-2955","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","lissn":"","openAccess":true}
+{"id":"doaj","officialname":"Revue de Primatologie","issn":"","eissn":"2077-3757","lissn":"","openAccess":true}
\ No newline at end of file
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem
new file mode 100644
index 0000000000..403ffdf5dc
--- /dev/null
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem
@@ -0,0 +1,29 @@
+{"id":"unibi","officialname":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","issn":"2502-731X","eissn":"","lissn":"2502-731X","openAccess":true}
+{"id":"unibi","officialname":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","issn":"2502-7409","eissn":"","lissn":"1411-0253","openAccess":true}
+{"id":"unibi","officialname":"At-Tadbir : jurnal ilmiah manajemen","issn":"2502-7433","eissn":"","lissn":"2502-7433","openAccess":true}
+{"id":"unibi","officialname":"Jurnal Kesehatan Panrita Husada.","issn":"2502-745X","eissn":"","lissn":"2502-745X","openAccess":true}
+{"id":"unibi","officialname":"ELang journal (An English Education journal)","issn":"2502-7549","eissn":"","lissn":"2502-7549","openAccess":true}
+{"id":"unibi","officialname":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","issn":"2423-3633","eissn":"","lissn":"2423-3625","openAccess":true}
+{"id":"unibi","officialname":"Pizhūhishnāmah-i ̒ilm/sanjī.","issn":"2423-5563","eissn":"","lissn":"2423-3773","openAccess":true}
+{"id":"unibi","officialname":"Iranian journal of animal biosystematics.","issn":"1735-434X","eissn":"","lissn":"1735-434X","openAccess":true}
+{"id":"unibi","officialname":"Majallah-i jangal-i Īrān.","issn":"2423-4435","eissn":"","lissn":"2008-6113","openAccess":true}
+{"id":"unibi","officialname":"Ābziyān-i zinatī.","issn":"2423-4575","eissn":"","lissn":"2423-4575","openAccess":true}
+{"id":"unibi","officialname":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","issn":"2423-4974","eissn":"","lissn":"2423-4974","openAccess":true}
+{"id":"unibi","officialname":"AIHM journal club.","issn":"2380-0607","eissn":"","lissn":"2380-0607","openAccess":true}
+{"id":"unibi","officialname":"Frontiers.","issn":"1085-4568","eissn":"","lissn":"1085-4568","openAccess":true}
+{"id":"unibi","officialname":"The journal of contemporary archival studies.","issn":"2380-8845","eissn":"","lissn":"2380-8845","openAccess":true}
+{"id":"unibi","officialname":"International journal of complementary & alternative medicine.","issn":"2381-1803","eissn":"","lissn":"2381-1803","openAccess":true}
+{"id":"unibi","officialname":"Palapala.","issn":"2381-2478","eissn":"","lissn":"2381-2478","openAccess":true}
+{"id":"unibi","officialname":"Asia pacific journal of environment ecology and sustainable development.","issn":"2382-5170","eissn":"","lissn":"2382-5170","openAccess":true}
+{"id":"unibi","officialname":"Majallah-i salāmat va bihdāsht","issn":"2382-9737","eissn":"","lissn":"2382-9737","openAccess":true}
+{"id":"unibi","officialname":"UCT journal of research in science ,engineering and technology","issn":"2382-977X","eissn":"","lissn":"2382-977X","openAccess":true}
+{"id":"unibi","officialname":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","issn":"2382-9974","eissn":"","lissn":"2382-9974","openAccess":true}
+{"id":"unibi","officialname":"Problemi endokrinnoï patologìï.","issn":"2227-4782","eissn":"","lissn":"2227-4782","openAccess":true}
+{"id":"unibi","officialname":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","issn":"2685-0079","eissn":"","lissn":"2597-4971","openAccess":true}
+{"id":"unibi","officialname":"Hypermedia magazine.","issn":"2574-0075","eissn":"","lissn":"2574-0075","openAccess":true}
+{"id":"unibi","officialname":"The museum review.","issn":"2574-0296","eissn":"","lissn":"2574-0296","openAccess":true}
+{"id":"unibi","officialname":"Bioactive compounds in health and disease.","issn":"2574-0334","eissn":"","lissn":"2574-0334","openAccess":true}
+{"id":"unibi","officialname":"Journal of computer science integration.","issn":"2574-108X","eissn":"","lissn":"2574-108X","openAccess":true}
+{"id":"unibi","officialname":"Child and adolescent obesity.","issn":"2574-254X","eissn":"","lissn":"2574-254X","openAccess":true}
+{"id":"unibi","officialname":"Journal of research on the college president.","issn":"2574-3325","eissn":"","lissn":"2574-3325","openAccess":true}
+{"id":"unibi","officialname":"European journal of sustainable development.","issn":"2239-6101","eissn":"","lissn":"2239-5938","openAccess":true}
\ No newline at end of file