From baad01cadcc499909d68651ff03a60213b7b8c76 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 29 Jul 2021 13:04:39 +0200 Subject: [PATCH] hostedbymap --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 +- .../doiboost/DoiBoostMappingUtil.scala | 2 + .../dhp/oa/graph/hostebymap/Aggregators.scala | 54 ------ .../dhp/oa/graph/hostebymap/Constants.java | 15 -- .../dhp/oa/graph/hostebymap/GetCSV.java | 111 ------------ .../oa/graph/hostebymap/model/DOAJModel.java | 53 ------ .../hostebymap/model/UnibiGoldModel.java | 44 ----- .../oa/graph/hostedbymap/Aggregators.scala | 97 ++++++++++ .../dhp/oa/graph/hostedbymap/Constants.java | 13 ++ .../dhp/oa/graph/hostedbymap/GetCSV.java | 107 +++++++++++ .../SparkProduceHostedByMap.scala} | 107 ++++++----- .../oa/graph/hostedbymap/model/DOAJModel.java | 52 ++++++ .../hostedbymap/model/UnibiGoldModel.java | 45 +++++ .../hostedbymap/download_csv_parameters.json | 37 ++++ .../oa/graph/hostedbymap/hostedby_params.json | 38 ++++ .../hostedbymap/oozie_app/config-default.xml | 30 ++++ .../graph/hostedbymap/oozie_app/workflow.xml | 148 +++++++++++++++ .../oa/graph/hostedbymap/TestPreprocess.scala | 127 +++++++++---- .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 168 +++++++++--------- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 28 +-- .../dhp/oa/graph/hostedbymap/datasource.json | 2 +- .../graph/hostedbymap/datasourceHostedByItem | 9 + .../dhp/oa/graph/hostedbymap/doajHostedByItem | 25 +++ .../oa/graph/hostedbymap/unibyHostedByItem | 29 +++ 24 files changed, 885 insertions(+), 459 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java rename dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/{hostebymap/SparkPrepareHostedByMapData.scala => hostedbymap/SparkProduceHostedByMap.scala} (76%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b9442..58009bfcf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 686a2f1f1..502cb370f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -245,6 +245,8 @@ object DoiBoostMappingUtil { if (item != null) { hb.setValue(item.officialname) hb.setKey(generateDSId(item.id)) + //TODO replace with the one above as soon as the new HBM will be used + //hb.setKey(item.id) if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala deleted file mode 100644 index 561d2bbf4..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala +++ /dev/null @@ -1,54 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap - -import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} -import org.apache.spark.sql.expressions.Aggregator - - -case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} -case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} - -object Aggregators { - - - - def getId(s1:String, s2:String) : String = { - if (!s1.equals("")){ - return s1} - s2 - } - - - def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { - val transformedData : Dataset[HostedByItemType] = df - .groupByKey(_.id)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator) - .map{ - case (id:String , res:HostedByItemType) => res - }(Encoders.product[HostedByItemType]) - - transformedData - } - - val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { - override def zero: HostedByItemType = HostedByItemType("","","","","",false) - override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { - return merge(b, a) - } - override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - - HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) - - } - override def finish(reduction: HostedByItemType): HostedByItemType = reduction - override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - - override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - }.toColumn - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java deleted file mode 100644 index b07e33cd1..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java +++ /dev/null @@ -1,15 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap; - -public class Constants { - - - - public static final String OPENAIRE = "openaire"; - public static final String DOAJ = "doaj"; - public static final String UNIBI = "unibi"; - - - public static final String ISSN = "issn"; - public static final String EISSN = "eissn"; - public static final String ISSNL = "issnl"; -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java deleted file mode 100644 index d397886a3..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java +++ /dev/null @@ -1,111 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.conf.Configuration; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import java.io.*; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Optional; - -public class GetCSV { - private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV.class); - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetCSV.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("hdfsPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - final String classForName = parser.get("classForName"); - final Boolean shouldReplace = Optional.ofNullable((parser.get("replace"))) - .map(Boolean::valueOf) - .orElse(false); - - - URLConnection connection = new URL(fileURL).openConnection(); - connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); - - BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - - if(shouldReplace){ - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); - String line = null; - while((line = in.readLine())!= null){ - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - } - - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; - if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath, false); - } - fsDataOutputStream = fileSystem.create(hdfsWritePath); - - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - - Class clazz = Class.forName(classForName); - - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(clazz) - .withMultilineLimit(1) - .build() - .parse() - .forEach(line -> { - try { - writer.write(mapper.writeValueAsString(line)); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - - - - writer.close(); - in.close(); - if(shouldReplace){ - File f = new File("/tmp/DOAJ.csv"); - f.delete(); - } - - - } - - - - -} - diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java deleted file mode 100644 index fe1d14a76..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java +++ /dev/null @@ -1,53 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap.model; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - - -public class DOAJModel implements Serializable { - @CsvBindByName(column = "Journal title") - private String journalTitle; - - @CsvBindByName(column = "Journal ISSN (print version)") - private String issn ; - - @CsvBindByName(column = "Journal EISSN (online version)") - private String eissn; - - @CsvBindByName(column = "Review process") - private String reviewProcess; - - - public String getJournalTitle() { - return journalTitle; - } - - public void setJournalTitle(String journalTitle) { - this.journalTitle = journalTitle; - } - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getEissn() { - return eissn; - } - - public void setEissn(String eissn) { - this.eissn = eissn; - } - - public String getReviewProcess() { - return reviewProcess; - } - - public void setReviewProcess(String reviewProcess) { - this.reviewProcess = reviewProcess; - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java deleted file mode 100644 index 309f74eea..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java +++ /dev/null @@ -1,44 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap.model; - -import com.opencsv.bean.CsvBindByName; - -import java.io.Serializable; - -public class UnibiGoldModel implements Serializable { - @CsvBindByName(column = "ISSN") - private String issn; - @CsvBindByName(column = "ISSN_L") - private String issn_l; - @CsvBindByName(column = "TITLE") - private String title; - @CsvBindByName(column = "TITLE_SOURCE") - private String title_source; - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getIssn_l() { - return issn_l; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getTitle_source() { - return title_source; - } - - public void setTitle_source(String title_source) { - this.title_source = title_source; - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala new file mode 100644 index 000000000..6a9346ed5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -0,0 +1,97 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +import org.apache.spark.sql.expressions.Aggregator + + +case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} +case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} + +object Aggregators { + + + + def getId(s1:String, s2:String) : String = { + if (s1.startsWith("10|")){ + return s1} + s2 + } + + def getValue(s1:String, s2:String) : String = { + if(!s1.equals("")){ + return s1 + } + s2 + } + + + def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { + val transformedData : Dataset[HostedByItemType] = df + .groupByKey(_.id)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator) + .map{ + case (id:String , res:HostedByItemType) => res + }(Encoders.product[HostedByItemType]) + + transformedData + } + + val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { + override def zero: HostedByItemType = HostedByItemType("","","","","",false) + override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { + return merge(b, a) + } + override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + + HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) + + } + override def finish(reduction: HostedByItemType): HostedByItemType = reduction + override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + + override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + }.toColumn + + def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = { + val transformedData : Dataset[(String, HostedByItemType)] = df + .groupByKey(_._1)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator1) + .map{ + case (id:String , res:(String, HostedByItemType)) => res + }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + transformedData + } + + val hostedByAggregator1: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { + override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false)) + override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = { + return merge(b, a) + } + override def merge(b1: (String, HostedByItemType), b2: (String, HostedByItemType)): (String, HostedByItemType) = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(b1._2.id.startsWith("10|")){ + return (b1._1, HostedByItemType(b1._2.id, b1._2.officialname, b1._2.issn, b1._2.eissn, b1._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + } + return (b2._1, HostedByItemType(b2._2.id, b2._2.officialname, b2._2.issn, b2._2.eissn, b2._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + } + override def finish(reduction: (String,HostedByItemType)): (String, HostedByItemType) = reduction + override def bufferEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + + override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + }.toColumn + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java new file mode 100644 index 000000000..b29877a48 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java @@ -0,0 +1,13 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +public class Constants { + + public static final String OPENAIRE = "openaire"; + public static final String DOAJ = "doaj"; + public static final String UNIBI = "unibi"; + + public static final String ISSN = "issn"; + public static final String EISSN = "eissn"; + public static final String ISSNL = "issnl"; +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java new file mode 100644 index 000000000..9516cf6f7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java @@ -0,0 +1,107 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("workingPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + final Boolean shouldReplace = Optional + .ofNullable((parser.get("replace"))) + .map(Boolean::valueOf) + .orElse(false); + + URLConnection connection = new URL(fileURL).openConnection(); + connection + .setRequestProperty( + "User-Agent", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); + + BufferedReader in = new BufferedReader( + new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + + if (shouldReplace) { + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); + String line = null; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + } + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + + Class clazz = Class.forName(classForName); + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(in) + .withType(clazz) + .withMultilineLimit(1) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + writer.close(); + in.close(); + if (shouldReplace) { + File f = new File("/tmp/DOAJ.csv"); + f.delete(); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index b66e97281..c44f2cbed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -1,17 +1,23 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap +package eu.dnetlib.dhp.oa.graph.hostedbymap import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel} -import eu.dnetlib.dhp.schema.oaf.{Datasource} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} - import com.fasterxml.jackson.databind.ObjectMapper +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import java.io.PrintWriter -object SparkPrepareHostedByMapData { +import org.apache.hadoop.io.compress.GzipCodec + + +object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) @@ -37,24 +43,32 @@ object SparkPrepareHostedByMapData { } } -// def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { -// implicit val formats = DefaultFormats -// val serializedJSON:String = write(input) -// -// var hostedBy = new ListBuffer[String]() -// if(!input.issn.equals("")){ -// hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" -// } -// if(!input.eissn.equals("")){ -// hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" -// } -// if(!input.lissn.equals("")){ -// hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" -// } -// -// hostedBy -// -// } + def toHostedByMap(input: (String, HostedByItemType)): String = { + import org.json4s.jackson.Serialization + + implicit val formats = org.json4s.DefaultFormats + + val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) + + Serialization.write(map) + + + } + + /** + * + def toHostedByMap(input: Map[String, HostedByItemType]): String = { + import org.json4s.jackson.Serialization + + implicit val formats = org.json4s.DefaultFormats + + + + Serialization.write(input) + + + } + */ def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { if(issn != null){ @@ -166,11 +180,31 @@ object SparkPrepareHostedByMapData { } + + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { + val conf = new Configuration() + + conf.set("fs.defaultFS", hdfsNameNode) + val fs= FileSystem.get(conf) + val output = fs.create(new Path(outputPath)) + val writer = new PrintWriter(output) + try { + input.foreach(hbi => writer.println(hbi)) + } + finally { + writer.close() + + } + + } + + + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedby/prepare_hostedby_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession @@ -179,11 +213,10 @@ object SparkPrepareHostedByMapData { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - import spark.implicits._ val datasourcePath = parser.get("datasourcePath") val workingDirPath = parser.get("workingPath") - + val outputPath = parser.get("outputPath") implicit val formats = DefaultFormats @@ -191,29 +224,15 @@ object SparkPrepareHostedByMapData { logger.info("Getting the Datasources") - // val doajDataset: Dataset[DOAJModel] = spark.read.textFile(workingDirPath + "/doaj").as[DOAJModel] - val dats : Dataset[HostedByItemType] = - oaHostedByDataset(spark, datasourcePath) + Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) - dats.flatMap(hbi => toList(hbi)) - .groupByKey(_._1) + .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) + .map(hbi => toHostedByMap(hbi))(Encoders.STRING) + .rdd.saveAsTextFile(outputPath + "/hostedByMap", classOf[GzipCodec]) -// -// - -// - -// -// Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") -// .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) -// .filter(i => i != null)) -// .flatMap(toHostedByMap) -// .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") -// -// } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java new file mode 100644 index 000000000..ba804b939 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java new file mode 100644 index 000000000..0927a136b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java @@ -0,0 +1,45 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issn_l; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String title_source; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssn_l() { + return issn_l; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitle_source() { + return title_source; + } + + public void setTitle_source(String title_source) { + this.title_source = title_source; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json new file mode 100644 index 000000000..fba048343 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -0,0 +1,37 @@ + +[ + + { + "paramName":"fu", + "paramLongName":"fileURL", + "paramDescription": "the url to download the csv file ", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + { + "paramName": "hnn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + }, + { + "paramName": "cfn", + "paramLongName": "classForName", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + { + "paramName": "sr", + "paramLongName": "replace", + "paramDescription": "true if the input file has to be cleaned before parsing", + "paramRequired": false + } +] + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json new file mode 100644 index 000000000..9173b78ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json @@ -0,0 +1,38 @@ + +[ + + { + "paramName":"dsp", + "paramLongName":"datasourcePath", + "paramDescription": "the path to the datasource ", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml new file mode 100644 index 000000000..e5ec3d0ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml new file mode 100644 index 000000000..ecf6c3b31 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -0,0 +1,148 @@ + + + + + sourcePath + the source path + + + outputPath + the output path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + --hdfsNameNode${nameNode} + --fileURL${unibiFileURL} + --workingPath${workingDir}/unibi_gold + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + --hdfsNameNode${nameNode} + --fileURL${doajFileURL} + --workingPath${workingDir}/doaj + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel + --replacetrue + + + + + + + + + + + + yarn-cluster + Produce the new HostedByMap + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkProduceHostedByMap + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --datasourcePath${sourcePath}/datasource + --workingPath${workingDir} + --outputPath${outputPath} + --masteryarn-cluster + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 8e5657bfc..2ed76a72a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -1,19 +1,14 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap -import java.sql.Timestamp - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.oa.graph.hostebymap.{Constants, HostedByInfo, SparkPrepareHostedByMapData} +import eu.dnetlib.dhp.oa.graph.hostedbymap.{Aggregators, Constants, HostedByInfo, HostedByItemType, SparkProduceHostedByMap} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} import org.junit.jupiter.api.Test -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.mutable.ListBuffer -import scala.io.Source +import org.junit.jupiter.api.Assertions._ +import org.json4s.jackson.Serialization.write class TestPreprocess extends java.io.Serializable{ @@ -21,19 +16,14 @@ class TestPreprocess extends java.io.Serializable{ implicit val schema = Encoders.product[HostedByInfo] + def toHBIString (hbi:HostedByItemType): String = { + implicit val formats = DefaultFormats + + write(hbi) + } @Test def readDatasource():Unit = { - - - import org.apache.spark.sql.Encoders - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -45,25 +35,29 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("datasource.json").getPath + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.oaHostedByDataset(spark, path) - println(SparkPrepareHostedByMapData.oaHostedByDataset(spark, path).count) + assertEquals(9, ds.count) + assertEquals(8, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(5, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365")).count == 1) + assertTrue(ds.filter(hbi => hbi.eissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.id.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) + ds.foreach(hbi => assertTrue(hbi.id.startsWith("10|"))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } @Test def readGold():Unit = { - - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -76,23 +70,27 @@ class TestPreprocess extends java.io.Serializable{ val path = getClass.getResource("unibi_transformed.json").getPath - println(SparkPrepareHostedByMapData.goldHostedByDataset(spark, path).count) + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.goldHostedByDataset(spark, path) + assertEquals(29, ds.count) + assertEquals(29, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(29, ds.filter(hbi => !hbi.lissn.equals("")).count) + + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().officialname.equals("European journal of sustainable development.")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().lissn.equals("2239-5938")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).count == 1) + ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.UNIBI))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } @Test def readDoaj():Unit = { - - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -104,14 +102,69 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("doaj_transformed.json").getPath + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.doajHostedByDataset(spark, path) - println(SparkPrepareHostedByMapData.doajHostedByDataset(spark, path).count) + assertEquals(25, ds.count) + assertEquals(14, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(21, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().officialname.equals("Journal of Space Technology")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().eissn.equals("2411-5029")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).count == 1) + assertTrue(ds.filter(hbi => hbi.eissn.equals("2077-2955")).first().issn.equals("")) + ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.DOAJ))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } + @Test + def testAggregator() : Unit = { + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + + + val tmp = SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) + .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + assertEquals(106, tmp.count) + assertEquals(82, tmp.map(i => i._1)(Encoders.STRING).distinct().count) + + + val ds :Dataset[(String, HostedByItemType)] = Aggregators.explodeHostedByItemType(SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) + .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]))) + + assertEquals(82, ds.count) + + assertEquals(13, ds.filter(i => i._2.id.startsWith("10|")).count) + + assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.id.startsWith("10|")) + assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.openAccess) + assertEquals(1, ds.filter(i => i._1.equals("2077-3757")).count) + + val hbmap : Dataset[String] = ds.filter(hbi => hbi._2.id.startsWith("10|")).map(SparkProduceHostedByMap.toHostedByMap)(Encoders.STRING) + + hbmap.foreach(entry => println(entry)) + spark.close() + + } + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java index 01c70502c..f886b275b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -1,111 +1,109 @@ + package eu.dnetlib.dhp.oa.graph.hostedbymap; +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.util.List; + +import org.junit.jupiter.api.Test; + import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV; -import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; -import org.junit.jupiter.api.Test; -import java.io.*; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.List; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; -public class TestReadCSV { +public class TestReadCSV { - @Test - public void testCSVUnibi() throws FileNotFoundException { + @Test + public void testCSVUnibi() throws FileNotFoundException { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") + .getPath(); - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") - .getPath(); + List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) + .withType(UnibiGoldModel.class) + .build() + .parse(); - List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) - .withType(UnibiGoldModel.class) - .build() - .parse(); + ObjectMapper mapper = new ObjectMapper(); - ObjectMapper mapper = new ObjectMapper(); + beans.forEach(r -> { + try { + System.out.println(mapper.writeValueAsString(r)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); - beans.forEach(r -> { - try { - System.out.println(mapper.writeValueAsString(r)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - }); + } + @Test + public void testCSVUrlUnibi() throws IOException { - } + URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); - @Test - public void testCSVUrlUnibi() throws IOException { + BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); + ObjectMapper mapper = new ObjectMapper(); - URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel.class) + .build() + .parse() + .forEach(line -> - BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); - ObjectMapper mapper = new ObjectMapper(); + { + try { + System.out.println(mapper.writeValueAsString(line)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel.class) - .build() - .parse() - .forEach(line -> + ); + } - { - try { - System.out.println(mapper.writeValueAsString(line)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } + @Test + public void testCSVUrlDOAJ() throws IOException { + URLConnection connection = new URL("https://doaj.org/csv").openConnection(); + connection + .setRequestProperty( + "User-Agent", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); - ); - } + BufferedReader in = new BufferedReader( + new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + // BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); + String line = null; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); + ObjectMapper mapper = new ObjectMapper(); - @Test - public void testCSVUrlDOAJ() throws IOException { + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel.class) + .withMultilineLimit(1) + .build() + .parse() + .forEach(lline -> - URLConnection connection = new URL("https://doaj.org/csv").openConnection(); - connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); + { + try { + System.out.println(mapper.writeValueAsString(lline)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } - BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - //BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); - String line = null; - while((line = in.readLine())!= null){ - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); - ObjectMapper mapper = new ObjectMapper(); - - - - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.DOAJModel.class) - .withMultilineLimit(1) - .build() - .parse() - .forEach(lline -> - - { - try { - System.out.println(mapper.writeValueAsString(lline)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - - - ); - } + ); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c41a6c68c..63f18a803 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,13 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -16,12 +16,14 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.List; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.lenient; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -340,7 +342,7 @@ public class MappersTest { assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); - //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + // assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json index 818aaa716..4467c702f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -1,6 +1,6 @@ {"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2077-3757","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem new file mode 100644 index 000000000..093c57a9c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem @@ -0,0 +1,9 @@ +{"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","officialname":"Известия высших учебных заведений: Проблемы энергетики","issn":"1998-9903","eissn":"","lissn":"","openAccess":false} +{"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","officialname":"Thémata","issn":"0212-8365","eissn":"2253-900X","lissn":"","openAccess":false} +{"id":"10|issn___print::051e86306840dc8255d95c5671e97928","officialname":"Science Technology & Public Policy","issn":"2640-4613","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","officialname":"Cahiers d’études germaniques","issn":"0751-4239","eissn":"2605-8359","lissn":"","openAccess":false} +{"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","officialname":"Regional Economics Theory and Practice","issn":"2073-1477","eissn":"2311-8733","lissn":"","openAccess":false} +{"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","officialname":"Transplantation","issn":"0041-1337","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","officialname":"International Journal of Operations Research and Information Systems","issn":"1947-9328","eissn":"1947-9336","lissn":"","openAccess":false} +{"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","officialname":"Bulletin of the British Mycological Society","issn":"0007-1528","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","officialname":"Journal of Technology and Innovation","issn":"","eissn":"2410-3993","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem new file mode 100644 index 000000000..effd0dd60 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem @@ -0,0 +1,25 @@ +{"id":"doaj","officialname":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","lissn":"","openAccess":true} +{"id":"doaj","officialname":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Membranes","issn":"2077-0375","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Agriculture","issn":"","eissn":"2077-0472","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Religions","issn":"","eissn":"2077-1444","lissn":"","openAccess":true} +{"id":"doaj","officialname":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","lissn":"","openAccess":true} +{"id":"doaj","officialname":"UCV-Scientia","issn":"2077-172X","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Granì","issn":"2077-1800","eissn":"2413-8738","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Science Education International","issn":"","eissn":"2077-2327","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Edumecentro","issn":"","eissn":"2077-2874","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Monteverdia","issn":"","eissn":"2077-2890","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Transformación","issn":"","eissn":"2077-2955","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Revue de Primatologie","issn":"","eissn":"2077-3757","lissn":"","openAccess":true} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem new file mode 100644 index 000000000..403ffdf5d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem @@ -0,0 +1,29 @@ +{"id":"unibi","officialname":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","issn":"2502-731X","eissn":"","lissn":"2502-731X","openAccess":true} +{"id":"unibi","officialname":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","issn":"2502-7409","eissn":"","lissn":"1411-0253","openAccess":true} +{"id":"unibi","officialname":"At-Tadbir : jurnal ilmiah manajemen","issn":"2502-7433","eissn":"","lissn":"2502-7433","openAccess":true} +{"id":"unibi","officialname":"Jurnal Kesehatan Panrita Husada.","issn":"2502-745X","eissn":"","lissn":"2502-745X","openAccess":true} +{"id":"unibi","officialname":"ELang journal (An English Education journal)","issn":"2502-7549","eissn":"","lissn":"2502-7549","openAccess":true} +{"id":"unibi","officialname":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","issn":"2423-3633","eissn":"","lissn":"2423-3625","openAccess":true} +{"id":"unibi","officialname":"Pizhūhishnāmah-i ̒ilm/sanjī.","issn":"2423-5563","eissn":"","lissn":"2423-3773","openAccess":true} +{"id":"unibi","officialname":"Iranian journal of animal biosystematics.","issn":"1735-434X","eissn":"","lissn":"1735-434X","openAccess":true} +{"id":"unibi","officialname":"Majallah-i jangal-i Īrān.","issn":"2423-4435","eissn":"","lissn":"2008-6113","openAccess":true} +{"id":"unibi","officialname":"Ābziyān-i zinatī.","issn":"2423-4575","eissn":"","lissn":"2423-4575","openAccess":true} +{"id":"unibi","officialname":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","issn":"2423-4974","eissn":"","lissn":"2423-4974","openAccess":true} +{"id":"unibi","officialname":"AIHM journal club.","issn":"2380-0607","eissn":"","lissn":"2380-0607","openAccess":true} +{"id":"unibi","officialname":"Frontiers.","issn":"1085-4568","eissn":"","lissn":"1085-4568","openAccess":true} +{"id":"unibi","officialname":"˜The œjournal of contemporary archival studies.","issn":"2380-8845","eissn":"","lissn":"2380-8845","openAccess":true} +{"id":"unibi","officialname":"International journal of complementary & alternative medicine.","issn":"2381-1803","eissn":"","lissn":"2381-1803","openAccess":true} +{"id":"unibi","officialname":"Palapala.","issn":"2381-2478","eissn":"","lissn":"2381-2478","openAccess":true} +{"id":"unibi","officialname":"Asia pacific journal of environment ecology and sustainable development.","issn":"2382-5170","eissn":"","lissn":"2382-5170","openAccess":true} +{"id":"unibi","officialname":"Majallah-i salāmat va bihdāsht","issn":"2382-9737","eissn":"","lissn":"2382-9737","openAccess":true} +{"id":"unibi","officialname":"UCT journal of research in science ,engineering and technology","issn":"2382-977X","eissn":"","lissn":"2382-977X","openAccess":true} +{"id":"unibi","officialname":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","issn":"2382-9974","eissn":"","lissn":"2382-9974","openAccess":true} +{"id":"unibi","officialname":"Problemi endokrinnoï patologìï.","issn":"2227-4782","eissn":"","lissn":"2227-4782","openAccess":true} +{"id":"unibi","officialname":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","issn":"2685-0079","eissn":"","lissn":"2597-4971","openAccess":true} +{"id":"unibi","officialname":"Hypermedia magazine.","issn":"2574-0075","eissn":"","lissn":"2574-0075","openAccess":true} +{"id":"unibi","officialname":"˜The œmuseum review.","issn":"2574-0296","eissn":"","lissn":"2574-0296","openAccess":true} +{"id":"unibi","officialname":"Bioactive compounds in health and disease.","issn":"2574-0334","eissn":"","lissn":"2574-0334","openAccess":true} +{"id":"unibi","officialname":"Journal of computer science integration.","issn":"2574-108X","eissn":"","lissn":"2574-108X","openAccess":true} +{"id":"unibi","officialname":"Child and adolescent obesity.","issn":"2574-254X","eissn":"","lissn":"2574-254X","openAccess":true} +{"id":"unibi","officialname":"Journal of research on the college president.","issn":"2574-3325","eissn":"","lissn":"2574-3325","openAccess":true} +{"id":"unibi","officialname":"European journal of sustainable development.","issn":"2239-6101","eissn":"","lissn":"2239-5938","openAccess":true} \ No newline at end of file