From bc4b86c27c1a080fa487c163df7671a7effaf757 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 13 Jul 2021 11:52:24 +0200 Subject: [PATCH 01/70] updated URL in the issueManagement tag --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 45e230c4f..6e4526e41 100644 --- a/pom.xml +++ b/pom.xml @@ -25,7 +25,7 @@ Redmine - https://issue.openaire.research-infrastructures.eu/projects/openaire + https://support.openaire.eu/projects/openaire From c35c1176011be0086ce199793346a43dd0809ebd Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 14 Jul 2021 09:44:32 +0200 Subject: [PATCH 02/70] fixed process doiboost workflow: - splitted OrcidToOAF into two phase preprocess and process - updated workflow used in production --- .../crossref/SparkMapDumpIntoOAF.scala | 2 +- .../orcid/SparkConvertORCIDToOAF.scala | 56 +++------------ .../doiboost/orcid/SparkPreprocessORCID.scala | 70 +++++++++++++++++++ .../doiboost/uw/SparkMapUnpayWallToOAF.scala | 2 +- .../convert_crossref_dump_to_oaf_params.json | 6 ++ .../doiboost/convert_orcid_to_oaf_params.json | 6 ++ .../doiboost/convert_uw_to_oaf_params.json | 6 ++ .../dhp/doiboost/oozie_app/workflow.xml | 2 +- .../doiboost/orcid_oaf/oozie_app/workflow.xml | 2 +- ...rams.json => preprocess_orcid_params.json} | 3 +- .../orcid/MappingORCIDToOAFTest.scala | 2 +- 11 files changed, 103 insertions(+), 54 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{convert_map_to_oaf_params.json => preprocess_orcid_params.json} (59%) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala index 57acaf404..c65916610 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/SparkMapDumpIntoOAF.scala @@ -21,7 +21,7 @@ object SparkMapDumpIntoOAF { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index a359eb3c6..9117bcb34 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -1,61 +1,18 @@ package eu.dnetlib.doiboost.orcid -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.merge.AuthorMerger import eu.dnetlib.dhp.schema.oaf.Publication -import eu.dnetlib.dhp.schema.orcid.OrcidDOI import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) - def fixORCIDItem(item :ORCIDItem):ORCIDItem = { - new ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) - - } - - - def run(spark:SparkSession,sourcePath:String,workingPath:String, targetPath:String):Unit = { - import spark.implicits._ - implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - - val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) - - spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") - - val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) - - spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") - - val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] - - val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] - - works.joinWith(authors, authors("oid").equalTo(works("oid"))) - .map(i =>{ - val doi = i._1.doi - var author = i._2 - (doi, author) - }).groupBy(col("_1").alias("doi")) - .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] - .map(s => fixORCIDItem(s)) - .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") - - val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] - - logger.info("Converting ORCID to OAF") - dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) - } - def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession @@ -64,11 +21,16 @@ object SparkConvertORCIDToOAF { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] + import spark.implicits._ - val sourcePath = parser.get("sourcePath") val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - run(spark, sourcePath, workingPath, targetPath) + + val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + + logger.info("Converting ORCID to OAF") + dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala new file mode 100644 index 000000000..d6911cfa7 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -0,0 +1,70 @@ +package eu.dnetlib.doiboost.orcid + +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.merge.AuthorMerger +import eu.dnetlib.dhp.schema.oaf.Publication +import eu.dnetlib.dhp.schema.orcid.OrcidDOI +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +object SparkPreprocessORCID { + val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) + + def fixORCIDItem(item :ORCIDItem):ORCIDItem = { + ORCIDItem(item.doi, item.authors.groupBy(_.oid).map(_._2.head).toList) + + } + + + def run(spark:SparkSession,sourcePath:String,workingPath:String):Unit = { + import spark.implicits._ + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] + + val inputRDD:RDD[OrcidAuthor] = spark.sparkContext.textFile(s"$sourcePath/authors").map(s => ORCIDToOAF.convertORCIDAuthor(s)).filter(s => s!= null).filter(s => ORCIDToOAF.authorValid(s)) + + spark.createDataset(inputRDD).as[OrcidAuthor].write.mode(SaveMode.Overwrite).save(s"$workingPath/author") + + val res = spark.sparkContext.textFile(s"$sourcePath/works").flatMap(s => ORCIDToOAF.extractDOIWorks(s)).filter(s => s!= null) + + spark.createDataset(res).as[OrcidWork].write.mode(SaveMode.Overwrite).save(s"$workingPath/works") + + val authors :Dataset[OrcidAuthor] = spark.read.load(s"$workingPath/author").as[OrcidAuthor] + + val works :Dataset[OrcidWork] = spark.read.load(s"$workingPath/works").as[OrcidWork] + + works.joinWith(authors, authors("oid").equalTo(works("oid"))) + .map(i =>{ + val doi = i._1.doi + val author = i._2 + (doi, author) + }).groupBy(col("_1").alias("doi")) + .agg(collect_list(col("_2")).alias("authors")).as[ORCIDItem] + .map(s => fixORCIDItem(s)) + .write.mode(SaveMode.Overwrite).save(s"$workingPath/orcidworksWithAuthor") + } + + def main(args: Array[String]): Unit = { + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") + + run(spark, sourcePath, workingPath) + + } + +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala index a72e4b0d6..4530926f1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/SparkMapUnpayWallToOAF.scala @@ -18,7 +18,7 @@ object SparkMapUnpayWallToOAF { val logger: Logger = LoggerFactory.getLogger(SparkMapDumpIntoOAF.getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkMapDumpIntoOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json new file mode 100644 index 000000000..da324f8c4 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_crossref_dump_to_oaf_params.json @@ -0,0 +1,6 @@ +[ + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the OAF Orcid transformed", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source path ", "paramRequired": false}, + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json new file mode 100644 index 000000000..6c9ca5ede --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json @@ -0,0 +1,6 @@ +[ + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the OAF Orcid transformed", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path ", "paramRequired": false}, + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json new file mode 100644 index 000000000..da324f8c4 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_uw_to_oaf_params.json @@ -0,0 +1,6 @@ +[ + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the OAF Orcid transformed", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source path ", "paramRequired": false}, + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml index fa47e142d..34b4b5c5e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml @@ -368,7 +368,7 @@ yarn-cluster cluster Convert ORCID to Dataset - eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF + eu.dnetlib.doiboost.orcid.SparkPreprocessORCID dhp-doiboost-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_oaf/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_oaf/oozie_app/workflow.xml index bffde793b..0670e18de 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_oaf/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid_oaf/oozie_app/workflow.xml @@ -34,7 +34,7 @@ yarn-cluster cluster Convert ORCID to Dataset - eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF + eu.dnetlib.doiboost.orcid.SparkPreprocessORCID dhp-doiboost-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json similarity index 59% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json index 152103570..fdc1e2f20 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/convert_map_to_oaf_params.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json @@ -1,7 +1,6 @@ [ {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the path of the Orcid Input file", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path ", "paramRequired": false}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the working dir path", "paramRequired": true}, - {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} + {"paramName":"m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index 7628fb853..076a01526 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -46,7 +46,7 @@ class MappingORCIDToOAFTest { implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] import spark.implicits._ - SparkConvertORCIDToOAF.run( spark,sourcePath, workingPath, targetPath) + SparkPreprocessORCID.run( spark,sourcePath, workingPath) val mapper = new ObjectMapper() From 441701c85c4e41c50bbdd49334234a28ff554c01 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 11:36:30 +0200 Subject: [PATCH 03/70] DoiBoost AccessRigh #4362 - If multiple licenses are available, take the one applied to 'vor' --- .../doiboost/crossref/Crossref2Oaf.scala | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index fb96717d9..3f2f63fe6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -168,12 +168,22 @@ case object Crossref2Oaf { // Mapping instance val instance = new Instance() val license = for { - JString(lic) <- json \ "license" \ "URL" - } yield asField(lic) - val l = license.filter(d => StringUtils.isNotBlank(d.getValue)) - if (l.nonEmpty) - instance.setLicense(l.head) - + JObject(license) <- json \ "license" + JField("URL", JString(lic)) <- license + JField("content-version", JString(content_version)) <- license + } yield (asField(lic), content_version) + val l = license.filter(d => StringUtils.isNotBlank(d._1.getValue)) + if (l.nonEmpty){ + if (l exists (d => d._2.equals("vor"))){ + for(d <- l){ + if (d._2.equals("vor")){ + instance.setLicense(d._1) + } + } + } + else{ + instance.setLicense(l.head._1)} + } // Ticket #6281 added pid to Instance instance.setPid(result.getPid) From 3d8e2aa1468c3b112f49f30ae8532522ef67470c Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 14 Jul 2021 14:37:06 +0200 Subject: [PATCH 04/70] Code refactor: - removed old workflows in doiboost - splitted workflow of doiboost in preprocess and process --- .../orcid/SparkConvertORCIDToOAF.scala | 17 +- .../doiboost/orcid/SparkPreprocessORCID.scala | 2 +- .../doiboost/crossref/oozie_app/workflow.xml | 101 -------- .../intersection/oozie_app/config-default.xml | 38 --- .../intersection/oozie_app/workflow.xml | 96 -------- .../doiboost/mag/oozie_app/config-default.xml | 42 ---- .../dhp/doiboost/mag/oozie_app/workflow.xml | 92 -------- .../oozie_app/config-default.xml | 0 .../preprocess/oozie_app/workflow.xml | 216 ++++++++++++++++++ .../oozie_app/config-default.xml | 0 .../{ => process}/oozie_app/workflow.xml | 206 +---------------- .../unpaywall/oozie_app/config-default.xml | 38 --- .../doiboost/unpaywall/oozie_app/workflow.xml | 55 ----- .../dnetlib/doiboost/mag/MAGMappingTest.scala | 49 ++-- .../orcid/MappingORCIDToOAFTest.scala | 6 +- 15 files changed, 264 insertions(+), 694 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{crossref => preprocess}/oozie_app/config-default.xml (100%) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{ => process}/oozie_app/config-default.xml (100%) rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{ => process}/oozie_app/workflow.xml (54%) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala index 9117bcb34..fa4a93e00 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkConvertORCIDToOAF.scala @@ -10,6 +10,16 @@ import org.slf4j.{Logger, LoggerFactory} object SparkConvertORCIDToOAF { val logger: Logger = LoggerFactory.getLogger(SparkConvertORCIDToOAF.getClass) + + def run(spark:SparkSession, workingPath:String, targetPath:String) :Unit = { + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] + import spark.implicits._ + val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] + + logger.info("Converting ORCID to OAF") + dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) + } + def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json"))) @@ -21,16 +31,11 @@ object SparkConvertORCIDToOAF { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - implicit val mapEncoderPubs: Encoder[Publication] = Encoders.kryo[Publication] - import spark.implicits._ val workingPath = parser.get("workingPath") val targetPath = parser.get("targetPath") - val dataset: Dataset[ORCIDItem] =spark.read.load(s"$workingPath/orcidworksWithAuthor").as[ORCIDItem] - - logger.info("Converting ORCID to OAF") - dataset.map(o => ORCIDToOAF.convertTOOAF(o)).write.mode(SaveMode.Overwrite).save(targetPath) + run(spark,workingPath, targetPath) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala index d6911cfa7..31f331912 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkPreprocessORCID.scala @@ -50,7 +50,7 @@ object SparkPreprocessORCID { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/convert_orcid_to_oaf_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(SparkConvertORCIDToOAF.getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/preprocess_orcid_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml deleted file mode 100644 index 63c2e9ef2..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/workflow.xml +++ /dev/null @@ -1,101 +0,0 @@ - - - - workingPath - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - timestamp - Timestamp for incremental Harvesting - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.CrossrefImporter - -t${workingPath}/input/crossref/index_update - -n${nameNode} - -ts${timestamp} - - - - - - - - yarn-cluster - cluster - ExtractCrossrefToOAF - eu.dnetlib.doiboost.crossref.CrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --workingPath/data/doiboost/input/crossref - --masteryarn-cluster - - - - - - - - - - - - - - - - - - yarn-cluster - cluster - ConvertCrossrefToOAF - eu.dnetlib.doiboost.crossref.SparkMapDumpIntoOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --sourcePath${workingPath}/input/crossref/crossref_ds - --targetPath${workingPath}/process/ - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml deleted file mode 100644 index cf617a84c..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/config-default.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml deleted file mode 100644 index dcde62c9d..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/intersection/oozie_app/workflow.xml +++ /dev/null @@ -1,96 +0,0 @@ - - - - hostedByMapPath - the Hosted By Map Path - - - affiliationPath - the Affliation Path - - - paperAffiliationPath - the paperAffiliation Path - - - workingDirPath - the Working Path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - yarn-cluster - cluster - Create DOIBoost Infospace - eu.dnetlib.doiboost.SparkGenerateDoiBoost - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --hostedByMapPath${hostedByMapPath} - --affiliationPath${affiliationPath} - --paperAffiliationPath${paperAffiliationPath} - --workingDirPath${workingDirPath} - --masteryarn-cluster - - - - - - - - - yarn-cluster - cluster - Generate DOIBoost ActionSet - eu.dnetlib.doiboost.SparkGenerateDOIBoostActionSet - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --dbPublicationPath${workingDirPath}/doiBoostPublicationFiltered - --dbDatasetPath${workingDirPath}/crossrefDataset - --crossRefRelation${workingDirPath}/crossrefRelation - --dbaffiliationRelationPath${workingDirPath}/doiBoostPublicationAffiliation - -do${workingDirPath}/doiBoostOrganization - --targetPath${workingDirPath}/actionDataSet - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml deleted file mode 100644 index 59e5c059f..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.wf.rerun.failnodes - false - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml deleted file mode 100644 index 9d19dddc7..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/mag/oozie_app/workflow.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - sourcePath - the working dir base path - - - targetPath - the working dir base path - - - workingPath - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - yarn-cluster - cluster - Convert Mag to Dataset - eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - ${sparkExtraOPT} - - --sourcePath${sourcePath} - --targetPath${workingPath} - --masteryarn-cluster - - - - - - - - - - yarn-cluster - cluster - Convert Mag to OAF Dataset - eu.dnetlib.doiboost.mag.SparkPreProcessMAG - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --sourcePath${workingPath} - --workingPath${workingPath}/process - --targetPath${targetPath} - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref/oozie_app/config-default.xml rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml new file mode 100644 index 000000000..03f7b7566 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -0,0 +1,216 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + inputPathCrossref + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + + + MAGDumpPath + the MAG dump working path + + + + inputPathMAG + the MAG working path + + + + + + inputPathOrcid + the ORCID input path + + + + workingPathOrcid + the ORCID working path + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} + ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} + ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} + ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} + ${wf:conf('resumeFrom') eq 'PreProcessORCID'} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords + --hdfsServerUri${nameNode} + --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz + --workingPath${crossrefDumpPath} + --outputPath${crossrefDumpPath}/files/ + + + + + + + + yarn-cluster + cluster + SparkUnpackCrossrefEntries + eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/files + --targetPath${crossrefDumpPath}/crossref_unpack/ + + + + + + + + + yarn-cluster + cluster + SparkGenerateCrossrefDataset + eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset + dhp-doiboost-${projectVersion}.jar + + --executor-memory=7G + --executor-cores=2 + --driver-memory=7G + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/crossref_unpack/ + --targetPath${inputPathCrossref}/crossref_ds + + + + + + + + + + + + + + + + + + + + + + + + + + + + yarn-cluster + cluster + Convert Mag to Dataset + eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${MAGDumpPath} + --targetPath${inputPathMAG}/dataset + --masteryarn-cluster + + + + + + + + + yarn-cluster + cluster + Convert ORCID to Dataset + eu.dnetlib.doiboost.orcid.SparkPreprocessORCID + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${inputPathOrcid} + --workingPath${workingPathOrcid} + --masteryarn-cluster + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/config-default.xml rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml similarity index 54% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index 34b4b5c5e..f845d97f3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sparkDriverMemory @@ -17,8 +17,6 @@ sparkExecutorCores number of cores used by single executor - - workingPath @@ -40,29 +38,8 @@ inputPathCrossref the Crossref input path - - crossrefDumpPath - the Crossref dump path - - - - - - - - - - - - - - - MAGDumpPath - the MAG dump working path - - inputPathMAG the MAG working path @@ -76,11 +53,6 @@ - - inputPathOrcid - the ORCID input path - - workingPathOrcid the ORCID working path @@ -103,15 +75,12 @@ - ${wf:conf('resumeFrom') eq 'ConvertCrossrefToOAF'} - ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'PreprocessMag'} ${wf:conf('resumeFrom') eq 'PreprocessUW'} - ${wf:conf('resumeFrom') eq 'PreprocessORCID'} + ${wf:conf('resumeFrom') eq 'ProcessORCID'} ${wf:conf('resumeFrom') eq 'CreateDOIBoost'} ${wf:conf('resumeFrom') eq 'GenerateActionSet'} - ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} - + @@ -119,170 +88,6 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz - --workingPath${crossrefDumpPath} - --outputPath${crossrefDumpPath}/files/ - - - - - - - - yarn-cluster - cluster - SparkUnpackCrossrefEntries - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=7G - --executor-cores=2 - --driver-memory=7G - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/crossref_unpack/ - --targetPath${inputPathCrossref}/crossref_ds - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - yarn-cluster - cluster - Convert Mag to Dataset - eu.dnetlib.doiboost.mag.SparkImportMagIntoDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${MAGDumpPath} - --targetPath${inputPathMAG}/dataset - --masteryarn-cluster - - - - - - yarn-cluster @@ -326,7 +131,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${inputPathMAG}/dataset - --workingPath${inputPathMAG}/process + --workingPath${inputPathMAG}/process_p --targetPath${workingPath} --masteryarn-cluster @@ -368,7 +173,7 @@ yarn-cluster cluster Convert ORCID to Dataset - eu.dnetlib.doiboost.orcid.SparkPreprocessORCID + eu.dnetlib.doiboost.orcid.SparkConvertORCIDToOAF dhp-doiboost-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -380,7 +185,6 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --sourcePath${inputPathOrcid} --workingPath${workingPathOrcid} --targetPath${workingPath}/orcidPublication --masteryarn-cluster diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml deleted file mode 100644 index cf617a84c..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/config-default.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml deleted file mode 100644 index d2a69752e..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/unpaywall/oozie_app/workflow.xml +++ /dev/null @@ -1,55 +0,0 @@ - - - - sourcePath - the working dir base path - - - targetPath - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - yarn-cluster - cluster - Convert UnpayWall to Dataset - eu.dnetlib.doiboost.uw.SparkMapUnpayWallToOAF - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - ${sparkExtraOPT} - - --sourcePath${sourcePath}/uw_extracted - --targetPath${targetPath} - --masteryarn-cluster - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala index 7eb50665e..46d4ec08d 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/mag/MAGMappingTest.scala @@ -1,22 +1,15 @@ package eu.dnetlib.doiboost.mag -import java.sql.Timestamp - -import eu.dnetlib.dhp.schema.oaf.Publication -import org.apache.htrace.fasterxml.jackson.databind.SerializationFeature -import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.api.java.function.MapFunction -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} -import org.codehaus.jackson.map.{ObjectMapper, SerializationConfig} -import org.junit.jupiter.api.Test -import org.slf4j.{Logger, LoggerFactory} +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, SparkSession} +import org.codehaus.jackson.map.ObjectMapper import org.junit.jupiter.api.Assertions._ -import org.apache.spark.sql.functions._ +import org.junit.jupiter.api.Test +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} -import scala.collection.JavaConverters._ +import java.sql.Timestamp import scala.io.Source -import scala.reflect.ClassTag -import scala.util.matching.Regex @@ -65,14 +58,19 @@ class MAGMappingTest { @Test def normalizeDoiTest():Unit = { - import org.json4s.jackson.Serialization.write - import org.json4s.DefaultFormats + implicit val formats = DefaultFormats - val conf = new SparkConf().setAppName("test").setMaster("local[2]") - val sc = new SparkContext(conf) - val spark = SparkSession.builder.config(sc.getConf).getOrCreate() + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() val path = getClass.getResource("magPapers.json").getPath import org.apache.spark.sql.Encoders @@ -90,14 +88,19 @@ class MAGMappingTest { @Test def normalizeDoiTest2():Unit = { - import org.json4s.jackson.Serialization.write import org.json4s.DefaultFormats implicit val formats = DefaultFormats - val conf = new SparkConf().setAppName("test").setMaster("local[2]") - val sc = new SparkContext(conf) - val spark = SparkSession.builder.config(sc.getConf).getOrCreate() + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() val path = getClass.getResource("duplicatedMagPapers.json").getPath import org.apache.spark.sql.Encoders diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala index 076a01526..b484dc087 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/MappingORCIDToOAFTest.scala @@ -48,6 +48,8 @@ class MappingORCIDToOAFTest { SparkPreprocessORCID.run( spark,sourcePath, workingPath) + SparkConvertORCIDToOAF.run(spark, workingPath,targetPath) + val mapper = new ObjectMapper() @@ -61,6 +63,8 @@ class MappingORCIDToOAFTest { assertTrue(oA == p.count()) println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(p.first())) + spark.close() + } @@ -78,7 +82,7 @@ class MappingORCIDToOAFTest { val oaf = ORCIDToOAF.convertTOOAF(orcid) assert(oaf.getPid.size() == 1) oaf.getPid.toList.foreach(pid => assert(pid.getQualifier.getClassid.equals("doi"))) - oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876".toLowerCase()))) + oaf.getPid.toList.foreach(pid => assert(pid.getValue.equals("10.1042/BCJ20160876"))) //println(mapper.writeValueAsString(ORCIDToOAF.convertTOOAF(orcid))) From 981b1018f6459b73205e0c2a90fbcf3fc72c52e8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 11:43:00 +0200 Subject: [PATCH 05/70] DoiBoost AccessRigh #4362 - decide access right according to licence. Default access right is Unknown --- .../doiboost/DoiBoostMappingUtil.scala | 65 ++++++++++++++++++- .../doiboost/crossref/Crossref2Oaf.scala | 4 +- 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index c0939fec1..2558e1c67 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -1,12 +1,16 @@ package eu.dnetlib.doiboost +import java.time.LocalDate +import java.time.format.DateTimeFormatter + import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, Organization, Publication, Qualifier, Relation, Result, StructuredProperty} +import eu.dnetlib.dhp.schema.oaf.{AccessRight, DataInfo, Dataset, Field, Instance, KeyValue, Oaf, OpenAccessRoute, Organization, Publication, Qualifier, Relation, Result, StructuredProperty} import eu.dnetlib.dhp.utils.DHPUtils import org.apache.commons.lang3.StringUtils import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.doiboost.DoiBoostMappingUtil.{getClosedAccessQualifier, getEmbargoedAccessQualifier, getUnknownQualifier} import org.json4s import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse @@ -118,6 +122,51 @@ object DoiBoostMappingUtil { } + def decideAccessRight(lic : Field[String], date:String) : AccessRight = { + if(lic == null){ + //Default value Unknown + return getUnknownQualifier() + } + val license : String = lic.getValue + //CC licenses + if(license.startsWith("cc") || + license.startsWith("http://creativecommons.org/licenses") || + license.startsWith("https://creativecommons.org/licenses") || + + //ACS Publications Author choice licenses (considered OPEN also by Unpaywall) + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") || + license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") || + + //APA (considered OPEN also by Unpaywall) + license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")){ + + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + + //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED) + if(license.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")){ + val now = java.time.LocalDate.now + val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") + + val pub_date = LocalDate.parse(date, formatter) + + if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + else{ + return getEmbargoedAccessQualifier() + } + } + + return getClosedAccessQualifier() + + } + def getOpenAccessQualifier():AccessRight = { @@ -129,6 +178,20 @@ object DoiBoostMappingUtil { } + def getUnknownQualifier():AccessRight = { + OafMapperUtils.accessRight("UNKNOWN","not available",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + } + + + def getEmbargoedAccessQualifier():AccessRight = { + OafMapperUtils.accessRight("EMBARGO","Embargo",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + } + + def getClosedAccessQualifier():AccessRight = { + OafMapperUtils.accessRight("CLOSED","Closed Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + } + + def extractInstance(r:Result):Option[Instance] = { r.getInstance().asScala.find(i => i.getInstancetype != null && i.getInstancetype.getClassid.nonEmpty) } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index 3f2f63fe6..25f0ff381 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -4,7 +4,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf.utils.{IdentifierFactory, OafMapperUtils} import eu.dnetlib.dhp.utils.DHPUtils -import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import eu.dnetlib.doiboost.DoiBoostMappingUtil.{decideAccessRight, _} import org.apache.commons.lang.StringUtils import org.json4s import org.json4s.DefaultFormats @@ -195,7 +195,7 @@ case object Crossref2Oaf { OafMapperUtils.qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS, ModelConstants.DNET_REVIEW_LEVELS)) } - instance.setAccessright(getRestrictedQualifier()) + instance.setAccessright(decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue)) instance.setInstancetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) result.setResourcetype(OafMapperUtils.qualifier(cobjCategory.substring(0, 4), cobjCategory.substring(5), ModelConstants.DNET_PUBLICATION_RESOURCE, ModelConstants.DNET_PUBLICATION_RESOURCE)) From 6222adf17609ffe956be0bf6df2976363f182d3e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 11:46:31 +0200 Subject: [PATCH 06/70] DoiBoost AccessRigh #4362 - added resources and test for crossref mapping (licence part included) --- .../crossref/CrossrefMappingTest.scala | 97 ++ .../crossref/publication_license_embargo.json | 1537 +++++++++++++++++ .../publication_license_embargo_open.json | 1537 +++++++++++++++++ .../crossref/publication_license_open.json | 1537 +++++++++++++++++ .../crossref/publication_license_vor.json | 1537 +++++++++++++++++ 5 files changed, 6245 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index 0fa34d88e..63555bcbd 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -492,6 +492,103 @@ class CrossrefMappingTest { } + @Test + def testLicenseVorClosed() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_vor.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://www.springer.com/vor"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("CLOSED"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + + + + + } + + @Test + def testLicenseOpen() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_open.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + @Test + def testLicenseEmbargoOpen() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_open.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("OPEN"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == OpenAccessRoute.hybrid)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + @Test + def testLicenseEmbargo() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo.json")).mkString + + + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + + } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json new file mode 100644 index 000000000..47ca55f34 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json new file mode 100644 index 000000000..e667f3c7f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_open.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2020, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json new file mode 100644 index 000000000..225a36b1f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_open.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json new file mode 100644 index 000000000..f2e91a23f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_vor.json @@ -0,0 +1,1537 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://www.springer.com/vor", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file From f4f7c6f9d34ac050e60bf35b42518fdb2082b280 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 13:52:52 +0200 Subject: [PATCH 07/70] DoiBoost AccessRigh #4362 - Unpaywall mapped to OAF with OPEN instance (non oa are filtered out) (unknown hostedby) + map the color as it is --- .../dnetlib/doiboost/uw/UnpayWallToOAF.scala | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index cc758bcae..0a5ba063f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -11,6 +11,7 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ import eu.dnetlib.doiboost.DoiBoostMappingUtil._ +import eu.dnetlib.doiboost.uw.UnpayWallToOAF.get_unpaywall_color @@ -23,6 +24,21 @@ case class OALocation(evidence:Option[String], host_type:Option[String], is_best object UnpayWallToOAF { val logger: Logger = LoggerFactory.getLogger(getClass) + + def get_unpaywall_color(input:String):Option[OpenAccessRoute] = { + if(input.equalsIgnoreCase("close")) + return None + if(input.equalsIgnoreCase("green")) + return Some(OpenAccessRoute.green) + if(input.equalsIgnoreCase("bronze")) + return Some(OpenAccessRoute.bronze) + if(input.equalsIgnoreCase("hybrid")) + return Some(OpenAccessRoute.hybrid) + else + return Some(OpenAccessRoute.gold) + + } + def get_color(is_oa:Boolean, location: OALocation, journal_is_oa:Boolean):Option[OpenAccessRoute] = { if (is_oa) { if (location.host_type.isDefined) { @@ -65,7 +81,7 @@ object UnpayWallToOAF { val oaLocation:OALocation = (json \ "best_oa_location").extractOrElse[OALocation](null) - val colour = get_color(is_oa, oaLocation, journal_is_oa) + val colour = get_unpaywall_color((json \ "oa_status").extractOrElse[String](null)) pub.setCollectedfrom(List(createUnpayWallCollectedFrom()).asJava) pub.setDataInfo(generateDataInfo()) From 09ad7b2a9e3bc6c64c12d345da56aaccaf3b1fa1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Jul 2021 13:58:05 +0200 Subject: [PATCH 08/70] DoiBoost AccessRigh #4362 - Unpaywall mapped to OAF with OPEN instance (non oa are filtered out) (unknown hostedby) + map the color as it is --- .../src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala index 0a5ba063f..c8324cde1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/uw/UnpayWallToOAF.scala @@ -26,7 +26,7 @@ object UnpayWallToOAF { def get_unpaywall_color(input:String):Option[OpenAccessRoute] = { - if(input.equalsIgnoreCase("close")) + if(input == null || input.equalsIgnoreCase("close")) return None if(input.equalsIgnoreCase("green")) return Some(OpenAccessRoute.green) From 7e2caafe840ae192d6df8784f2954fb51604dd24 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 15 Jul 2021 09:53:12 +0200 Subject: [PATCH 09/70] Scholexplorer: fixed mapping typologies --- .../dhp/sx/graph/scholix/ScholixUtils.scala | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index 6a7ee7803..4dafd4fa3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -1,11 +1,10 @@ package eu.dnetlib.dhp.sx.graph.scholix -import eu.dnetlib.dhp.schema.oaf.{Dataset, Relation, Result, StructuredProperty} -import eu.dnetlib.dhp.schema.sx.scholix.{Scholix, ScholixCollectedFrom, ScholixEntityId, ScholixIdentifier, ScholixRelationship, ScholixResource} +import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty} +import eu.dnetlib.dhp.schema.sx.scholix._ import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology} import eu.dnetlib.dhp.utils.DHPUtils -import org.apache.spark.sql.Encoders.bean import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.{Encoder, Encoders} import org.json4s @@ -301,14 +300,14 @@ object ScholixUtils { if (r.getPid == null || r.getPid.isEmpty) return null - val pids:List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) - if (pids.isEmpty) + val persistentIdentifiers:List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r) + if (persistentIdentifiers.isEmpty) return null - s.setLocalIdentifier(pids.asJava) - if (r.isInstanceOf[Dataset]) - s.setTypology(Typology.dataset) - else + s.setLocalIdentifier(persistentIdentifiers.asJava) + if (r.isInstanceOf[Publication] ) s.setTypology(Typology.publication) + else + s.setTypology(Typology.dataset) s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) From bf9e0d2d4f663bc0187965f454b9b4b6412d9f4f Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 15 Jul 2021 17:59:41 +0200 Subject: [PATCH 10/70] Merge pull request 'orcid-no-doi' (#123) from enrico.ottonello/dnet-hadoop:orcid-no-doi into beta Reviewed-on: https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/123 --- .../SparkGenEnrichedOrcidWorks.java | 29 +++++++++++++++---- .../orcidnodoi/oozie_app/workflow.xml | 13 ++++++--- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 9f8727d30..1d47808ef 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -4,6 +4,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -32,10 +33,7 @@ import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.orcid.AuthorData; -import eu.dnetlib.dhp.schema.orcid.AuthorSummary; -import eu.dnetlib.dhp.schema.orcid.Work; -import eu.dnetlib.dhp.schema.orcid.WorkDetail; +import eu.dnetlib.dhp.schema.orcid.*; import eu.dnetlib.doiboost.orcid.json.JsonHelper; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; @@ -111,6 +109,10 @@ public class SparkGenEnrichedOrcidWorks { Encoders.bean(WorkDetail.class)); logger.info("Works data loaded: " + workDataset.count()); + final LongAccumulator warnNotFoundContributors = spark + .sparkContext() + .longAccumulator("warnNotFoundContributors"); + JavaRDD> enrichedWorksRDD = workDataset .joinWith( authorDataset, @@ -119,7 +121,21 @@ public class SparkGenEnrichedOrcidWorks { (MapFunction, Tuple2>) value -> { WorkDetail w = value._1; AuthorData a = value._2; - AuthorMatcher.match(a, w.getContributors()); + if (w.getContributors() == null + || (w.getContributors() != null && w.getContributors().size() == 0)) { + Contributor c = new Contributor(); + c.setName(a.getName()); + c.setSurname(a.getSurname()); + c.setCreditName(a.getCreditName()); + c.setOid(a.getOid()); + List contributors = Arrays.asList(c); + w.setContributors(contributors); + if (warnNotFoundContributors != null) { + warnNotFoundContributors.add(1); + } + } else { + AuthorMatcher.match(a, w.getContributors()); + } return new Tuple2<>(a.getOid(), JsonHelper.createOidWork(w)); }, Encoders.tuple(Encoders.STRING(), Encoders.STRING())) @@ -172,7 +188,7 @@ public class SparkGenEnrichedOrcidWorks { OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, p)))) .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) .saveAsNewAPIHadoopFile( - workingPath.concat(outputEnrichedWorksPath), + outputEnrichedWorksPath, Text.class, Text.class, SequenceFileOutputFormat.class, @@ -180,6 +196,7 @@ public class SparkGenEnrichedOrcidWorks { logger.info("parsedPublications: " + parsedPublications.value().toString()); logger.info("enrichedPublications: " + enrichedPublications.value().toString()); + logger.info("warnNotFoundContributors: " + warnNotFoundContributors.value().toString()); logger.info("errorsGeneric: " + errorsGeneric.value().toString()); logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString()); logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString()); diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml index 365c4d5b4..04ca05af2 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml @@ -7,9 +7,14 @@ outputPath + /data/orcid_activities_2020/no_doi_dataset_prod/ path where to store the action set - + + processOutputPath + /data/orcid_activities_2020/process_no_doi_dataset_prod + temporary path where to store the action set + spark2GenNoDoiDatasetMaxExecutors 40 @@ -66,7 +71,7 @@ - + @@ -92,7 +97,7 @@ --workingPath${workingPath}/ --hdfsServerUri${nameNode} --orcidDataFolderlast_orcid_dataset - --outputEnrichedWorksPathno_doi_dataset + --outputEnrichedWorksPath${processOutputPath} @@ -100,7 +105,7 @@ - ${workingPath}/no_doi_dataset/* + ${processOutputPath}/* ${outputPath} From 34506df1b607241f72f699eafeb4472bc6f11d27 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 10:29:51 +0200 Subject: [PATCH 11/70] DoiBoost AccessRigh #4362 - if the journal is open, the OPEN access right is set to all instances and color is GOLD (overwrite if the color was already set in one of the previous steps) --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 2558e1c67..6a2f0e45f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -213,8 +213,10 @@ object DoiBoostMappingUtil { if (item != null) { hb.setValue(item.officialname) hb.setKey(generateDSId(item.id)) - if (item.openAccess) + if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) + i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) + } val ar = getOpenAccessQualifier() publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) } From 199123b74b3a988a2cadf5bec843998e734d494b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 17:30:27 +0200 Subject: [PATCH 12/70] DoiBoost AccessRigh #4362 - Fixed issue on date formatting. Added test method and associated resource --- .../doiboost/DoiBoostMappingUtil.scala | 50 +- .../crossref/CrossrefMappingTest.scala | 21 + .../publication_license_embargo_datetime.json | 1538 +++++++++++++++++ 3 files changed, 1600 insertions(+), 9 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 6a2f0e45f..b0c6e009b 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -149,18 +149,50 @@ object DoiBoostMappingUtil { //OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED) if(license.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model")){ val now = java.time.LocalDate.now - val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") - val pub_date = LocalDate.parse(date, formatter) + try{ + val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd")) + if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + else{ + return getEmbargoedAccessQualifier() + } + }catch { + case e: Exception => { + try{ + val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")) + if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ + val oaq : AccessRight = getOpenAccessQualifier() + oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) + return oaq + } + else{ + return getEmbargoedAccessQualifier() + } + }catch{ + case ex: Exception => return getClosedAccessQualifier() + } + } - if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ - val oaq : AccessRight = getOpenAccessQualifier() - oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) - return oaq - } - else{ - return getEmbargoedAccessQualifier() } + + //val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") + + + + // val pub_date = LocalDate.parse(date, formatter) + +// if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ +// val oaq : AccessRight = getOpenAccessQualifier() +// oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) +// return oaq +// } +// else{ +// return getEmbargoedAccessQualifier() +// } } return getClosedAccessQualifier() diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala index 63555bcbd..75fb3f787 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/crossref/CrossrefMappingTest.scala @@ -589,6 +589,27 @@ class CrossrefMappingTest { } + @Test + def testLicenseEmbargoDateTime() :Unit = { + val json = Source.fromInputStream(getClass.getResourceAsStream("publication_license_embargo_datetime.json")).mkString + assertNotNull(json) + assertFalse(json.isEmpty); + + val resultList: List[Oaf] = Crossref2Oaf.convert(json) + + assertTrue(resultList.nonEmpty) + + + val item : Result = resultList.filter(p => p.isInstanceOf[Result]).head.asInstanceOf[Result] + + assertTrue(item.getInstance().asScala exists (i => i.getLicense.getValue.equals("https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getClassid.equals("EMBARGO"))) + assertTrue(item.getInstance().asScala exists (i => i.getAccessright.getOpenAccessRoute == null)) + mapper.getSerializationConfig.enable(SerializationConfig.Feature.INDENT_OUTPUT) + println(mapper.writeValueAsString(item)) + + } + } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json new file mode 100644 index 000000000..c84e16350 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/publication_license_embargo_datetime.json @@ -0,0 +1,1538 @@ +{ +"indexed": { +"date-parts": [ +[ +2021, +7, +2 +] +], +"date-time": "2021-07-02T07:30:10Z", +"timestamp": 1625211010708 +}, +"reference-count": 83, +"publisher": "Springer Science and Business Media LLC", +"issue": "5", +"license": [ +{ +"URL": "https://www.springer.com/tdm", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "tdm" +}, +{ +"URL": "https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model", +"start": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T00:00:00Z", +"timestamp": 1613952000000 +}, +"delay-in-days": 0, +"content-version": "vor" +} +], +"content-domain": { +"domain": [ +"link.springer.com" +], +"crossmark-restriction": false +}, +"short-container-title": [ +"Nat Astron" +], +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"DOI": "10.1038/s41550-020-01295-8", +"type": "journal-article", +"created": { +"date-parts": [ +[ +2021, +2, +22 +] +], +"date-time": "2021-02-22T17:03:42Z", +"timestamp": 1614013422000 +}, +"page": "510-518", +"update-policy": "http://dx.doi.org/10.1007/springer_crossmark_policy", +"source": "Crossref", +"is-referenced-by-count": 6, +"title": [ +"A tidal disruption event coincident with a high-energy neutrino" +], +"prefix": "10.1038", +"volume": "5", +"author": [ +{ +"ORCID": "http://orcid.org/0000-0003-2434-0387", +"authenticated-orcid": false, +"given": "Robert", +"family": "Stein", +"sequence": "first", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3859-8074", +"authenticated-orcid": false, +"given": "Sjoert van", +"family": "Velzen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8594-8666", +"authenticated-orcid": false, +"given": "Marek", +"family": "Kowalski", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Anna", +"family": "Franckowiak", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3703-5154", +"authenticated-orcid": false, +"given": "Suvi", +"family": "Gezari", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3124-2814", +"authenticated-orcid": false, +"given": "James C. A.", +"family": "Miller-Jones", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Sara", +"family": "Frederick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0466-3779", +"authenticated-orcid": false, +"given": "Itai", +"family": "Sfaradi", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael F.", +"family": "Bietenholz", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5936-1156", +"authenticated-orcid": false, +"given": "Assaf", +"family": "Horesh", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rob", +"family": "Fender", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2403-4582", +"authenticated-orcid": false, +"given": "Simone", +"family": "Garrappa", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-2184-6430", +"authenticated-orcid": false, +"given": "Tomás", +"family": "Ahumada", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Igor", +"family": "Andreoni", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Justin", +"family": "Belicki", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8018-5348", +"authenticated-orcid": false, +"given": "Eric C.", +"family": "Bellm", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Markus", +"family": "Böttcher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Valery", +"family": "Brinnel", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Rick", +"family": "Burruss", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1673-970X", +"authenticated-orcid": false, +"given": "S. Bradley", +"family": "Cenko", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8262-2924", +"authenticated-orcid": false, +"given": "Michael W.", +"family": "Coughlin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2292-0441", +"authenticated-orcid": false, +"given": "Virginia", +"family": "Cunningham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Andrew", +"family": "Drake", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Glennys R.", +"family": "Farrar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Michael", +"family": "Feeney", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Ryan J.", +"family": "Foley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3653-5598", +"authenticated-orcid": false, +"given": "Avishay", +"family": "Gal-Yam", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "V. Zach", +"family": "Golkhou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-4163-4996", +"authenticated-orcid": false, +"given": "Ariel", +"family": "Goobar", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-3168-0139", +"authenticated-orcid": false, +"given": "Matthew J.", +"family": "Graham", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Erica", +"family": "Hammerstein", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-3367-3415", +"authenticated-orcid": false, +"given": "George", +"family": "Helou", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-9878-7889", +"authenticated-orcid": false, +"given": "Tiara", +"family": "Hung", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Mansi M.", +"family": "Kasliwal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5740-7747", +"authenticated-orcid": false, +"given": "Charles D.", +"family": "Kilpatrick", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-5105-344X", +"authenticated-orcid": false, +"given": "Albert K. H.", +"family": "Kong", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-6540-1484", +"authenticated-orcid": false, +"given": "Thomas", +"family": "Kupfer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2451-5482", +"authenticated-orcid": false, +"given": "Russ R.", +"family": "Laher", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-2242-0244", +"authenticated-orcid": false, +"given": "Ashish A.", +"family": "Mahabal", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8532-9395", +"authenticated-orcid": false, +"given": "Frank J.", +"family": "Masci", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-0280-7484", +"authenticated-orcid": false, +"given": "Jannis", +"family": "Necker", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-8342-6274", +"authenticated-orcid": false, +"given": "Jakob", +"family": "Nordin", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel A.", +"family": "Perley", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-8121-2560", +"authenticated-orcid": false, +"given": "Mickael", +"family": "Rigault", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7788-628X", +"authenticated-orcid": false, +"given": "Simeon", +"family": "Reusch", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Hector", +"family": "Rodriguez", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0002-7559-315X", +"authenticated-orcid": false, +"given": "César", +"family": "Rojas-Bravo", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-7648-4142", +"authenticated-orcid": false, +"given": "Ben", +"family": "Rusholme", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-4401-0430", +"authenticated-orcid": false, +"given": "David L.", +"family": "Shupe", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-9898-5597", +"authenticated-orcid": false, +"given": "Leo P.", +"family": "Singer", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0003-1546-6615", +"authenticated-orcid": false, +"given": "Jesper", +"family": "Sollerman", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Maayane T.", +"family": "Soumagnac", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Daniel", +"family": "Stern", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Kirsty", +"family": "Taggart", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Jakob", +"family": "van Santen", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Charlotte", +"family": "Ward", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"given": "Patrick", +"family": "Woudt", +"sequence": "additional", +"affiliation": [ + +] +}, +{ +"ORCID": "http://orcid.org/0000-0001-6747-8509", +"authenticated-orcid": false, +"given": "Yuhan", +"family": "Yao", +"sequence": "additional", +"affiliation": [ + +] +} +], +"member": "297", +"published-online": { +"date-parts": [ +[ +2021, +2, +22 +] +] +}, +"reference": [ +{ +"key": "1295_CR1", +"doi-asserted-by": "crossref", +"first-page": "P03012", +"DOI": "10.1088/1748-0221/12/03/P03012", +"volume": "12", +"author": "MG Aartsen", +"year": "2017", +"unstructured": "Aartsen, M. G. et al. The IceCube Neutrino Observatory: instrumentation and online systems. J. Instrum. 12, P03012 (2017).", +"journal-title": "J. Instrum." +}, +{ +"key": "1295_CR2", +"unstructured": "Stein, R. IceCube-191001A—IceCube observation of a high-energy neutrino candidate event. GCN Circ. 25913 (2019)." +}, +{ +"key": "1295_CR3", +"doi-asserted-by": "crossref", +"first-page": "018002", +"DOI": "10.1088/1538-3873/aaecbe", +"volume": "131", +"author": "EC Bellm", +"year": "2019", +"unstructured": "Bellm, E. C. et al. The Zwicky Transient Facility: system overview, performance, and first results. Publ. Astron. Soc. Pac. 131, 018002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR4", +"doi-asserted-by": "crossref", +"first-page": "533", +"DOI": "10.1016/j.astropartphys.2007.03.005", +"volume": "27", +"author": "M Kowalski", +"year": "2007", +"unstructured": "Kowalski, M. & Mohr, A. Detecting neutrino transients with optical follow-up observations. Astropart. Phys. 27, 533–538 (2007).", +"journal-title": "Astropart. Phys." +}, +{ +"key": "1295_CR5", +"doi-asserted-by": "crossref", +"first-page": "329", +"DOI": "10.1088/0004-637X/693/1/329", +"volume": "693", +"author": "GR Farrar", +"year": "2009", +"unstructured": "Farrar, G. R. & Gruzinov, A. Giant AGN flares and cosmic ray bursts. Astrophys. J. 693, 329–332 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR6", +"doi-asserted-by": "crossref", +"first-page": "1354", +"DOI": "10.1093/mnras/stx863", +"volume": "469", +"author": "L Dai", +"year": "2017", +"unstructured": "Dai, L. & Fang, K. Can tidal disruption events produce the IceCube neutrinos? Mon. Not. R. Astron. Soc. 469, 1354–1359 (2017).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR7", +"doi-asserted-by": "crossref", +"first-page": "114", +"DOI": "10.3847/1538-4357/ab44ca", +"volume": "886", +"author": "K Hayasaki", +"year": "2019", +"unstructured": "Hayasaki, K. & Yamazaki, R. Neutrino emissions from tidal disruption remnants. Astrophys. J. 886, 114 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR8", +"unstructured": "Farrar, G. R. & Piran, T. Tidal disruption jets as the source of Ultra-High Energy Cosmic Rays. Preprint at https://arxiv.org/abs/1411.0704 (2014)." +}, +{ +"key": "1295_CR9", +"doi-asserted-by": "crossref", +"first-page": "3", +"DOI": "10.3847/1538-4357/aa6344", +"volume": "838", +"author": "N Senno", +"year": "2017", +"unstructured": "Senno, N., Murase, K. & Mészáros, P. High-energy neutrino flares from X-ray bright and dark tidal disruption events. Astrophys. J. 838, 3 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR10", +"doi-asserted-by": "crossref", +"first-page": "083005", +"DOI": "10.1103/PhysRevD.93.083005", +"volume": "93", +"author": "XY Wang", +"year": "2016", +"unstructured": "Wang, X. Y. & Liu, R. Y. Tidal disruption jets of supermassive black holes as hidden sources of cosmic rays: explaining the IceCube TeV–PeV neutrinos. Phys. Rev. D 93, 083005 (2016).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR11", +"doi-asserted-by": "crossref", +"first-page": "123001", +"DOI": "10.1103/PhysRevD.95.123001", +"volume": "95", +"author": "C Lunardini", +"year": "2017", +"unstructured": "Lunardini, C. & Winter, W. High energy neutrinos from the tidal disruption of stars. Phys. Rev. D 95, 123001 (2017).", +"journal-title": "Phys. Rev. D" +}, +{ +"key": "1295_CR12", +"unstructured": "Stein, R., Franckowiak, A., Necker, J., Gezari, S. & Velzen, S. V. Candidate counterparts to IceCube-191001A with ZTF. Astron. Telegr. 13160 (2019)." +}, +{ +"key": "1295_CR13", +"doi-asserted-by": "crossref", +"first-page": "078001", +"DOI": "10.1088/1538-3873/ab006c", +"volume": "131", +"author": "MJ Graham", +"year": "2019", +"unstructured": "Graham, M. J. et al. The Zwicky Transient Facility: science objectives. Publ. Astron. Soc. Pac. 131, 078001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR14", +"unstructured": "Nordin, J. et al. TNS Astronomical Transient Report 33340 (2019)." +}, +{ +"key": "1295_CR15", +"unstructured": "Nicholl, M. et al. ePESSTO+ classification of optical transients. Astron. Telegr. 12752 (2019)." +}, +{ +"key": "1295_CR16", +"unstructured": "van Velzen, S. et al. Seventeen tidal disruption events from the first half of ZTF survey observations: entering a new era of population studies. Preprint at https://arxiv.org/abs/2001.01409 (2020)." +}, +{ +"key": "1295_CR17", +"doi-asserted-by": "crossref", +"first-page": "82", +"DOI": "10.3847/1538-4357/ab1844", +"volume": "878", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. Late-time UV observations of tidal disruption flares reveal unobscured, compact accretion disks. Astrophys. J. 878, 82 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR18", +"doi-asserted-by": "crossref", +"first-page": "5655", +"DOI": "10.1093/mnras/staa192", +"volume": "492", +"author": "A Mummery", +"year": "2020", +"unstructured": "Mummery, A. & Balbus, S. A. The spectral evolution of disc dominated tidal disruption events. Mon. Not. R. Astron. Soc. 492, 5655–5674 (2020).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR19", +"doi-asserted-by": "crossref", +"first-page": "184", +"DOI": "10.1088/0004-637X/764/2/184", +"volume": "764", +"author": "NJ McConnell", +"year": "2013", +"unstructured": "McConnell, N. J. & Ma, C. P. Revisiting the scaling relations of black hole masses and host galaxy properties. Astrophys. J. 764, 184 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR20", +"doi-asserted-by": "crossref", +"first-page": "149", +"DOI": "10.3847/1538-4357/aa633b", +"volume": "838", +"author": "K Auchettl", +"year": "2017", +"unstructured": "Auchettl, K., Guillochon, J. & Ramirez-Ruiz, E. New physical insights about tidal disruption events from a comprehensive observational inventory at X-ray wavelengths. Astrophys. J. 838, 149 (2017).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR21", +"doi-asserted-by": "crossref", +"first-page": "4136", +"DOI": "10.1093/mnras/stz1602", +"volume": "487", +"author": "T Wevers", +"year": "2019", +"unstructured": "Wevers, T. et al. Black hole masses of tidal disruption event host galaxies II. Mon. Not. R. Astron. Soc. 487, 4136–4152 (2019).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR22", +"doi-asserted-by": "crossref", +"first-page": "198", +"DOI": "10.3847/1538-4357/aafe0c", +"volume": "872", +"author": "S van Velzen", +"year": "2019", +"unstructured": "van Velzen, S. et al. The first tidal disruption flare in ZTF: from photometric selection to multi-wavelength characterization. Astrophys. J. 872, 198 (2019).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR23", +"doi-asserted-by": "crossref", +"first-page": "A81", +"DOI": "10.1051/0004-6361/201117855", +"volume": "538", +"author": "G Morlino", +"year": "2012", +"unstructured": "Morlino, G. & Caprioli, D. Strong evidence for hadron acceleration in Tycho’s supernova remnant. Astron. Astrophys. 538, A81 (2012).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR24", +"doi-asserted-by": "crossref", +"first-page": "86", +"DOI": "10.3847/1538-4357/aaa8e0", +"volume": "854", +"author": "T Eftekhari", +"year": "2018", +"unstructured": "Eftekhari, T., Berger, E., Zauderer, B. A., Margutti, R. & Alexander, K. D. Radio monitoring of the tidal disruption event Swift J164449.3+573451. III. Late-time jet energetics and a deviation from equipartition. Astrophys. J. 854, 86 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR25", +"doi-asserted-by": "crossref", +"first-page": "1258", +"DOI": "10.1093/mnras/stt1645", +"volume": "436", +"author": "A Horesh", +"year": "2013", +"unstructured": "Horesh, A. et al. An early and comprehensive millimetre and centimetre wave and X-ray study of SN 2011dh: a non-equipartition blast wave expanding into a massive stellar wind. Mon. Not. R. Astron. Soc. 436, 1258–1267 (2013).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR26", +"doi-asserted-by": "crossref", +"first-page": "78", +"DOI": "10.1088/0004-637X/772/1/78", +"volume": "772", +"author": "R Barniol Duran", +"year": "2013", +"unstructured": "Barniol Duran, R., Nakar, E. & Piran, T. Radius constraints and minimal equipartition energy of relativistically moving synchrotron sources. Astrophys. J. 772, 78 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR27", +"doi-asserted-by": "crossref", +"first-page": "69", +"DOI": "10.1071/AS02053", +"volume": "20", +"author": "AG Polatidis", +"year": "2003", +"unstructured": "Polatidis, A. G. & Conway, J. E. Proper motions in compact symmetric objects. Publ. Astron. Soc. Aust. 20, 69–74 (2003).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR28", +"doi-asserted-by": "crossref", +"first-page": "L25", +"DOI": "10.3847/2041-8205/819/2/L25", +"volume": "819", +"author": "KD Alexander", +"year": "2016", +"unstructured": "Alexander, K. D., Berger, E., Guillochon, J., Zauderer, B. A. & Williams, P. K. G. Discovery of an outflow from radio observations of the tidal disruption event ASASSN-14li. Astrophys. J. Lett. 819, L25 (2016).", +"journal-title": "Astrophys. J. Lett." +}, +{ +"key": "1295_CR29", +"doi-asserted-by": "crossref", +"first-page": "127", +"DOI": "10.3847/0004-637X/827/2/127", +"volume": "827", +"author": "J Krolik", +"year": "2016", +"unstructured": "Krolik, J., Piran, T., Svirski, G. & Cheng, R. M. ASASSN-14li: a model tidal disruption event. Astrophys. J. 827, 127 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR30", +"doi-asserted-by": "crossref", +"first-page": "1", +"DOI": "10.3847/1538-4357/aab361", +"volume": "856", +"author": "DR Pasham", +"year": "2018", +"unstructured": "Pasham, D. R. & van Velzen, S. Discovery of a time lag between the soft X-ray and radio emission of the tidal disruption flare ASASSN-14li: evidence for linear disk–jet coupling. Astrophys. J. 856, 1 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR31", +"doi-asserted-by": "crossref", +"first-page": "L9", +"DOI": "10.1051/0004-6361/201834750", +"volume": "622", +"author": "NL Strotjohann", +"year": "2019", +"unstructured": "Strotjohann, N. L., Kowalski, M. & Franckowiak, A. Eddington bias for cosmic neutrino sources. Astron. Astrophys. 622, L9 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR32", +"doi-asserted-by": "crossref", +"first-page": "425", +"DOI": "10.1146/annurev.aa.22.090184.002233", +"volume": "22", +"author": "AM Hillas", +"year": "1984", +"unstructured": "Hillas, A. M. The origin of ultra-high-energy cosmic rays. Annu. Rev. Astron. Astrophys. 22, 425–444 (1984).", +"journal-title": "Annu. Rev. Astron. Astrophys." +}, +{ +"key": "1295_CR33", +"doi-asserted-by": "crossref", +"first-page": "eaat1378", +"DOI": "10.1126/science.aat1378", +"volume": "361", +"author": "IceCube Collaboration", +"year": "2018", +"unstructured": "IceCube Collaboration et al. Multimessenger observations of a flaring blazar coincident with high-energy neutrino IceCube-170922A. Science 361, eaat1378 (2018).", +"journal-title": "Science" +}, +{ +"key": "1295_CR34", +"unstructured": "Blaufuss, E., Kintscher, T., Lu, L. & Tung, C. F. The next generation of IceCube real-time neutrino alerts. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1021 (PoS, 2019)." +}, +{ +"key": "1295_CR35", +"doi-asserted-by": "crossref", +"first-page": "071101", +"DOI": "10.1103/PhysRevLett.116.071101", +"volume": "116", +"author": "K Murase", +"year": "2016", +"unstructured": "Murase, K., Guetta, D. & Ahlers, M. Hidden cosmic-ray accelerators as an origin of TeV–PeV cosmic neutrinos. Phys. Rev. Lett. 116, 071101 (2016).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR36", +"unstructured": "Stein, R. Search for neutrinos from populations of optical transients. In Proc. 36th International Cosmic Ray Conference (ICRC2019) 1016 (PoS, 2019).", +"DOI": "10.22323/1.358.1016", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR37", +"doi-asserted-by": "crossref", +"first-page": "048001", +"DOI": "10.1088/1538-3873/aaff99", +"volume": "131", +"author": "MW Coughlin", +"year": "2019", +"unstructured": "Coughlin, M. W. et al. 2900 square degree search for the optical counterpart of short gamma-ray burst GRB 180523B with the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 048001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR38", +"unstructured": "Stein, R. IceCube-200107A: IceCube observation of a high-energy neutrino candidate event. GCN Circ. 26655 (2020)." +}, +{ +"key": "1295_CR39", +"doi-asserted-by": "crossref", +"first-page": "018003", +"DOI": "10.1088/1538-3873/aae8ac", +"volume": "131", +"author": "FJ Masci", +"year": "2019", +"unstructured": "Masci, F. J. et al. The Zwicky Transient Facility: data processing, products, and archive. Publ. Astron. Soc. Pac. 131, 018003 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR40", +"doi-asserted-by": "crossref", +"first-page": "018001", +"DOI": "10.1088/1538-3873/aae904", +"volume": "131", +"author": "MT Patterson", +"year": "2019", +"unstructured": "Patterson, M. T. et al. The Zwicky Transient Facility Alert Distribution System. Publ. Astron. Soc. Pac. 131, 018001 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR41", +"unstructured": "Stein, R. & Reusch, S. robertdstein/ampel_followup_pipeline: V1.1 Release (Zenodo, 2020); https://doi.org/10.5281/zenodo.4048336", +"DOI": "10.5281/zenodo.4048336", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR42", +"doi-asserted-by": "crossref", +"first-page": "A147", +"DOI": "10.1051/0004-6361/201935634", +"volume": "631", +"author": "J Nordin", +"year": "2019", +"unstructured": "Nordin, J. et al. Transient processing and analysis using AMPEL: alert management, photometry, and evaluation of light curves. Astron. Astrophys. 631, A147 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR43", +"doi-asserted-by": "crossref", +"first-page": "038002", +"DOI": "10.1088/1538-3873/aaf3fa", +"volume": "131", +"author": "A Mahabal", +"year": "2019", +"unstructured": "Mahabal, A. et al. Machine learning for the Zwicky Transient Facility. Publ. Astron. Soc. Pac. 131, 038002 (2019).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR44", +"doi-asserted-by": "crossref", +"first-page": "075002", +"DOI": "10.1088/1538-3873/aac410", +"volume": "130", +"author": "MT Soumagnac", +"year": "2018", +"unstructured": "Soumagnac, M. T. & Ofek, E. O. catsHTM: a tool for fast accessing and cross-matching large astronomical catalogs. Publ. Astron. Soc. Pac. 130, 075002 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR45", +"doi-asserted-by": "crossref", +"first-page": "A1", +"DOI": "10.1051/0004-6361/201833051", +"volume": "616", +"author": "Gaia Collaboration", +"year": "2018", +"unstructured": "Gaia Collaboration et al. Gaia Data Release 2. Summary of the contents and survey properties. Astron. Astrophys. 616, A1 (2018).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR46", +"doi-asserted-by": "crossref", +"first-page": "128001", +"DOI": "10.1088/1538-3873/aae3d9", +"volume": "130", +"author": "Y Tachibana", +"year": "2018", +"unstructured": "Tachibana, Y. & Miller, A. A. A morphological classification model to identify unresolved PanSTARRS1 sources: application in the ZTF real-time pipeline. Publ. Astron. Soc. Pac. 130, 128001 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR47", +"unstructured": "Chambers, K. C. et al. The Pan-STARRS1 Surveys. Preprint at https://arxiv.org/abs/1612.05560 (2016)." +}, +{ +"key": "1295_CR48", +"doi-asserted-by": "crossref", +"first-page": "1868", +"DOI": "10.1088/0004-6256/140/6/1868", +"volume": "140", +"author": "EL Wright", +"year": "2010", +"unstructured": "Wright, E. L. et al. The Wide-field Infrared Survey Explorer (WISE): mission description and initial on-orbit performance. Astron. J. 140, 1868–1881 (2010).", +"journal-title": "Astron. J." +}, +{ +"key": "1295_CR49", +"doi-asserted-by": "crossref", +"first-page": "051103", +"DOI": "10.1103/PhysRevLett.124.051103", +"volume": "124", +"author": "MG Aartsen", +"year": "2020", +"unstructured": "Aartsen, M. G. et al. Time-integrated neutrino source searches with 10 years of IceCube data. Phys. Rev. Lett. 124, 051103 (2020).", +"journal-title": "Phys. Rev. Lett." +}, +{ +"key": "1295_CR50", +"unstructured": "Steele, I. A. et al. The Liverpool Telescope: performance and first results. Proc. SPIE 5489, https://doi.org/10.1117/12.551456 (2004).", +"DOI": "10.1117/12.551456", +"doi-asserted-by": "publisher" +}, +{ +"key": "1295_CR51", +"doi-asserted-by": "crossref", +"first-page": "035003", +"DOI": "10.1088/1538-3873/aaa53f", +"volume": "130", +"author": "N Blagorodnova", +"year": "2018", +"unstructured": "Blagorodnova, N. et al. The SED Machine: a robotic spectrograph for fast transient classification. Publ. Astron. Soc. Pac. 130, 035003 (2018).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR52", +"doi-asserted-by": "crossref", +"first-page": "A115", +"DOI": "10.1051/0004-6361/201935344", +"volume": "627", +"author": "M Rigault", +"year": "2019", +"unstructured": "Rigault, M. et al. Fully automated integral field spectrograph pipeline for the SEDMachine: pysedm. Astron. Astrophys. 627, A115 (2019).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR53", +"doi-asserted-by": "crossref", +"first-page": "A68", +"DOI": "10.1051/0004-6361/201628275", +"volume": "593", +"author": "C Fremling", +"year": "2016", +"unstructured": "Fremling, C. et al. PTF12os and iPTF13bvn. Two stripped-envelope supernovae from low-mass progenitors in NGC 5806. Astron. Astrophys. 593, A68 (2016).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR54", +"doi-asserted-by": "crossref", +"first-page": "72", +"DOI": "10.3847/1538-4357/aa998e", +"volume": "852", +"author": "S van Velzen", +"year": "2018", +"unstructured": "van Velzen, S. On the mass and luminosity functions of tidal disruption flares: rate suppression due to black hole event horizons. Astrophys. J. 852, 72 (2018).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR55", +"doi-asserted-by": "crossref", +"first-page": "95", +"DOI": "10.1007/s11214-005-5095-4", +"volume": "120", +"author": "PWA Roming", +"year": "2005", +"unstructured": "Roming, P. W. A. et al. The Swift Ultra-Violet/Optical Telescope. Space Sci. Rev. 120, 95–142 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR56", +"doi-asserted-by": "crossref", +"first-page": "1005", +"DOI": "10.1086/422091", +"volume": "611", +"author": "N Gehrels", +"year": "2004", +"unstructured": "Gehrels, N. et al. The Swift Gamma-Ray Burst Mission. Astrophys. J. 611, 1005–1020 (2004).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR57", +"doi-asserted-by": "crossref", +"first-page": "19", +"DOI": "10.3847/0004-637X/829/1/19", +"volume": "829", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S., Mendez, A. J., Krolik, J. H. & Gorjian, V. Discovery of transient infrared emission from dust heated by stellar tidal disruption flares. Astrophys. J. 829, 19 (2016).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR58", +"doi-asserted-by": "crossref", +"first-page": "575", +"DOI": "10.1093/mnras/stw307", +"volume": "458", +"author": "W Lu", +"year": "2016", +"unstructured": "Lu, W., Kumar, P. & Evans, N. J. Infrared emission from tidal disruption events—probing the pc-scale dust content around galactic nuclei. Mon. Not. R. Astron. Soc. 458, 575–581 (2016).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR59", +"unstructured": "Miller, J. S. & Stone, R. P. S. The Kast Double Spectrograph. Technical Report No. 66 (Lick Observatory, 1993)." +}, +{ +"key": "1295_CR60", +"doi-asserted-by": "crossref", +"first-page": "375", +"DOI": "10.1086/133562", +"volume": "107", +"author": "JB Oke", +"year": "1995", +"unstructured": "Oke, J. B. et al. The Keck Low-Resolution Imaging Spectrometer. Publ. Astron. Soc. Pac. 107, 375–385 (1995).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR61", +"doi-asserted-by": "crossref", +"first-page": "765", +"DOI": "10.1111/j.1365-2966.2005.08957.x", +"volume": "359", +"author": "A Garcia-Rissmann", +"year": "2005", +"unstructured": "Garcia-Rissmann, A. et al. An atlas of calcium triplet spectra of active galaxies. Mon. Not. R. Astron. Soc. 359, 765–780 (2005).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR62", +"doi-asserted-by": "crossref", +"first-page": "165", +"DOI": "10.1007/s11214-005-5097-2", +"volume": "120", +"author": "DN Burrows", +"year": "2005", +"unstructured": "Burrows, D. N. et al. The Swift X-Ray Telescope. Space Sci. Rev. 120, 165–195 (2005).", +"journal-title": "Space Sci. Rev." +}, +{ +"key": "1295_CR63", +"doi-asserted-by": "crossref", +"first-page": "L1", +"DOI": "10.1051/0004-6361:20000036", +"volume": "365", +"author": "F Jansen", +"year": "2001", +"unstructured": "Jansen, F. et al. XMM-Newton Observatory. I. The spacecraft and operations. Astron. Astrophys. 365, L1–L6 (2001).", +"journal-title": "Astron. Astrophys." +}, +{ +"key": "1295_CR64", +"unstructured": "HI4PI Collaboration et al. HI4PI: a full-sky H i survey based on EBHIS and GASS. Astron. Astrophys. 594, A116 (2016).", +"DOI": "10.1051/0004-6361/201629178", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR65", +"unstructured": "Arnaud, K. A. in Astronomical Data Analysis Software and Systems V (eds Jacoby, G. H. & Barnes, J.) 17 (Astronomical Society of the Pacific, 1996)." +}, +{ +"key": "1295_CR66", +"doi-asserted-by": "crossref", +"first-page": "1545", +"DOI": "10.1111/j.1365-2966.2008.13953.x", +"volume": "391", +"author": "JTL Zwart", +"year": "2008", +"unstructured": "Zwart, J. T. L. et al. The Arcminute Microkelvin Imager. Mon. Not. R. Astron. Soc. 391, 1545–1558 (2008).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR67", +"doi-asserted-by": "crossref", +"first-page": "5677", +"DOI": "10.1093/mnras/sty074", +"volume": "475", +"author": "J Hickish", +"year": "2018", +"unstructured": "Hickish, J. et al. A digital correlator upgrade for the Arcminute MicroKelvin Imager. Mon. Not. R. Astron. Soc. 475, 5677–5687 (2018).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR68", +"doi-asserted-by": "crossref", +"first-page": "1396", +"DOI": "10.1093/mnras/stv1728", +"volume": "453", +"author": "YC Perrott", +"year": "2015", +"unstructured": "Perrott, Y. C. et al. AMI galactic plane survey at 16 GHz—II. Full data release with extended coverage and improved processing. Mon. Not. R. Astron. Soc. 453, 1396–1403 (2015).", +"journal-title": "Mon. Not. R. Astron. Soc." +}, +{ +"key": "1295_CR69", +"unstructured": "McMullin, J. P., Waters, B., Schiebel, D., Young, W. & Golap, K. in Astronomical Data Analysis Software and Systems XVI (eds Shaw, R. A. et al.) 127 (Astronomical Society of the Pacific, 2007)." +}, +{ +"key": "1295_CR70", +"doi-asserted-by": "crossref", +"first-page": "1071", +"DOI": "10.1088/0004-637X/697/2/1071", +"volume": "697", +"author": "WB Atwood", +"year": "2009", +"unstructured": "Atwood, W. B. et al. The Large Area Telescope on the Fermi Gamma-ray Space Telescope mission. Astrophys. J. 697, 1071–1102 (2009).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR71", +"unstructured": "Wood, M. et al. Fermipy: an open-source Python package for analysis of Fermi-LAT Data. In Proc. 35th International Cosmic Ray Conference (ICRC2017) 824 (PoS, 2017).", +"DOI": "10.22323/1.301.0824", +"doi-asserted-by": "crossref" +}, +{ +"key": "1295_CR72", +"unstructured": "Garrappa, S. & Buson, S. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR73", +"unstructured": "The Fermi-LAT collaboration. Fermi Large Area Telescope Fourth Source Catalog. Astrophys. J. Suppl. Ser. 247, 33 (2020)." +}, +{ +"key": "1295_CR74", +"doi-asserted-by": "crossref", +"first-page": "14", +"DOI": "10.1088/0004-637X/767/1/14", +"volume": "767", +"author": "T Pursimo", +"year": "2013", +"unstructured": "Pursimo, T. et al. The Micro-Arcsecond Scintillation-Induced Variability (MASIV) survey. III. Optical identifications and new redshifts. Astrophys. J. 767, 14 (2013).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR75", +"unstructured": "Garrappa, S., Buson, S. & Fermi-LAT Collaboration. Fermi-LAT gamma-ray observations of IceCube-191001A. GCN Circ. 25932 (2019)." +}, +{ +"key": "1295_CR76", +"doi-asserted-by": "crossref", +"first-page": "133", +"DOI": "10.1088/0004-637X/802/2/133", +"volume": "802", +"author": "C Diltz", +"year": "2015", +"unstructured": "Diltz, C., Böttcher, M. & Fossati, G. Time dependent hadronic modeling of flat spectrum radio quasars. Astrophys. J. 802, 133 (2015).", +"journal-title": "Astrophys. J." +}, +{ +"key": "1295_CR77", +"doi-asserted-by": "crossref", +"first-page": "88", +"DOI": "10.1038/s41550-018-0610-1", +"volume": "3", +"author": "S Gao", +"year": "2019", +"unstructured": "Gao, S., Fedynitch, A., Winter, W. & Pohl, M. Modelling the coincident observation of a high-energy neutrino and a bright blazar flare. Nat. Astron. 3, 88–92 (2019).", +"journal-title": "Nat. Astron." +}, +{ +"key": "1295_CR78", +"unstructured": "Ayala, H. IceCube-191001A: HAWC follow-up. GCN Circ. 25936 (2019)." +}, +{ +"key": "1295_CR79", +"doi-asserted-by": "crossref", +"first-page": "62", +"DOI": "10.1126/science.aad1182", +"volume": "351", +"author": "S van Velzen", +"year": "2016", +"unstructured": "van Velzen, S. et al. A radio jet from the optical and x-ray bright stellar tidal disruption flare ASASSN-14li. Science 351, 62–65 (2016).", +"journal-title": "Science" +}, +{ +"key": "1295_CR80", +"doi-asserted-by": "crossref", +"first-page": "306", +"DOI": "10.1086/670067", +"volume": "125", +"author": "D Foreman-Mackey", +"year": "2013", +"unstructured": "Foreman-Mackey, D., Hogg, D. W., Lang, D. & Goodman, J. emcee: the MCMC Hammer. Publ. Astron. Soc. Pac. 125, 306 (2013).", +"journal-title": "Publ. Astron. Soc. Pac." +}, +{ +"key": "1295_CR81", +"doi-asserted-by": "crossref", +"first-page": "6", +"DOI": "10.3847/1538-4365/aab761", +"volume": "236", +"author": "J Guillochon", +"year": "2018", +"unstructured": "Guillochon, J. et al. MOSFiT: Modular Open Source Fitter for Transients. Astrophys. J. Suppl. Ser. 236, 6 (2018).", +"journal-title": "Astrophys. J. Suppl. Ser." +}, +{ +"key": "1295_CR82", +"doi-asserted-by": "crossref", +"first-page": "e008", +"DOI": "10.1017/pasa.2013.44", +"volume": "31", +"author": "J Granot", +"year": "2014", +"unstructured": "Granot, J. & van der Horst, A. J. Gamma-ray burst jets and their radio observations. Publ. Astron. Soc. Aust. 31, e008 (2014).", +"journal-title": "Publ. Astron. Soc. Aust." +}, +{ +"key": "1295_CR83", +"doi-asserted-by": "crossref", +"first-page": "102", +"DOI": "10.1088/0004-637X/815/2/102", +"volume": "815", +"author": "W Fong", +"year": "2015", +"unstructured": "Fong, W., Berger, E., Margutti, R. & Zauderer, B. A. A decade of short-duration gamma-ray burst broadband afterglows: energetics, circumburst densities, and jet opening angles. Astrophys. J. 815, 102 (2015).", +"journal-title": "Astrophys. J." +} +], +"container-title": [ +"Nature Astronomy" +], +"original-title": [ + +], +"language": "en", +"link": [ +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8", +"content-type": "text/html", +"content-version": "vor", +"intended-application": "text-mining" +}, +{ +"URL": "http://www.nature.com/articles/s41550-020-01295-8.pdf", +"content-type": "application/pdf", +"content-version": "vor", +"intended-application": "similarity-checking" +} +], +"deposited": { +"date-parts": [ +[ +2021, +5, +17 +] +], +"date-time": "2021-05-17T15:08:12Z", +"timestamp": 1621264092000 +}, +"score": 1.0, +"subtitle": [ + +], +"short-title": [ + +], +"issued": { +"date-parts": [ +[ +2021, +2, +22 +] +], + "date-time": "2021-05-17T15:08:12Z" +}, +"references-count": 83, +"journal-issue": { +"published-print": { +"date-parts": [ +[ +2021, +5 +] +] +}, +"issue": "5" +}, +"alternative-id": [ +"1295" +], +"URL": "http://dx.doi.org/10.1038/s41550-020-01295-8", +"relation": { +"cites": [ + +] +}, +"ISSN": [ +"2397-3366" +], +"issn-type": [ +{ +"value": "2397-3366", +"type": "electronic" +} +], +"assertion": [ +{ +"value": "21 July 2020", +"order": 1, +"name": "received", +"label": "Received", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "16 December 2020", +"order": 2, +"name": "accepted", +"label": "Accepted", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "22 February 2021", +"order": 3, +"name": "first_online", +"label": "First Online", +"group": { +"name": "ArticleHistory", +"label": "Article History" +} +}, +{ +"value": "The authors declare no competing interests.", +"order": 1, +"name": "Ethics", +"group": { +"name": "EthicsHeading", +"label": "Competing interests" +} +} +] +} +} \ No newline at end of file From 59530a14fb747b3c29a2fa36dcd2c5fa0e13eebe Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 12:34:35 +0200 Subject: [PATCH 13/70] DoiBoost AccessRigh #4362 - set BestAccessRight with the ususal comparator --- .../doiboost/DoiBoostMappingUtil.scala | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index b0c6e009b..149d8ee5c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -258,17 +258,18 @@ object DoiBoostMappingUtil { i.setHostedby(hb) }) - val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid) - if (ar.nonEmpty) { - if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){ - val ar = getOpenAccessQualifier() - publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) - } - else { - val ar = getRestrictedQualifier() - publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) - } - } + publication.setBestaccessright(OafMapperUtils.createBestAccessRights(publication.getInstance())) +// val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid) +// if (ar.nonEmpty) { +// if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){ +// val ar = getOpenAccessQualifier() +// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) +// } +// else { +// val ar = getRestrictedQualifier() +// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) +// } +// } publication } From 662c396354d63732d95dba3055ceaccb0fe6a526 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 12:41:14 +0200 Subject: [PATCH 14/70] duplicate the number of partitions in ConvertCrossrefToOaf --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index f845d97f3..34ca58344 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -99,7 +99,7 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=7680 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From 0977baf41d26be303200ffdb8ed7fb224b53db63 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 19 Jul 2021 17:43:52 +0200 Subject: [PATCH 15/70] contents mapped from the stores with 'claim' interpretation will not change their identifier along their way towards the graph --- .../raw/AbstractMdRecordToOafMapper.java | 24 +++++++++++++++---- .../raw/GenerateEntitiesApplication.java | 6 +++-- .../dhp/oa/graph/raw/OafToOafMapper.java | 5 ++++ .../dhp/oa/graph/raw/OdfToOafMapper.java | 5 ++++ 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 1e80dfd46..cbaadf85c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -71,6 +71,8 @@ public abstract class AbstractMdRecordToOafMapper { private final boolean shouldHashId; + private final boolean forceOriginalId; + protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_4_SLASH = "http://datacite.org/schema/kernel-4/"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; @@ -98,11 +100,20 @@ public abstract class AbstractMdRecordToOafMapper { nsContext.put("datacite", DATACITE_SCHEMA_KERNEL_3); } + protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs, final boolean invisible, + final boolean shouldHashId, final boolean forceOriginalId) { + this.vocs = vocs; + this.invisible = invisible; + this.shouldHashId = shouldHashId; + this.forceOriginalId = forceOriginalId; + } + protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId) { this.vocs = vocs; this.invisible = invisible; this.shouldHashId = shouldHashId; + this.forceOriginalId = false; } public List processMdRecord(final String xml) { @@ -190,10 +201,15 @@ public abstract class AbstractMdRecordToOafMapper { final long lastUpdateTimestamp) { final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); - final String id = IdentifierFactory.createIdentifier(entity, shouldHashId); - if (!id.equals(entity.getId())) { - entity.getOriginalId().add(entity.getId()); - entity.setId(id); + + if (!forceOriginalId) { + final String id = IdentifierFactory.createIdentifier(entity, shouldHashId); + if (!id.equals(entity.getId())) { + final Set originalId = Sets.newHashSet(entity.getOriginalId()); + originalId.add(entity.getId()); + entity.setOriginalId(Lists.newArrayList(originalId)); + entity.setId(id); + } } final List oafs = Lists.newArrayList(entity); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index fcd6f459a..bbfb7429f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -163,11 +163,13 @@ public class GenerateEntitiesApplication { switch (type.toLowerCase()) { case "oaf-store-cleaned": - case "oaf-store-claim": return new OafToOafMapper(vocs, false, shouldHashId).processMdRecord(s); + case "oaf-store-claim": + return new OafToOafMapper(vocs, false, shouldHashId, true).processMdRecord(s); case "odf-store-cleaned": - case "odf-store-claim": return new OdfToOafMapper(vocs, false, shouldHashId).processMdRecord(s); + case "odf-store-claim": + return new OdfToOafMapper(vocs, false, shouldHashId, true).processMdRecord(s); case "oaf-store-intersection": return new OafToOafMapper(vocs, true, shouldHashId).processMdRecord(s); case "odf-store-intersection": diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 06aeab345..d753cddeb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -27,6 +27,11 @@ import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; public class OafToOafMapper extends AbstractMdRecordToOafMapper { + public OafToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId, + final boolean forceOrginalId) { + super(vocs, invisible, shouldHashId, forceOrginalId); + } + public OafToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId) { super(vocs, invisible, shouldHashId); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index b7400873b..7925a7826 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -22,6 +22,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/"; + public OdfToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId, + final boolean forceOrginalId) { + super(vocs, invisible, shouldHashId, forceOrginalId); + } + public OdfToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId) { super(vocs, invisible, shouldHashId); } From 65934888a1ad64226472d70a594d9fe701ee0487 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 19 Jul 2021 17:52:24 +0200 Subject: [PATCH 16/70] adding record identifier among the originalIds regardless of what IdentifierFactory produces --- .../dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index cbaadf85c..03c3eeb3c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -202,12 +202,13 @@ public abstract class AbstractMdRecordToOafMapper { final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); + final Set originalId = Sets.newHashSet(entity.getOriginalId()); + originalId.add(entity.getId()); + entity.setOriginalId(Lists.newArrayList(originalId)); + if (!forceOriginalId) { final String id = IdentifierFactory.createIdentifier(entity, shouldHashId); if (!id.equals(entity.getId())) { - final Set originalId = Sets.newHashSet(entity.getOriginalId()); - originalId.add(entity.getId()); - entity.setOriginalId(Lists.newArrayList(originalId)); entity.setId(id); } } From b420b11ed3c35182aaced4c9df378d2eb3ed0ec8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 18:16:23 +0200 Subject: [PATCH 17/70] duplicate the number of partitions in ProcessMag --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index 34ca58344..499e0e0bb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -124,7 +124,7 @@ --executor-memory=${sparkExecutorIntersectionMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=7680 --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} From 54acc5373b6e32071afa837bae209fcfcbd22500 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 18:18:09 +0200 Subject: [PATCH 18/70] changed the name of the workflows --- .../eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 03f7b7566..52f958d4d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sparkDriverMemory From 83fe31c92e0588de7c1176a004751e0f32cbad2c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 19 Jul 2021 18:19:14 +0200 Subject: [PATCH 19/70] changed the name of the workflows --- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index f845d97f3..8793f339c 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sparkDriverMemory From 10d7b4f0b4f3a5e9b2eb0ab8b40b4560dc1d4069 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 20 Jul 2021 11:51:33 +0200 Subject: [PATCH 20/70] filtering 'old' OpenAIRE ids from the entity.originalId[] array in the OAF -> XML searialization procedure --- .../eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 86bbae99e..a985d2371 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -16,6 +16,7 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -183,6 +184,7 @@ public class XmlRecordFactory implements Serializable { .getOriginalId() .stream() .filter(Objects::nonNull) + .filter(id -> !id.matches("^\\d{2}" + IdentifierFactory.ID_PREFIX_SEPARATOR)) .map(s -> XmlSerializationUtils.asXmlElement("originalId", s)) .collect(Collectors.toList())); } From 31d2d6d41ea7dc590e028a5567165bf2612981bc Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 21 Jul 2021 18:09:14 +0200 Subject: [PATCH 21/70] Scholexplorer: introduction of dedup openaire --- .../eu/dnetlib/dhp/oa/merge/AuthorMerger.java | 1 + .../graph/SparkConvertDatasetToJsonRDD.scala | 41 ++++++ .../dhp/sx/graph/SparkCreateInputGraph.scala | 2 +- .../sx/graph/convert_dataset_json_params.json | 5 + .../oozie_app/config-default.xml | 0 .../extractEntities/oozie_app/workflow.xml | 85 +++++++++++++ .../dhp/sx/graph/step2/oozie_app/workflow.xml | 120 ------------------ 7 files changed, 133 insertions(+), 121 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/{step2 => extractEntities}/oozie_app/config-default.xml (100%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/extractEntities/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java index 0b602b774..fe9f94960 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java @@ -78,6 +78,7 @@ public class AuthorMerger { a -> a .getPid() .stream() + .filter(Objects::nonNull) .filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p))) .map(p -> new Tuple2<>(p, a))) .collect(Collectors.toList()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala new file mode 100644 index 000000000..0886a1fca --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala @@ -0,0 +1,41 @@ +package eu.dnetlib.dhp.sx.graph + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import org.apache.commons.io.IOUtils +import org.apache.hadoop.io.compress.GzipCodec +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +object SparkConvertDatasetToJsonRDD { + + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + val resultObject = List("publication","dataset","software", "otherResearchProduct") + val mapper = new ObjectMapper() + implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + + resultObject.foreach{item => + spark.read.load(s"$sourcePath/$item").as[Result].map(r=> mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) + } + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala index a37dd2132..350b00c5e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateInputGraph.scala @@ -70,7 +70,7 @@ object SparkCreateInputGraph { resultObject.foreach { r => log.info(s"Make ${r._1} unique") - makeDatasetUnique(s"$targetPath/extracted/${r._1}",s"$targetPath/dedup/${r._1}",spark, r._2) + makeDatasetUnique(s"$targetPath/extracted/${r._1}",s"$targetPath/preprocess/${r._1}",spark, r._2) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json new file mode 100644 index 000000000..8bfdde5b0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json @@ -0,0 +1,5 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the raw graph", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/extractEntities/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/config-default.xml rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/extractEntities/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/extractEntities/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/extractEntities/oozie_app/workflow.xml new file mode 100644 index 000000000..685976ce6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/extractEntities/oozie_app/workflow.xml @@ -0,0 +1,85 @@ + + + + sourcePath + the working dir base path + + + targetPath + the graph Raw base path + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + Extract entities in raw graph + eu.dnetlib.dhp.sx.graph.SparkCreateInputGraph + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=2000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn + --sourcePath${sourcePath} + --targetPath${targetPath} + + + + + + + + + + + + + + + + + yarn + cluster + Generate Input Graph for deduplication + eu.dnetlib.dhp.sx.graph.SparkConvertDatasetToJsonRDD + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=3000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn + --sourcePath${targetPath}/preprocess + --targetPath${targetPath}/dedup + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml deleted file mode 100644 index 9d06c42d6..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step2/oozie_app/workflow.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - workingPath - the working path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${jobTracker} - ${nameNode} - yarn-cluster - cluster - Extract DLI Entities (Publication) - eu.dnetlib.dhp.sx.graph.SparkSplitOafTODLIEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory ${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=5000 - ${sparkExtraOPT} - - -mt yarn-cluster - --workingPath${workingPath} - -epublication - - - - - - - - ${jobTracker} - ${nameNode} - yarn-cluster - cluster - Extract DLI Entities (Dataset) - eu.dnetlib.dhp.sx.graph.SparkSplitOafTODLIEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory ${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=5000 - ${sparkExtraOPT} - - -mt yarn-cluster - --workingPath${workingPath} - -edataset - - - - - - - - ${jobTracker} - ${nameNode} - yarn-cluster - cluster - Extract DLI Entities (Unknown) - eu.dnetlib.dhp.sx.graph.SparkSplitOafTODLIEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory ${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=5000 - ${sparkExtraOPT} - - -mt yarn-cluster - --workingPath${workingPath} - -eunknown - - - - - - - - ${jobTracker} - ${nameNode} - yarn-cluster - cluster - Extract DLI Entities (Relation) - eu.dnetlib.dhp.sx.graph.SparkSplitOafTODLIEntities - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory ${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=5000 - ${sparkExtraOPT} - - -mt yarn-cluster - --workingPath${workingPath} - -erelation - - - - - - - \ No newline at end of file From d94565862a39e72316d5a0338c2a756b8563fb9f Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 21 Jul 2021 21:23:11 +0200 Subject: [PATCH 22/70] fixed NPE --- .../src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java index fe9f94960..f900a276d 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java @@ -67,6 +67,7 @@ public class AuthorMerger { a -> a .getPid() .stream() + .filter(Objects::nonNull) .map(p -> new Tuple2<>(pidToComparableString(p), a))) .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); From 1a5b114906107df32c0e2df33dbd2b1e9c34f871 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 22 Jul 2021 12:00:23 +0200 Subject: [PATCH 23/70] DoiBoost AccessRigh #4362 - refactoring --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 149d8ee5c..686a2f1f1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -249,8 +249,8 @@ object DoiBoostMappingUtil { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) } - val ar = getOpenAccessQualifier() - publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) +// val ar = getOpenAccessQualifier() +// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) } else { hb = ModelConstants.UNKNOWN_REPOSITORY From 62ae36a3d25d9110437e42391dab227485c056bb Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 22 Jul 2021 15:41:38 +0200 Subject: [PATCH 24/70] fixed NPE --- .../main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java | 7 +++++-- .../eu/dnetlib/dhp/actionmanager/datacite/record.json | 2 +- .../dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java index f900a276d..7a8e55a6e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java @@ -152,7 +152,7 @@ public class AuthorMerger { } private static boolean hasPid(Author a) { - if (a == null || a.getPid() == null || a.getPid().size() == 0) + if (a == null || a.getPid() == null || a.getPid().isEmpty()) return false; return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue())); } @@ -161,7 +161,10 @@ public class AuthorMerger { if (StringUtils.isNotBlank(author.getSurname())) { return new Person(author.getSurname() + ", " + author.getName(), false); } else { - return new Person(author.getFullname(), false); + if (StringUtils.isNotBlank(author.getFullname())) + return new Person(author.getFullname(), false); + else + return new Person("", false); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json index 3ae10be73..f5aa65940 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/datacite/record.json @@ -1 +1 @@ -{"id":"10.7282/t3-sjyd-1r46","type":"dois","attributes":{"doi":"10.7282/t3-sjyd-1r46","identifiers":[],"creators":[{"name":"Huang, Xu","nameType":"Personal","givenName":"Xu","familyName":"Huang","affiliation":[],"nameIdentifiers":[]},{"name":"Guo, Zhixiong","nameType":"Personal","givenName":"Zhixiong","familyName":"Guo","nameIdentifiers":[{"schemeUri":"https://orcid.org","nameIdentifier":"https://orcid.org/0000-0003-0481-2738","nameIdentifierScheme":"ORCID"}],"affiliation":[]}],"titles":[{"title":"High thermal conductance across c-BN/diamond interface"}],"publisher":"Rutgers University","container":{},"publicationYear":2099,"subjects":[{"subject":"Diamond"},{"subject":"Cubic boron nitride"},{"subject":"Thermal conductivity"},{"subject":"Interface"},{"subject":"Phonon"},{"subject":"Thermal conductance"}],"contributors":[],"dates":[{"date":"2099-12-31","dateType":"Accepted"},{"date":"2099","dateType":"Issued"}],"language":"en","types":{"ris":"RPRT","bibtex":"article","citeproc":"article-journal","schemaOrg":"ScholarlyArticle","resourceType":"Accepted manuscript","resourceTypeGeneral":"Text"},"relatedIdentifiers":[],"sizes":[],"formats":["application/pdf"],"version":null,"rightsList":[{"rights":"Embargo"}],"descriptions":[{"description":"High thermal conductivity electronic components with low interfacial thermal resistance are of technological importance and fundamental interest of research. Diamond, a superhard material with ultrahigh thermal conductivity at room temperature, is desirable for microelectronics thermal management. Cubic polymorph of boron nitride (c-BN) is a promising material due to wide bandgap and diamond like structure and properties. To understand the nature in thermal transport of diamond, c-BN and the most commonly used silicon (Si) semiconductor, ab initio phonon Boltzmann transport equations are employed to investigate lattice vibrational properties of these three materials. At 300 K, the predicted thermal conductivity of Si, diamond and c-BN reached 142, 2112, and 736 W/(m��K), respectively. What's more, heat transport phenomena across the interfaces of Si/diamond, c-BN/diamond and Si/c-BN are unfolded. In comparison, the interfacial thermal conductance of c-BN/diamond is ten-fold of Si/diamond; besides, the thermal conductance across Si/c-BN interface is 20.2% larger than that of Si/diamond at 300 K and 18.9% larger at 340 K. These findings provide us new vision and potential solution to heat dissipation of high-local-power density devices, shedding light on future thermal management of c-BN and diamond related electronics.","descriptionType":"Abstract"}],"geoLocations":[],"fundingReferences":[],"url":"https://scholarship.libraries.rutgers.edu/discovery/fulldisplay/alma991031549917804646/01RUT_INST:ResearchRepository","contentUrl":null,"metadataVersion":1,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"viewCount":0,"downloadCount":0,"referenceCount":0,"citationCount":0,"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2020-06-30T21:12:19Z","registered":"2020-07-02T16:45:07Z","published":null,"updated":"2021-01-14T18:24:19Z"},"relationships":{"client":{"data":{"id":"rutgers.lib","type":"clients"}}}} \ No newline at end of file +{"id":"10.5517/ccdc.csd.cc25rpzm","type":"dois","attributes":{"doi":"10.5517/ccdc.csd.cc25rpzm","prefix":"10.5517","suffix":"ccdc.csd.cc25rpzm","identifiers":[{"identifier":"2018781","identifierType":"CCDC"}],"alternateIdentifiers":[{"alternateIdentifierType":"CCDC","alternateIdentifier":"2018781"}],"creators":[{"name":"Ling, Irene","affiliation":[],"nameIdentifiers":[]},{"name":"Sobolev, Alexandre N.","affiliation":[],"nameIdentifiers":[]},{"name":"Raston, Colin L.","affiliation":[],"nameIdentifiers":[]}],"titles":[{"title":"CCDC 2018781: Experimental Crystal Structure Determination"}],"publisher":"Cambridge Crystallographic Data Centre","container":{},"publicationYear":2021,"subjects":[{"subject":"Crystal Structure"},{"subject":"Experimental 3D Coordinates"},{"subject":"Crystal System"},{"subject":"Space Group"},{"subject":"Cell Parameters"},{"subject":"Crystallography"},{"subject":"bis[penta-aqua-copper(ii)] bis(mu-5,11,17,23-tetra-sulfonato-25,26,27,28-tetrahydroxycalix(4)arene)-dodeca-aqua-tri-copper(ii) bis(nitrate) heptahydrate"}],"contributors":[],"dates":[],"language":"en","types":{"ris":"DATA","bibtex":"misc","citeproc":"dataset","schemaOrg":"Dataset","resourceTypeGeneral":"Dataset"},"relatedIdentifiers":[{"relationType":"IsSupplementTo","relatedIdentifier":"10.1080/00958972.2020.1849642","relatedIdentifierType":"DOI"}],"sizes":[],"formats":["CIF"],"version":null,"rightsList":[],"descriptions":[{"description":"Related Article: Irene Ling, Alexandre N. Sobolev, Colin L. Raston|2021|J.Coord.Chem.|74|40|doi:10.1080/00958972.2020.1849642","descriptionType":"Other"}],"geoLocations":[],"fundingReferences":[],"xml":"PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHJlc291cmNlIHhtbG5zOnhzaT0iaHR0cDovL3d3dy53My5vcmcvMjAwMS9YTUxTY2hlbWEtaW5zdGFuY2UiIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00L21ldGFkYXRhLnhzZCI+CiAgPGlkZW50aWZpZXIgaWRlbnRpZmllclR5cGU9IkRPSSI+MTAuNTUxNy9DQ0RDLkNTRC5DQzI1UlBaTTwvaWRlbnRpZmllcj4KICA8Y3JlYXRvcnM+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lPkxpbmcsIElyZW5lPC9jcmVhdG9yTmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWU+U29ib2xldiwgQWxleGFuZHJlIE4uPC9jcmVhdG9yTmFtZT4KICAgIDwvY3JlYXRvcj4KICAgIDxjcmVhdG9yPgogICAgICA8Y3JlYXRvck5hbWU+UmFzdG9uLCBDb2xpbiBMLjwvY3JlYXRvck5hbWU+CiAgICA8L2NyZWF0b3I+CiAgPC9jcmVhdG9ycz4KICA8dGl0bGVzPgogICAgPHRpdGxlPkNDREMgMjAxODc4MTogRXhwZXJpbWVudGFsIENyeXN0YWwgU3RydWN0dXJlIERldGVybWluYXRpb248L3RpdGxlPgogIDwvdGl0bGVzPgogIDxwdWJsaXNoZXI+Q2FtYnJpZGdlIENyeXN0YWxsb2dyYXBoaWMgRGF0YSBDZW50cmU8L3B1Ymxpc2hlcj4KICA8cHVibGljYXRpb25ZZWFyPjIwMjE8L3B1YmxpY2F0aW9uWWVhcj4KICA8cmVzb3VyY2VUeXBlIHJlc291cmNlVHlwZUdlbmVyYWw9IkRhdGFzZXQiLz4KICA8c3ViamVjdHM+CiAgICA8c3ViamVjdD5DcnlzdGFsIFN0cnVjdHVyZTwvc3ViamVjdD4KICAgIDxzdWJqZWN0PkV4cGVyaW1lbnRhbCAzRCBDb29yZGluYXRlczwvc3ViamVjdD4KICAgIDxzdWJqZWN0PkNyeXN0YWwgU3lzdGVtPC9zdWJqZWN0PgogICAgPHN1YmplY3Q+U3BhY2UgR3JvdXA8L3N1YmplY3Q+CiAgICA8c3ViamVjdD5DZWxsIFBhcmFtZXRlcnM8L3N1YmplY3Q+CiAgICA8c3ViamVjdD5DcnlzdGFsbG9ncmFwaHk8L3N1YmplY3Q+CiAgICA8c3ViamVjdD5iaXNbcGVudGEtYXF1YS1jb3BwZXIoaWkpXSBiaXMobXUtNSwxMSwxNywyMy10ZXRyYS1zdWxmb25hdG8tMjUsMjYsMjcsMjgtdGV0cmFoeWRyb3h5Y2FsaXgoNClhcmVuZSktZG9kZWNhLWFxdWEtdHJpLWNvcHBlcihpaSkgYmlzKG5pdHJhdGUpIGhlcHRhaHlkcmF0ZTwvc3ViamVjdD4KICA8L3N1YmplY3RzPgogIDxsYW5ndWFnZT5lbmc8L2xhbmd1YWdlPgogIDxhbHRlcm5hdGVJZGVudGlmaWVycz4KICAgIDxhbHRlcm5hdGVJZGVudGlmaWVyIGFsdGVybmF0ZUlkZW50aWZpZXJUeXBlPSJDQ0RDIj4yMDE4NzgxPC9hbHRlcm5hdGVJZGVudGlmaWVyPgogIDwvYWx0ZXJuYXRlSWRlbnRpZmllcnM+CiAgPHJlbGF0ZWRJZGVudGlmaWVycz4KICAgIDxyZWxhdGVkSWRlbnRpZmllciByZWxhdGVkSWRlbnRpZmllclR5cGU9IkRPSSIgcmVsYXRpb25UeXBlPSJJc1N1cHBsZW1lbnRUbyI+MTAuMTA4MC8wMDk1ODk3Mi4yMDIwLjE4NDk2NDI8L3JlbGF0ZWRJZGVudGlmaWVyPgogIDwvcmVsYXRlZElkZW50aWZpZXJzPgogIDxzaXplcy8+CiAgPGZvcm1hdHM+CiAgICA8Zm9ybWF0PkNJRjwvZm9ybWF0PgogIDwvZm9ybWF0cz4KICA8dmVyc2lvbi8+CiAgPGRlc2NyaXB0aW9ucz4KICAgIDxkZXNjcmlwdGlvbiBkZXNjcmlwdGlvblR5cGU9Ik90aGVyIj5SZWxhdGVkIEFydGljbGU6IElyZW5lIExpbmcsICBBbGV4YW5kcmUgTi4gU29ib2xldiwgIENvbGluIEwuIFJhc3RvbnwyMDIxfEouQ29vcmQuQ2hlbS58NzR8NDB8ZG9pOjEwLjEwODAvMDA5NTg5NzIuMjAyMC4xODQ5NjQyPC9kZXNjcmlwdGlvbj4KICA8L2Rlc2NyaXB0aW9ucz4KPC9yZXNvdXJjZT4K","url":"http://www.ccdc.cam.ac.uk/services/structure_request?id=doi:10.5517/ccdc.csd.cc25rpzm&sid=DataCite","contentUrl":null,"metadataVersion":3,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"api","isActive":true,"state":"findable","reason":null,"viewCount":0,"viewsOverTime":[],"downloadCount":0,"downloadsOverTime":[],"referenceCount":0,"citationCount":0,"citationsOverTime":[],"partCount":0,"partOfCount":0,"versionCount":0,"versionOfCount":0,"created":"2021-03-09T13:25:35.000Z","registered":"2021-03-09T13:25:36.000Z","published":"2021","updated":"2021-03-31T21:49:56.000Z"},"relationships":{"client":{"data":{"id":"ccdc.csd","type":"clients"}},"provider":{"data":{"id":"ccdc","type":"providers"}},"media":{"data":{"id":"10.5517/ccdc.csd.cc25rpzm","type":"media"}},"references":{"data":[]},"citations":{"data":[]},"parts":{"data":[]},"partOf":{"data":[]},"versions":{"data":[]},"versionOf":{"data":[]}}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala index 0886a1fca..3ee0c7dd6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertDatasetToJsonRDD.scala @@ -33,6 +33,7 @@ object SparkConvertDatasetToJsonRDD { val mapper = new ObjectMapper() implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + resultObject.foreach{item => spark.read.load(s"$sourcePath/$item").as[Result].map(r=> mapper.writeValueAsString(r))(Encoders.STRING).rdd.saveAsTextFile(s"$targetPath/${item.toLowerCase}", classOf[GzipCodec]) } From 058b636d4d0949e0b76773b280fd3f65987afefb Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 22 Jul 2021 16:08:54 +0200 Subject: [PATCH 25/70] added control to check if the entity exists --- .../dhp/oa/dedup/SparkUpdateEntity.java | 71 ++++++++++--------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java index 03709c8fe..fdef7f77d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java @@ -23,6 +23,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -77,48 +78,54 @@ public class SparkUpdateEntity extends AbstractSparkAction { (type, clazz) -> { final String outputPath = dedupGraphPath + "/" + type; removeOutputDir(spark, outputPath); + final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); + if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { + JavaRDD sourceEntity = sc + .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); - JavaRDD sourceEntity = sc - .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); + if (mergeRelExists(workingPath, type.toString())) { - if (mergeRelExists(workingPath, type.toString())) { + final String mergeRelPath = DedupUtility + .createMergeRelPath(workingPath, "*", type.toString()); + final String dedupRecordPath = DedupUtility + .createDedupRecordPath(workingPath, "*", type.toString()); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, "*", type.toString()); - final String dedupRecordPath = DedupUtility - .createDedupRecordPath(workingPath, "*", type.toString()); + final Dataset rel = spark + .read() + .load(mergeRelPath) + .as(Encoders.bean(Relation.class)); - final Dataset rel = spark.read().load(mergeRelPath).as(Encoders.bean(Relation.class)); + final JavaPairRDD mergedIds = rel + .where("relClass == 'merges'") + .where("source != target") + .select(rel.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); - final JavaPairRDD mergedIds = rel - .where("relClass == 'merges'") - .where("source != target") - .select(rel.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); + JavaPairRDD entitiesWithId = sourceEntity + .mapToPair( + (PairFunction) s -> new Tuple2<>( + MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); + if (type == EntityType.organization) // exclude root records from organizations + entitiesWithId = excludeRootOrgs(entitiesWithId, rel); - JavaPairRDD entitiesWithId = sourceEntity - .mapToPair( - (PairFunction) s -> new Tuple2<>( - MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); - if (type == EntityType.organization) // exclude root records from organizations - entitiesWithId = excludeRootOrgs(entitiesWithId, rel); + JavaRDD map = entitiesWithId + .leftOuterJoin(mergedIds) + .map(k -> { + if (k._2()._2().isPresent()) { + return updateDeletedByInference(k._2()._1(), clazz); + } + return k._2()._1(); + }); - JavaRDD map = entitiesWithId - .leftOuterJoin(mergedIds) - .map(k -> { - if (k._2()._2().isPresent()) { - return updateDeletedByInference(k._2()._1(), clazz); - } - return k._2()._1(); - }); + sourceEntity = map.union(sc.textFile(dedupRecordPath)); - sourceEntity = map.union(sc.textFile(dedupRecordPath)); + } + sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); } - - sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); }); } From 43e9380cd351bbc4697548c79c1a48e1bad7ac3b Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 23 Jul 2021 11:25:18 +0200 Subject: [PATCH 26/70] update resolve relation to use the same format of openaire graph --- .../dhp/sx/graph/SparkResolveRelation.scala | 77 +++++++++++++++---- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index 6ee575e2a..11d026f02 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -4,7 +4,12 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.{Relation, Result} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf +import org.apache.spark.rdd.RDD import org.apache.spark.sql._ +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString} +import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ @@ -29,23 +34,11 @@ object SparkResolveRelation { val workingPath = parser.get("workingPath") log.info(s"workingPath -> $workingPath") - - implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) implicit val relEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) import spark.implicits._ - val entities:Dataset[Result] = spark.read.load(s"$entityPath/*").as[Result] - entities.flatMap(e => e.getPid.asScala - .map(p => - convertPidToDNETIdentifier(p.getValue, p.getQualifier.getClassid)) - .filter(s => s!= null) - .map(s => (s,e.getId)) - ).groupByKey(_._1) - .reduceGroups((x,y) => if (x._2.startsWith("50|doi") || x._2.startsWith("50|pmid")) x else y) - .map(s =>s._2) - .write - .mode(SaveMode.Overwrite) - .save(s"$workingPath/resolvedPid") + + extractPidResolvedTableFromJsonRDD(spark, entityPath, workingPath) val rPid:Dataset[(String,String)] = spark.read.load(s"$workingPath/resolvedPid").as[(String,String)] @@ -74,11 +67,65 @@ object SparkResolveRelation { }.filter(r => r.getSource.startsWith("50")&& r.getTarget.startsWith("50")) .write .mode(SaveMode.Overwrite) - .save(s"$workingPath/resolvedRelation") + .save(s"$workingPath/relation") } + private def extractPidsFromRecord(input:String):(String,List[(String,String)]) = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: json4s.JValue = parse(input) + val id:String = (json \ "id").extract[String] + val result: List[(String,String)] = for { + JObject(pids) <- json \ "pid" + JField("value", JString(pidValue)) <- pids + JField("qualifier", JObject(qualifier)) <- pids + JField("classname", JString(pidType)) <- qualifier + } yield (pidValue, pidType) + (id,result) + } + private def extractPidResolvedTableFromJsonRDD(spark: SparkSession, entityPath: String, workingPath: String) = { + import spark.implicits._ + + val d: RDD[(String,String)] = spark.sparkContext.textFile(s"$entityPath/*") + .map(i => extractPidsFromRecord(i)) + .filter(s => s != null && s._2!=null && s._2.nonEmpty) + .flatMap{ p => + p._2.map(pid => + (p._1,convertPidToDNETIdentifier(pid._1, pid._2)) + ) + } + + spark.createDataset(d) + .groupByKey(_._1) + .reduceGroups((x, y) => if (x._2.startsWith("50|doi") || x._2.startsWith("50|pmid")) x else y) + .map(s => s._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/resolvedPid") + } + + + /* + This method should be used once we finally convert everythings in Kryo dataset + instead of using rdd of json + */ + private def extractPidResolvedTableFromKryo(spark: SparkSession, entityPath: String, workingPath: String) = { + import spark.implicits._ + implicit val oafEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + val entities: Dataset[Result] = spark.read.load(s"$entityPath/*").as[Result] + entities.flatMap(e => e.getPid.asScala + .map(p => + convertPidToDNETIdentifier(p.getValue, p.getQualifier.getClassid)) + .filter(s => s != null) + .map(s => (s, e.getId)) + ).groupByKey(_._1) + .reduceGroups((x, y) => if (x._2.startsWith("50|doi") || x._2.startsWith("50|pmid")) x else y) + .map(s => s._2) + .write + .mode(SaveMode.Overwrite) + .save(s"$workingPath/resolvedPid") + } def convertPidToDNETIdentifier(pid:String, pidType: String):String = { if (pid==null || pid.isEmpty || pidType== null || pidType.isEmpty) From ca74e8dd02617b5c2911f1cc4be53a47f08f2f13 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 23 Jul 2021 11:40:06 +0200 Subject: [PATCH 27/70] create a separate wf for resolving relation --- .../oozie_app/config-default.xml | 0 .../resolverelation/oozie_app/workflow.xml | 52 ++++++++++++++++ .../dhp/sx/graph/step3/oozie_app/workflow.xml | 61 ------------------- 3 files changed, 52 insertions(+), 61 deletions(-) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/{step3 => resolverelation}/oozie_app/config-default.xml (100%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/config-default.xml rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml new file mode 100644 index 000000000..e73b61a74 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml @@ -0,0 +1,52 @@ + + + + entityPath + the path of deduplicate Entities + + + relationPath + the path of relation unresolved + + + targetPath + the path of relation unresolved + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + yarn + cluster + Resolve Relations in raw graph + eu.dnetlib.dhp.sx.graph.SparkResolveRelation + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=3000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn + --relationPath${relationPath} + --workingPath${targetPath} + --entityPath${entityPath} + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/workflow.xml deleted file mode 100644 index 4d54b2afb..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step3/oozie_app/workflow.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - sourcePath - the source path - - - targetPath - the source path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - entity - the entity to be merged - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - ${jobTracker} - ${nameNode} - yarn-cluster - cluster - Merge ${entity} - eu.dnetlib.dhp.sx.graph.SparkScholexplorerCreateRawGraphJob - dhp-graph-mapper-${projectVersion}.jar - --executor-memory ${sparkExecutorMemory} --driver-memory=${sparkDriverMemory} ${sparkExtraOPT} - -mt yarn-cluster - --sourcePath${sourcePath}/${entity} - --targetPath${targetPath}/${entity} - --entity${entity} - - - - - - - \ No newline at end of file From bc835d2024df229fa0fe132b7d216ce94dee456e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 23 Jul 2021 11:55:55 +0200 Subject: [PATCH 28/70] [cleaning] fixed filtering function for missing titles --- .../eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index a75cc52e6..e5181b111 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -98,7 +98,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { Result r = (Result) value; - if (Objects.nonNull(r.getTitle()) && r.getTitle().isEmpty()) { + if (Objects.isNull(r.getTitle()) || r.getTitle().isEmpty()) { return false; } From 4a439c3863de04e1c3c051ed2e2fc18f4731d1a9 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 23 Jul 2021 13:23:25 +0200 Subject: [PATCH 29/70] NPE fixed --- .../java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index 11d026f02..9e7963b2a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -48,7 +48,7 @@ object SparkResolveRelation { m => val sourceResolved = m._2 val currentRelation = m._1._2 - if (sourceResolved!=null && sourceResolved._2.nonEmpty) + if (sourceResolved!=null && sourceResolved._2!=null && sourceResolved._2.nonEmpty) currentRelation.setSource(sourceResolved._2) currentRelation }.write From cfde63a7c3741111b2c4b95573c2e8a166732935 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 23 Jul 2021 14:17:17 +0200 Subject: [PATCH 30/70] fixed resolve relation join --- .../java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index 9e7963b2a..aa09311e3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -44,7 +44,7 @@ object SparkResolveRelation { val relationDs:Dataset[(String,Relation)] = spark.read.load(relationPath).as[Relation].map(r => (r.getSource.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) - relationDs.joinWith(rPid, relationDs("_1").equalTo(rPid("_1")), "left").map{ + relationDs.joinWith(rPid, relationDs("_1").equalTo(rPid("_2")), "left").map{ m => val sourceResolved = m._2 val currentRelation = m._1._2 @@ -57,7 +57,7 @@ object SparkResolveRelation { val relationSourceResolved:Dataset[(String,Relation)] = spark.read.load(s"$workingPath/resolvedSource").as[Relation].map(r => (r.getTarget.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) - relationSourceResolved.joinWith(rPid, relationSourceResolved("_1").equalTo(rPid("_1")), "left").map{ + relationSourceResolved.joinWith(rPid, relationSourceResolved("_1").equalTo(rPid("_2")), "left").map{ m => val targetResolved = m._2 val currentRelation = m._1._2 From d9e3b8993739c649b622aaa6c6c401710f8b2a31 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 23 Jul 2021 16:38:32 +0200 Subject: [PATCH 31/70] implemented last part of workflows to generate scholixGraph --- .../sx/graph/SparkConvertRDDtoDataset.scala | 43 ++++++++++++++++ .../dhp/sx/graph/SparkResolveRelation.scala | 22 ++++---- .../oozie_app/config-default.xml | 0 .../oozie_app/workflow.xml | 51 ++++--------------- .../resolverelation/oozie_app/workflow.xml | 12 ++++- 5 files changed, 75 insertions(+), 53 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/{step1 => finalGraph}/oozie_app/config-default.xml (100%) rename dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/{step1 => finalGraph}/oozie_app/workflow.xml (77%) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala new file mode 100644 index 000000000..2cd176dee --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.sx.graph +import com.cloudera.com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} +object SparkConvertRDDtoDataset { + + def main(args: Array[String]): Unit = { + val entities = List( + ("dataset", classOf[OafDataset]), + ("otherresearchproduct", classOf[OtherResearchProduct]), + ("publication", classOf[Publication]), + ("software", classOf[Software]) + ) + + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/convert_dataset_json_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + val mapper = new ObjectMapper() + implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + + entities.foreach{ + e => + val rdd =spark.sparkContext.textFile(s"$sourcePath/${e._1}").map(s => mapper.readValue(s, e._2)) + spark.createDataset(rdd).as[Result].write.mode(SaveMode.Overwrite).save(s"$targetPath/${e._1}") + } + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index aa09311e3..0d0dc4159 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -30,7 +30,7 @@ object SparkResolveRelation { val relationPath = parser.get("relationPath") log.info(s"sourcePath -> $relationPath") val entityPath = parser.get("entityPath") - log.info(s"targetPath -> $entityPath") + log.info(s"entityPath -> $entityPath") val workingPath = parser.get("workingPath") log.info(s"workingPath -> $workingPath") @@ -48,8 +48,8 @@ object SparkResolveRelation { m => val sourceResolved = m._2 val currentRelation = m._1._2 - if (sourceResolved!=null && sourceResolved._2!=null && sourceResolved._2.nonEmpty) - currentRelation.setSource(sourceResolved._2) + if (sourceResolved!=null && sourceResolved._1!=null && sourceResolved._1.nonEmpty) + currentRelation.setSource(sourceResolved._1) currentRelation }.write .mode(SaveMode.Overwrite) @@ -61,13 +61,13 @@ object SparkResolveRelation { m => val targetResolved = m._2 val currentRelation = m._1._2 - if (targetResolved!=null && targetResolved._2.nonEmpty) - currentRelation.setTarget(targetResolved._2) + if (targetResolved!=null && targetResolved._1.nonEmpty) + currentRelation.setTarget(targetResolved._1) currentRelation }.filter(r => r.getSource.startsWith("50")&& r.getTarget.startsWith("50")) .write .mode(SaveMode.Overwrite) - .save(s"$workingPath/relation") + .save(s"$workingPath/relation_resolved") } @@ -89,16 +89,16 @@ object SparkResolveRelation { val d: RDD[(String,String)] = spark.sparkContext.textFile(s"$entityPath/*") .map(i => extractPidsFromRecord(i)) - .filter(s => s != null && s._2!=null && s._2.nonEmpty) + .filter(s => s != null && s._1!= null && s._2!=null && s._2.nonEmpty) .flatMap{ p => p._2.map(pid => - (p._1,convertPidToDNETIdentifier(pid._1, pid._2)) + (p._1, convertPidToDNETIdentifier(pid._1, pid._2)) ) - } + }.filter(r =>r._1 != null || r._2 != null) spark.createDataset(d) - .groupByKey(_._1) - .reduceGroups((x, y) => if (x._2.startsWith("50|doi") || x._2.startsWith("50|pmid")) x else y) + .groupByKey(_._2) + .reduceGroups((x, y) => if (x._1.startsWith("50|doi") || x._1.startsWith("50|pmid")) x else y) .map(s => s._2) .write .mode(SaveMode.Overwrite) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/config-default.xml rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml similarity index 77% rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml rename to dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml index 3ea4e9d30..4e601bff3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/step1/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sourcePath @@ -6,48 +6,22 @@ targetPath - the graph Raw base path + the final graph path - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + yarn cluster - Extract entities in raw graph - eu.dnetlib.dhp.sx.graph.SparkCreateInputGraph - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=2000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn - --sourcePath${sourcePath} - --targetPath${targetPath} - - - - - - - - - yarn - cluster - Resolve Relations in raw graph - eu.dnetlib.dhp.sx.graph.SparkResolveRelation + Import JSONRDD to Dataset kryo + eu.dnetlib.dhp.sx.graph.SparkConvertRDDtoDataset dhp-graph-mapper-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -60,9 +34,8 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn - --relationPath${targetPath}/extracted/relation - --workingPath${targetPath}/resolved/ - --entityPath${targetPath}/dedup + --sourcePath${sourcePath} + --targetPath${targetPath}/entities @@ -87,7 +60,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --masteryarn - --sourcePath${targetPath}/dedup + --sourcePath${targetPath}/entities --targetPath${targetPath}/provision/summaries @@ -114,7 +87,7 @@ --masteryarn --summaryPath${targetPath}/provision/summaries --targetPath${targetPath}/provision/scholix - --relationPath${targetPath}/resolved/resolvedRelation + --relationPath${sourcePath}/relation_resolved @@ -182,9 +155,5 @@ - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml index e73b61a74..7683ff94c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/resolverelation/oozie_app/workflow.xml @@ -15,14 +15,24 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + yarn From a0393607a7cadb821ebf25fb76e72d226127440a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 23 Jul 2021 18:14:37 +0200 Subject: [PATCH 32/70] mapping funding relations from Datacite should be done according to the actual result identifier --- .../actionmanager/datacite/DataciteToOAFTransformation.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala index 0cdf0accb..045927bed 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala @@ -532,11 +532,11 @@ object DataciteToOAFTransformation { JField("awardUri", JString(awardUri)) <- fundingReferences } yield awardUri + result.setId(IdentifierFactory.createIdentifier(result)) var relations: List[Relation] = awardUris.flatMap(a => get_projectRelation(a, result.getId)).filter(r => r != null) - fix_figshare(result) - result.setId(IdentifierFactory.createIdentifier(result)) + if (result.getId == null) return List() From f9fbb0f26193c8959c8b8cc527c734a60cfdd9f7 Mon Sep 17 00:00:00 2001 From: antleb Date: Sat, 24 Jul 2021 16:40:28 +0300 Subject: [PATCH 33/70] added indicators second sprint --- .../step16_7-createIndicatorsTables.sql | 197 +++++++++++++++++- 1 file changed, 196 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql index 8998cb9fc..a2fc88a39 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql @@ -39,4 +39,199 @@ from publication p join result_instance ri on ri.id = p.id join datasource on datasource.id = ri.hostedby where datasource.id like '%doajarticles%') tmp -on p.id= tmp.id; \ No newline at end of file +on p.id= tmp.id; + +create table indi_project_pubs_count stored as parquet as +select pr.id id, count(p.id) total_pubs from project_results pr +join publication p on p.id=pr.result +group by pr.id + +create table indi_project_datasets_count stored as parquet as +select pr.id id, count(d.id) total_datasets from project_results pr +join dataset d on d.id=pr.result +group by pr.id + +create table indi_project_software_count stored as parquet as +select pr.id id, count(s.id) total_software from project_results pr +join software s on s.id=pr.result +group by pr.id + +create table indi_project_otherresearch_count stored as parquet as +select pr.id id, count(o.id) total_other from project_results pr +join otherresearchproduct o on o.id=pr.result +group by pr.id + +create table indi_pub_avg_year_country_oa stored as parquet as +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + from + (SELECT year, country, SUM(CASE + WHEN bestlicence='Open Access' THEN 1 + ELSE 0 + END) AS OpenAccess, SUM(CASE + WHEN bestlicence<>'Open Access' THEN 1 + ELSE 0 + END) AS NonOpenAccess + FROM publication p + join result_organization ro on p.id=ro.id + join organization o on o.id=ro.organization + where cast(year as int)>=2003 and cast(year as int)<=2021 + group by year, country) tmp + +create table indi_dataset_avg_year_country_oa stored as parquet as +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + from + (SELECT year, country, SUM(CASE + WHEN bestlicence='Open Access' THEN 1 + ELSE 0 + END) AS OpenAccess, SUM(CASE + WHEN bestlicence<>'Open Access' THEN 1 + ELSE 0 + END) AS NonOpenAccess + FROM dataset d + join result_organization ro on d.id=ro.id + join organization o on o.id=ro.organization + where cast(year as int)>=2003 and cast(year as int)<=2021 + group by year, country) tmp + +create table indi_software_avg_year_country_oa stored as parquet as +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + from + (SELECT year, country, SUM(CASE + WHEN bestlicence='Open Access' THEN 1 + ELSE 0 + END) AS OpenAccess, SUM(CASE + WHEN bestlicence<>'Open Access' THEN 1 + ELSE 0 + END) AS NonOpenAccess + FROM software s + join result_organization ro on s.id=ro.id + join SOURCER.organization o on o.id=ro.organization + where cast(year as int)>=2003 and cast(year as int)<=2021 + group by year, country) tmp + + +create table indi_other_avg_year_country_oa stored as parquet as +select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, +round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA + from + (SELECT year, country, SUM(CASE + WHEN bestlicence='Open Access' THEN 1 + ELSE 0 + END) AS OpenAccess, SUM(CASE + WHEN bestlicence<>'Open Access' THEN 1 + ELSE 0 + END) AS NonOpenAccess + FROM otherresearchproduct orp + join result_organization ro on orp.id=ro.id + join organization o on o.id=ro.organization + where cast(year as int)>=2003 and cast(year as int)<=2021 + group by year, country) tmp + +create table indi_pub_avg_year_context_oa stored as parquet as +with total as +(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from publication_concepts pc +join context c on pc.concept like concat('%',c.id,'%') +join publication p on p.id=pc.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by c.name, year ) +select year, name, round(no_of_pubs/total*100,3) averageofpubs +from total + +create table indi_dataset_avg_year_context_oa stored as parquet as +with total as +(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from dataset_concepts pc +join context c on pc.concept like concat('%',c.id,'%') +join dataset p on p.id=pc.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by c.name, year ) +select year, name, round(no_of_pubs/total*100,3) averageofdataset +from total + +create table indi_software_avg_year_context_oa stored as parquet as +with total as +(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from software_concepts pc +join context c on pc.concept like concat('%',c.id,'%') +join software p on p.id=pc.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by c.name, year ) +select year, name, round(no_of_pubs/total*100,3) averageofsoftware +from total + +create table indi_other_avg_year_context_oa stored as parquet as +with total as +(select count(distinct pc.id) no_of_pubs, year, c.name name, sum(count(distinct pc.id)) over(PARTITION by year) as total from otherresearchproduct_concepts pc +join context c on pc.concept like concat('%',c.id,'%') +join otherresearchproduct p on p.id=pc.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by c.name, year ) +select year, name, round(no_of_pubs/total*100,3) averageofother +from total + +create table indi_other_avg_year_content_oa stored as parquet as +with total as +(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +from otherresearchproduct_datasources pd +join datasource d on datasource=d.id +join otherresearchproduct p on p.id=pd.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by d.type, year) +select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct +from total + +create table indi_software_avg_year_content_oa stored as parquet as +with total as +(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +from software_datasources pd +join datasource d on datasource=d.id +join software p on p.id=pd.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by d.type, year) +select year, type, round(no_of_pubs/total*100,3) averageOfSoftware +from total + +create table indi_dataset_avg_year_content_oa stored as parquet as +with total as +(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +from dataset_datasources pd +join datasource d on datasource=d.id +join dataset p on p.id=pd.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by d.type, year) +select year, type, round(no_of_pubs/total*100,3) averageOfDatasets +from total + +create table indi_pub_avg_year_content_oa stored as parquet as +with total as +(select count(distinct pd.id) no_of_pubs, year, d.type type, sum(count(distinct pd.id)) over(PARTITION by year) as total +from publication_datasources pd +join datasource d on datasource=d.id +join publication p on p.id=pd.id +where cast(year as int)>=2003 and cast(year as int)<=2021 +group by d.type, year) +select year, type, round(no_of_pubs/total*100,3) averageOfPubs +from total + +create table indi_pub_has_cc_licence stored as parquet as +select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license +from publication p +left outer join (select p.id, license.type as lic from publication p +join publication_licenses as license on license.id = p.id +where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp +on p.id= tmp.id + +create table indi_pub_has_cc_licence_url stored as parquet as +select distinct p.id, (case when lic_host='' or lic_host is null then 0 else 1 end) as has_cc_license_url +from publication p +left outer join (select p.id, lower(parse_url(license.type, "HOST")) as lic_host +from publication p +join publication_licenses as license on license.id = p.id +WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp +on p.id= tmp.id + + +create table indi_pub_has_abstract stored as parquet as +select distinct publication.id, coalesce(abstract, 1) has_abstract +from publication \ No newline at end of file From 3920c69bc8e050ada6284f8d40e687e5dd9b1761 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Sun, 25 Jul 2021 09:51:24 +0200 Subject: [PATCH 34/70] change implementation of resolve Relation to generate jsonRdd in output --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 ++- .../dhp/sx/graph/SparkResolveRelation.scala | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 58009bfcf..3f27b9442 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,7 +38,8 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class GroupEntitiesSparkJob { +public class +GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index 0d0dc4159..82bf3c50e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -1,8 +1,10 @@ package eu.dnetlib.dhp.sx.graph +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.{Relation, Result} import org.apache.commons.io.IOUtils +import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql._ @@ -40,7 +42,9 @@ object SparkResolveRelation { extractPidResolvedTableFromJsonRDD(spark, entityPath, workingPath) - val rPid:Dataset[(String,String)] = spark.read.load(s"$workingPath/resolvedPid").as[(String,String)] + val mappper = new ObjectMapper() + + val rPid:Dataset[(String,String)] = spark.read.load(s"$workingPath/relationResolvedPid").as[(String,String)] val relationDs:Dataset[(String,Relation)] = spark.read.load(relationPath).as[Relation].map(r => (r.getSource.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) @@ -68,6 +72,11 @@ object SparkResolveRelation { .write .mode(SaveMode.Overwrite) .save(s"$workingPath/relation_resolved") + + spark.read.load(s"$workingPath/relation_resolved").as[Relation] + .map(r => mappper.writeValueAsString(r)) + .rdd.saveAsTextFile(s"$workingPath/relation", classOf[GzipCodec]) + } @@ -102,7 +111,7 @@ object SparkResolveRelation { .map(s => s._2) .write .mode(SaveMode.Overwrite) - .save(s"$workingPath/resolvedPid") + .save(s"$workingPath/relationResolvedPid") } @@ -124,7 +133,7 @@ object SparkResolveRelation { .map(s => s._2) .write .mode(SaveMode.Overwrite) - .save(s"$workingPath/resolvedPid") + .save(s"$workingPath/relationResolvedPid") } def convertPidToDNETIdentifier(pid:String, pidType: String):String = { From 8fac10c91e109fd3ea74d4534b68df468618bf1a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Sun, 25 Jul 2021 11:15:37 +0200 Subject: [PATCH 35/70] fixed defintion wf of creation final infospace of scholexplorer --- .../sx/graph/SparkConvertRDDtoDataset.scala | 58 +++++++++++++------ .../sx/graph/SparkCreateSummaryObject.scala | 5 +- .../dhp/sx/graph/SparkResolveRelation.scala | 4 +- .../graph/finalGraph/oozie_app/workflow.xml | 4 +- 4 files changed, 48 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 2cd176dee..cb41d6134 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.sx.graph -import com.cloudera.com.fasterxml.jackson.databind.ObjectMapper + +import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} @@ -9,12 +10,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkConvertRDDtoDataset { def main(args: Array[String]): Unit = { - val entities = List( - ("dataset", classOf[OafDataset]), - ("otherresearchproduct", classOf[OtherResearchProduct]), - ("publication", classOf[Publication]), - ("software", classOf[Software]) - ) + val log: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() @@ -29,15 +25,43 @@ object SparkConvertRDDtoDataset { val sourcePath = parser.get("sourcePath") log.info(s"sourcePath -> $sourcePath") - val targetPath = parser.get("targetPath") - log.info(s"targetPath -> $targetPath") - val mapper = new ObjectMapper() - implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + val t = parser.get("targetPath") + log.info(s"targetPath -> $t") + + val entityPath = s"$t/entities" + val relPath = s"$t/relation" + val mapper = new ObjectMapper() + implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) + implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) + implicit val relationEncoder: Encoder[Relation] = Encoders.kryo(classOf[Relation]) + implicit val orpEncoder: Encoder[OtherResearchProduct] = Encoders.kryo(classOf[OtherResearchProduct]) + implicit val softwareEncoder: Encoder[Software] = Encoders.kryo(classOf[Software]) + + + log.info("Converting dataset") + val rddDataset =spark.sparkContext.textFile(s"$sourcePath/dataset").map(s => mapper.readValue(s, classOf[OafDataset])) + spark.createDataset(rddDataset).as[OafDataset].write.mode(SaveMode.Overwrite).save(s"$entityPath/dataset") + + + log.info("Converting publication") + val rddPublication =spark.sparkContext.textFile(s"$sourcePath/publication").map(s => mapper.readValue(s, classOf[Publication])) + spark.createDataset(rddPublication).as[Publication].write.mode(SaveMode.Overwrite).save(s"$entityPath/publication") + + log.info("Converting software") + val rddSoftware =spark.sparkContext.textFile(s"$sourcePath/software").map(s => mapper.readValue(s, classOf[Software])) + spark.createDataset(rddSoftware).as[Software].write.mode(SaveMode.Overwrite).save(s"$entityPath/software") + + log.info("Converting otherresearchproduct") + val rddOtherResearchProduct =spark.sparkContext.textFile(s"$sourcePath/otherresearchproduct").map(s => mapper.readValue(s, classOf[OtherResearchProduct])) + spark.createDataset(rddOtherResearchProduct).as[OtherResearchProduct].write.mode(SaveMode.Overwrite).save(s"$entityPath/otherresearchproduct") + + + log.info("Converting Relation") + + + val rddRelation =spark.sparkContext.textFile(s"$sourcePath/relation").map(s => mapper.readValue(s, classOf[Relation])) + spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") + - entities.foreach{ - e => - val rdd =spark.sparkContext.textFile(s"$sourcePath/${e._1}").map(s => mapper.readValue(s, e._2)) - spark.createDataset(rdd).as[Result].write.mode(SaveMode.Overwrite).save(s"$targetPath/${e._1}") - } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index a66da3e6d..ac189b6ba 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.sx.graph import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.schema.oaf.Result +import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary import eu.dnetlib.dhp.sx.graph.scholix.ScholixUtils import org.apache.commons.io.IOUtils @@ -29,11 +29,12 @@ object SparkCreateSummaryObject { log.info(s"targetPath -> $targetPath") implicit val resultEncoder:Encoder[Result] = Encoders.kryo[Result] + implicit val oafEncoder:Encoder[Oaf] = Encoders.kryo[Oaf] implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result] + val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Oaf].filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index 82bf3c50e..b2fddec20 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -57,10 +57,10 @@ object SparkResolveRelation { currentRelation }.write .mode(SaveMode.Overwrite) - .save(s"$workingPath/resolvedSource") + .save(s"$workingPath/relationResolvedSource") - val relationSourceResolved:Dataset[(String,Relation)] = spark.read.load(s"$workingPath/resolvedSource").as[Relation].map(r => (r.getTarget.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) + val relationSourceResolved:Dataset[(String,Relation)] = spark.read.load(s"$workingPath/relationResolvedSource").as[Relation].map(r => (r.getTarget.toLowerCase, r))(Encoders.tuple(Encoders.STRING, relEncoder)) relationSourceResolved.joinWith(rPid, relationSourceResolved("_1").equalTo(rPid("_2")), "left").map{ m => val targetResolved = m._2 diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml index 4e601bff3..d8eb1fc80 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/finalGraph/oozie_app/workflow.xml @@ -35,7 +35,7 @@ --masteryarn --sourcePath${sourcePath} - --targetPath${targetPath}/entities + --targetPath${targetPath} @@ -87,7 +87,7 @@ --masteryarn --summaryPath${targetPath}/provision/summaries --targetPath${targetPath}/provision/scholix - --relationPath${sourcePath}/relation_resolved + --relationPath${targetPath}/relation From 848aabbb6cef47e941f739f475521011dfee6be9 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Sun, 25 Jul 2021 12:06:41 +0200 Subject: [PATCH 36/70] minor fix --- .../main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala | 1 + .../java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index ba483bfb2..0a7fc18fb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -42,6 +42,7 @@ object SparkCreateScholix { val relationDS: Dataset[(String, Relation)] = spark.read.load(relationPath).as[Relation] + .filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .map(r => (r.getSource, r))(Encoders.tuple(Encoders.STRING, relEncoder)) val summaryDS: Dataset[(String, ScholixSummary)] = spark.read.load(summaryPath).as[ScholixSummary] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala index ac189b6ba..0970375f5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkCreateSummaryObject.scala @@ -34,7 +34,7 @@ object SparkCreateSummaryObject { implicit val summaryEncoder:Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] - val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Oaf].filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) + val ds:Dataset[Result] = spark.read.load(s"$sourcePath/*").as[Result].filter(r=>r.getDataInfo== null || r.getDataInfo.getDeletedbyinference== false) ds.repartition(6000).map(r => ScholixUtils.resultToSummary(r)).filter(s => s!= null).write.mode(SaveMode.Overwrite).save(targetPath) From f3b9570354bd170511c56e9995ac6188601add56 Mon Sep 17 00:00:00 2001 From: antleb Date: Mon, 26 Jul 2021 13:00:16 +0300 Subject: [PATCH 37/70] properly invalidating metadata --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh index d5aa207d1..fb944f4ff 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh @@ -13,7 +13,7 @@ echo "Getting file from " $SCRIPT_PATH hdfs dfs -copyToLocal $SCRIPT_PATH echo "Creating indicators" -impala-shell -d ${TARGET} -q "invalidate metadata" +impala-shell -q "invalidate metadata" impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -c -f - cat step16_7-createIndicatorsTables.sql | impala-shell -d $TARGET -f - echo "Indicators created" \ No newline at end of file From ed185fd7ed479e385904eb4b8edc4fe821844f5c Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 27 Jul 2021 11:42:47 +0300 Subject: [PATCH 38/70] added missing colons --- .../step16_7-createIndicatorsTables.sql | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql index a2fc88a39..f1ebf0d87 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql @@ -44,22 +44,22 @@ on p.id= tmp.id; create table indi_project_pubs_count stored as parquet as select pr.id id, count(p.id) total_pubs from project_results pr join publication p on p.id=pr.result -group by pr.id +group by pr.id; create table indi_project_datasets_count stored as parquet as select pr.id id, count(d.id) total_datasets from project_results pr join dataset d on d.id=pr.result -group by pr.id +group by pr.id; create table indi_project_software_count stored as parquet as select pr.id id, count(s.id) total_software from project_results pr join software s on s.id=pr.result -group by pr.id +group by pr.id; create table indi_project_otherresearch_count stored as parquet as select pr.id id, count(o.id) total_other from project_results pr join otherresearchproduct o on o.id=pr.result -group by pr.id +group by pr.id; create table indi_pub_avg_year_country_oa stored as parquet as select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, @@ -76,7 +76,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA join result_organization ro on p.id=ro.id join organization o on o.id=ro.organization where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp + group by year, country) tmp; create table indi_dataset_avg_year_country_oa stored as parquet as select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, @@ -93,7 +93,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA join result_organization ro on d.id=ro.id join organization o on o.id=ro.organization where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp + group by year, country) tmp; create table indi_software_avg_year_country_oa stored as parquet as select year, country, round(OpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageOA, @@ -110,7 +110,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA join result_organization ro on s.id=ro.id join SOURCER.organization o on o.id=ro.organization where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp + group by year, country) tmp; create table indi_other_avg_year_country_oa stored as parquet as @@ -128,7 +128,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA join result_organization ro on orp.id=ro.id join organization o on o.id=ro.organization where cast(year as int)>=2003 and cast(year as int)<=2021 - group by year, country) tmp + group by year, country) tmp; create table indi_pub_avg_year_context_oa stored as parquet as with total as @@ -138,7 +138,7 @@ join publication p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) select year, name, round(no_of_pubs/total*100,3) averageofpubs -from total +from total; create table indi_dataset_avg_year_context_oa stored as parquet as with total as @@ -148,7 +148,7 @@ join dataset p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) select year, name, round(no_of_pubs/total*100,3) averageofdataset -from total +from total; create table indi_software_avg_year_context_oa stored as parquet as with total as @@ -158,7 +158,7 @@ join software p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) select year, name, round(no_of_pubs/total*100,3) averageofsoftware -from total +from total; create table indi_other_avg_year_context_oa stored as parquet as with total as @@ -168,7 +168,7 @@ join otherresearchproduct p on p.id=pc.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by c.name, year ) select year, name, round(no_of_pubs/total*100,3) averageofother -from total +from total; create table indi_other_avg_year_content_oa stored as parquet as with total as @@ -179,7 +179,7 @@ join otherresearchproduct p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) select year, type, round(no_of_pubs/total*100,3) averageOfOtherresearchproduct -from total +from total; create table indi_software_avg_year_content_oa stored as parquet as with total as @@ -190,7 +190,7 @@ join software p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) select year, type, round(no_of_pubs/total*100,3) averageOfSoftware -from total +from total; create table indi_dataset_avg_year_content_oa stored as parquet as with total as @@ -201,7 +201,7 @@ join dataset p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) select year, type, round(no_of_pubs/total*100,3) averageOfDatasets -from total +from total; create table indi_pub_avg_year_content_oa stored as parquet as with total as @@ -212,7 +212,7 @@ join publication p on p.id=pd.id where cast(year as int)>=2003 and cast(year as int)<=2021 group by d.type, year) select year, type, round(no_of_pubs/total*100,3) averageOfPubs -from total +from total; create table indi_pub_has_cc_licence stored as parquet as select distinct p.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -220,7 +220,7 @@ from publication p left outer join (select p.id, license.type as lic from publication p join publication_licenses as license on license.id = p.id where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp -on p.id= tmp.id +on p.id= tmp.id; create table indi_pub_has_cc_licence_url stored as parquet as select distinct p.id, (case when lic_host='' or lic_host is null then 0 else 1 end) as has_cc_license_url @@ -229,9 +229,8 @@ left outer join (select p.id, lower(parse_url(license.type, "HOST")) as lic_host from publication p join publication_licenses as license on license.id = p.id WHERE lower(parse_url(license.type, 'HOST')) = 'creativecommons.org') tmp -on p.id= tmp.id - +on p.id= tmp.id; create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract -from publication \ No newline at end of file +from publication; \ No newline at end of file From 1a28a69cac1031bda96929c5a6512f52d8fdda2d Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 27 Jul 2021 15:14:09 +0300 Subject: [PATCH 39/70] changed the citeee in *_citations to cites --- .../graph/stats/oozie_app/scripts/step2.sql | 23 ++----------------- .../graph/stats/oozie_app/scripts/step3.sql | 21 ++--------------- .../graph/stats/oozie_app/scripts/step4.sql | 21 ++--------------- .../graph/stats/oozie_app/scripts/step5.sql | 21 ++--------------- 4 files changed, 8 insertions(+), 78 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql index 75b24b189..bb0d0ac6c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step2.sql @@ -90,27 +90,8 @@ FROM ${openaire_db_name}.publication p where p.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.publication_citations AS -SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result +SELECT substr(p.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.publication p lateral view explode(p.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" - and p.datainfo.deletedbyinference = false; - --- ANALYZE TABLE ${stats_db_name}.publication_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_classifications COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_classifications COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_concepts COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_concepts COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_datasources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_datasources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_languages COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_languages COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_pids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_pids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_topics COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_topics COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.publication_citations COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_citations COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file + and p.datainfo.deletedbyinference = false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql index 540cc03a5..953eaad6a 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step3.sql @@ -41,7 +41,7 @@ FROM ${openaire_db_name}.dataset d WHERE d.datainfo.deletedbyinference = FALSE; CREATE TABLE ${stats_db_name}.dataset_citations AS -SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS result +SELECT substr(d.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.dataset d LATERAL VIEW explode(d.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" @@ -95,21 +95,4 @@ CREATE TABLE ${stats_db_name}.dataset_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.dataset p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; --- --- ANALYZE TABLE ${stats_db_name}.dataset_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_classifications COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_classifications COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_concepts COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_concepts COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_datasources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_datasources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_languages COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_languages COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_pids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_pids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_topics COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_topics COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +where p.datainfo.deletedbyinference = false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql index 54345e074..0210dc8cb 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step4.sql @@ -41,7 +41,7 @@ from ${openaire_db_name}.software s where s.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.software_citations AS -SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT +SELECT substr(s.id, 4) as id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.software s LATERAL VIEW explode(s.extrainfo) citations as citation where xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" @@ -95,21 +95,4 @@ CREATE TABLE ${stats_db_name}.software_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.software p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; --- --- ANALYZE TABLE ${stats_db_name}.software_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_classifications COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_classifications COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_concepts COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_concepts COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_datasources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_datasources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_languages COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_languages COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_pids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_pids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_topics COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_topics COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +where p.datainfo.deletedbyinference = false; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql index 36ad5d92a..f7b302186 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step5.sql @@ -41,7 +41,7 @@ WHERE o.datainfo.deletedbyinference = FALSE; -- Otherresearchproduct_citations CREATE TABLE ${stats_db_name}.otherresearchproduct_citations AS -SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS RESULT +SELECT substr(o.id, 4) AS id, xpath_string(citation.value, "//citation/id[@type='openaire']/@value") AS cites FROM ${openaire_db_name}.otherresearchproduct o LATERAL VIEW explode(o.extrainfo) citations AS citation WHERE xpath_string(citation.value, "//citation/id[@type='openaire']/@value") != "" and o.datainfo.deletedbyinference = false; @@ -86,21 +86,4 @@ where p.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.otherresearchproduct_topics AS SELECT substr(p.id, 4) AS id, subjects.subject.qualifier.classname AS type, subjects.subject.value AS topic FROM ${openaire_db_name}.otherresearchproduct p LATERAL VIEW explode(p.subject) subjects AS subject -where p.datainfo.deletedbyinference = false; - --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_classifications COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_classifications COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_concepts COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_concepts COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_datasources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_datasources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_languages COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_languages COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_pids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_pids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_topics COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_topics COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +where p.datainfo.deletedbyinference = false; \ No newline at end of file From 5aa7d16d1b1bf2f100081f491152db4fb7ac90a1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 27 Jul 2021 15:11:37 +0200 Subject: [PATCH 40/70] updated assertions in eu.dnetlib.dhp.oa.graph.raw.MappersTest --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 58 ++++++++----------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 5b229a625..c41a6c68c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,17 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.lenient; - -import java.io.IOException; -import java.util.List; -import java.util.Optional; - +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -20,22 +16,12 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.util.List; +import java.util.Optional; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Author; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.Field; -import eu.dnetlib.dhp.schema.oaf.Instance; -import eu.dnetlib.dhp.schema.oaf.Oaf; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Relation; -import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -74,7 +60,7 @@ public class MappersTest { assertValidId(p.getId()); - assertEquals(1, p.getOriginalId().size()); + assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().contains("10.3897/oneeco.2.e13718")); assertValidId(p.getCollectedfrom().get(0).getKey()); @@ -261,8 +247,8 @@ public class MappersTest { final Relation r2 = (Relation) list.get(2); assertValidId(d.getId()); - assertEquals(1, d.getOriginalId().size()); - assertTrue(d.getOriginalId().contains("oai:zenodo.org:3234526")); + assertEquals(2, d.getOriginalId().size()); + assertTrue(d.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:zenodo.org:3234526"))); assertValidId(d.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(d.getAuthor().size() > 0); @@ -351,8 +337,11 @@ public class MappersTest { final Publication p = (Publication) list.get(0); assertValidId(p.getId()); - assertTrue(p.getOriginalId().size() == 1); - assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + assertEquals(2, p.getOriginalId().size()); + + assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); + //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); @@ -413,7 +402,8 @@ public class MappersTest { assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, d.getDataInfo().getProvenanceaction().getSchemename()); assertValidId(d.getId()); - assertTrue(d.getOriginalId().size() == 1); + assertEquals(2, d.getOriginalId().size()); + assertEquals("feabb67c-1fd1-423b-aec6-606d04ce53c6", d.getOriginalId().get(0)); assertValidId(d.getCollectedfrom().get(0).getKey()); @@ -663,8 +653,8 @@ public class MappersTest { final Dataset p = (Dataset) list.get(0); assertValidId(p.getId()); - assertTrue(p.getOriginalId().size() == 1); - assertEquals("df76e73f-0483-49a4-a9bb-63f2f985574a", p.getOriginalId().get(0)); + assertEquals(2, p.getOriginalId().size()); + assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("df76e73f-0483-49a4-a9bb-63f2f985574a"))); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); From 825d9f02897c07f64880650ee6b58ccb4f3fcf2d Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 27 Jul 2021 16:09:30 +0200 Subject: [PATCH 41/70] fixed datacite workflow starting from Importing delta --- .../datacite/DataciteToOAFTransformation.scala | 2 +- .../actionmanager/datacite/oozie_app/workflow.xml | 2 +- .../datacite/DataciteToOAFTest.scala | 15 ++++++++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala index 045927bed..cfdd98d30 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala @@ -367,7 +367,7 @@ object DataciteToOAFTransformation { result.setDateofcollection(ISO8601FORMAT.format(d)) - result.setDateoftransformation(ISO8601FORMAT.format(ts)) + result.setDateoftransformation(ISO8601FORMAT.format(d)) result.setDataInfo(dataInfo) val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List()) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml index 036178b37..021704f54 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml @@ -16,7 +16,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala index 0d10c41dc..a795a910d 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTest.scala @@ -3,13 +3,14 @@ package eu.dnetlib.dhp.actionmanager.datacite import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.SerializationFeature - import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest import eu.dnetlib.dhp.schema.oaf.Oaf import org.junit.jupiter.api.extension.ExtendWith import org.junit.jupiter.api.{BeforeEach, Test} import org.mockito.junit.jupiter.MockitoExtension +import java.text.SimpleDateFormat +import java.util.Locale import scala.io.Source @ExtendWith(Array(classOf[MockitoExtension])) @@ -22,6 +23,18 @@ class DataciteToOAFTest extends AbstractVocabularyTest{ super.setUpVocabulary() } + + @Test + def testDateMapping:Unit = { + val inputDate = "2021-07-14T11:52:54+0000" + val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) + val dt = ISO8601FORMAT.parse(inputDate) + println(dt.getTime) + + + } + + @Test def testMapping() :Unit = { val record =Source.fromInputStream(getClass.getResourceAsStream("record.json")).mkString From 16c91203bd434664fdb8b8a3633fb683880039c5 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 28 Jul 2021 10:30:49 +0200 Subject: [PATCH 42/70] implemented workflow of creation action set for scholexplorer --- .../datacite/AbstractRestClient.scala | 13 +-- .../datacite/ImportDatacite.scala | 2 +- .../sx/provision/SparkCreateActionset.scala | 90 +++++++++++++++++++ .../dhp/sx/provision/SparkSaveActionSet.scala | 86 ++++++++++++++++++ .../dhp/sx/actionset/generate_actionset.json | 6 ++ .../sx/actionset/oozie_app/config-default.xml | 23 +++++ .../dhp/sx/actionset/oozie_app/workflow.xml | 76 ++++++++++++++++ .../dhp/sx/actionset/save_actionset.json | 5 ++ 8 files changed, 295 insertions(+), 6 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala create mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala index 8df203283..823187afe 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala @@ -1,9 +1,10 @@ package eu.dnetlib.dhp.actionmanager.datacite import org.apache.commons.io.IOUtils +import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.{HttpGet, HttpPost, HttpRequestBase, HttpUriRequest} import org.apache.http.entity.StringEntity -import org.apache.http.impl.client.HttpClients +import org.apache.http.impl.client.{HttpClientBuilder, HttpClients} import java.io.IOException @@ -56,11 +57,15 @@ abstract class AbstractRestClient extends Iterator[String]{ private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={ - val client = HttpClients.createDefault + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build() var tries = 4 try { while (tries > 0) { - println(s"requesting ${r.getURI}") val response = client.execute(r) println(s"get response with status${response.getStatusLine.getStatusCode}") @@ -80,7 +85,5 @@ abstract class AbstractRestClient extends Iterator[String]{ throw new RuntimeException("Unable to close client ", e) } } - getBufferData() - } \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala index 931ac06f6..2b73d2955 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala @@ -140,7 +140,7 @@ object ImportDatacite { private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs:Int): Long = { var from:Long = timestamp * 1000 - val delta:Long = 50000000L + val delta:Long = 100000000L var client: DataciteAPIImporter = null val now :Long =System.currentTimeMillis() var i = 0 diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala new file mode 100644 index 000000000..6f0cdcf8a --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala @@ -0,0 +1,90 @@ +package eu.dnetlib.dhp.sx.provision + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} +import org.apache.spark.{SparkConf, sql} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +import scala.io.Source + +object SparkCreateActionset { + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/generate_actionset.json")).mkString) + parser.parseArgument(args) + + + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + val workingDirFolder = parser.get("workingDirFolder") + log.info(s"workingDirFolder -> $workingDirFolder") + + implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoders:Encoder[Result] = Encoders.kryo[Result] + implicit val relationEncoders:Encoder[Relation] = Encoders.kryo[Relation] + + import spark.implicits._ + + val relation = spark.read.load(s"$sourcePath/relation").as[Relation] + + relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.save(s"$workingDirFolder/id_relation") + + + val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] + + log.info("extract source and target Identifier involved in relations") + + + log.info("save relation filtered") + + relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving publication") + + val publication:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/publication").as[Result].map(p => (p.getId, p)) + + publication + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving dataset") + val dataset:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/dataset").as[Result].map(p => (p.getId, p)) + dataset + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving software") + val software:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/software").as[Result].map(p => (p.getId, p)) + software + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + + log.info("saving Other Research product") + val orp:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/otherresearchproduct").as[Result].map(p => (p.getId, p)) + orp + .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + .map(p => p._1._2) + .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") + } + +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala new file mode 100644 index 000000000..d1d0b8424 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala @@ -0,0 +1,86 @@ +package eu.dnetlib.dhp.sx.provision + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.action.AtomicAction +import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Software, Dataset => OafDataset} +import org.apache.hadoop.io.Text +import org.apache.hadoop.io.compress.GzipCodec +import org.apache.hadoop.mapred.SequenceFileOutputFormat +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +import scala.io.Source + +object SparkSaveActionSet { + + + def toActionSet(item: Oaf): (String, String) = { + val mapper = new ObjectMapper() + + item match { + case dataset: OafDataset => + val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset] + a.setClazz(classOf[OafDataset]) + a.setPayload(dataset) + (dataset.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case publication: Publication => + val a: AtomicAction[Publication] = new AtomicAction[Publication] + a.setClazz(classOf[Publication]) + a.setPayload(publication) + (publication.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case software: Software => + val a: AtomicAction[Software] = new AtomicAction[Software] + a.setClazz(classOf[Software]) + a.setPayload(software) + (software.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case orp: OtherResearchProduct => + val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct] + a.setClazz(classOf[OtherResearchProduct]) + a.setPayload(orp) + (orp.getClass.getCanonicalName, mapper.writeValueAsString(a)) + + case relation: Relation => + val a: AtomicAction[Relation] = new AtomicAction[Relation] + a.setClazz(classOf[Relation]) + a.setPayload(relation) + (relation.getClass.getCanonicalName, mapper.writeValueAsString(a)) + case _ => + null + } + + } + + def main(args: Array[String]): Unit = { + val log: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/save_actionset.json")).mkString) + parser.parseArgument(args) + + + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + + val targetPath = parser.get("targetPath") + log.info(s"targetPath -> $targetPath") + + implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING) + + spark.read.load(sourcePath).as[Oaf] + .map(o =>toActionSet(o)) + .filter(o => o!= null) + .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) + + } + +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json new file mode 100644 index 000000000..0563808ea --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json @@ -0,0 +1,6 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath","paramDescription": "source path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingDirFolder","paramDescription": "the working Dir Folder", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath","paramDescription": "the target path ", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml new file mode 100644 index 000000000..dd3c32c62 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml @@ -0,0 +1,23 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml new file mode 100644 index 000000000..ef86a1772 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -0,0 +1,76 @@ + + + + sourcePath + the path of the consistent graph + + + workingDirFolder + the path of working dir ActionSet + + + outputPath + the path of Scholexplorer ActionSet + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn-cluster + cluster + Create Action Set + eu.dnetlib.dhp.sx.provision.SparkCreateActionset + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --targetPath${outputPath} + --workingDirFolder${workingDirFolder} + --masteryarn-cluster + + + + + + + + + yarn-cluster + cluster + Save Action Set + eu.dnetlib.dhp.sx.provision.SparkSaveActionSet + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${workingDirFolder}/actionSetOaf + --targetPath${outputPath} + --masteryarn-cluster + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json new file mode 100644 index 000000000..0264c825f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json @@ -0,0 +1,5 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath","paramDescription": "source path", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath","paramDescription": "the target path ", "paramRequired": true} +] \ No newline at end of file From 43e62fcae92b9c955201d0b2f493d22cc8b32744 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:04:55 +0200 Subject: [PATCH 43/70] DoiBoost AccessRigh #4362 - related to https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/126/files#issuecomment-4193 --- .../dnetlib/doiboost/DoiBoostMappingUtil.scala | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 686a2f1f1..d018948fc 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -179,20 +179,6 @@ object DoiBoostMappingUtil { } - //val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") - - - - // val pub_date = LocalDate.parse(date, formatter) - -// if (((now.toEpochDay - pub_date.toEpochDay)/365.0) > 1){ -// val oaq : AccessRight = getOpenAccessQualifier() -// oaq.setOpenAccessRoute(OpenAccessRoute.hybrid) -// return oaq -// } -// else{ -// return getEmbargoedAccessQualifier() -// } } return getClosedAccessQualifier() @@ -206,12 +192,12 @@ object DoiBoostMappingUtil { } def getRestrictedQualifier():AccessRight = { - OafMapperUtils.accessRight("RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + OafMapperUtils.accessRight( "RESTRICTED","Restricted",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) } def getUnknownQualifier():AccessRight = { - OafMapperUtils.accessRight("UNKNOWN","not available",ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + OafMapperUtils.accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) } From 5fe016dcbccda43276342efd1547276d723ff5db Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:14:28 +0200 Subject: [PATCH 44/70] DoiBoost AccessRigh #4362 - related to https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/126/files#issuecomment-4194 --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index d018948fc..ea65fc747 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -188,7 +188,8 @@ object DoiBoostMappingUtil { def getOpenAccessQualifier():AccessRight = { - OafMapperUtils.accessRight("OPEN","Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) + + OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN,"Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES) } def getRestrictedQualifier():AccessRight = { From 80d5b3b4deb5098ba85dd701a35cfecf82b28ef9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:16:49 +0200 Subject: [PATCH 45/70] DoiBoost AccessRigh #4362 - removing commented code --- .../eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index ea65fc747..e68880433 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -236,8 +236,7 @@ object DoiBoostMappingUtil { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) } -// val ar = getOpenAccessQualifier() -// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) + } else { hb = ModelConstants.UNKNOWN_REPOSITORY @@ -246,17 +245,7 @@ object DoiBoostMappingUtil { }) publication.setBestaccessright(OafMapperUtils.createBestAccessRights(publication.getInstance())) -// val ar = publication.getInstance().asScala.filter(i => i.getInstancetype != null && i.getAccessright!= null && i.getAccessright.getClassid!= null).map(f=> f.getAccessright.getClassid) -// if (ar.nonEmpty) { -// if(ar.contains(ModelConstants.ACCESS_RIGHT_OPEN)){ -// val ar = getOpenAccessQualifier() -// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) -// } -// else { -// val ar = getRestrictedQualifier() -// publication.setBestaccessright(OafMapperUtils.qualifier(ar.getClassid, ar.getClassname, ar.getSchemeid, ar.getSchemename)) -// } -// } + publication } From 4a9741825d3afd1792ebd8881ba82dde53962328 Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 28 Jul 2021 12:28:04 +0300 Subject: [PATCH 46/70] added result_orcid, result_project provenance, issn in datasources --- .../graph/stats/oozie_app/scripts/step13.sql | 20 ++++++++++--------- .../graph/stats/oozie_app/scripts/step15.sql | 11 +--------- .../graph/stats/oozie_app/scripts/step6.sql | 2 +- .../graph/stats/oozie_app/scripts/step7.sql | 9 ++------- .../graph/stats/oozie_app/scripts/step8.sql | 19 +++++++----------- 5 files changed, 22 insertions(+), 39 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql index d79396b3b..e4e81175c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step13.sql @@ -57,12 +57,14 @@ UNION ALL SELECT * FROM ${stats_db_name}.software_sources UNION ALL SELECT * FROM ${stats_db_name}.otherresearchproduct_sources; --- --- ANALYZE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_sources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_sources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_sources COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_sources COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file + + +create table ${stats_db_name}.result_orcid as +select distinct res.id, regexp_replace(res.orcid, 'http://orcid.org/' ,'') as orcid +from ( + SELECT substr(res.id, 4) as id, auth_pid.value as orcid + FROM ${openaire_db_name}.result res + LATERAL VIEW explode(author) a as auth + LATERAL VIEW explode(auth.pid) ap as auth_pid + LATERAL VIEW explode(auth.pid.qualifier.classid) apt as author_pid_type + WHERE res.datainfo.deletedbyinference = FALSE and res.datainfo.invisible = FALSE and author_pid_type = 'orcid') as res \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql index 8f364d747..8e66e05c0 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15.sql @@ -33,13 +33,4 @@ select * from ${stats_db_name}.dataset_refereed union all select * from ${stats_db_name}.software_refereed union all -select * from ${stats_db_name}.otherresearchproduct_refereed; --- --- ANALYZE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.publication_refereed COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.dataset_refereed COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.software_refereed COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.otherresearchproduct_refereed COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +select * from ${stats_db_name}.otherresearchproduct_refereed; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 5d81e97bb..4cbdba931 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -13,7 +13,7 @@ WHERE r.reltype = 'projectOrganization' and r.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.project_results AS -SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result +SELECT substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultProject' and r.datainfo.deletedbyinference = false; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql index ae540b9b2..b3cbc9b41 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step7.sql @@ -130,12 +130,7 @@ WHERE r.reltype = 'resultOrganization' and r.datainfo.deletedbyinference = false; CREATE TABLE ${stats_db_name}.result_projects AS -select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend +select pr.result AS id, pr.id AS project, datediff(p.enddate, p.startdate) AS daysfromend, pr.provenance as provenance FROM ${stats_db_name}.result r JOIN ${stats_db_name}.project_results pr ON r.id = pr.result - JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id; - --- ANALYZE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.result_organization COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.result_projects COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file + JOIN ${stats_db_name}.project_tmp p ON p.id = pr.id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index de0fedd7e..5d770dd61 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -17,7 +17,9 @@ CREATE TABLE ${stats_db_name}.datasource_tmp `latitude` STRING, `longitude` STRING, `websiteurl` STRING, - `compatibility` STRING + `compatibility` STRING, + issn_printed STRING, + issn_online STRING ) CLUSTERED BY (id) INTO 100 buckets stored AS orc tblproperties ('transactional' = 'true'); -- Insert statement that takes into account the piwik_id of the openAIRE graph @@ -32,7 +34,9 @@ SELECT substr(d1.id, 4) AS id, d1.latitude.value AS latitude, d1.longitude.value AS longitude, d1.websiteurl.value AS websiteurl, - d1.openairecompatibility.classid AS compatibility + d1.openairecompatibility.classid AS compatibility, + d1.journal.issnprinted AS issn_printed, + d1.journal.issnonline AS issn_online FROM ${openaire_db_name}.datasource d1 LEFT OUTER JOIN (SELECT id, split(originalidd, '\\:')[1] as piwik_id @@ -97,13 +101,4 @@ where d.datainfo.deletedbyinference = false; CREATE OR REPLACE VIEW ${stats_db_name}.datasource_results AS SELECT datasource AS id, id AS result -FROM ${stats_db_name}.result_datasources; - --- ANALYZE TABLE ${stats_db_name}.datasource_tmp COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.datasource_tmp COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.datasource_languages COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.datasource_oids COMPUTE STATISTICS FOR COLUMNS; --- ANALYZE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS; --- ANALYZE TABLE ${stats_db_name}.datasource_organizations COMPUTE STATISTICS FOR COLUMNS; \ No newline at end of file +FROM ${stats_db_name}.result_datasources; \ No newline at end of file From 2fff24df55f2bafd2b0f67837d2f266dcf934fa8 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 28 Jul 2021 11:34:19 +0200 Subject: [PATCH 47/70] code formatting --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 28 ++++++++++--------- .../oa/provision/utils/XmlRecordFactory.java | 2 +- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b9442..58009bfcf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c41a6c68c..63f18a803 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,13 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -16,12 +16,14 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.List; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.lenient; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -340,7 +342,7 @@ public class MappersTest { assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); - //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + // assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d2371..2c8240290 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -16,7 +16,6 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -43,6 +42,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { From c806387d4bfa74491375afbc80c39d67547ce9f6 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 20 Jul 2021 19:31:43 +0200 Subject: [PATCH 48/70] tests for enermaps --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 25 +++++++ .../eu/dnetlib/dhp/oa/graph/raw/enermaps.xml | 72 +++++++++++++++++++ .../oa/provision/XmlRecordFactoryTest.java | 29 ++++++++ .../eu/dnetlib/dhp/oa/provision/enermaps.json | 1 + 4 files changed, 127 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 63f18a803..fb4a5b5da 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -559,6 +559,31 @@ public class MappersTest { assertNotNull(d.getInstance().get(0).getUrl()); } + @Test + void testEnermaps() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("enermaps.xml")); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + assertEquals(1, list.size()); + assertTrue(list.get(0) instanceof Dataset); + + final Dataset d = (Dataset) list.get(0); + + assertValidId(d.getId()); + assertValidId(d.getCollectedfrom().get(0).getKey()); + assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); + assertEquals(1, d.getAuthor().size()); + assertEquals(1, d.getInstance().size()); + assertNotNull(d.getInstance().get(0).getUrl()); + assertNotNull(d.getContext()); + assertTrue(StringUtils.isNotBlank(d.getContext().get(0).getId())); + assertEquals("enermaps::selection::tgs00004", d.getContext().get(0).getId()); + } + @Test void testClaimFromCrossref() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml new file mode 100644 index 000000000..362b40c85 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/enermaps.xml @@ -0,0 +1,72 @@ + + + + enermaps____::04149ee428d07360314c2cb3ba95d41e + tgs00004 + 2021-07-20T18:43:12.096+02:00 + enermaps____ + + + + https://ec.europa.eu/eurostat/web/products-datasets/-/tgs00004 + + + Statistical Office of the European Union (Eurostat) + + + + + Regional GDP + + + Statistical Office of the European Union (Eurostat) + 2020 + + 2020-10-07 + + + + OPEN + Creative Commons Attribution 4.0 International + + + GDP expressed in PPS (purchasing power standards) eliminates differences in price levels between countries. Calculations on a per inhabitant basis allow for the comparison of economies and regions significantly different in absolute size. GDP per inhabitant in PPS is the key variable for determining the eligibility of NUTS 2 regions in the framework of the European Unions structural policy. + + 0021 + 2020-10-07 + OPEN + Creative Commons Attribution 4.0 International + + + + + + + + + https%3A%2F%2Flab.idiap.ch%2Fenermaps%2Fapi%2Fdatacite + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 6631cb4da..a5a1563aa 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -7,6 +7,8 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; +import eu.dnetlib.dhp.oa.provision.utils.ContextDef; +import eu.dnetlib.dhp.schema.oaf.Dataset; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -131,4 +133,31 @@ public class XmlRecordFactoryTest { System.out.println(doc.asXML()); assertEquals("", doc.valueOf("//rel/validated")); } + + @Test + public void testEnermapsRecord() throws IOException, DocumentException { + + String contextmap = "" + + ""+ + ""+ + ""; + + ContextMapper contextMapper = ContextMapper.fromXml(contextmap); + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + otherDsTypeId); + + Dataset d = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); + + JoinedEntity je = new JoinedEntity<>(d); + + String xml = xmlRecordFactory.build(je); + + assertNotNull(xml); + + Document doc = new SAXReader().read(new StringReader(xml)); + assertNotNull(doc); + System.out.println(doc.asXML()); + assertEquals("enermaps::selection::tgs00004", doc.valueOf("//concept/@id")); + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json new file mode 100644 index 000000000..dcd4c2ee1 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/enermaps.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|enermaps____::d77d5e503ad1439f585ac494268b351b","value":"Enermaps","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626800904248,"id":"50|enermaps____::04149ee428d07360314c2cb3ba95d41e","originalId":["50|enermaps____::04149ee428d07360314c2cb3ba95d41e","tgs00004"],"pid":[],"dateofcollection":"2021-07-20T18:43:12.096+02:00","dateoftransformation":"","extraInfo":[],"oaiprovenance":{"originDescription":{"harvestDate":"2021-07-20T18:43:12.096+02:00","altered":true,"baseURL":"https%3A%2F%2Flab.idiap.ch%2Fenermaps%2Fapi%2Fdatacite","identifier":"","datestamp":"","metadataNamespace":""}},"measures":null,"author":[{"fullname":"Statistical Office of the European Union (Eurostat)","name":"","surname":"","rank":1,"pid":[],"affiliation":[]}],"resulttype":{"classid":"dataset","classname":"dataset","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[],"title":[{"value":"\n Regional GDP\n ","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2020-10-07","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[{"value":"GDP expressed in PPS (purchasing power standards) eliminates differences in price levels between countries. Calculations on a per inhabitant basis allow for the comparison of economies and regions significantly different in absolute size. GDP per inhabitant in PPS is the key variable for determining the eligibility of NUTS 2 regions in the framework of the European Unions structural policy.","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Statistical Office of the European Union (Eurostat)","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[{"id":"enermaps::selection::tgs00004","dataInfo":[{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}]}],"externalReference":[],"instance":[{"license":{"value":"Creative Commons Attribution 4.0 International","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0021","classname":"Dataset","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://ec.europa.eu/eurostat/web/products-datasets/-/tgs00004"],"distributionlocation":null,"collectedfrom":{"key":"10|enermaps____::d77d5e503ad1439f585ac494268b351b","value":"Enermaps","dataInfo":null},"pid":[],"alternateIdentifier":[],"dateofacceptance":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"}}],"storagedate":{"value":"2020-10-07","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk","classname":"sysimport:crosswalk","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"device":null,"size":null,"version":null,"lastmetadataupdate":null,"metadataversionnumber":null,"geolocation":[]} From df8715a1ecc57b2221a960526700f53d1f6cb676 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 28 Jul 2021 11:58:26 +0200 Subject: [PATCH 49/70] format code after mvn compile --- .../dhp/oa/provision/XmlRecordFactoryTest.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index a5a1563aa..221049f90 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -7,8 +7,6 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; -import eu.dnetlib.dhp.oa.provision.utils.ContextDef; -import eu.dnetlib.dhp.schema.oaf.Dataset; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -23,8 +21,10 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; +import eu.dnetlib.dhp.oa.provision.utils.ContextDef; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -137,17 +137,18 @@ public class XmlRecordFactoryTest { @Test public void testEnermapsRecord() throws IOException, DocumentException { - String contextmap = "" + - ""+ - ""+ - ""; + String contextmap = "" + + + "" + + "" + + ""; ContextMapper contextMapper = ContextMapper.fromXml(contextmap); XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + otherDsTypeId); Dataset d = OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); + .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); JoinedEntity je = new JoinedEntity<>(d); From 9b181ffa73c6770a97caa9218c747e4d392d98ec Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 28 Jul 2021 16:31:29 +0300 Subject: [PATCH 50/70] added the h2020 classification scheme for projects --- .../dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 4cbdba931..0c4a767a4 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -18,6 +18,12 @@ FROM ${openaire_db_name}.relation r WHERE r.reltype = 'resultProject' and r.datainfo.deletedbyinference = false; +create table ${stats_db_name}.project_classification as +select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 +from ${openaire_db_name}project p + lateral view explode(p.h2020classification) classifs as class +where p.datainfo.deletedbyinference=false and class.h2020programme is not null; + CREATE TABLE ${stats_db_name}.project_tmp ( id STRING, From 3d8f0f629b3ff2c9d2b7401b7e62381f1547531c Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 28 Jul 2021 16:15:15 +0200 Subject: [PATCH 51/70] implemented workflow of creation action set for scholexplorer --- .../datacite/AbstractRestClient.scala | 32 ++++++++--------- dhp-workflows/dhp-graph-provision/pom.xml | 35 +++++++++++++++++++ .../sx/provision/SparkCreateActionset.scala | 31 ++++------------ .../dhp/sx/actionset/oozie_app/workflow.xml | 8 ++--- 4 files changed, 61 insertions(+), 45 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala index 823187afe..92a870e37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala @@ -64,26 +64,24 @@ abstract class AbstractRestClient extends Iterator[String]{ .setSocketTimeout(timeout * 1000).build() val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build() var tries = 4 - try { - while (tries > 0) { + while (tries > 0) { println(s"requesting ${r.getURI}") - val response = client.execute(r) - println(s"get response with status${response.getStatusLine.getStatusCode}") - if (response.getStatusLine.getStatusCode > 400) { - tries -= 1 + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 + } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries-=1 } - else - return IOUtils.toString(response.getEntity.getContent) } "" - } catch { - case e: Throwable => - throw new RuntimeException("Error on executing request ", e) - } finally try client.close() - catch { - case e: IOException => - throw new RuntimeException("Unable to close client ", e) - } - } + } getBufferData() } \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index c279436d7..e402d0600 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -9,6 +9,41 @@ dhp-graph-provision + + + + net.alchim31.maven + scala-maven-plugin + 4.0.1 + + + scala-compile-first + initialize + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + -Xmax-classfile-name + 200 + + ${scala.version} + + + + + + diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala index 6f0cdcf8a..faf386d25 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala @@ -43,7 +43,7 @@ object SparkCreateActionset { val relation = spark.read.load(s"$sourcePath/relation").as[Relation] relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.save(s"$workingDirFolder/id_relation") + .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] @@ -56,35 +56,18 @@ object SparkCreateActionset { relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") - log.info("saving publication") + log.info("saving entities") - val publication:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/publication").as[Result].map(p => (p.getId, p)) + val entities:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) - publication - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) + + entities.filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) + entities + .joinWith(idRelation, entities("_1").equalTo(idRelation("value"))) .map(p => p._1._2) .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - log.info("saving dataset") - val dataset:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/dataset").as[Result].map(p => (p.getId, p)) - dataset - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - log.info("saving software") - val software:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/software").as[Result].map(p => (p.getId, p)) - software - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") - - log.info("saving Other Research product") - val orp:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/otherresearchproduct").as[Result].map(p => (p.getId, p)) - orp - .joinWith(idRelation, publication("_1").equalTo(idRelation("value"))) - .map(p => p._1._2) - .write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf") } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml index ef86a1772..7c4b3dd26 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -14,7 +14,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -26,7 +26,7 @@ cluster Create Action Set eu.dnetlib.dhp.sx.provision.SparkCreateActionset - dhp-aggregation-${projectVersion}.jar + dhp-graph-provision-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -42,7 +42,7 @@ --workingDirFolder${workingDirFolder} --masteryarn-cluster - + @@ -53,7 +53,7 @@ cluster Save Action Set eu.dnetlib.dhp.sx.provision.SparkSaveActionSet - dhp-aggregation-${projectVersion}.jar + dhp-graph-provision-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} From 6dddad86ee10543a2d96e3fe7a555bd287492e0c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 28 Jul 2021 16:21:29 +0200 Subject: [PATCH 52/70] [cleaning] title cleaning based on the me.xuender:unidecode library --- dhp-common/pom.xml | 5 ++ .../oaf/utils/GraphCleaningFunctions.java | 19 ++--- .../schema/oaf/utils/OafMapperUtilsTest.java | 25 +++++-- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 26 +++++++ .../eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml | 70 +++++++++++++++++++ pom.xml | 5 ++ 6 files changed, 136 insertions(+), 14 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 74f31cf35..4c7810c47 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -25,6 +25,11 @@ com.github.sisyphsu dateparser + + me.xuender + unidecode + + org.apache.spark spark-core_2.11 diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index e5181b111..1d002ed7e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -7,22 +7,19 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.*; import java.util.function.Function; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; -import org.jetbrains.annotations.NotNull; import com.github.sisyphsu.dateparser.DateParserUtils; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import me.xuender.unidecode.Unidecode; public class GraphCleaningFunctions extends CleaningFunctions { @@ -194,11 +191,15 @@ public class GraphCleaningFunctions extends CleaningFunctions { .filter(Objects::nonNull) .filter(sp -> StringUtils.isNotBlank(sp.getValue())) .filter( - sp -> sp - .getValue() - .toLowerCase() - .replaceAll(TITLE_FILTER_REGEX, "") - .length() > TITLE_FILTER_RESIDUAL_LENGTH) + sp -> { + final String title = sp + .getValue() + .toLowerCase(); + final String residual = Unidecode + .decode(title) + .replaceAll(TITLE_FILTER_REGEX, ""); + return residual.length() > TITLE_FILTER_RESIDUAL_LENGTH; + }) .map(GraphCleaningFunctions::cleanValue) .collect(Collectors.toList())); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index eefa1e9a3..8d519a93f 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -4,12 +4,8 @@ package eu.dnetlib.dhp.schema.oaf.utils; import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; import java.util.HashSet; import java.util.List; -import java.util.Locale; -import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -19,13 +15,32 @@ import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; +import me.xuender.unidecode.Unidecode; public class OafMapperUtilsTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + @Test + public void testUnidecode() { + + assertEquals("Liu Ben Mu hiruzuSen tawa", Unidecode.decode("六本木ヒルズ森タワ")); + assertEquals("Nan Wu A Mi Tuo Fo", Unidecode.decode("南无阿弥陀佛")); + assertEquals("Yi Tiao Hui Zou Lu De Yu", Unidecode.decode("一条会走路的鱼")); + assertEquals("amidaniyorai", Unidecode.decode("あみだにょらい")); + assertEquals("T`owrk`iayi", Unidecode.decode("Թուրքիայի")); + assertEquals("Obzor tematiki", Unidecode.decode("Обзор тематики")); + assertEquals("GERMANSKIE IaZYKI", Unidecode.decode("ГЕРМАНСКИЕ ЯЗЫКИ")); + assertEquals("Diereunese tes ikanopoieses", Unidecode.decode("Διερεύνηση της ικανοποίησης")); + assertEquals("lqDy l'wly@", Unidecode.decode("القضايا الأولية")); + assertEquals("abc def ghi", Unidecode.decode("abc def ghi")); + } + @Test public void testDateValidation() { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 63f18a803..ba4211a3f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.graph.raw; +import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.cleanup; +import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.fixVocabularyNames; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; @@ -640,6 +642,30 @@ public class MappersTest { System.out.println(p.getTitle().get(0).getValue()); } + @Test + void testJairo() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_jairo.xml")); + final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertValidId(p.getId()); + assertValidId(p.getCollectedfrom().get(0).getKey()); + + assertNotNull(p.getTitle()); + assertFalse(p.getTitle().isEmpty()); + assertTrue(p.getTitle().size() == 1); + assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + + final Publication p_cleaned = cleanup(fixVocabularyNames(p)); + + assertNotNull(p_cleaned.getTitle()); + assertFalse(p_cleaned.getTitle().isEmpty()); + } + @Test void testOdfFromHdfs() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_from_hdfs.xml")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml new file mode 100644 index 000000000..9ec696256 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_jairo.xml @@ -0,0 +1,70 @@ + + +
+ jairo_______::000012e58ed836576ef2a0d38b0f726f + oai:irdb.nii.ac.jp:01221:0000010198 + + + + + + 2021-05-10T11:31:09.424Z + 2021-06-03T01:45:42.536Z + jairo_______ +
+ + 多項式GCDを用いた復号法に関する研究 + 上原, 剛 + 甲斐, 博 + 野田, 松太郎 + application/pdf + http://hdl.handle.net/2433/25934 + jpn + 京都大学数理解析研究所 + 410 + Departmental Bulletin Paper + 0014 + 2004-10-01 + + openaire____::554c7c2873 + OPEN + + + 2433/25934 + AN00061013 + http://hdl.handle.net/2433/25934 + http://repository.kulib.kyoto-u.ac.jp/dspace/bitstream/2433/25934/1/1395-16.pdf + 数理解析研究所講究録 + + + + + https%3A%2F%2Firdb.nii.ac.jp%2Foai + oai:irdb.nii.ac.jp:01221:0000010198 + 2021-04-13T13:36:29Z + + + http://repository.kulib.kyoto-u.ac.jp/dspace-oai/request + oai:repository.kulib.kyoto-u.ac.jp:2433/25934 + 2012-07-12T14:15:41Z + http://irdb.nii.ac.jp/oai + + + + + false + false + 0.9 + + + + +
\ No newline at end of file diff --git a/pom.xml b/pom.xml index 6e4526e41..fc4a8a21b 100644 --- a/pom.xml +++ b/pom.xml @@ -205,6 +205,11 @@ dateparser 1.0.7
+ + me.xuender + unidecode + 0.0.7 + com.google.guava From 4c5a71ba2f968e4a6eaf993eb2590306f92ad8d5 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 28 Jul 2021 17:11:18 +0200 Subject: [PATCH 53/70] [broker] updated relation descriptors, making use of constant values --- .../EnrichMissingDatasetIsReferencedBy.java | 3 ++- .../EnrichMissingDatasetIsRelatedTo.java | 3 ++- .../EnrichMissingDatasetIsSupplementedBy.java | 3 ++- .../EnrichMissingDatasetIsSupplementedTo.java | 3 ++- .../EnrichMissingDatasetReferences.java | 3 ++- .../EnrichMissingPublicationIsReferencedBy.java | 3 ++- .../EnrichMissingPublicationIsRelatedTo.java | 3 ++- .../EnrichMissingPublicationIsSupplementedBy.java | 3 ++- .../EnrichMissingPublicationIsSupplementedTo.java | 3 ++- .../EnrichMissingPublicationReferences.java | 3 ++- .../eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java | 13 +++++++------ 11 files changed, 27 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java index 21786687e..bcbcf755f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsReferencedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsReferencedBy extends AbstractEnrichMissingDat @Override protected boolean filterByType(final String relType) { - return relType.equals("isReferencedBy"); + return relType.equals(ModelConstants.IS_REFERENCED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java index 0f3739434..4125974ce 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsRelatedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsRelatedTo extends AbstractEnrichMissingDatase @Override protected boolean filterByType(final String relType) { - return relType.equals("isRelatedTo"); + return relType.equals(ModelConstants.IS_RELATED_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java index cde227fee..480daf666 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsSupplementedBy extends AbstractEnrichMissingD @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedBy"); + return relType.equals(ModelConstants.IS_SUPPLEMENTED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java index 750165ff5..97b1eb8bd 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetIsSupplementedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetIsSupplementedTo extends AbstractEnrichMissingD @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedTo"); + return relType.equals(ModelConstants.IS_SUPPLEMENT_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java index b1c0afe16..0978486a3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/EnrichMissingDatasetReferences.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedDatasets; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset { @@ -11,7 +12,7 @@ public class EnrichMissingDatasetReferences extends AbstractEnrichMissingDataset @Override protected boolean filterByType(final String relType) { - return relType.equals("references"); + return relType.equals(ModelConstants.REFERENCES); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java index eebb5c1a6..ff9155c9d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsReferencedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissingPublication { @@ -11,6 +12,6 @@ public class EnrichMissingPublicationIsReferencedBy extends AbstractEnrichMissin @Override protected boolean filterByType(final String relType) { - return relType.equals("isReferencedBy"); + return relType.equals(ModelConstants.IS_REFERENCED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java index a8aa550d4..1051559c9 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsRelatedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPublication { @@ -11,7 +12,7 @@ public class EnrichMissingPublicationIsRelatedTo extends AbstractEnrichMissingPu @Override protected boolean filterByType(final String relType) { - return relType.equals("isRelatedTo"); + return relType.equals(ModelConstants.IS_RELATED_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java index 762ac942e..d97f46f09 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedBy.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMissingPublication { @@ -11,6 +12,6 @@ public class EnrichMissingPublicationIsSupplementedBy extends AbstractEnrichMiss @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedBy"); + return relType.equals(ModelConstants.IS_SUPPLEMENTED_BY); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java index fc7196a01..b33b340e3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationIsSupplementedTo.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMissingPublication { @@ -11,7 +12,7 @@ public class EnrichMissingPublicationIsSupplementedTo extends AbstractEnrichMiss @Override protected boolean filterByType(final String relType) { - return relType.equals("isSupplementedTo"); + return relType.equals(ModelConstants.IS_SUPPLEMENT_TO); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java index da1994454..fe0f96b6e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/EnrichMissingPublicationReferences.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.relatedPublications; import eu.dnetlib.dhp.broker.model.Topic; +import eu.dnetlib.dhp.schema.common.ModelConstants; public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPublication { @@ -11,7 +12,7 @@ public class EnrichMissingPublicationReferences extends AbstractEnrichMissingPub @Override protected boolean filterByType(final String relType) { - return relType.equals("references"); + return relType.equals(ModelConstants.REFERENCES); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index c7be633a9..f578548fb 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -5,6 +5,7 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -52,15 +53,15 @@ public class ClusterUtils { } public static boolean isDedupRoot(final String id) { - return id.contains("dedup_wf_"); + return id.contains("dedup"); } public static final boolean isValidResultResultClass(final String s) { - return s.equals("isReferencedBy") - || s.equals("isRelatedTo") - || s.equals("references") - || s.equals("isSupplementedBy") - || s.equals("isSupplementedTo"); + return s.equals(ModelConstants.IS_REFERENCED_BY) + || s.equals(ModelConstants.IS_RELATED_TO) + || s.equals(ModelConstants.REFERENCES) + || s.equals(ModelConstants.IS_SUPPLEMENTED_BY) + || s.equals(ModelConstants.IS_SUPPLEMENT_TO); } public static T incrementAccumulator(final T o, final LongAccumulator acc) { From 3d1580fa9b81fec1a066d74e91de122d519099da Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 28 Jul 2021 18:50:31 +0300 Subject: [PATCH 54/70] fixed a typo --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql index 0c4a767a4..378e0f17b 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step6.sql @@ -20,7 +20,7 @@ WHERE r.reltype = 'resultProject' create table ${stats_db_name}.project_classification as select substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3 -from ${openaire_db_name}project p +from ${openaire_db_name}.project p lateral view explode(p.h2020classification) classifs as class where p.datainfo.deletedbyinference=false and class.h2020programme is not null; From 4afa5215a9ad70c86bb4bd0db9ce9fc039e4ed2a Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 28 Jul 2021 21:59:12 +0300 Subject: [PATCH 55/70] fixed a NPE? --- .../dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql index 5d770dd61..76d31eb5e 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step8.sql @@ -55,7 +55,7 @@ CREATE TABLE ${stats_db_name}.dual INSERT INTO ${stats_db_name}.dual VALUES ('X'); INSERT INTO ${stats_db_name}.datasource_tmp (`id`, `name`, `type`, `dateofvalidation`, `yearofvalidation`, `harvested`, - `piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`) + `piwik_id`, `latitude`, `longitude`, `websiteurl`, `compatibility`, `issn_printed`, `issn_online`) SELECT 'other', 'Other', 'Repository', @@ -66,7 +66,9 @@ SELECT 'other', NULL, NULL, NULL, - 'unknown' + 'unknown', + null, + null FROM ${stats_db_name}.dual WHERE 'other' not in (SELECT id FROM ${stats_db_name}.datasource_tmp WHERE name = 'Unknown Repository'); DROP TABLE ${stats_db_name}.dual; From 3721df7aa6d54bf20c1c709a1dcd3be3d8dc3af4 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 29 Jul 2021 10:45:24 +0200 Subject: [PATCH 56/70] refactoring create actionset of scholexplorer, moved on package dhp-aggregation --- .../scholix}/SparkCreateActionset.scala | 22 +++++++++---------- .../scholix}/SparkSaveActionSet.scala | 14 ++++++------ .../dhp/sx/actionset/generate_actionset.json | 0 .../sx/actionset/oozie_app/config-default.xml | 0 .../dhp/sx/actionset/oozie_app/workflow.xml | 4 ++-- .../dhp/sx/actionset/save_actionset.json | 0 6 files changed, 20 insertions(+), 20 deletions(-) rename dhp-workflows/{dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix}/SparkCreateActionset.scala (63%) rename dhp-workflows/{dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision => dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix}/SparkSaveActionSet.scala (86%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json (100%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml (100%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml (95%) rename dhp-workflows/{dhp-graph-provision => dhp-aggregation}/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json (100%) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala similarity index 63% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala index faf386d25..b78f411ee 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkCreateActionset.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkCreateActionset.scala @@ -1,9 +1,9 @@ -package eu.dnetlib.dhp.sx.provision +package eu.dnetlib.dhp.actionmanager.scholix import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} -import org.apache.spark.{SparkConf, sql} -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.SparkConf +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} import scala.io.Source @@ -34,16 +34,16 @@ object SparkCreateActionset { val workingDirFolder = parser.get("workingDirFolder") log.info(s"workingDirFolder -> $workingDirFolder") - implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val resultEncoders:Encoder[Result] = Encoders.kryo[Result] - implicit val relationEncoders:Encoder[Relation] = Encoders.kryo[Relation] + implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resultEncoders: Encoder[Result] = Encoders.kryo[Result] + implicit val relationEncoders: Encoder[Relation] = Encoders.kryo[Relation] - import spark.implicits._ + import spark.implicits._ val relation = spark.read.load(s"$sourcePath/relation").as[Relation] - relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) - .flatMap(r => List(r.getSource,r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") + relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + .flatMap(r => List(r.getSource, r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation") val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String] @@ -53,12 +53,12 @@ object SparkCreateActionset { log.info("save relation filtered") - relation.filter(r => (r.getDataInfo== null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) + relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge")) .write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf") log.info("saving entities") - val entities:Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) + val entities: Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders)) entities.filter(r => r.isInstanceOf[Result]).map(r => r.asInstanceOf[Result]) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala similarity index 86% rename from dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala index d1d0b8424..1df7ea3fb 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkSaveActionSet.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/scholix/SparkSaveActionSet.scala @@ -1,9 +1,9 @@ -package eu.dnetlib.dhp.sx.provision +package eu.dnetlib.dhp.actionmanager.scholix import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.schema.action.AtomicAction -import eu.dnetlib.dhp.schema.oaf.{Oaf, OtherResearchProduct, Publication, Relation, Software, Dataset => OafDataset} +import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation} import org.apache.hadoop.io.Text import org.apache.hadoop.io.compress.GzipCodec import org.apache.hadoop.mapred.SequenceFileOutputFormat @@ -73,13 +73,13 @@ object SparkSaveActionSet { val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") - implicit val oafEncoders:Encoder[Oaf] = Encoders.kryo[Oaf] - implicit val tEncoder:Encoder[(String,String)] = Encoders.tuple(Encoders.STRING,Encoders.STRING) + implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val tEncoder: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING) spark.read.load(sourcePath).as[Oaf] - .map(o =>toActionSet(o)) - .filter(o => o!= null) - .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text,Text]], classOf[GzipCodec]) + .map(o => toActionSet(o)) + .filter(o => o != null) + .rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text, Text]], classOf[GzipCodec]) } diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json similarity index 100% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/generate_actionset.json diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml similarity index 95% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml index 7c4b3dd26..8c045fcfe 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -26,7 +26,7 @@ cluster Create Action Set eu.dnetlib.dhp.sx.provision.SparkCreateActionset - dhp-graph-provision-${projectVersion}.jar + dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} @@ -53,7 +53,7 @@ cluster Save Action Set eu.dnetlib.dhp.sx.provision.SparkSaveActionSet - dhp-graph-provision-${projectVersion}.jar + dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json similarity index 100% rename from dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/save_actionset.json From 908f57a4758b2aa2ff1c93cad41fcf2328c73bbe Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 10:49:39 +0200 Subject: [PATCH 57/70] code formatting --- .../main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index f578548fb..7c4ca1d22 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -5,7 +5,6 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; -import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; @@ -18,6 +17,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; public class ClusterUtils { From e87e1805c4280e9d8ed9be9f733d331401273118 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 12:13:06 +0200 Subject: [PATCH 58/70] [raw_all] added extra workflow step for patching the identifiers in the relations, given an id mapping dataset --- .../graph/raw/PatchRelationsApplication.java | 115 ++++++++++++++++++ .../graph/raw/common/RelationIdMapping.java | 24 ++++ .../oa/graph/patch_relations_parameters.json | 26 ++++ .../oa/graph/raw_all/oozie_app/workflow.xml | 47 ++++++- 4 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java new file mode 100644 index 000000000..c2bcf69f0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -0,0 +1,115 @@ +package eu.dnetlib.dhp.oa.graph.raw; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; +import eu.dnetlib.dhp.schema.oaf.Relation; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.FileNotFoundException; +import java.util.Objects; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class PatchRelationsApplication { + + private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Optional.ofNullable( + PatchRelationsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) + .orElseThrow(FileNotFoundException::new) + )); + parser.parseArgument(args); + + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String graphBasePath = parser.get("graphBasePath"); + log.info("graphBasePath: {}", graphBasePath); + + final String workingDir = parser.get("workingDir"); + log.info("workingDir: {}", workingDir); + + final String idMappingPath = parser.get("idMappingPath"); + log.info("idMappingPath: {}", idMappingPath); + + final SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); + } + + /** + * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the + * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. + * + * @param spark the SparkSession + * @param graphBasePath base graph path providing the set of relations to patch + * @param workingDir intermediate storage location + * @param idMappingPath dataset providing the old -> new identifier mapping + */ + private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, final String idMappingPath) { + + final String relationPath = graphBasePath + "/relation"; + + final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); + final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); + + rels + .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional.ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setSource); + return r; + }, Encoders.bean(Relation.class)) + .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional.ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setTarget); + return r; + }, Encoders.bean(Relation.class)) + .map( + (MapFunction) OBJECT_MAPPER::writeValueAsString, + Encoders.STRING()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(workingDir); + + spark.read().textFile(workingDir) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(relationPath); + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java new file mode 100644 index 000000000..f251da8c3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java @@ -0,0 +1,24 @@ +package eu.dnetlib.dhp.oa.graph.raw.common; + +public class RelationIdMapping { + + private String oldId; + + private String newId; + + public String getOldId() { + return oldId; + } + + public void setOldId(final String oldId) { + this.oldId = oldId; + } + + public String getNewId() { + return newId; + } + + public void setNewId(final String newId) { + this.newId = newId; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json new file mode 100644 index 000000000..178c2d69b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json @@ -0,0 +1,26 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "g", + "paramLongName": "graphBasePath", + "paramDescription": "base graph path providing the set of relations to patch", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingDir", + "paramDescription": "intermediate storage location", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "idMappingPath", + "paramDescription": "dataset providing the old -> new identifier mapping", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index 7f1ecb39f..e7320de3b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -100,6 +100,16 @@ a blacklist of nsprefixes (comma separeted) + + shouldPatchRelations + false + activates the relation patching phase, driven by the content in ${idMappingPath} + + + idMappingPath + + path pointing to the relations identifiers mapping dataset + sparkDriverMemory memory for driver process @@ -538,7 +548,42 @@ - + + + + + + ${(shouldPatchRelations eq "true") and + (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} + + + + + + + + yarn + cluster + PatchRelations + eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --graphBasePath${workingDir}/graph_raw + --workingDir${workingDir}/patch_relations + --idMappingPath${idMappingPath} + + + + From 5d08ad86ae45478db3742fef51c7c0ae38f30e34 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 13:03:16 +0200 Subject: [PATCH 59/70] [raw_all] patching relation identifier phase to be run at the end, i.e. includes also claimed relations --- .../oa/graph/raw_all/oozie_app/workflow.xml | 75 +++++++++---------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index e7320de3b..321ca4090 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -548,42 +548,7 @@ - - - - - - ${(shouldPatchRelations eq "true") and - (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} - - - - - - - - yarn - cluster - PatchRelations - eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory ${sparkExecutorMemory} - --executor-cores ${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.sql.shuffle.partitions=7680 - - --graphBasePath${workingDir}/graph_raw - --workingDir${workingDir}/patch_relations - --idMappingPath${idMappingPath} - - - - + @@ -596,7 +561,6 @@ - yarn @@ -805,7 +769,42 @@ - + + + + + + ${(shouldPatchRelations eq "true") and + (fs:exists(concat(concat(wf:conf('nameNode'),'/'),wf:conf('idMappingPath'))) eq "true")} + + + + + + + + yarn + cluster + PatchRelations + eu.dnetlib.dhp.oa.graph.raw.PatchRelationsApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --graphBasePath${graphOutputPath} + --workingDir${workingDir}/patch_relations + --idMappingPath${idMappingPath} + + + + \ No newline at end of file From b1b0cc3f157df3998d2fe1392e6b5b7e76f75c12 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 29 Jul 2021 13:54:56 +0200 Subject: [PATCH 60/70] fixed wrong package name --- .../eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml index 8c045fcfe..2d97b5163 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/actionset/oozie_app/workflow.xml @@ -25,7 +25,7 @@ yarn-cluster cluster Create Action Set - eu.dnetlib.dhp.sx.provision.SparkCreateActionset + eu.dnetlib.dhp.actionmanager.scholix.SparkCreateActionset dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} @@ -52,7 +52,7 @@ yarn-cluster cluster Save Action Set - eu.dnetlib.dhp.sx.provision.SparkSaveActionSet + eu.dnetlib.dhp.actionmanager.scholix.SparkSaveActionSet dhp-aggregation-${projectVersion}.jar --executor-memory=${sparkExecutorMemory} From c53d106e80eca2af6456925130dd856bf5d33c49 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 29 Jul 2021 13:56:37 +0200 Subject: [PATCH 61/70] [provision] lowercase relation filter --- .../java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index 7d53d3554..b3f785492 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -10,6 +10,7 @@ import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -81,6 +82,7 @@ public class PrepareRelationsJob { Set relationFilter = Optional .ofNullable(parser.get("relationFilter")) + .map(String::toLowerCase) .map(s -> Sets.newHashSet(Splitter.on(",").split(s))) .orElse(new HashSet<>()); log.info("relationFilter: {}", relationFilter); @@ -130,7 +132,7 @@ public class PrepareRelationsJob { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) .filter(rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter(rel -> relationFilter.contains(rel.getRelClass()) == false); + .filter(rel -> relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())) == false); JavaRDD pruned = pruneRels( pruneRels( From 26af0320d0be2f88187b8a837f357b6634bd315b Mon Sep 17 00:00:00 2001 From: antleb Date: Fri, 30 Jul 2021 00:31:33 +0300 Subject: [PATCH 62/70] added the sprint 2 indicators in monitor db --- .../stats/oozie_app/scripts/step20-createMonitorDB.sql | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 7442b7c10..5da028304 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -116,6 +116,13 @@ compute stats TARGET.indi_pub_doi_from_crossref; create table TARGET.indi_pub_gold_oa as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); compute stats TARGET.indi_pub_gold_oa; +create view TARGET.indi_dataset_avg_year_country_oa as select * from SOURCE.indi_dataset_avg_year_country_oa orig; +create view TARGET.indi_project_datasets_count as select * from SOURCE.indi_project_datasets_count orig; +create view TARGET.indi_project_otherresearch_count as select * from SOURCE.indi_project_otherresearch_count orig; +create view TARGET.indi_project_pubs_count as select * from SOURCE.indi_project_pubs_count orig; +create view TARGET.indi_project_software_count as select * from SOURCE.indi_project_software_count orig; +create view TARGET.indi_pub_avg_year_country_oa as select * from SOURCE.indi_pub_avg_year_country_oa orig; + --denorm alter table TARGET.result rename to TARGET.res_tmp; From 6358f92c3a5301bd25ec00259453490d19e318aa Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 30 Jul 2021 08:54:25 +0200 Subject: [PATCH 63/70] added sleep to solve problem of lost request of creating index --- .../java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java index f96a64a27..ffeb0995d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java @@ -71,6 +71,9 @@ public class DropAndCreateESIndex { log.info(STATUS_CODE_TEXT, response.getStatusLine()); } + log.info("Sleeping 60 seconds to avoid to lost the creation of index request"); + Thread.sleep(60000); + try (CloseableHttpClient client = HttpClients.createDefault()) { final String summaryConf = IOUtils From 19620eed46d8f94474ebf27b21444f4c901080f7 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 30 Jul 2021 11:09:32 +0200 Subject: [PATCH 64/70] applying PR#131, Patch the identifiers (source/target) in the relations, refinements --- .../graph/raw/PatchRelationsApplication.java | 180 ++++++++++-------- .../graph/raw/common/RelationIdMapping.java | 29 +-- .../raw/PatchRelationApplicationTest.java | 115 +++++++++++ .../dnetlib/dhp/oa/graph/raw/id_mapping.json | 5 + .../dhp/oa/graph/raw/relations_to_patch.json | 6 + 5 files changed, 237 insertions(+), 98 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index c2bcf69f0..5523863ff 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -1,10 +1,12 @@ + package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; -import eu.dnetlib.dhp.schema.oaf.Relation; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.FileNotFoundException; +import java.util.Objects; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -15,101 +17,111 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.raw.common.RelationIdMapping; +import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; -import java.io.FileNotFoundException; -import java.util.Objects; -import java.util.Optional; - -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; - public class PatchRelationsApplication { - private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); + private static final Logger log = LoggerFactory.getLogger(PatchRelationsApplication.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - Optional.ofNullable( - PatchRelationsApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) - .orElseThrow(FileNotFoundException::new) - )); - parser.parseArgument(args); + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Optional + .ofNullable( + PatchRelationsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/patch_relations_parameters.json")) + .orElseThrow(FileNotFoundException::new))); + parser.parseArgument(args); - final Boolean isSparkSessionManaged = Optional - .ofNullable(parser.get("isSparkSessionManaged")) - .map(Boolean::valueOf) - .orElse(Boolean.TRUE); - log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + final Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - final String graphBasePath = parser.get("graphBasePath"); - log.info("graphBasePath: {}", graphBasePath); + final String graphBasePath = parser.get("graphBasePath"); + log.info("graphBasePath: {}", graphBasePath); - final String workingDir = parser.get("workingDir"); - log.info("workingDir: {}", workingDir); + final String workingDir = parser.get("workingDir"); + log.info("workingDir: {}", workingDir); - final String idMappingPath = parser.get("idMappingPath"); - log.info("idMappingPath: {}", idMappingPath); + final String idMappingPath = parser.get("idMappingPath"); + log.info("idMappingPath: {}", idMappingPath); - final SparkConf conf = new SparkConf(); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); - } + final SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> patchRelations(spark, graphBasePath, workingDir, idMappingPath)); + } - /** - * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the - * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. - * - * @param spark the SparkSession - * @param graphBasePath base graph path providing the set of relations to patch - * @param workingDir intermediate storage location - * @param idMappingPath dataset providing the old -> new identifier mapping - */ - private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, final String idMappingPath) { + /** + * Substitutes the identifiers (source/target) from the set of relations part of the graphBasePath included in the + * mapping provided by the dataset stored on idMappingPath, using workingDir as intermediate storage location. + * + * @param spark the SparkSession + * @param graphBasePath base graph path providing the set of relations to patch + * @param workingDir intermediate storage location + * @param idMappingPath dataset providing the old -> new identifier mapping + */ + private static void patchRelations(final SparkSession spark, final String graphBasePath, final String workingDir, + final String idMappingPath) { - final String relationPath = graphBasePath + "/relation"; + final String relationPath = graphBasePath + "/relation"; - final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); - final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); + final Dataset rels = Utils.readPath(spark, relationPath, Relation.class); + final Dataset idMapping = Utils.readPath(spark, idMappingPath, RelationIdMapping.class); - rels - .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") - .map((MapFunction, Relation>) t -> { - final Relation r = t._1(); - Optional.ofNullable(t._2()) - .map(RelationIdMapping::getNewId) - .ifPresent(r::setSource); - return r; - }, Encoders.bean(Relation.class)) - .joinWith(idMapping, rels.col("target").equalTo(idMapping.col("oldId")), "left") - .map((MapFunction, Relation>) t -> { - final Relation r = t._1(); - Optional.ofNullable(t._2()) - .map(RelationIdMapping::getNewId) - .ifPresent(r::setTarget); - return r; - }, Encoders.bean(Relation.class)) - .map( - (MapFunction) OBJECT_MAPPER::writeValueAsString, - Encoders.STRING()) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .text(workingDir); + log.info("relations: {}", rels.count()); + log.info("idMapping: {}", idMapping.count()); - spark.read().textFile(workingDir) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .text(relationPath); - } + final Dataset bySource = rels + .joinWith(idMapping, rels.col("source").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional + .ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setSource); + return r; + }, Encoders.bean(Relation.class)); + bySource + .joinWith(idMapping, bySource.col("target").equalTo(idMapping.col("oldId")), "left") + .map((MapFunction, Relation>) t -> { + final Relation r = t._1(); + Optional + .ofNullable(t._2()) + .map(RelationIdMapping::getNewId) + .ifPresent(r::setTarget); + return r; + }, Encoders.bean(Relation.class)) + .map( + (MapFunction) OBJECT_MAPPER::writeValueAsString, + Encoders.STRING()) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(workingDir); + + spark + .read() + .textFile(workingDir) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(relationPath); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java index f251da8c3..d5852ab70 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/RelationIdMapping.java @@ -1,24 +1,25 @@ + package eu.dnetlib.dhp.oa.graph.raw.common; public class RelationIdMapping { - private String oldId; + private String oldId; - private String newId; + private String newId; - public String getOldId() { - return oldId; - } + public String getOldId() { + return oldId; + } - public void setOldId(final String oldId) { - this.oldId = oldId; - } + public void setOldId(final String oldId) { + this.oldId = oldId; + } - public String getNewId() { - return newId; - } + public String getNewId() { + return newId; + } - public void setNewId(final String newId) { - this.newId = newId; - } + public void setNewId(final String newId) { + this.newId = newId; + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java new file mode 100644 index 000000000..3fd365416 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -0,0 +1,115 @@ + +package eu.dnetlib.dhp.oa.graph.raw; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class PatchRelationApplicationTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + public static final String ID_MAPPING_PATH = "map/id_mapping.json"; + + private static SparkSession spark; + + private static Path workingDir; + + private static final Logger log = LoggerFactory.getLogger(PatchRelationApplicationTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(PatchRelationApplicationTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(PatchRelationApplicationTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(PatchRelationApplicationTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + + FileUtils + .copyInputStreamToFile( + PatchRelationApplicationTest.class.getResourceAsStream("id_mapping.json"), + workingDir.resolve(ID_MAPPING_PATH).toFile()); + + FileUtils + .copyInputStreamToFile( + PatchRelationApplicationTest.class.getResourceAsStream("relations_to_patch.json"), + workingDir.resolve("graphBasePath/relation/rels.json").toFile()); + + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + public void testPatchRelationApplication() throws Exception { + + final String graphBasePath = workingDir.toString() + "/graphBasePath"; + PatchRelationsApplication.main(new String[] { + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-graphBasePath", graphBasePath, + "-workingDir", workingDir.toString() + "/workingDir", + "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH + }); + + final List rels = spark + .read() + .textFile(graphBasePath + "/relation") + .map( + (MapFunction) s -> OBJECT_MAPPER.readValue(s, Relation.class), + Encoders.bean(Relation.class)) + .collectAsList(); + + assertEquals(6, rels.size()); + + assertEquals(0, getCount(rels, "1a"), "should be patched to 1b"); + assertEquals(0, getCount(rels, "2a"), "should be patched to 2b"); + + assertEquals(2, getCount(rels, "10a"), "not included in patching"); + assertEquals(2, getCount(rels, "20a"), "not included in patching"); + + assertEquals(2, getCount(rels, "15a"), "not included in patching"); + assertEquals(2, getCount(rels, "25a"), "not included in patching"); + + assertEquals(2, getCount(rels, "1b"), "patched from 1a"); + assertEquals(2, getCount(rels, "2b"), "patched from 2a"); + } + + private long getCount(List rels, final String id) { + return rels.stream().filter(r -> r.getSource().equals(id) || r.getTarget().equals(id)).count(); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json new file mode 100644 index 000000000..640d042b1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/id_mapping.json @@ -0,0 +1,5 @@ +{"oldId": "1a", "newId": "1b"} +{"oldId": "2a", "newId": "2b"} +{"oldId": "3a", "newId": "3b"} +{"oldId": "4a", "newId": "4b"} +{"oldId": "5a", "newId": "5b"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json new file mode 100644 index 000000000..31755c53d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/relations_to_patch.json @@ -0,0 +1,6 @@ +{"source":"1a","target":"10a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"10a","target":"1a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"2a","target":"20a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"20a","target":"2a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"15a","target":"25a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} +{"source":"25a","target":"15a","collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.900","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1626336932282,"relType":"datasourceOrganization","subRelType":"provision","relClass":"provides","validated":false,"validationDate":null,"properties":[]} \ No newline at end of file From e244f73165a6b90a9b36686a0180685128695f8e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 30 Jul 2021 11:54:13 +0200 Subject: [PATCH 65/70] Update 'README.md' --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 39d4d98e4..0a0bd82ab 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ # dnet-hadoop -Dnet-hadoop is a tool for \ No newline at end of file +Dnet-hadoop is the project that defined all the OOZIE workflows for the OpenAIRE Graph construction, processing, provisioning. \ No newline at end of file From 117c3d5c67876c323bf3e14cb4233110a0e6945b Mon Sep 17 00:00:00 2001 From: antleb Date: Mon, 2 Aug 2021 12:15:58 +0300 Subject: [PATCH 66/70] fixed a typo --- .../stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql index f1ebf0d87..020787039 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql @@ -108,7 +108,7 @@ round(NonOpenAccess/(OpenAccess+NonOpenAccess)*100,3) as averageNonOA END) AS NonOpenAccess FROM software s join result_organization ro on s.id=ro.id - join SOURCER.organization o on o.id=ro.organization + join organization o on o.id=ro.organization where cast(year as int)>=2003 and cast(year as int)<=2021 group by year, country) tmp; From fd55c77d979dadb051ef37250f0c0f52022de757 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 Aug 2021 13:48:42 +0200 Subject: [PATCH 67/70] updated dependency dhp-schemas:2.7.15 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index fc4a8a21b..433c88093 100644 --- a/pom.xml +++ b/pom.xml @@ -741,7 +741,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.14] + [2.7.15] [4.0.3] [6.0.5] [3.1.6] From e826aae84811ec1143ddd7134ef11879846f043b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 Aug 2021 14:28:59 +0200 Subject: [PATCH 68/70] using constants from ModelConstants --- .../eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala | 12 ++++++------ .../dhp/sx/graph/bio/pubmed/PubMedToOaf.scala | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala index a19c6fb12..90b65c8f7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/BioDBToOAF.scala @@ -199,7 +199,7 @@ object BioDBToOAF { d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) } val relevant_dates: List[StructuredProperty] = dates.filter(d => !d.date_info.contains("entry version")) - .map(date => OafMapperUtils.structuredProperty(date.date, "UNKNOWN", "UNKNOWN", ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE, DATA_INFO)) + .map(date => OafMapperUtils.structuredProperty(date.date, ModelConstants.UNKNOWN, ModelConstants.UNKNOWN, ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE, DATA_INFO)) if (relevant_dates != null && relevant_dates.nonEmpty) d.setRelevantdate(relevant_dates.asJava) d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) @@ -218,12 +218,12 @@ object BioDBToOAF { if (references_pmid != null && references_pmid.nonEmpty) { - val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), "relationship", "isRelatedTo", if (i_date.isDefined) i_date.get.date else null) + val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) rel.getCollectedfrom List(d, rel) } else if (references_doi != null && references_doi.nonEmpty) { - val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), "relationship", "isRelatedTo", if (i_date.isDefined) i_date.get.date else null) + val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null) List(d, rel) } else @@ -243,7 +243,7 @@ object BioDBToOAF { rel.setCollectedfrom(List(collectedFromMap("pdb")).asJava) rel.setDataInfo(DATA_INFO) - rel.setRelType("resultResult") + rel.setRelType(ModelConstants.RESULT_RESULT) rel.setSubRelType(subRelType) rel.setRelClass(relClass) @@ -263,7 +263,7 @@ object BioDBToOAF { def createSupplementaryRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, date:String): Relation = { - createRelation(pid,pidType,sourceId,collectedFrom, "supplement","IsSupplementTo", date) + createRelation(pid,pidType,sourceId,collectedFrom, ModelConstants.SUPPLEMENT, ModelConstants.IS_SUPPLEMENT_TO, date) } @@ -392,6 +392,6 @@ object BioDBToOAF { i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) - List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"),"relationship", "isRelatedTo", GraphCleaningFunctions.cleanDate(input.date))) + List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, GraphCleaningFunctions.cleanDate(input.date))) } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala index ae4a72062..9a49deebc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala @@ -16,7 +16,7 @@ object PubMedToOaf { ) def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { - val result_typologies = getVocabularyTerm("dnet:result_typologies", vocabularies, cobjQualifier.getClassid) + val result_typologies = getVocabularyTerm(ModelConstants.DNET_RESULT_TYPOLOGIES, vocabularies, cobjQualifier.getClassid) result_typologies.getClassid match { case "dataset" => new Dataset case "publication" => new Publication @@ -68,11 +68,11 @@ object PubMedToOaf { //else We have to find a terms that match the vocabulary otherwise we discard it val ja = article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue)) if (ja.isDefined) { - val cojbCategory = getVocabularyTerm("dnet:publication_resource", vocabularies, ja.get.getValue) + val cojbCategory = getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue) i.setInstancetype(cojbCategory) } else { val i_type = article.getPublicationTypes.asScala - .map(s => getVocabularyTerm("dnet:publication_resource", vocabularies, s.getValue)) + .map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue)) .find(q => q != null) if (i_type.isDefined) i.setInstancetype(i_type.get) @@ -112,7 +112,7 @@ object PubMedToOaf { if (article.getLanguage != null) { - val term = vocabularies.getSynonymAsQualifier("dnet:languages", article.getLanguage) + val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_LANGUAGES, article.getLanguage) if (term != null) result.setLanguage(term) } From 3fc820203bcd76bf4aa81baa027cd76bee7c0460 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 4 Aug 2021 10:13:48 +0200 Subject: [PATCH 69/70] fixed wrong test file --- .../eu/dnetlib/dhp/sx/graph/oaf_to_summary | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary index 4d581044a..e1fd758b4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/oaf_to_summary @@ -1,20 +1,10 @@ -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::0f2129f0a8ddfb099b9fabba3105245f","target":"50|doi_________::4af011e641e0ba286660fd24a3f603b7","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2018-01-01","dataInfo":null}]} -{"id":"50|doi_________::0f2129f0a8ddfb099b9fabba3105245f","localIdentifier":[{"identifier":"10.1111/1346-8138.14162","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/1346-8138.14162/fullpdf"}],"typology":"publication","subType":"Article","title":["Guideline of SSc","Diagnostic criteria, severity classification and guidelines of systemic sclerosis"],"author":["Yoshihide Asano","Masatoshi Jinnin","Yasushi Kawaguchi","Masataka Kuwana","Daisuke Goto","Shinichi Sato","Kazuhiko Takehara","Masaru Hatano","Manabu Fujimoto","Naoki Mugii","Hironobu Ihn"],"date":["2018-04-23","2018-01-01"],"subject":null,"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::1b57d5ebe71734c1fa98624d9609971e","target":"50|doi_________::7e79063f205480e61ee7fdcf7ab03bad","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2004-11-01","dataInfo":null}]} -{"id":"50|doi_________::1b57d5ebe71734c1fa98624d9609971e","localIdentifier":[{"identifier":"10.1002/ajmg.a.30270","schema":"doi","url":"https://onlinelibrary.wiley.com/doi/full/10.1002/ajmg.a.30270"}],"typology":"publication","subType":"Article","title":["Clinical variability in a Noonan syndrome family with a newPTPN11 gene mutation"],"author":["D�bora Romeo Bertola","Alexandre C. Pereira","Paulo S.L. de Oliveira","Chong A. Kim","Jos� Eduardo Krieger"],"date":["2004-09-21T23:19:41Z","2004-11-01"],"subject":[{"scheme":"keywords","value":"Genetics(clinical)"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3d6b9e4f51325f7f17b6809513812a43","target":"50|doi_________::edb21431e0271061e0dddc248300708a","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2010-08-16","dataInfo":null}]} -{"id":"50|doi_________::3d6b9e4f51325f7f17b6809513812a43","localIdentifier":[{"identifier":"10.1111/j.1440-1843.2010.01819.x","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/j.1440-1843.2010.01819.x/fullpdf"}],"typology":"publication","subType":"Article","title":["P. aeruginosa: host defence in the lung","Pseudomonas aeruginosa: Host defence in lung diseases"],"author":["Bryan J. WILLIAMS","Joanne DEHNBOSTEL","Timothy S. BLACKWELL"],"date":["2010-08-16"],"subject":[{"scheme":"keywords","value":"Pulmonary and Respiratory Medicine"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f441c6243fd6ae381c520b42349b769","target":"50|doi_________::44ebec98169daae57c106eb1a1072aae","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2018-07-12","dataInfo":null}]} -{"id":"50|doi_________::3f441c6243fd6ae381c520b42349b769","localIdentifier":[{"identifier":"10.1007/s11901-018-0414-x","schema":"doi","url":"http://link.springer.com/article/10.1007/s11901-018-0414-x/fulltext.html"}],"typology":"publication","subType":"Article","title":["DILI Associated with Skin Reactions"],"author":["Sahand Rahnama-Moghadam","Hans L. Tillmann"],"date":["2018-07-12"],"subject":null,"publisher":["Springer Science and Business Media LLC"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f73d349736eb476653a026d14222b12","target":"50|pmid________::ff99f7ec03946fa4c8f413d59f75a547","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2003-01-01","dataInfo":null}]} -{"id":"50|doi_________::3f73d349736eb476653a026d14222b12","localIdentifier":[{"identifier":"10.1002/lsm.10225","schema":"doi","url":"https://dx.doi.org/10.1002/lsm.10225"}],"typology":"publication","subType":"Article","title":["Multicenter study of noninvasive radiofrequency for periorbital tissue tightening"],"author":["Fitzpatrick, Richard","Geronemus, Roy","Goldberg, David","Kaminer, Michael","Kilmer, Suzanne","Ruiz-Esparza, Javier"],"date":["2003-01-01","2003-10-17T12:03:53Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::3f73d349736eb476653a026d14222b12","target":"50|pmid________::f767374d588a8d51de0f129261daa5a7","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2003-01-01","dataInfo":null}]} -{"id":"50|doi_________::3f73d349736eb476653a026d14222b12","localIdentifier":[{"identifier":"10.1002/lsm.10225","schema":"doi","url":"https://dx.doi.org/10.1002/lsm.10225"}],"typology":"publication","subType":"Article","title":["Multicenter study of noninvasive radiofrequency for periorbital tissue tightening"],"author":["Fitzpatrick, Richard","Geronemus, Roy","Goldberg, David","Kaminer, Michael","Kilmer, Suzanne","Ruiz-Esparza, Javier"],"date":["2003-01-01","2003-10-17T12:03:53Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::48c200713e34afe5c4dabf77f258f9de","target":"50|doi_________::e2d40a313240d3eb979a3172103a4d7f","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2006-11-17","dataInfo":null}]} -{"id":"50|doi_________::48c200713e34afe5c4dabf77f258f9de","localIdentifier":[{"identifier":"10.1007/s11096-006-9043-5","schema":"doi","url":"https://dx.doi.org/10.1007/s11096-006-9043-5"}],"typology":"publication","subType":"Article","title":["Patients’ attitudes towards and experiences of generic drug substitution in Norway"],"author":["Kjoenniksen, Inge","Lindbaek, Morten","Granas, Anne Gerd"],"date":["2006-11-17"],"subject":null,"publisher":["Springer Science and Business Media LLC"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::507df31b75efc911c50e0d6e25f13d5a","target":"50|doi_________::fa0760d1427b71b6cb3ffcc739751197","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2009-09-29","dataInfo":null}]} -{"id":"50|doi_________::507df31b75efc911c50e0d6e25f13d5a","localIdentifier":[{"identifier":"10.1080/10408398509527417","schema":"doi","url":"http://www.tandfonline.com/doi/pdf/10.1080/10408398509527417"}],"typology":"publication","subType":"Article","title":["The genusallium. Part 2"],"author":["Gruffydd R. Fenwick","Anthony B. Hanley","John R. Whitaker"],"date":["2009-09-30T13:53:43Z","2009-09-29"],"subject":[{"scheme":"keywords","value":"General Medicine"}],"publisher":["Informa UK Limited"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::6b9dce3b94b3bfe9649c4fb6b9e66681","target":"50|pmid________::43fb246d61ba89b7f9825d9e02856d17","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2009-01-01","dataInfo":null}]} -{"id":"50|doi_________::6b9dce3b94b3bfe9649c4fb6b9e66681","localIdentifier":[{"identifier":"10.1111/j.1742-481x.2009.00634.x","schema":"doi","url":"https://dx.doi.org/10.1111/j.1742-481x.2009.00634.x"}],"typology":"publication","subType":"Article","title":["Venous leg ulcers: patient concordance with compression therapy and its impact on healing and prevention of recurrence"],"author":["Moffatt, Christine","Kommala, Dheerendra","Dourdin, Nathalie","Choe, Yoonhee"],"date":["2009-01-01","2009-11-13T10:40:02Z"],"subject":[{"scheme":"keywords","value":"Surgery"},{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"},{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"}],"abstract":null} -{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":null,"relType":"resultResult","subRelType":"relationship","relClass":"References","source":"50|doi_________::91c510d2d163e81d55283bb9c2d4d7b7","target":"50|doi_________::1d47307b88d6bb6757f71bfc56686b74","validated":false,"validationDate":null,"properties":[{"key":"RelationDate","value":"2015-01-01","dataInfo":null}]} -{"id":"50|doi_________::91c510d2d163e81d55283bb9c2d4d7b7","localIdentifier":[{"identifier":"10.1111/jocd.12148","schema":"doi","url":"http://onlinelibrary.wiley.com/wol1/doi/10.1111/jocd.12148/fullpdf"}],"typology":"publication","subType":"Article","title":["Assessment of efficacy and tolerability of different concentrations of trichloroacetic acid vs\n. carbon dioxide laser in treatment of xanthelasma palpebrarum"],"author":["Basma Mourad","Lamia H. Elgarhy","Heba-Alla Ellakkawy","Nageh Elmahdy"],"date":["2015-08-07","2015-01-01"],"subject":[{"scheme":"keywords","value":"Dermatology"}],"publisher":["Wiley"],"relatedPublications":0,"relatedDatasets":0,"relatedUnknown":0,"datasources":[{"datasourceName":"Crossref","datasourceId":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","completionStatus":"complete"},{"datasourceName":"Datacite","datasourceId":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","completionStatus":"complete"}],"abstract":null} \ No newline at end of file +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1602017035423,"id":"50|doi_________::002d84a55111d3d23a3ef388f8f31ad5","originalId":["10.1007/s10956-019-9769-1","9769","50|doiboost____::002d84a55111d3d23a3ef388f8f31ad5"],"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-07-29T19:20:00+0200","dateoftransformation":"1970-01-19T12:20:43+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Yeh, Heng-Yi","name":"Heng-Yi","surname":"Yeh","rank":1,"pid":null,"affiliation":[]},{"fullname":"Tsai, Yu-Hsiang","name":"Yu-Hsiang","surname":"Tsai","rank":2,"pid":null,"affiliation":[]},{"fullname":"Tsai, Chin-Chung","name":"Chin-Chung","surname":"Tsai","rank":3,"pid":null,"affiliation":[]},{"fullname":"Chang, Hsin-Yi","name":"Hsin-Yi","surname":"Chang","rank":4,"pid":[{"value":"https://orcid.org/0000-0002-9659-1022","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"General Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Education","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Investigating Students’ Conceptions of Technology-Assisted Science Learning: a Drawing Analysis","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2019-01-31","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-07-01","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-31T13:04:54Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-31","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1007/s10956-019-9769-1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":{"value":"http://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://link.springer.com/content/pdf/10.1007/s10956-019-9769-1.pdf","http://link.springer.com/article/10.1007/s10956-019-9769-1/fulltext.html","http://dx.doi.org/10.1007/s10956-019-9769-1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1007/s10956-019-9769-1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-31","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1596054283851,"id":"50|doi_________::0035704f67a6e839f786b7390c31106e","originalId":["990","10.1186/1471-2377-14-81","50|doiboost____::0035704f67a6e839f786b7390c31106e"],"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-07-29T20:24:43Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Florien W Boele","name":"Florien W","surname":"Boele","rank":1,"pid":null,"affiliation":null},{"fullname":"Irma M Verdonck-de Leeuw","name":"Irma M","surname":"Verdonck-de Leeuw","rank":2,"pid":null,"affiliation":null},{"fullname":"Pim Cuijpers","name":"Pim","surname":"Cuijpers","rank":3,"pid":null,"affiliation":null},{"fullname":"Jaap C Reijneveld","name":"Jaap C","surname":"Reijneveld","rank":4,"pid":null,"affiliation":null},{"fullname":"Jan J Heimans","name":"Jan J","surname":"Heimans","rank":5,"pid":null,"affiliation":null},{"fullname":"Martin Klein","name":"Martin","surname":"Klein","rank":6,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Clinical Neurology","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"General Medicine","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Internet-based guided self-help for glioma patients with depressive symptoms: design of a randomized controlled trial","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2014-04-10T19:01:28Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2014-04-10","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2014-04-10","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-23","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"publisher":{"value":"Springer Science and Business Media LLC","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://www.springer.com/tdm","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://link.springer.com/content/pdf/10.1186/1471-2377-14-81.pdf","http://link.springer.com/article/10.1186/1471-2377-14-81/fulltext.html","http://link.springer.com/content/pdf/10.1186/1471-2377-14-81","http://dx.doi.org/10.1186/1471-2377-14-81"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1186/1471-2377-14-81"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1186/1471-2377-14-81","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2014-04-10","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"BMC Neurology","issnPrinted":null,"issnOnline":"1471-2377","issnLinking":null,"ep":null,"iss":null,"sp":null,"vol":"14","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1619631198141,"id":"50|doi_________::003bc91a6b4c1565813dfdd522697b1a","originalId":["10.1039/c8tc05911j","50|doiboost____::003bc91a6b4c1565813dfdd522697b1a"],"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-04-28T17:33:18Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Yuqin Li","name":"Yuqin","surname":"Li","rank":1,"pid":null,"affiliation":null},{"fullname":"Siming Gao","name":"Siming","surname":"Gao","rank":2,"pid":null,"affiliation":null},{"fullname":"Nan Zhang","name":"Nan","surname":"Zhang","rank":3,"pid":null,"affiliation":null},{"fullname":"Xin Huang","name":"Xin","surname":"Huang","rank":4,"pid":null,"affiliation":null},{"fullname":"Jinchang Tian","name":"Jinchang","surname":"Tian","rank":5,"pid":null,"affiliation":null},{"fullname":"Feng Xu","name":"Feng","surname":"Xu","rank":6,"pid":null,"affiliation":null},{"fullname":"Zhizhong Sun","name":"Zhizhong","surname":"Sun","rank":7,"pid":null,"affiliation":null},{"fullname":"Shougen Yin","name":"Shougen","surname":"Yin","rank":8,"pid":null,"affiliation":null},{"fullname":"Xiaoming Wu","name":"Xiaoming","surname":"Wu","rank":9,"pid":null,"affiliation":null},{"fullname":"Wenyi Chu","name":"Wenyi","surname":"Chu","rank":10,"pid":[{"value":"http://orcid.org/0000-0002-4926-4475","qualifier":{"classid":"orcid_pending","classname":"orcid_pending","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Materials Chemistry","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"General Chemistry","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Solution-processable, high luminance deep-blue organic light emitting devices based on novel naphthalene bridged bis-triphenylamine derivatives","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2019-01-29T03:05:29Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-02-28","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"The optimal device E exhibited a very high luminance of 10 407 cd m −2 and a maximum current efficiency of 7.80 cd A −1 .","dataInfo":null},{"value":"A series of naphthalene bridged bis-triphenylamine derivatives with a twisted structure was designed, synthesized and characterized. The dependence of their thermal, photophysical and electrochemical properties and performance as emitters in OLEDs on their chemical structure was systematically studied by the introduction of aryl groups with electron-donating or electron-withdrawing substituents to the naphthalene bridged bis-triphenylamine core. These compounds exhibited steady blue light emissions and high d values ranging from 468 to 500 °C. Most importantly, the deep-blue OLEDs were successfully fabricated using a solution processed method by blending PVK and PBD to improve OLED performance. The optimal device E exhibited a very high luminance of 10 407 cd m −2 and a maximum current efficiency of 7.80 cd A −1 with CIE coordinates of (0.166, 0.097). These results indicated that these compounds with the twisted naphthalene bridged bis-triphenylamine core could show stable deep-blue electroluminescence properties and introducing the electron-donating group (–OCH 3 ) could enable high luminance and current efficiency for OLEDs.","dataInfo":null}],"dateofacceptance":{"value":"2019-01-29T03:05:29Z","dataInfo":null},"publisher":{"value":"Royal Society of Chemistry (RSC)","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"http://rsc.li/journals-terms-of-use","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://pubs.rsc.org/en/content/articlepdf/2019/TC/C8TC05911J","http://dx.doi.org/10.1039/c8tc05911j"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-29T03:05:29Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1039/c8tc05911j"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1039/c8tc05911j","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2019-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Journal of Materials Chemistry C","issnPrinted":"2050-7526","issnOnline":"2050-7534","issnLinking":null,"ep":"2698","iss":null,"sp":"2686","vol":"7","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1611897051298,"id":"50|doi_________::0048eb8bb3f289cccc9358bf3726a772","originalId":["10.1016/s1441-3582(03)70132-3","10.1016/S1441-3582(03)70132-3","50|doiboost____::0048eb8bb3f289cccc9358bf3726a772"],"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-03-12T11:12:27+0100","dateoftransformation":"1970-01-19T09:00:07+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Ling, Bith-Hong","name":"Bith-Hong","surname":"Ling","rank":1,"pid":null,"affiliation":[]},{"fullname":"Lockshin, Larry","name":"Larry","surname":"Lockshin","rank":2,"pid":null,"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"Components of Wine Prices for Australian Wine: How Winery Reputation, Wine Quality, Region, Vintage, and Winery Size Contribute to the Price of Varietal Wines","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2003-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2018-12-09","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2010-07-07T08:52:15Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2021-01-27","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":" Australian wines are identified by the varietal names of the grapes rather than the regions as in France and other traditional wine producing countries. This paper uses the concept of hedonic price theory to investigate a range of extrinsic characteristics’ ability to predict prices for different climate regions (warm and cool) and four major wine varieties of Australian wines, two reds (shiraz and cabernet) and two whites (chardonnay and riesling). The effects of winery reputation (wine company/brand), winery size (production scale), age of the wine, and region of origin (wine grape source) contributing to the relationship between price and quality attributes of Australian wines are investigated, based on 1880 observations of bottled wines. Wine quality rating and winery/brand reputation have major effects on the price, while region and size of winery have differential effects depending on the variety of grape. Vintage has only a minor effect. ","dataInfo":null}],"dateofacceptance":{"value":"2003-01-01","dataInfo":null},"publisher":{"value":"Elsevier BV","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1016/s1441-3582(03)70132-3"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2003-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":{"value":"http://journals.sagepub.com/page/policies/text-and-data-mining-license","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://api.elsevier.com/content/article/PII:S1441358203701323?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S1441358203701323?httpAccept=text/plain","http://journals.sagepub.com/doi/pdf/10.1016/S1441-3582%2803%2970132-3","http://dx.doi.org/10.1016/s1441-3582(03)70132-3"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1016/s1441-3582(03)70132-3","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-07-07T08:52:15Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1613672628083,"id":"50|doi_________::005ca383dbcbecb839c1c4f525636048","originalId":["10.1080/01431161.2017.1302106","50|doiboost____::005ca383dbcbecb839c1c4f525636048"],"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-02-18T18:23:48Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Jun Wang","name":"Jun","surname":"Wang","rank":1,"pid":null,"affiliation":null},{"fullname":"Jinye Peng","name":"Jinye","surname":"Peng","rank":2,"pid":null,"affiliation":null},{"fullname":"Xiaoyue Jiang","name":"Xiaoyue","surname":"Jiang","rank":3,"pid":null,"affiliation":null},{"fullname":"Xiaoyi Feng","name":"Xiaoyi","surname":"Feng","rank":4,"pid":null,"affiliation":null},{"fullname":"Jianhong Zhou","name":"Jianhong","surname":"Zhou","rank":5,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"General Earth and Planetary Sciences","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Remote-sensing image fusion using sparse representation with sub-dictionaries","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2017-03-24T08:21:49Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-24","qualifier":{"classid":"published-online","classname":"published-online","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-06-18","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-24","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-28","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"publisher":{"value":"Informa UK Limited","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://www.tandfonline.com/doi/pdf/10.1080/01431161.2017.1302106","http://dx.doi.org/10.1080/01431161.2017.1302106"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1080/01431161.2017.1302106"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1080/01431161.2017.1302106","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-03-24","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"International Journal of Remote Sensing","issnPrinted":"0143-1161","issnOnline":"1366-5901","issnLinking":null,"ep":"3585","iss":null,"sp":"3564","vol":"38","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1619364899333,"id":"50|doi_________::006a8e00482f03066c79b472dfe51ba3","originalId":["10.1515/bot-2019-0045","50|doiboost____::006a8e00482f03066c79b472dfe51ba3"],"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-02-08T18:30:38+0100","dateoftransformation":"1970-01-19T08:13:03+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"den Hartog, Cornelis","name":"Cornelis","surname":"den Hartog","rank":1,"pid":null,"affiliation":[]},{"fullname":"Triest, Ludwig","name":"Ludwig","surname":"Triest","rank":2,"pid":[{"value":"https://orcid.org/0000-0002-4946-9614","qualifier":{"classid":"orcid","classname":"Open Researcher and Contributor ID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Plant Science","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Aquatic Science","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Ecology, Evolution, Behavior and Systematics","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"A profound view and discourse on the typification and status of three confused taxa: Ruppia maritima, R. spiralis and R. cirrhosa","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2020-01-15","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-01-15","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-01-15T09:02:47Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2020-06-25","qualifier":{"classid":"published-print","classname":"published-print","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"AbstractTaxonomic difficulties have persisted within the genus Ruppia for a long time. We first unravel misconceptions as perceived on different continents and subsequently present a revised interpretation of the identity and typification of three European taxa at species level: Ruppia maritima L., Ruppia spiralis L. ex Dumortier, and Ruppia cirrhosa (Petagna) Grande. To do this, historical specimens, illustrations and original descriptions were studied. We supersede a previous choice of the figure of Buccaferrea maritima, foliis minus acutis Micheli (1729) as the lectotype of R. maritima and type species of the genus Ruppia owing to a serious conflict with the protologue. Based on a meticulous interpretation of protologues and figures in a historical context, we reject the recent view of assigning R. cirrhosa and its proposed lectotype (iconotype) as a homotypic synonym of R. maritima. We agree with an earlier lectotypification of R. spiralis, though for another reason than the above-mentioned abused homotypy. Consequently, R. cirrhosa is a synonym of neither R. maritima or R. spiralis, based on material from Petagna in the Herbarium of Naples designated as the holotype of R. cirrhosa. We argue for three species to be considered as fully independent taxa: R. maritima, R. spiralis and R. cirrhosa.","dataInfo":null}],"dateofacceptance":{"value":"2020-01-15","dataInfo":null},"publisher":{"value":"Walter de Gruyter GmbH","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1515/bot-2019-0045"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2020-01-15","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://www.degruyter.com/view/journals/botm/63/3/article-p229.xml","https://www.degruyter.com/document/doi/10.1515/bot-2019-0045/pdf","http://dx.doi.org/10.1515/bot-2019-0045"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1515/bot-2019-0045","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2020-06-25","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1588058636007,"id":"50|doi_________::00a55b7fae8de31a2ffa4e235e98a7bf","originalId":["10.1109/tns.2004.835620","50|doiboost____::00a55b7fae8de31a2ffa4e235e98a7bf"],"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2020-04-30T20:50:59+0200","dateoftransformation":"1970-01-19T10:11:12+0100","extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Veloso, J.F.C.A.","name":"J.F.C.A.","surname":"Veloso","rank":1,"pid":null,"affiliation":[]},{"fullname":"Amaro, F.","name":"F.","surname":"Amaro","rank":2,"pid":null,"affiliation":[]},{"fullname":"dos Santos, J.M.F.","name":"J.M.F.","surname":"dos Santos","rank":3,"pid":null,"affiliation":[]},{"fullname":"Mir, J.A.","name":"J.A.","surname":"Mir","rank":4,"pid":null,"affiliation":[]},{"fullname":"Derbyshire, G.E.","name":"G.E.","surname":"Derbyshire","rank":5,"pid":null,"affiliation":[]},{"fullname":"Stephenson, R.","name":"R.","surname":"Stephenson","rank":6,"pid":null,"affiliation":[]},{"fullname":"Rhodes, N.J.","name":"N.J.","surname":"Rhodes","rank":7,"pid":null,"affiliation":[]},{"fullname":"Schooneveld, E.M.","name":"E.M.","surname":"Schooneveld","rank":8,"pid":null,"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Nuclear and High Energy Physics","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Electrical and Electronic Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null},{"value":"Nuclear Energy and Engineering","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Application of the microhole and strip plate detector for neutron detection","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2004-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-03-14","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2004-10-19T08:20:44Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2004-01-01","dataInfo":null},"publisher":{"value":"Institute of Electrical and Electronics Engineers (IEEE)","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1109/tns.2004.835620"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2004-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://xplorestaging.ieee.org/ielx5/23/29603/01344292.pdf?arnumber=1344292","http://dx.doi.org/10.1109/tns.2004.835620"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1109/tns.2004.835620","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2004-10-19T08:20:44Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":null} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1592299453363,"id":"50|doi_________::00ad40afc28e0c860116ac595183a0c0","originalId":["S0377221797004529","10.1016/s0377-2217(97)00452-9","50|doiboost____::00ad40afc28e0c860116ac595183a0c0"],"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-06-16T09:24:13Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Chung-Yee Lee","name":"Chung-Yee","surname":"Lee","rank":1,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"Two-machine flowshop scheduling with availability constraints","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2002-07-25T17:48:22Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"1999-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-04-24","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2002-07-25T17:48:22Z","dataInfo":null},"publisher":{"value":"Elsevier BV","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":{"value":"https://www.elsevier.com/tdm/userlicense/1.0/","dataInfo":null},"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://api.elsevier.com/content/article/PII:S0377221797004529?httpAccept=text/xml","https://api.elsevier.com/content/article/PII:S0377221797004529?httpAccept=text/plain","http://dx.doi.org/10.1016/s0377-2217(97)00452-9"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2002-07-25T17:48:22Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1016/s0377-2217(97)00452-9"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1016/s0377-2217(97)00452-9","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"1999-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"European Journal of Operational Research","issnPrinted":"0377-2217","issnOnline":null,"issnLinking":null,"ep":"429","iss":null,"sp":"420","vol":"114","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1609910911587,"id":"50|doi_________::00d8413c6d6f5c091ab6dda88f0a0ecb","originalId":["10.1175/jpo-d-16-0281.1","50|doiboost____::00d8413c6d6f5c091ab6dda88f0a0ecb"],"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2021-01-06T05:28:31Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"P. B. Smit","name":"P. B.","surname":"Smit","rank":1,"pid":null,"affiliation":null},{"fullname":"T. T. Janssen","name":"T. T.","surname":"Janssen","rank":2,"pid":null,"affiliation":null},{"fullname":"T. H. C. Herbers","name":"T. H. C.","surname":"Herbers","rank":3,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[{"value":"Oceanography","qualifier":{"classid":"keywords","classname":"keywords","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":null}],"title":[{"value":"Nonlinear Wave Kinematics near the Ocean Surface","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2017-05-09T19:33:59Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2019-11-16","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[{"value":"AbstractEstimation of second-order, near-surface wave kinematics is important for interpretation of ocean surface remote sensing and surface-following instruments, determining loading on offshore structures, and understanding of upper-ocean transport processes. Unfortunately, conventional wave theories based on Stokes-type expansions do not consider fluid motions at levels above the unperturbed fluid level. The usual practice of extrapolating the fluid kinematics from the unperturbed free surface to higher points in the fluid is generally reasonable for narrowband waves, but for broadband ocean waves this results in dramatic (and nonphysical) overestimation of surface velocities. Consequently, practical approximations for random waves are at best empirical and are often only loosely constrained by physical principles. In the present work, the authors formulate the governing equations for water waves in an incompressible and inviscid fluid, using a boundary-fitted coordinate system (i.e., sigma or s coordinates) to derive expressions for near-surface kinematics in nonlinear random waves from first principles. Comparison to a numerical model valid for highly nonlinear waves shows that the new results 1) are consistent with second-order Stokes theory, 2) are similar to extrapolation methods in narrowband waves, and 3) greatly improve estimates of surface kinematics in random seas.","dataInfo":null}],"dateofacceptance":{"value":"2017-05-09T19:33:59Z","dataInfo":null},"publisher":{"value":"American Meteorological Society","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0001","classname":"0001","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["https://journals.ametsoc.org/view/journals/phoc/47/7/jpo-d-16-0281.1.xml","http://journals.ametsoc.org/jpo/article-pdf/47/7/1657/4810788/jpo-d-16-0281_1.pdf","https://journals.ametsoc.org/downloadpdf/journals/phoc/47/7/jpo-d-16-0281.1.xml","http://dx.doi.org/10.1175/jpo-d-16-0281.1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-05-09T19:33:59Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1175/jpo-d-16-0281.1"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1175/jpo-d-16-0281.1","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2017-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"Journal of Physical Oceanography","issnPrinted":"0022-3670","issnOnline":"1520-0485","issnLinking":null,"ep":"1673","iss":null,"sp":"1657","vol":"47","edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} +{"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1582142606255,"id":"50|doi_________::00df77889b1ee0af015524c627f0cf47","originalId":["10.1109/isscc.2010.5433999","50|doiboost____::00df77889b1ee0af015524c627f0cf47"],"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"dateofcollection":"2020-02-19T20:03:26Z","dateoftransformation":null,"extraInfo":[],"oaiprovenance":null,"measures":[],"author":[{"fullname":"Sameh A Ibrahim","name":"Sameh A","surname":"Ibrahim","rank":1,"pid":null,"affiliation":null},{"fullname":"Behzad Razavi","name":"Behzad","surname":"Razavi","rank":2,"pid":null,"affiliation":null}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"language":null,"country":[],"subject":[],"title":[{"value":"A 20Gb/s 40mW equalizer in 90nm CMOS technology","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":null}],"relevantdate":[{"value":"2010-03-24T14:35:14Z","qualifier":{"classid":"created","classname":"created","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2010-01-01","qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null},{"value":"2017-06-19","qualifier":{"classid":"updated","classname":"updated","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":null}],"description":[],"dateofacceptance":{"value":"2010-03-24T14:35:14Z","dataInfo":null},"publisher":{"value":"IEEE","dataInfo":null},"embargoenddate":null,"source":[{"value":"Crossref","dataInfo":null}],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"0004","classname":"0004","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":null,"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":null,"url":["http://xplorestaging.ieee.org/ielx5/5428240/5433812/05433999.pdf?arnumber=5433999","http://dx.doi.org/10.1109/isscc.2010.5433999"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref","dataInfo":null},"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":null}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-03-24T14:35:14Z","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null},{"license":null,"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"hostedby":{"key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c","value":"Unknown Repository","dataInfo":null},"url":["https://dx.doi.org/10.1109/isscc.2010.5433999"],"distributionlocation":null,"collectedfrom":{"key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254","value":"Datacite","dataInfo":null},"pid":[{"value":"10.1109/isscc.2010.5433999","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":null,"dateofacceptance":{"value":"2010-01-01","dataInfo":null},"processingchargeamount":null,"processingchargecurrency":null,"refereed":null}],"journal":{"name":"2010 IEEE International Solid-State Circuits Conference - (ISSCC)","issnPrinted":null,"issnOnline":null,"issnLinking":null,"ep":null,"iss":null,"sp":null,"vol":null,"edition":null,"conferenceplace":null,"conferencedate":null,"dataInfo":null}} \ No newline at end of file From 83c04e5d28472fe5a199f88b50b2b81e3b15c2e9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 4 Aug 2021 10:37:57 +0200 Subject: [PATCH 70/70] mapping test for dataset records adapted to reflect the delegated pid authority (zenodo) --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 60970f4d6..c431b4dd8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -24,6 +24,7 @@ import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -250,7 +251,24 @@ public class MappersTest { final Relation r1 = (Relation) list.get(1); final Relation r2 = (Relation) list.get(2); + assertEquals(d.getId(), r1.getSource()); + assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r1.getTarget()); + assertEquals(ModelConstants.RESULT_PROJECT, r1.getRelType()); + assertEquals(ModelConstants.OUTCOME, r1.getSubRelType()); + assertEquals(ModelConstants.IS_PRODUCED_BY, r1.getRelClass()); + assertTrue(r1.getValidated()); + assertEquals("2020-01-01", r1.getValidationDate()); + + assertEquals(d.getId(), r2.getTarget()); + assertEquals("40|corda_______::e06332dee33bec6c2ba4c98601053229", r2.getSource()); + assertEquals(ModelConstants.RESULT_PROJECT, r2.getRelType()); + assertEquals(ModelConstants.OUTCOME, r2.getSubRelType()); + assertEquals(ModelConstants.PRODUCES, r2.getRelClass()); + assertTrue(r2.getValidated()); + assertEquals("2020-01-01", r2.getValidationDate()); + assertValidId(d.getId()); + assertEquals("50|doi_________::000374d100a9db469bd42b69dbb40b36", d.getId()); assertEquals(2, d.getOriginalId().size()); assertTrue(d.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:zenodo.org:3234526"))); assertValidId(d.getCollectedfrom().get(0).getKey()); @@ -304,10 +322,12 @@ public class MappersTest { }); assertEquals("0001", d.getInstance().get(0).getRefereed().getClassid()); assertNotNull(d.getInstance().get(0).getPid()); - assertTrue(d.getInstance().get(0).getPid().isEmpty()); + assertFalse(d.getInstance().get(0).getPid().isEmpty()); - assertEquals("doi", d.getInstance().get(0).getAlternateIdentifier().get(0).getQualifier().getClassid()); - assertEquals("10.5281/zenodo.3234526", d.getInstance().get(0).getAlternateIdentifier().get(0).getValue()); + assertEquals("doi", d.getInstance().get(0).getPid().get(0).getQualifier().getClassid()); + assertEquals("10.5281/zenodo.3234526", d.getInstance().get(0).getPid().get(0).getValue()); + + assertTrue(d.getInstance().get(0).getAlternateIdentifier().isEmpty()); assertValidId(r1.getSource()); assertValidId(r1.getTarget()); @@ -738,12 +758,11 @@ public class MappersTest { } private void assertValidId(final String id) { - System.out.println(id); + // System.out.println(id); assertEquals(49, id.length()); - assertEquals('|', id.charAt(2)); - assertEquals(':', id.charAt(15)); - assertEquals(':', id.charAt(16)); + assertEquals(IdentifierFactory.ID_PREFIX_SEPARATOR, id.substring(2, 3)); + assertEquals(IdentifierFactory.ID_SEPARATOR, id.substring(15, 17)); } private List vocs() throws IOException {