From 2b9d0416eca2b0b1fc5b9a2b7ed19ca0868ddd2c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 19 Oct 2023 16:26:37 +0200 Subject: [PATCH 1/3] [graph raw] URL Validator to accept double slashes --- .../raw/AbstractMdRecordToOafMapper.java | 2 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 14 ++++ .../dnetlib/dhp/oa/graph/raw/idus_sevilla.xml | 65 +++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/idus_sevilla.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index b37e6a755..bca6a2aae 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -29,7 +29,7 @@ public abstract class AbstractMdRecordToOafMapper { protected final VocabularyGroup vocs; - protected static final UrlValidator URL_VALIDATOR = UrlValidator.getInstance(); + protected static final UrlValidator URL_VALIDATOR = new UrlValidator(UrlValidator.ALLOW_2_SLASHES); private final boolean invisible; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index b506d3a62..da7a890ee 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -797,6 +797,20 @@ class MappersTest { assertFalse(p_cleaned.getTitle().isEmpty()); } + @Test + void test_instance_url_validation() throws IOException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("idus_sevilla.xml"))); + final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + + final Publication p = (Publication) list.get(0); + + assertNotNull(p.getInstance()); + assertFalse(p.getInstance().isEmpty()); + assertNotNull(p.getInstance().get(0).getUrl()); + assertFalse(p.getInstance().get(0).getUrl().isEmpty()); + assertEquals("https://idus.us.es/handle//11441/118940", p.getInstance().get(0).getUrl().get(0)); + } + @Test void testZenodo() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_zenodo.xml"))); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/idus_sevilla.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/idus_sevilla.xml new file mode 100644 index 000000000..1bfa3c7c3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/idus_sevilla.xml @@ -0,0 +1,65 @@ + + +
+ od______3272::6a4d00217a024a46ce9697ce98b13c2a + oai:idus.us.es:11441/118940 + + + + + + 2021-08-20T12:32:32.826Z + 2023-07-04T15:47:55.397Z + od______3272 +
+ + El museo pictorico y escala optica : tomo I : theorica de la pintura en que se describe su origen ... y se aprueban con demonstraciomes mathematicas y filosoficas, sus mas radicales fundamentos + Palomino de Castro y Velasco, Antonio, 1653-1726 + Rovira y Brocandel, Hipólito, 1693-1765 + Palomino de Castro y Velasco, Antonio, 1653-1726 + 2021-08-12T08:59:53Z + 1715 + A 042(a)/063 + application/pdf + https://idus.us.es/handle//11441/118940 + spa + En Madrid : por Lucas Antonio de Bedmar ... : vendese en casa de Don Joseph de Villar y Villanueva, 1715 + info:eu-repo/semantics/book + info:eu-repo/semantics/publishedVersion + 0002 + 1715-01-01 + + opendoar____::3272 + OPEN + + + https://idus.us.es/handle//11441/118940 + + http://creativecommons.org/licenses/by-nc-nd/4.0/ + + + + + http%3A%2F%2Fidus.us.es%2Foai%2Fdriver + oai:idus.us.es:11441/118940 + 2021-08-12T08:59:54Z + http://www.openarchives.org/OAI/2.0/oai_dc/ + + + + false + false + 0.9 + + + + +
\ No newline at end of file From 7fc621cdecaa23f57aa7744f51b3f15c46366dd3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 20 Oct 2023 22:28:12 +0200 Subject: [PATCH 2/3] added defaults to the graph resolution workflow config-default.xml --- .../resolution/oozie_app/config-default.xml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml index 6fb2a1253..86847ed46 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/resolution/oozie_app/config-default.xml @@ -1,4 +1,12 @@ + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + oozie.use.system.libpath true @@ -7,4 +15,28 @@ oozie.action.sharelib.for.spark spark2 + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + \ No newline at end of file From a870aa2b093929e190ae48cbb15cf98d732e2926 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 20 Oct 2023 22:28:39 +0200 Subject: [PATCH 3/3] depending on dhp-schemas:3.17.2 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9cd82a343..f361a266c 100644 --- a/pom.xml +++ b/pom.xml @@ -888,7 +888,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [3.17.1] + [3.17.2] [4.0.3] [6.0.5] [3.1.6]