From c35bf486cc87ce8b6acd9ccbf4238fae776b1c9e Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 4 Aug 2020 12:50:12 +0200 Subject: [PATCH 1/5] added handle among the possible PIDs --- .../test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt | 1 + .../src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt index 05484c8e5..729296522 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt @@ -951,6 +951,7 @@ dnet:countries @=@ ZW @=@ ABW dnet:protocols @=@ oai @=@ OAI-PMH dnet:protocols @=@ oai @=@ OAI_PMH dnet:pid_types @=@ orcid @=@ ORCID12 +dnet:pid_types @=@ handle @=@ hdl dnet:review_levels @=@ 0000 @=@ UNKNOWN dnet:review_levels @=@ 0002 @=@ 80 大阪経大学会「Working Paper」 dnet:review_levels @=@ 0002 @=@ AO diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index 59bed7c3a..93cc00eca 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1045,6 +1045,7 @@ dnet:pid_types @=@ dnet:pid_types @=@ pmid @=@ pmid dnet:pid_types @=@ dnet:pid_types @=@ urn @=@ urn dnet:pid_types @=@ dnet:pid_types @=@ who @=@ WHO Identifier dnet:pid_types @=@ dnet:pid_types @=@ drks @=@ DRKS Identifier +dnet:pid_types @=@ dnet:pid_types @=@ handle @=@ Handle dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/SUBJECT/ACM @=@ An ACM classification term that can be associated to your publications dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/SUBJECT/ARXIV @=@ An ARXIV classification term that can be associated to your publications dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/SUBJECT/DDC @=@ A Dewey Decimal classification term (DDC) that can be associated to your publications From 09a323d18de0d495fcc4f45a8822197709753180 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 4 Aug 2020 12:50:52 +0200 Subject: [PATCH 2/5] testing a dataset from Nakala --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 23 +++++ .../dnetlib/dhp/oa/graph/raw/odf_nakala.xml | 88 +++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_nakala.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 0a513f633..50f190a61 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -287,6 +287,29 @@ public class MappersTest { System.out.println("***************"); } + @Test + void testNakala() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_nakala.xml")); + final List list = new OdfToOafMapper(vocs, false).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + assertEquals(1, list.size()); + assertTrue(list.get(0) instanceof Dataset); + + final Dataset d = (Dataset) list.get(0); + + assertValidId(d.getId()); + assertValidId(d.getCollectedfrom().get(0).getKey()); + assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); + assertEquals(1, d.getAuthor().size()); + assertEquals(0, d.getSubject().size()); + assertEquals(1, d.getInstance().size()); + assertEquals(1, d.getPid().size()); + } + private void assertValidId(final String id) { assertEquals(49, id.length()); assertEquals('|', id.charAt(2)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_nakala.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_nakala.xml new file mode 100644 index 000000000..105d0c413 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_nakala.xml @@ -0,0 +1,88 @@ + + + + r3f5b9831893::cca7367159bc3ff90cd2f75bf9dc21c4 + oai:nakala.fr:hdl_11280_847e01df + 2020-08-01T00:16:24.742Z + r3f5b9831893 + oai:nakala.fr:hdl_11280_847e01df + 2020-06-08T01:01:38Z + hdl_11280_2b09fc10 + hdl_11280_c1bc48d0 + hdl_11280_57c8db3a + 2020-08-01T00:31:35.625Z + + + + 277 + http://hdl.handle.net/11280/847e01df + + http://hdl.handle.net/http://hdl.handle.net/11280/847e01df + + http://nakala.fr/data/11280/847e01df + + + DHAAP + + + + CVP_Notice277-1 place du Docteur Antoine Béclère _PHO02.jpg + + + Hôpital Saint-Antoine. Fragment de dalle funéraire trouvée en décembre 1932. Paris (XIIème arr.). Photographie d'Albert Citerne (1876-1970). Plaque de verre, 1932. Département Histoire de l'Architecture et Archéologie de Paris. + Nfa_1146 + Hôpital Saint-Antoine. Fragment de dalle funéraire trouvée en décembre 1932. Paris (XIIème arr.). Photographie d'Albert Citerne (1876-1970). Plaque de verre, 1932. Département Histoire de l'Architecture et Archéologie de Paris. + + Nakala by Huma-Num + + + DHAAP, Pôle Archéologique + + + + 1932 + + StillImage + + + + + http://hdl.handle.net/11280/847e01df + 0025 + + OPEN + und + + + + + + + https%3A%2F%2Fwww.nakala.fr%2Foai_oa%2F11280%2F8892ab4b + oai:nakala.fr:hdl_11280_847e01df + 2020-06-08T01:01:38Z + + + + + false + false + 0.9 + + + + + \ No newline at end of file From b4e4e5f858dd9250da1d5257249b758791295bf6 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 4 Aug 2020 12:52:14 +0200 Subject: [PATCH 3/5] do not duplicate result PIDs --- .../java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 62f8123bb..fa0e5221d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -12,6 +12,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; @@ -366,7 +367,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - final List res = new ArrayList<>(); + final Set res = new HashSet(); res .addAll( prepareListStructPropsWithValidQualifier( @@ -382,7 +383,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL' and @alternateIdentifierType != 'landingPage']", "@alternateIdentifierType", DNET_PID_TYPES, info)); - return res; + return Lists.newArrayList(res); } } From 01db29e208090adf8228829bdcf2391c0f762fc0 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 4 Aug 2020 12:53:48 +0200 Subject: [PATCH 4/5] fixes redmine issue #5846: datacite and its different namespace declarations --- .../dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 5159fa9bb..7a04e0c3d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -24,7 +24,9 @@ public abstract class AbstractMdRecordToOafMapper { private final boolean invisible; protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; + protected static final String DATACITE_SCHEMA_KERNEL_4_SLASH = "http://datacite.org/schema/kernel-4/"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; + protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/"; protected static final Qualifier ORCID_PID_TYPE = qualifier( "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Qualifier MAG_PID_TYPE = qualifier( @@ -55,7 +57,7 @@ public abstract class AbstractMdRecordToOafMapper { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); final Document doc = DocumentHelper - .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); + .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3).replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3).replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); final KeyValue collectedFrom = getProvenanceDatasource( doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); From a29565ff5723b273055748fb1c21d6e06a283883 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 4 Aug 2020 12:55:27 +0200 Subject: [PATCH 5/5] code formatting --- .../dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java | 6 +++++- .../java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java | 3 ++- .../test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java | 1 - 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 7a04e0c3d..5b6ae72f1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -57,7 +57,11 @@ public abstract class AbstractMdRecordToOafMapper { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); final Document doc = DocumentHelper - .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3).replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3).replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); + .parseText( + xml + .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); final KeyValue collectedFrom = getProvenanceDatasource( doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index fa0e5221d..6fe7bb971 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -12,11 +12,12 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; +import com.google.common.collect.Lists; + import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Author; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 50f190a61..2c10f8f58 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -276,7 +276,6 @@ public class MappersTest { System.out.println("***************"); } - @Test void testClaimDedup() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_dedup.xml"));