diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 85a5e9889..8b16b2bce 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -24,7 +24,9 @@ public abstract class AbstractMdRecordToOafMapper { private final boolean invisible; protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; + protected static final String DATACITE_SCHEMA_KERNEL_4_SLASH = "http://datacite.org/schema/kernel-4/"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; + protected static final String DATACITE_SCHEMA_KERNEL_3_SLASH = "http://datacite.org/schema/kernel-3/"; protected static final Qualifier ORCID_PID_TYPE = qualifier( "ORCID", "Open Researcher and Contributor ID", DNET_PID_TYPES, DNET_PID_TYPES); protected static final Qualifier MAG_PID_TYPE = qualifier( @@ -55,7 +57,11 @@ public abstract class AbstractMdRecordToOafMapper { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); final Document doc = DocumentHelper - .parseText(xml.replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3)); + .parseText( + xml + .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); final KeyValue collectedFrom = getProvenanceDatasource( doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 56ae56d37..e934aceba 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -14,6 +14,8 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Node; +import com.google.common.collect.Lists; + import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Author; @@ -364,7 +366,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - final List res = new ArrayList<>(); + final Set res = new HashSet(); res .addAll( prepareListStructPropsWithValidQualifier( @@ -380,7 +382,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL' and @alternateIdentifierType != 'landingPage']", "@alternateIdentifierType", DNET_PID_TYPES, info)); - return res; + return Lists.newArrayList(res); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index e51297bab..0519e9259 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -286,6 +286,29 @@ public class MappersTest { System.out.println("***************"); } + @Test + void testNakala() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_nakala.xml")); + final List list = new OdfToOafMapper(vocs, false).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + assertEquals(1, list.size()); + assertTrue(list.get(0) instanceof Dataset); + + final Dataset d = (Dataset) list.get(0); + + assertValidId(d.getId()); + assertValidId(d.getCollectedfrom().get(0).getKey()); + assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); + assertEquals(1, d.getAuthor().size()); + assertEquals(0, d.getSubject().size()); + assertEquals(1, d.getInstance().size()); + assertEquals(1, d.getPid().size()); + } + private void assertValidId(final String id) { assertEquals(49, id.length()); assertEquals('|', id.charAt(2)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt index 05484c8e5..729296522 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt @@ -951,6 +951,7 @@ dnet:countries @=@ ZW @=@ ABW dnet:protocols @=@ oai @=@ OAI-PMH dnet:protocols @=@ oai @=@ OAI_PMH dnet:pid_types @=@ orcid @=@ ORCID12 +dnet:pid_types @=@ handle @=@ hdl dnet:review_levels @=@ 0000 @=@ UNKNOWN dnet:review_levels @=@ 0002 @=@ 80 大阪経大学会「Working Paper」 dnet:review_levels @=@ 0002 @=@ AO diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index 59bed7c3a..93cc00eca 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1045,6 +1045,7 @@ dnet:pid_types @=@ dnet:pid_types @=@ pmid @=@ pmid dnet:pid_types @=@ dnet:pid_types @=@ urn @=@ urn dnet:pid_types @=@ dnet:pid_types @=@ who @=@ WHO Identifier dnet:pid_types @=@ dnet:pid_types @=@ drks @=@ DRKS Identifier +dnet:pid_types @=@ dnet:pid_types @=@ handle @=@ Handle dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/SUBJECT/ACM @=@ An ACM classification term that can be associated to your publications dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/SUBJECT/ARXIV @=@ An ARXIV classification term that can be associated to your publications dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/SUBJECT/DDC @=@ A Dewey Decimal classification term (DDC) that can be associated to your publications diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_nakala.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_nakala.xml new file mode 100644 index 000000000..105d0c413 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_nakala.xml @@ -0,0 +1,88 @@ + + + + r3f5b9831893::cca7367159bc3ff90cd2f75bf9dc21c4 + oai:nakala.fr:hdl_11280_847e01df + 2020-08-01T00:16:24.742Z + r3f5b9831893 + oai:nakala.fr:hdl_11280_847e01df + 2020-06-08T01:01:38Z + hdl_11280_2b09fc10 + hdl_11280_c1bc48d0 + hdl_11280_57c8db3a + 2020-08-01T00:31:35.625Z + + + + 277 + http://hdl.handle.net/11280/847e01df + + http://hdl.handle.net/http://hdl.handle.net/11280/847e01df + + http://nakala.fr/data/11280/847e01df + + + DHAAP + + + + CVP_Notice277-1 place du Docteur Antoine Béclère _PHO02.jpg + + + Hôpital Saint-Antoine. Fragment de dalle funéraire trouvée en décembre 1932. Paris (XIIème arr.). Photographie d'Albert Citerne (1876-1970). Plaque de verre, 1932. Département Histoire de l'Architecture et Archéologie de Paris. + Nfa_1146 + Hôpital Saint-Antoine. Fragment de dalle funéraire trouvée en décembre 1932. Paris (XIIème arr.). Photographie d'Albert Citerne (1876-1970). Plaque de verre, 1932. Département Histoire de l'Architecture et Archéologie de Paris. + + Nakala by Huma-Num + + + DHAAP, Pôle Archéologique + + + + 1932 + + StillImage + + + + + http://hdl.handle.net/11280/847e01df + 0025 + + OPEN + und + + + + + + + https%3A%2F%2Fwww.nakala.fr%2Foai_oa%2F11280%2F8892ab4b + oai:nakala.fr:hdl_11280_847e01df + 2020-06-08T01:01:38Z + + + + + false + false + 0.9 + + + + + \ No newline at end of file