From 99cfb027facae6727a7dc8f6c5f3e353dce324c4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 23 Apr 2021 17:09:36 +0200 Subject: [PATCH] making ODF record parsing namespace unaware (#6629) --- .../dhp/oa/graph/raw/OdfToOafMapper.java | 81 +++++++++++-------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 683b376309..c2c2cb645b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -40,19 +40,20 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareTitles(final Document doc, final DataInfo info) { - return prepareListStructProps(doc, "//datacite:title", MAIN_TITLE_QUALIFIER, info); + return prepareListStructProps( + doc, "//*[local-name()='titles']/*[local-name()='title']", MAIN_TITLE_QUALIFIER, info); } @Override protected List prepareAuthors(final Document doc, final DataInfo info) { final List res = new ArrayList<>(); int pos = 1; - for (final Object o : doc.selectNodes("//datacite:creator")) { + for (final Object o : doc.selectNodes("//*[local-name()='creator']")) { final Node n = (Node) o; final Author author = new Author(); - final String fullname = n.valueOf("./datacite:creatorName"); - final String name = n.valueOf("./datacite:givenName"); - final String surname = n.valueOf("./datacite:familyName"); + final String fullname = n.valueOf("./*[local-name()='creatorName']"); + final String name = n.valueOf("./*[local-name()='givenName']"); + final String surname = n.valueOf("./*[local-name()='familyName']"); if (StringUtils.isNotBlank(fullname) || StringUtils.isNotBlank(name) || StringUtils.isNotBlank(surname)) { author.setFullname(fullname); @@ -74,7 +75,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { author.setFullname(String.format("%s, %s", author.getSurname(), author.getName())); } - author.setAffiliation(prepareListFields(n, "./datacite:affiliation", info)); + author.setAffiliation(prepareListFields(n, "./*[local-name()='affiliation']", info)); author.setPid(preparePids(n, info)); author.setRank(pos++); res.add(author); @@ -85,7 +86,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { private List preparePids(final Node n, final DataInfo info) { final List res = new ArrayList<>(); - for (final Object o : n.selectNodes("./datacite:nameIdentifier")) { + for (final Object o : n.selectNodes("./*[local-name()='nameIdentifier']")) { final String id = ((Node) o).getText(); final String type = ((Node) o) @@ -128,23 +129,25 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); final Set url = new HashSet<>(); - for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) { + for (final Object o : doc + .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']")) { url.add(((Node) o).getText().trim()); } for (final Object o : doc - .selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='landingPage']")) { + .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='landingPage']")) { url.add(((Node) o).getText().trim()); } - for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) { + for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='URL']")) { url.add(((Node) o).getText().trim()); } - for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='landingPage']")) { + for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) { url.add(((Node) o).getText().trim()); } - for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) { + for (final Object o : doc + .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']")) { url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } - for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) { + for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='DOI']")) { url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim()); } if (!url.isEmpty()) { @@ -162,7 +165,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareRelevantDates(final Document doc, final DataInfo info) { final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes("//datacite:date")) { + for (final Object o : doc.selectNodes("//*[local-name()='date']")) { final String dateType = ((Node) o).valueOf("@dateType"); if (StringUtils.isBlank(dateType) || (!dateType.equalsIgnoreCase("Accepted") @@ -192,32 +195,32 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List> prepareContributors(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:contributorName", info); + return prepareListFields(doc, "//*[local-name()='contributorName']", info); } @Override protected List> prepareFormats(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:format", info); + return prepareListFields(doc, "//*[local-name()=':format']", info); } @Override protected Field preparePublisher(final Document doc, final DataInfo info) { - return prepareField(doc, "//datacite:publisher", info); + return prepareField(doc, "//*[local-name()=':publisher']", info); } @Override protected List> prepareDescriptions(final Document doc, final DataInfo info) { - return prepareListFields(doc, "//datacite:description[@descriptionType='Abstract']", info); + return prepareListFields(doc, "//*[local-name()='description' and ./@descriptionType='Abstract']", info); } @Override protected List prepareSubjects(final Document doc, final DataInfo info) { - return prepareListStructProps(doc, "//datacite:subject", info); + return prepareListStructProps(doc, "//*[local-name()='subject']", info); } @Override protected Qualifier prepareLanguages(final Document doc) { - return prepareQualifier(doc, "//datacite:language", DNET_LANGUAGES); + return prepareQualifier(doc, "//*[local-name()=':language']", DNET_LANGUAGES); } @Override @@ -232,7 +235,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Document doc, final DataInfo info) { return prepareListFields( - doc, "//datacite:contributor[@contributorType='ContactGroup']/datacite:contributorName", info); + doc, + "//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']", + info); } @Override @@ -240,12 +245,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Document doc, final DataInfo info) { return prepareListFields( - doc, "//datacite:contributor[@contributorType='ContactPerson']/datacite:contributorName", info); + doc, + "//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']", + info); } @Override protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { - return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages"); + return prepareQualifier(doc, "//*[local-name()='format']", "dnet:programming_languages"); } @Override @@ -267,7 +274,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Document doc, final DataInfo info) { return prepareListFields( - doc, "//datacite:relatedIdentifier[@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", info); + doc, + "//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", + info); } // DATASETS @@ -276,11 +285,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected List prepareDatasetGeoLocations(final Document doc, final DataInfo info) { final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes("//datacite:geoLocation")) { + for (final Object o : doc.selectNodes("//*[local-name()='geoLocation']")) { final GeoLocation loc = new GeoLocation(); - loc.setBox(((Node) o).valueOf("./datacite:geoLocationBox")); - loc.setPlace(((Node) o).valueOf("./datacite:geoLocationPlace")); - loc.setPoint(((Node) o).valueOf("./datacite:geoLocationPoint")); + loc.setBox(((Node) o).valueOf("./*[local-name()=':geoLocationBox']")); + loc.setPlace(((Node) o).valueOf("./*[local-name()=':geoLocationPlace']")); + loc.setPoint(((Node) o).valueOf("./*[local-name()=':geoLocationPoint']")); res.add(loc); } return res; @@ -297,17 +306,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected Field prepareDatasetLastMetadataUpdate( final Document doc, final DataInfo info) { - return prepareField(doc, "//datacite:date[@dateType='Updated']", info); + return prepareField(doc, "//*[local-name()='date' and ./@dateType='Updated']", info); } @Override protected Field prepareDatasetVersion(final Document doc, final DataInfo info) { - return prepareField(doc, "//datacite:version", info); + return prepareField(doc, "//*[local-name()='version']", info); } @Override protected Field prepareDatasetSize(final Document doc, final DataInfo info) { - return prepareField(doc, "//datacite:size", info); + return prepareField(doc, "//*[local-name()='size']", info); } @Override @@ -317,7 +326,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected Field prepareDatasetStorageDate(final Document doc, final DataInfo info) { - return prepareField(doc, "//datacite:date[@dateType='Issued']", info); + return prepareField(doc, "//*[local-name()='date' and ./@dateType='Issued']", info); } @Override @@ -331,7 +340,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final List res = new ArrayList<>(); - for (final Object o : doc.selectNodes("//datacite:relatedIdentifier[@relatedIdentifierType='OPENAIRE']")) { + for (final Object o : doc + .selectNodes("//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='OPENAIRE']")) { final String originalId = ((Node) o).getText(); @@ -385,13 +395,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { res .addAll( prepareListStructPropsWithValidQualifier( - doc, "//datacite:identifier[@identifierType != 'URL' and @identifierType != 'landingPage']", + doc, + "//*[local-name()='identifier' and ./@identifierType != 'URL' and ./@identifierType != 'landingPage']", "@identifierType", DNET_PID_TYPES, info)); res .addAll( prepareListStructPropsWithValidQualifier( doc, - "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL' and @alternateIdentifierType != 'landingPage']", + "//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType != 'URL' and ./@alternateIdentifierType != 'landingPage']", "@alternateIdentifierType", DNET_PID_TYPES, info)); return Lists.newArrayList(res); }