From 628ca54dd32e7957caa9b5150cb07e5b3dfd28f3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 17 Nov 2020 12:26:16 +0100 Subject: [PATCH 1/7] disable old maven repository URLs --- pom.xml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pom.xml b/pom.xml index e9b90a7652..d0757f145a 100644 --- a/pom.xml +++ b/pom.xml @@ -70,6 +70,26 @@ false + + dnet45-releases-old + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + + false + + + false + + + + dnet45-snapshots-old + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots + + false + + + false + + From cfc01f136e2dd10edd9ca006ded209de01c736ec Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 17 Nov 2020 12:27:06 +0100 Subject: [PATCH 2/7] PID filtering based on a blacklist --- .../dhp/oa/graph/clean/CleaningFunctions.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java index 4bcce80376..e9f783670d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctions.java @@ -1,8 +1,10 @@ package eu.dnetlib.dhp.oa.graph.clean; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.Objects; +import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -17,7 +19,13 @@ import eu.dnetlib.dhp.schema.oaf.*; public class CleaningFunctions { public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/"; - public static final String NONE = "none"; + + public static final Set PID_BLACKLIST = new HashSet<>(); + + static { + PID_BLACKLIST.add("none"); + PID_BLACKLIST.add("na"); + } public static T fixVocabularyNames(T value) { if (value instanceof Datasource) { @@ -114,7 +122,7 @@ public class CleaningFunctions { .stream() .filter(Objects::nonNull) .filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue()))) - .filter(sp -> !NONE.equalsIgnoreCase(sp.getValue().trim())) + .filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase())) .filter(sp -> Objects.nonNull(sp.getQualifier())) .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid())) .map(sp -> { From 7e0a76a8ac46640945a5285123d7dc392ab04dd6 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 17 Nov 2020 18:39:25 +0100 Subject: [PATCH 3/7] test fr TextGrid --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 16 +++ .../eu/dnetlib/dhp/oa/graph/raw/textgrid.xml | 113 ++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index af9fadb5ae..663eb10e28 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -340,6 +340,22 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); } + @Test + void testTextGrid() throws IOException { + final String xml = IOUtils.toString(getClass().getResourceAsStream("textgrid.xml")); + final List list = new OdfToOafMapper(vocs, false).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Dataset p = (Dataset) list.get(0); + assertValidId(p.getId()); + assertValidId(p.getCollectedfrom().get(0).getKey()); + assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + System.out.println(p.getTitle().get(0).getValue()); + } + private void assertValidId(final String id) { assertEquals(49, id.length()); assertEquals('|', id.charAt(2)); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid.xml new file mode 100644 index 0000000000..d6970ab3ee --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/textgrid.xml @@ -0,0 +1,113 @@ + + + + r3f52792889d::000051aa1f61d77d2c0b340091f8024e + textgrid:q9cv.0 + 2020-11-17T09:34:11.128+01:00 + r3f52792889d + textgrid:q9cv.0 + 2012-01-21T13:35:20Z + 2020-11-17T09:46:21.551+01:00 + + + + hdl:11858/00-1734-0000-0003-7664-F + + + Hoffmann von Fallersleben, August Heinrich + 118552589 + + + + Mailied + August Heinrich Hoffmann von Fallersleben: Unpolitische Lieder von Hoffmann von Fallersleben, 1. + 2. Theil, 1. Theil, Hamburg: Hoffmann und Campe, 1841. + + TextGrid + 2012 + + + tvitt@textgrid.de + + + Digitale Bibliothek + TGPR-372fe6dc-57f2-6cd4-01b5-2c4bbefcfd3c + + + + 2012-01-21T13:35:20Z + 2012-01-21T13:35:20Z + 2012-01-21T13:35:20Z + + + + textgrid:q9cv.0 + http://hdl.handle.net/hdl:11858/00-1734-0000-0003-7664-F + + + hdl:11858/00-1734-0000-0003-7666-B + + + 527 Bytes + + + text/tg.edition+tg.aggregation+xml + + 0 + + Der annotierte Datenbestand der Digitalen Bibliothek inklusive + Metadaten sowie davon einzeln zugängliche Teile sind eine Abwandlung + des Datenbestandes von www.editura.de durch TextGrid und werden + unter der Lizenz Creative Commons Namensnennung 3.0 Deutschland + Lizenz (by-Nennung TextGrid) veröffentlicht. Die Lizenz bezieht sich + nicht auf die der Annotation zu Grunde liegenden allgemeinfreien + Texte (Siehe auch Punkt 2 der Lizenzbestimmungen). + + + + + + + + Hamburg + + + + hdl:11858/00-1734-0000-0003-7664-F + 0021 + 0002 + 2012-01-01 + OPEN + http://creativecommons.org/licenses/by/3.0/de/legalcode + und + + + + + + + https%3A%2F%2Fdev.textgridlab.org%2F1.0%2Ftgoaipmh%2Foai + textgrid:q9cv.0 + 2012-01-21T13:35:20Z + http://schema.datacite.org/oai/oai-1.0/ + + + + false + false + 0.9 + + + + + \ No newline at end of file From 8f87020a5027920578f6bf0aa134ab0eba7cc8fd Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 17 Nov 2020 18:42:09 +0100 Subject: [PATCH 4/7] #56: map relevantDates from aggregated ODF records --- .../eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 7124684d5d..842638f40e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -162,16 +162,23 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : doc.selectNodes("//datacite:date")) { final String dateType = ((Node) o).valueOf("@dateType"); if (StringUtils.isBlank(dateType) - && !dateType.equalsIgnoreCase("Accepted") + || ( !dateType.equalsIgnoreCase("Accepted") && !dateType.equalsIgnoreCase("Issued") && !dateType.equalsIgnoreCase("Updated") - && !dateType.equalsIgnoreCase("Available")) { + && !dateType.equalsIgnoreCase("Available"))) { res .add( structuredProperty( ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info)); } + else{ + res + .add( + structuredProperty( + ((Node) o).getText(), dateType, dateType, DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, + info)); + } } return res; } From 01a6e039892a0a884a9f5d14748b86b774da212c Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 17 Nov 2020 23:26:47 +0200 Subject: [PATCH 5/7] starting from first step... --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 20eec37dc3..d6cc14e255 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -46,7 +46,7 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] From 33da2e3d6c94796ca875aa30fc962c3c6ea54857 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 18 Nov 2020 09:26:20 +0100 Subject: [PATCH 6/7] xpaths for dateOfCollection and dateOfTransformation --- .../raw/AbstractMdRecordToOafMapper.java | 61 ++++++++++++++++--- .../dhp/oa/graph/raw/OdfToOafMapper.java | 19 +++--- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 4 +- .../dnetlib/dhp/oa/graph/raw/oaf_record.xml | 5 +- 4 files changed, 65 insertions(+), 24 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index da4b5e324a..95dd1e1cad 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -1,10 +1,36 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.NOT_AVAILABLE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.dataInfo; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.journal; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.keyValue; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.listFields; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.oaiIProvenance; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.qualifier; +import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; @@ -15,7 +41,24 @@ import org.dom4j.Node; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.common.LicenseComparator; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.Context; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.GeoLocation; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.Journal; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OAIProvenance; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public abstract class AbstractMdRecordToOafMapper { @@ -92,10 +135,10 @@ public abstract class AbstractMdRecordToOafMapper { } protected String getResultType(final Document doc, final List instances) { - String type = doc.valueOf("//dr:CobjCategory/@type"); + final String type = doc.valueOf("//dr:CobjCategory/@type"); if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { - String instanceType = instances + final String instanceType = instances .stream() .map(i -> i.getInstancetype().getClassid()) .findFirst() @@ -256,13 +299,11 @@ public abstract class AbstractMdRecordToOafMapper { r.setDataInfo(info); r.setLastupdatetimestamp(lastUpdateTimestamp); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); - r.setOriginalId(Arrays.asList(findOriginalId(doc))); - r.setCollectedfrom(Arrays.asList(collectedFrom)); r.setPid(prepareResultPids(doc, info)); - r.setDateofcollection(doc.valueOf("//dr:dateOfCollection")); - r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation")); + r.setDateofcollection(doc.valueOf("//dr:dateOfCollection|//dri:dateOfCollection")); + r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation|//dri:dateOfTransformation")); r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES r.setOaiprovenance(prepareOAIprovenance(doc)); r.setAuthor(prepareAuthors(doc, info)); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 842638f40e..b432a51df6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -162,22 +162,21 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { for (final Object o : doc.selectNodes("//datacite:date")) { final String dateType = ((Node) o).valueOf("@dateType"); if (StringUtils.isBlank(dateType) - || ( !dateType.equalsIgnoreCase("Accepted") - && !dateType.equalsIgnoreCase("Issued") - && !dateType.equalsIgnoreCase("Updated") - && !dateType.equalsIgnoreCase("Available"))) { + || (!dateType.equalsIgnoreCase("Accepted") + && !dateType.equalsIgnoreCase("Issued") + && !dateType.equalsIgnoreCase("Updated") + && !dateType.equalsIgnoreCase("Available"))) { res .add( structuredProperty( ((Node) o).getText(), "UNKNOWN", "UNKNOWN", DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, info)); - } - else{ + } else { res - .add( - structuredProperty( - ((Node) o).getText(), dateType, dateType, DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, - info)); + .add( + structuredProperty( + ((Node) o).getText(), dateType, dateType, DNET_DATA_CITE_DATE, DNET_DATA_CITE_DATE, + info)); } } return res; diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 663eb10e28..2d4cccdfbb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -78,6 +78,8 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertFalse(p.getDataInfo().getInvisible()); assertTrue(p.getSource().size() == 1); + assertTrue(StringUtils.isNotBlank(p.getDateofcollection())); + assertTrue(StringUtils.isNotBlank(p.getDateoftransformation())); assertTrue(p.getAuthor().size() > 0); final Optional author = p @@ -329,7 +331,7 @@ public class MappersTest { @Test void testODFRecord() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml")); - List list = new OdfToOafMapper(vocs, false).processMdRecord(xml); + final List list = new OdfToOafMapper(vocs, false).processMdRecord(xml); System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); System.out.println("***************"); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml index 3b2658bcf3..2c6c98ebb3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_record.xml @@ -7,13 +7,12 @@
pensoft_____::00ea4a1cd53806a97d62ea6bf268f2a2 10.3897/oneeco.2.e13718 - - 2020-03-23T00:20:51.392Z - 2020-03-23T00:26:59.078Z + 2020-03-23T00:20:51.392Z + 2020-03-23T00:26:59.078Z pensoft_____
From be7b310cef8d729b925397f89b49827f9bd44068 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 18 Nov 2020 10:01:20 +0100 Subject: [PATCH 7/7] rel semantcis ignore case --- .../main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 842638f40e..16fde45179 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -348,7 +348,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { getRelation( otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, lastUpdateTimestamp)); - } else if (type.equals("IsPartOf")) { + } else if (type.equalsIgnoreCase("IsPartOf")) { res .add(