From 8cdce59e0ebd826efd1b8845011ce3f2bf494e71 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Aug 2021 11:32:26 +0200 Subject: [PATCH 1/3] [graph raw] let the mapping exceptions propagate --- .../raw/AbstractMdRecordToOafMapper.java | 82 +++++++------------ .../raw/GenerateEntitiesApplication.java | 3 +- .../raw/GenerateEntitiesApplicationTest.java | 6 +- .../dhp/oa/graph/raw/odf_fwfebooklibrary.xml | 0 4 files changed, 36 insertions(+), 55 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 9aa4e4c31..526f45f6e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -28,10 +28,7 @@ import java.util.Optional; import java.util.Set; import org.apache.commons.lang3.StringUtils; -import org.dom4j.Document; -import org.dom4j.DocumentFactory; -import org.dom4j.DocumentHelper; -import org.dom4j.Node; +import org.dom4j.*; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -112,43 +109,40 @@ public abstract class AbstractMdRecordToOafMapper { this.forceOriginalId = false; } - public List processMdRecord(final String xml) { - try { - DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); + public List processMdRecord(final String xml) throws DocumentException { - final Document doc = DocumentHelper - .parseText( - xml - .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); + DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); - final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final Document doc = DocumentHelper + .parseText( + xml + .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); - if (collectedFrom == null) { - return Lists.newArrayList(); - } + final KeyValue collectedFrom = getProvenanceDatasource( + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) - ? collectedFrom - : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - - if (hostedBy == null) { - return Lists.newArrayList(); - } - - final DataInfo info = prepareDataInfo(doc, invisible); - final long lastUpdateTimestamp = new Date().getTime(); - - final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); - - final String type = getResultType(doc, instances); - - return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); - } catch (final Exception e) { - throw new RuntimeException(e); + if (collectedFrom == null) { + return Lists.newArrayList(); } + + final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) + ? collectedFrom + : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); + + if (hostedBy == null) { + return Lists.newArrayList(); + } + + final DataInfo info = prepareDataInfo(doc, invisible); + final long lastUpdateTimestamp = new Date().getTime(); + + final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); + + final String type = getResultType(doc, instances); + + return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); } protected String getResultType(final Document doc, final List instances) { @@ -499,22 +493,6 @@ public abstract class AbstractMdRecordToOafMapper { return vocs.getTermAsQualifier(schemeId, classId); } - protected List prepareListStructProps( - final Node node, - final String xpath, - final String xpathClassId, - final String schemeId, - final DataInfo info) { - final List res = new ArrayList<>(); - - for (final Object o : node.selectNodes(xpath)) { - final Node n = (Node) o; - final String classId = n.valueOf(xpathClassId).trim(); - res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info)); - } - return res; - } - protected List prepareListStructPropsWithValidQualifier( final Node node, final String xpath, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 9027b49d7..6bb18c375 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -17,6 +17,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -158,7 +159,7 @@ public class GenerateEntitiesApplication { final String id, final String s, final boolean shouldHashId, - final VocabularyGroup vocs) { + final VocabularyGroup vocs) throws DocumentException { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 1e974dd69..67490a470 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.util.List; import org.apache.commons.io.IOUtils; +import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -44,7 +45,7 @@ class GenerateEntitiesApplicationTest { } @Test - void testMergeResult() throws IOException { + void testMergeResult() throws IOException, DocumentException { Result publication = getResult("oaf_record.xml", Publication.class); Result dataset = getResult("odf_dataset.xml", Dataset.class); Result software = getResult("odf_software.xml", Software.class); @@ -76,7 +77,8 @@ class GenerateEntitiesApplicationTest { assertEquals(resultType, merge.getResulttype().getClassid()); } - protected Result getResult(String xmlFileName, Class clazz) throws IOException { + protected Result getResult(String xmlFileName, Class clazz) + throws IOException, DocumentException { final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName)); return new OdfToOafMapper(vocs, false, true) .processMdRecord(xml) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml new file mode 100644 index 000000000..e69de29bb From 86d940044c067f71ac0bc451885ecd329f117cc6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Aug 2021 11:32:56 +0200 Subject: [PATCH 2/3] added test to verify bad records from FWF-E-Book-Library --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 43 ++++++++----- .../dhp/oa/graph/raw/odf_fwfebooklibrary.xml | 61 +++++++++++++++++++ 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 000dbfe25..5228d1d02 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -13,6 +13,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -48,7 +49,7 @@ class MappersTest { } @Test - void testPublication() throws IOException { + void testPublication() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); @@ -145,7 +146,7 @@ class MappersTest { } @Test - void testPublication_PubMed() throws IOException { + void testPublication_PubMed() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record_pubmed.xml"))); @@ -217,7 +218,7 @@ class MappersTest { } @Test - void testPublicationInvisible() throws IOException { + void testPublicationInvisible() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); @@ -233,7 +234,17 @@ class MappersTest { } @Test - void testDataset() throws IOException { + void testOdfFwfEBookLibrary() throws IOException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_fwfebooklibrary.xml"))); + + assertThrows( + IllegalArgumentException.class, + () -> new OdfToOafMapper(vocs, false, true).processMdRecord(xml)); + } + + @Test + void testDataset() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -346,7 +357,7 @@ class MappersTest { } @Test - void testOdfBielefeld() throws IOException { + void testOdfBielefeld() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_bielefeld.xml"))); @@ -394,7 +405,7 @@ class MappersTest { } @Test - void testOpentrial() throws IOException { + void testOpentrial() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_opentrial.xml"))); @@ -511,7 +522,7 @@ class MappersTest { } @Test - void testSoftware() throws IOException { + void testSoftware() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_software.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -530,7 +541,7 @@ class MappersTest { } @Test - void testClaimDedup() throws IOException { + void testClaimDedup() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_dedup.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -544,7 +555,7 @@ class MappersTest { } @Test - void testNakala() throws IOException { + void testNakala() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_nakala.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -572,7 +583,7 @@ class MappersTest { } @Test - void testEnermaps() throws IOException { + void testEnermaps() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("enermaps.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -597,7 +608,7 @@ class MappersTest { } @Test - void testClaimFromCrossref() throws IOException { + void testClaimFromCrossref() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_crossref.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -614,7 +625,7 @@ class MappersTest { } @Test - void testODFRecord() throws IOException { + void testODFRecord() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -628,7 +639,7 @@ class MappersTest { } @Test - void testTextGrid() throws IOException { + void testTextGrid() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("textgrid.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -662,7 +673,7 @@ class MappersTest { } @Test - void testBologna() throws IOException { + void testBologna() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf-bologna.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -679,7 +690,7 @@ class MappersTest { } @Test - void testJairo() throws IOException { + void testJairo() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_jairo.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -703,7 +714,7 @@ class MappersTest { } @Test - void testOdfFromHdfs() throws IOException { + void testOdfFromHdfs() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_from_hdfs.xml"))); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml index e69de29bb..cead018d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml @@ -0,0 +1,61 @@ + + +
+ oai:e-book.fwf.ac.at:o:116 + 2020-04-08T13:25:21.742Z + text + 2021-08-12T00:16:55.365Z +
+ + + + 978-3-205-77704-5 + https://e-book.fwf.ac.at/o:116 + + + Although small and not particularly peoples both Chechens and Palestinians became famous for suicide bomber attacks in recent years. This can - partly - be explained by the unrecognised collective traumas of the past. Both Chechens and Palestinians experienced collective traumas in the 1940ties. The entire Chechen population wad deported by Josef Stalin to Kasakhstan, Kirgysia and Sibiria in February 1944 under the pretext of collaboration with the Third Reich. Those who survived were allowed to return in 1957 to Chechenya. Half of the Palestinian Arab population was expelled from Palestine in 1947/48, when fighting erupted between Jews and Arabs. The refugees were never allowed to return. The memory of the deportation/expulsion was kept alive. The founding traumas contributed to the development of Chechen and Palestinian nationalism. Chechens and Palestinians till today suffer from their collective traumas, which stayed unrecognised and therefore create psychological and political problems for the following generations - and for their adverseries. The phenomenon of the "closed circle of violence" created a phobic collective behaviour, which led for example Chechens to the illusionary declaration of independence in 1991. It also led to the individual overreaction of young Chechens or Palestinians, who became living bombs. The collective Trauma, if untreated, poses a threat to any peaceful political solution. + 1. Einleitung Ausgehend von der Fragestellung, warum gerade bei Tschetschenen und Palästinensern der Selbstmordterrorismus in den letzten Jahren so populär geworden ist, analysiert die Autorin die Geschichte dieser beiden Völker. Einer der Gründe ist bisher wenig beachtet worden. Der Einfluss eines kollektiven Traumas, das als solches nicht anerkannt, behandelt und auch nicht einer politischen Lösung zugeführt wurde. 2. Geschichte der Palästinenser und Tschetschenen Im Zuge der Errichtung Israels im Unabhängigkeitskrieg 1948 verlor die Hälfte des palästinensischen Volkes - 750.000 Menschen - ihre Heimat. Unter der Führung von Jassir Arafat kämpften sie in den Jahrzehnten danach - mit Gewalt und am Verhandlungstisch - um einen eigenen Staat. Das Recht auf Rückkehr spielte dabei immer eine besondere Rolle. Die "Nakbah", die als Katastrophe empfundene Vertreibung 1948, wurde dabei Bezugs- und Angelpunkt mehrer Generationen von Flüchtlingen. Die Weigerung Israels, die Mitverantwortung für die Vertreibung der Palästinenser zu übernehmen und das kollektive Trauma der Palästinenser anzuerkennen - aus Angst vor einer Infragestellung des eigenen Staates - ist einer der Gründe, warum der Nahostkonflikt bisher nicht gelöst werden konnte. Auch die Tschetschenen durften jahrzehntelang über die Deportation ihres Volkes nicht einmal sprechen. Hatte Josef Stallin sie erst unter dem Vorwand der Kollaboration mit Nazi-Deutschland deportiert, waren sie zwar nach seinem Tod in die Heimat zurückgekehrt, lebten dort aber jahrzehntelang weiterhin als "unzuverlässiges Volk". Das kollektive Trauma der Deportation konnte nur mündlich überliefert werden. Mit dem Zusammenbruch der Sowjetunion brach der ungelöste Konflikt zwischen Tschetschenien und Russland sofort auf, das Land ging in blutigen Kriegen unter. 3. Zusammenfassung Die kollektive Erinnerung ist in den vergangenen Jahrzehnten zu einem zentralen Forschungsthema geworden. Der vorsichtige Einsatz von in der Individualpsychologie gewonnenen Erkenntnissen in der Behandlung von kollektiven Traumata, um zu einer politischen Lösung zu kommen, ist eine Chance. Das Studium historischer Fakten in Kombination mit den Erkenntnissen der Psychologie und Psychiatrie bietet die Basis für eine politische Lösung. Die vorliegende Arbeit zeigt, dass kollektive Traumata, die nicht behandelt werden, immer wieder, auch Generationen später, zu kollektiven Reaktionen führen können, die auf den ersten Blick irrational erscheinen. Die vielleicht radikalste Form des politischen Widerstandes, das Selbstmordattentat, ist dafür ein Beispiel. + deu/ger + Böhlau + application/pdf + + Trauma und Terror: Zum palästinensischen und tschetschenischen Nationalismus + + + + Szyszkowitz, Tessa + Tessa + Szyszkowitz + + + + Trauma, Terror, Palestinians, Suicide attacks, Recognition, Chechens + ÖFOS 2002, Contemporary history + BIC Standard Subject Categories, Postwar 20th century history, from c 1945 to c 2000 (HBLW3) + ÖFOS 2002, Zeitgeschichte + + + 14.02 MB + + + 2007 + + + literature + 2007-01-01 + http://creativecommons.org/licenses/by-nc-nd/3.0/at/ + http://creativecommons.org/licenses/by-nc-nd/3.0/at/ + deu/ger + fwf_________::D 3929 + + + https://fedora.e-book.fwf.ac.at/fedora/get/o:116/bdef:Content/download + +
\ No newline at end of file From dc8b05b39ef225c9898acc458b916331f239c25b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:18:25 +0200 Subject: [PATCH 3/3] Hosted By Map - changed the association with the datasource id for the hostedby element: there is no more the need to compute it. With the new HBM it is already the id in the graph --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 909e0f077..efd5d2497 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -231,9 +231,7 @@ object DoiBoostMappingUtil { var hb = new KeyValue if (item != null) { hb.setValue(item.officialname) - hb.setKey(generateDSId(item.id)) - //TODO replace with the one above as soon as the new HBM will be used - //hb.setKey(item.id) + hb.setKey(item.id) if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold)