From 5e17247bb658de3d7b5fb0db39d3337cbeaa6fd1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 13 Dec 2021 11:48:40 +0100 Subject: [PATCH 1/2] avoid NPEs merging XMLInstance(s) --- .../oa/provision/utils/XmlRecordFactory.java | 89 +++++++++---------- 1 file changed, 40 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 66827af67..8bf2a4185 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1,50 +1,6 @@ package eu.dnetlib.dhp.oa.provision.utils; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; -import static org.apache.commons.lang3.StringUtils.isNotBlank; -import static org.apache.commons.lang3.StringUtils.substringBefore; - -import java.io.IOException; -import java.io.Serializable; -import java.io.StringReader; -import java.io.StringWriter; -import java.net.URL; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.function.Function; -import java.util.stream.Collector; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.solr.common.util.URLUtil; -import org.apache.spark.util.LongAccumulator; -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.Node; -import org.dom4j.io.OutputFormat; -import org.dom4j.io.SAXReader; -import org.dom4j.io.XMLWriter; - import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.base.Splitter; @@ -53,14 +9,42 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.mycila.xmltool.XMLDoc; import com.mycila.xmltool.XMLTag; - import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.model.XmlInstance; import eu.dnetlib.dhp.schema.common.*; +import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.spark.util.LongAccumulator; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.Element; +import org.dom4j.Node; +import org.dom4j.io.OutputFormat; +import org.dom4j.io.SAXReader; +import org.dom4j.io.XMLWriter; + +import javax.xml.transform.*; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import java.io.IOException; +import java.io.Serializable; +import java.io.StringReader; +import java.io.StringWriter; +import java.net.URL; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; +import static org.apache.commons.lang3.StringUtils.isNotBlank; +import static org.apache.commons.lang3.StringUtils.substringBefore; public class XmlRecordFactory implements Serializable { @@ -1274,6 +1258,7 @@ public class XmlRecordFactory implements Serializable { private Stream groupInstancesByUrl(List instance) { return instance .stream() + .filter(i -> Objects.nonNull(i.getUrl())) .map(i -> { i .setUrl( @@ -1329,18 +1314,24 @@ public class XmlRecordFactory implements Serializable { instance.getCollectedfrom().add(i.getCollectedfrom()); instance.getHostedby().add(i.getHostedby()); instance.getInstancetype().add(i.getInstancetype()); - instance.getLicense().add(i.getLicense().getValue()); - instance.getDistributionlocation().add(i.getDistributionlocation()); instance.getPid().addAll(i.getPid()); instance.getAlternateIdentifier().addAll(i.getAlternateIdentifier()); - instance.getDateofacceptance().add(i.getDateofacceptance().getValue()); + instance.getRefereed().add(i.getRefereed()); instance .setProcessingchargeamount( Optional.ofNullable(i.getProcessingchargeamount()).map(apc -> apc.getValue()).orElse(null)); instance .setProcessingchargecurrency( Optional.ofNullable(i.getProcessingchargecurrency()).map(c -> c.getValue()).orElse(null)); - instance.getRefereed().add(i.getRefereed()); + Optional + .ofNullable(i.getDateofacceptance()) + .ifPresent(d -> instance.getDateofacceptance().add(d.getValue())); + Optional + .ofNullable(i.getLicense()) + .ifPresent(license -> instance.getLicense().add(license.getValue())); + Optional + .ofNullable(i.getDistributionlocation()) + .ifPresent(dl -> instance.getDistributionlocation().add(dl)); }); if (instance.getHostedby().size() > 1 From 98eb292c5971e90cbd67d7e833f720879d9dff5e Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 13 Dec 2021 13:27:20 +0100 Subject: [PATCH 2/2] avoid NPEs merging XMLInstance(s) --- .../oa/provision/utils/XmlRecordFactory.java | 73 ++++++++++--------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 8bf2a4185..af7c7b7c3 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1,22 +1,24 @@ package eu.dnetlib.dhp.oa.provision.utils; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Joiner; -import com.google.common.base.Splitter; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import com.mycila.xmltool.XMLDoc; -import com.mycila.xmltool.XMLTag; -import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.oa.provision.model.XmlInstance; -import eu.dnetlib.dhp.schema.common.*; -import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; +import static org.apache.commons.lang3.StringUtils.isNotBlank; +import static org.apache.commons.lang3.StringUtils.substringBefore; + +import java.io.IOException; +import java.io.Serializable; +import java.io.StringReader; +import java.io.StringWriter; +import java.net.URL; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import javax.xml.transform.*; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -29,22 +31,23 @@ import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; -import javax.xml.transform.*; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; -import java.io.IOException; -import java.io.Serializable; -import java.io.StringReader; -import java.io.StringWriter; -import java.net.URL; -import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.Stream; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Joiner; +import com.google.common.base.Splitter; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.mycila.xmltool.XMLDoc; +import com.mycila.xmltool.XMLTag; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; -import static org.apache.commons.lang3.StringUtils.isNotBlank; -import static org.apache.commons.lang3.StringUtils.substringBefore; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; +import eu.dnetlib.dhp.oa.provision.model.XmlInstance; +import eu.dnetlib.dhp.schema.common.*; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { @@ -1314,8 +1317,6 @@ public class XmlRecordFactory implements Serializable { instance.getCollectedfrom().add(i.getCollectedfrom()); instance.getHostedby().add(i.getHostedby()); instance.getInstancetype().add(i.getInstancetype()); - instance.getPid().addAll(i.getPid()); - instance.getAlternateIdentifier().addAll(i.getAlternateIdentifier()); instance.getRefereed().add(i.getRefereed()); instance .setProcessingchargeamount( @@ -1323,6 +1324,12 @@ public class XmlRecordFactory implements Serializable { instance .setProcessingchargecurrency( Optional.ofNullable(i.getProcessingchargecurrency()).map(c -> c.getValue()).orElse(null)); + Optional + .ofNullable(i.getPid()) + .ifPresent(pid -> instance.getPid().addAll(pid)); + Optional + .ofNullable(i.getAlternateIdentifier()) + .ifPresent(altId -> instance.getAlternateIdentifier().addAll(altId)); Optional .ofNullable(i.getDateofacceptance()) .ifPresent(d -> instance.getDateofacceptance().add(d.getValue()));