From 10d7b4f0b4f3a5e9b2eb0ab8b40b4560dc1d4069 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Tue, 20 Jul 2021 11:51:33 +0200 Subject: [PATCH] filtering 'old' OpenAIRE ids from the entity.originalId[] array in the OAF -> XML searialization procedure --- .../eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 86bbae99e..a985d2371 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -16,6 +16,7 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -183,6 +184,7 @@ public class XmlRecordFactory implements Serializable { .getOriginalId() .stream() .filter(Objects::nonNull) + .filter(id -> !id.matches("^\\d{2}" + IdentifierFactory.ID_PREFIX_SEPARATOR)) .map(s -> XmlSerializationUtils.asXmlElement("originalId", s)) .collect(Collectors.toList())); }