filtering 'old' OpenAIRE ids from the entity.originalId[] array in the OAF -> XML searialization procedure

This commit is contained in:
Claudio Atzori 2021-07-20 11:51:33 +02:00
parent 5947cddafc
commit 77e8c6c7f7
1 changed files with 2 additions and 0 deletions

View File

@ -16,6 +16,7 @@ import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource; import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamResult;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
import org.dom4j.Document; import org.dom4j.Document;
@ -183,6 +184,7 @@ public class XmlRecordFactory implements Serializable {
.getOriginalId() .getOriginalId()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(id -> !id.matches("^\\d{2}" + IdentifierFactory.ID_PREFIX_SEPARATOR))
.map(s -> XmlSerializationUtils.asXmlElement("originalId", s)) .map(s -> XmlSerializationUtils.asXmlElement("originalId", s))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }