filtering 'old' OpenAIRE ids from the entity.originalId[] array in the OAF -> XML searialization procedure

This commit is contained in:
Claudio Atzori 2021-07-20 11:51:33 +02:00
parent 5947cddafc
commit 77e8c6c7f7
1 changed files with 2 additions and 0 deletions

View File

@ -16,6 +16,7 @@ import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.util.LongAccumulator;
import org.dom4j.Document;
@ -183,6 +184,7 @@ public class XmlRecordFactory implements Serializable {
.getOriginalId()
.stream()
.filter(Objects::nonNull)
.filter(id -> !id.matches("^\\d{2}" + IdentifierFactory.ID_PREFIX_SEPARATOR))
.map(s -> XmlSerializationUtils.asXmlElement("originalId", s))
.collect(Collectors.toList()));
}