forked from D-Net/dnet-hadoop
filtering 'old' OpenAIRE ids from the entity.originalId[] array in the OAF -> XML searialization procedure
This commit is contained in:
parent
83fe31c92e
commit
10d7b4f0b4
|
@ -16,6 +16,7 @@ import javax.xml.transform.*;
|
||||||
import javax.xml.transform.dom.DOMSource;
|
import javax.xml.transform.dom.DOMSource;
|
||||||
import javax.xml.transform.stream.StreamResult;
|
import javax.xml.transform.stream.StreamResult;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.util.LongAccumulator;
|
import org.apache.spark.util.LongAccumulator;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
|
@ -183,6 +184,7 @@ public class XmlRecordFactory implements Serializable {
|
||||||
.getOriginalId()
|
.getOriginalId()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
|
.filter(id -> !id.matches("^\\d{2}" + IdentifierFactory.ID_PREFIX_SEPARATOR))
|
||||||
.map(s -> XmlSerializationUtils.asXmlElement("originalId", s))
|
.map(s -> XmlSerializationUtils.asXmlElement("originalId", s))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue