package eu.dnetlib.validator2.engine import groovy.xml.DOMBuilder import groovy.xml.FactorySupport import org.w3c.dom.Document import org.xml.sax.InputSource import javax.xml.XMLConstants import javax.xml.parsers.DocumentBuilder import javax.xml.parsers.DocumentBuilderFactory import java.util.function.BiConsumer import java.util.function.Predicate import java.util.zip.ZipEntry class XMLHelper { static Document parse(String resource) { new File("src/test/resources" + resource).withReader("UTF-8") { reader -> DOMBuilder.parse(reader) } } static def groovyParse(String resource) { new File("src/test/resources" + resource).withReader("UTF-8") { reader -> new XmlSlurper().parse(reader) } } static void forEachZippedXMLDocument(File zipFile, int bufferSize, Predicate filter, BiConsumer < String, Document > documentConsumer) { if (documentConsumer) { DocumentBuilderFactory factory = FactorySupport.createDocumentBuilderFactory() factory.setNamespaceAware(true) factory.setValidating(false) factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true) factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false) DocumentBuilder documentBuilder = factory.newDocumentBuilder() InputSource src = new InputSource() src.setEncoding("UTF-8") Helper.forEachZipEntry(zipFile, bufferSize, filter) { String id, InputStream zipEntryBytes -> src.setByteStream(zipEntryBytes) Document doc = documentBuilder.parse(src) documentConsumer.accept(id, doc) } } } }