package eu.dnetlib.miscutils.iterators.xml; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.io.StringWriter; import java.util.zip.GZIPInputStream; import java.util.zip.ZipInputStream; import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; import org.junit.Before; import org.junit.Test; import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.Resource; public class IterableXmlParserTest { private Resource xmlZip = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoar.zip"); private Resource xmlGz = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoar.xml.gz"); private Resource xmlZipErr = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoarErr.zip"); private Resource xmlSingle = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/singleRepo.xml"); private String element = "repository"; private IterableXmlParser parser; private SAXReader reader; @Before public void setUp() throws Exception { reader = new SAXReader(); } @Test public void testGz() throws Exception { doTest(new GZIPInputStream(xmlGz.getInputStream()), element); } @Test public void test() throws Exception { doTest(read(new ZipInputStream(xmlZip.getInputStream())), element); } @Test public void testErr() throws Exception { doTest(read(new ZipInputStream(xmlZipErr.getInputStream())), element); } @Test public void testSingle() throws Exception { doTest(xmlSingle.getInputStream(), element); } @Test public void testOaiRecord() throws Exception { int count = doTest(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/oaiRecord.xml").getInputStream(), "record"); assertTrue(count == 1); } @Test public void testWeird() throws Exception { int count = doTest(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/weirdRecords.xml").getInputStream(), "record"); assertTrue(count == 3); } @Test public void testWeirdGz() throws Exception { int count = doTest(new GZIPInputStream(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/weirdRecords.xml.gz").getInputStream()), "record"); assertTrue(count == 3); } private int doTest(final InputStream stream, final String element) throws DocumentException { parser = new IterableXmlParser(element, stream); int count = 0; for (String xml : parser) { //System.out.println(xml); Document doc = reader.read(new StringReader(xml)); assertNotNull(doc); assertNotNull(doc.selectSingleNode("//" + element)); count++; } return count; } // helper method, reads the compressed text out of the xmlZip file private InputStream read(final ZipInputStream zis) throws IOException { final StringWriter sw = new StringWriter(); while (zis.getNextEntry() != null) { byte[] buffer = new byte[1]; while (zis.read(buffer) != -1) { IOUtils.write(buffer, sw, "UTF-8"); } } zis.close(); sw.close(); return new ByteArrayInputStream(sw.toString().getBytes()); } }