dnet-core/dnet-core-components/src/test/java/eu/dnetlib/miscutils/iterators/xml/IterableXmlParserTest.java

114 lines
3.2 KiB
Java

package eu.dnetlib.miscutils.iterators.xml;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipInputStream;
import org.apache.commons.io.IOUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import org.junit.Before;
import org.junit.Test;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
public class IterableXmlParserTest {
private Resource xmlZip = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoar.zip");
private Resource xmlGz = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoar.xml.gz");
private Resource xmlZipErr = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoarErr.zip");
private Resource xmlSingle = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/singleRepo.xml");
private String element = "repository";
private IterableXmlParser parser;
private SAXReader reader;
@Before
public void setUp() throws Exception {
reader = new SAXReader();
}
@Test
public void testGz() throws Exception {
doTest(new GZIPInputStream(xmlGz.getInputStream()), element);
}
@Test
public void test() throws Exception {
doTest(read(new ZipInputStream(xmlZip.getInputStream())), element);
}
@Test
public void testErr() throws Exception {
doTest(read(new ZipInputStream(xmlZipErr.getInputStream())), element);
}
@Test
public void testSingle() throws Exception {
doTest(xmlSingle.getInputStream(), element);
}
@Test
public void testOaiRecord() throws Exception {
int count = doTest(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/oaiRecord.xml").getInputStream(), "record");
assertTrue(count == 1);
}
@Test
public void testWeird() throws Exception {
int count = doTest(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/weirdRecords.xml").getInputStream(), "record");
assertTrue(count == 3);
}
@Test
public void testWeirdGz() throws Exception {
int count = doTest(new GZIPInputStream(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/weirdRecords.xml.gz").getInputStream()), "record");
assertTrue(count == 3);
}
private int doTest(final InputStream stream, final String element) throws DocumentException {
parser = new IterableXmlParser(element, stream);
int count = 0;
for (String xml : parser) {
//System.out.println(xml);
Document doc = reader.read(new StringReader(xml));
assertNotNull(doc);
assertNotNull(doc.selectSingleNode("//" + element));
count++;
}
return count;
}
// helper method, reads the compressed text out of the xmlZip file
private InputStream read(final ZipInputStream zis) throws IOException {
final StringWriter sw = new StringWriter();
while (zis.getNextEntry() != null) {
byte[] buffer = new byte[1];
while (zis.read(buffer) != -1) {
IOUtils.write(buffer, sw, "UTF-8");
}
}
zis.close();
sw.close();
return new ByteArrayInputStream(sw.toString().getBytes());
}
}