You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
118 lines
3.3 KiB
Java
118 lines
3.3 KiB
Java
package eu.dnetlib.miscutils.iterators.xml;
|
|
|
|
import static org.junit.Assert.assertNotNull;
|
|
import static org.junit.Assert.assertTrue;
|
|
|
|
import java.io.ByteArrayInputStream;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.StringReader;
|
|
import java.io.StringWriter;
|
|
import java.util.zip.GZIPInputStream;
|
|
import java.util.zip.ZipInputStream;
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.dom4j.Document;
|
|
import org.dom4j.DocumentException;
|
|
import org.dom4j.io.SAXReader;
|
|
import org.junit.Before;
|
|
import org.junit.Ignore;
|
|
import org.junit.Test;
|
|
import org.springframework.core.io.ClassPathResource;
|
|
import org.springframework.core.io.Resource;
|
|
|
|
@Ignore
|
|
// TODO investigate why it takes so long to run
|
|
// Time elapsed: 303.806 sec - in eu.dnetlib.miscutils.iterators.xml.IterableXmlParserTest
|
|
public class IterableXmlParserTest {
|
|
|
|
private Resource xmlZip = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoar.zip");
|
|
|
|
private Resource xmlGz = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoar.xml.gz");
|
|
|
|
private Resource xmlZipErr = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/opendoarErr.zip");
|
|
|
|
private Resource xmlSingle = new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/singleRepo.xml");
|
|
|
|
private String element = "repository";
|
|
|
|
private IterableXmlParser parser;
|
|
|
|
private SAXReader reader;
|
|
|
|
@Before
|
|
public void setUp() throws Exception {
|
|
reader = new SAXReader();
|
|
}
|
|
|
|
@Test
|
|
public void testGz() throws Exception {
|
|
doTest(new GZIPInputStream(xmlGz.getInputStream()), element);
|
|
}
|
|
|
|
@Test
|
|
public void test() throws Exception {
|
|
doTest(read(new ZipInputStream(xmlZip.getInputStream())), element);
|
|
}
|
|
|
|
@Test
|
|
public void testErr() throws Exception {
|
|
doTest(read(new ZipInputStream(xmlZipErr.getInputStream())), element);
|
|
}
|
|
|
|
@Test
|
|
public void testSingle() throws Exception {
|
|
doTest(xmlSingle.getInputStream(), element);
|
|
}
|
|
|
|
@Test
|
|
public void testOaiRecord() throws Exception {
|
|
int count = doTest(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/oaiRecord.xml").getInputStream(), "record");
|
|
assertTrue(count == 1);
|
|
}
|
|
|
|
@Test
|
|
public void testWeird() throws Exception {
|
|
int count = doTest(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/weirdRecords.xml").getInputStream(), "record");
|
|
assertTrue(count == 3);
|
|
}
|
|
|
|
@Test
|
|
public void testWeirdGz() throws Exception {
|
|
int count = doTest(new GZIPInputStream(new ClassPathResource("eu/dnetlib/miscutils/iterators/xml/weirdRecords.xml.gz").getInputStream()), "record");
|
|
assertTrue(count == 3);
|
|
}
|
|
|
|
private int doTest(final InputStream stream, final String element) throws DocumentException {
|
|
parser = new IterableXmlParser(element, stream);
|
|
int count = 0;
|
|
for (String xml : parser) {
|
|
//System.out.println(xml);
|
|
Document doc = reader.read(new StringReader(xml));
|
|
assertNotNull(doc);
|
|
assertNotNull(doc.selectSingleNode("//" + element));
|
|
count++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
// helper method, reads the compressed text out of the xmlZip file
|
|
private InputStream read(final ZipInputStream zis) throws IOException {
|
|
|
|
final StringWriter sw = new StringWriter();
|
|
while (zis.getNextEntry() != null) {
|
|
|
|
byte[] buffer = new byte[1];
|
|
|
|
while (zis.read(buffer) != -1) {
|
|
IOUtils.write(buffer, sw, "UTF-8");
|
|
}
|
|
}
|
|
zis.close();
|
|
sw.close();
|
|
|
|
return new ByteArrayInputStream(sw.toString().getBytes());
|
|
}
|
|
|
|
}
|