forked from D-Net/dnet-hadoop
PED test
This commit is contained in:
parent
83d5e165a7
commit
6bd84678ca
|
@ -0,0 +1,45 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.bmuse.bioschema;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
|
import org.apache.any23.Any23;
|
||||||
|
import org.apache.any23.extractor.ExtractionException;
|
||||||
|
import org.apache.any23.source.DocumentSource;
|
||||||
|
import org.apache.any23.source.StringDocumentSource;
|
||||||
|
import org.apache.any23.writer.NTriplesWriter;
|
||||||
|
import org.apache.any23.writer.TripleHandler;
|
||||||
|
import org.apache.any23.writer.TripleHandlerException;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.io.output.ByteArrayOutputStream;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class Html2TriplesTest {
|
||||||
|
|
||||||
|
static Logger logger = LoggerFactory.getLogger(Html2TriplesTest.class);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// @Disabled
|
||||||
|
void conversionTest() throws Exception {
|
||||||
|
InputStream is = Html2TriplesTest.class.getResourceAsStream("/eu/dnetlib/dhp/bmuse/bioschema/ped.html");
|
||||||
|
String page = IOUtils.toString(is, StandardCharsets.UTF_8.name());
|
||||||
|
DocumentSource source = new StringDocumentSource(page, "https://proteinensemble.org/PED00001");
|
||||||
|
Any23 runner = new Any23();
|
||||||
|
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
|
TripleHandler handler = new NTriplesWriter(out);) {
|
||||||
|
runner.extract(source, handler);
|
||||||
|
logger.info(out.toString("UTF-8"));
|
||||||
|
} catch (ExtractionException e) {
|
||||||
|
logger.error("Cannot extract triples", e);
|
||||||
|
} catch (IOException e1) {
|
||||||
|
logger.error(" IO error whilst extracting triples", e1);
|
||||||
|
} catch (TripleHandlerException e2) {
|
||||||
|
logger.error("TripleHanderException", e2);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue