fixed a problem with xpaths
This commit is contained in:
parent
265bfd364d
commit
dd7350ecf2
|
@ -23,7 +23,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
|
||||
public class BaseCollectorIterator implements Iterator<Element> {
|
||||
public class BaseCollectorIterator implements Iterator<Document> {
|
||||
|
||||
private Object nextElement;
|
||||
|
||||
|
@ -51,13 +51,13 @@ public class BaseCollectorIterator implements Iterator<Element> {
|
|||
|
||||
@Override
|
||||
public synchronized boolean hasNext() {
|
||||
return (this.nextElement != null) && (this.nextElement instanceof Element);
|
||||
return (this.nextElement != null) && (this.nextElement instanceof Document);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Element next() {
|
||||
public synchronized Document next() {
|
||||
try {
|
||||
return this.nextElement instanceof Element ? (Element) this.nextElement : null;
|
||||
return this.nextElement instanceof Document ? (Document) this.nextElement : null;
|
||||
} finally {
|
||||
try {
|
||||
this.nextElement = this.queue.take();
|
||||
|
@ -113,7 +113,9 @@ public class BaseCollectorIterator implements Iterator<Element> {
|
|||
|
||||
for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
|
||||
if (o instanceof Element) {
|
||||
this.queue.put(((Element) o).detach());
|
||||
final Element newRoot = (Element) ((Element) o).detach();
|
||||
final Document newDoc = DocumentHelper.createDocument(newRoot);
|
||||
this.queue.put(newDoc);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ import java.util.stream.StreamSupport;
|
|||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.dom4j.Element;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.io.OutputFormat;
|
||||
import org.dom4j.io.XMLWriter;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -51,14 +51,14 @@ public class BaseCollectorPlugin implements CollectorPlugin {
|
|||
throw new CollectorException(e);
|
||||
}
|
||||
|
||||
final Iterator<Element> iterator = new BaseCollectorIterator(this.fs, filePath, report);
|
||||
final Spliterator<Element> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
|
||||
final Iterator<Document> iterator = new BaseCollectorIterator(this.fs, filePath, report);
|
||||
final Spliterator<Document> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
|
||||
return StreamSupport.stream(spliterator, false)
|
||||
.filter(elem -> filterXml(elem, report))
|
||||
.map(elem -> xmlToString(report, elem));
|
||||
.filter(doc -> filterXml(doc, report))
|
||||
.map(doc -> xmlToString(doc, report));
|
||||
}
|
||||
|
||||
private boolean filterXml(final Element elem, final AggregatorReport report) {
|
||||
private boolean filterXml(final Document doc, final AggregatorReport report) {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
// HERE THE FILTERS ACCORDING TO THE DOCUMENTATION
|
||||
|
@ -66,10 +66,10 @@ public class BaseCollectorPlugin implements CollectorPlugin {
|
|||
return true;
|
||||
}
|
||||
|
||||
private String xmlToString(final AggregatorReport report, final Element elem) {
|
||||
private String xmlToString(final Document doc, final AggregatorReport report) {
|
||||
try (final StringWriter sw = new StringWriter()) {
|
||||
final XMLWriter writer = new XMLWriter(sw, OutputFormat.createPrettyPrint());
|
||||
writer.write(elem);
|
||||
writer.write(doc);
|
||||
return writer.toString();
|
||||
} catch (final IOException e) {
|
||||
report.put(e.getClass().getName(), e.getMessage());
|
||||
|
|
|
@ -4,14 +4,18 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
|||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Attribute;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.Element;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
|
@ -27,7 +31,7 @@ public class BaseCollectorIteratorTest {
|
|||
final Map<String, Map<String, String>> collections = new HashMap<>();
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
final Element record = iterator.next();
|
||||
final Document record = iterator.next();
|
||||
|
||||
count++;
|
||||
|
||||
|
@ -35,7 +39,10 @@ public class BaseCollectorIteratorTest {
|
|||
System.out.println("# Read records: " + count);
|
||||
}
|
||||
|
||||
// System.out.println(record.asXML());
|
||||
|
||||
for (final Object o : record.selectNodes("//*[local-name() = 'collection']")) {
|
||||
|
||||
final Element n = (Element) o;
|
||||
final String collName = n.getText().trim();
|
||||
if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) {
|
||||
|
@ -46,14 +53,16 @@ public class BaseCollectorIteratorTest {
|
|||
}
|
||||
|
||||
collections.put(collName, collAttrs);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collections.forEach((k, v) -> {
|
||||
System.out.println(k);
|
||||
v.forEach((ak, av) -> System.out.println(" - " + ak + "=" + av));
|
||||
});
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
for (final Entry<String, Map<String, String>> e : collections.entrySet()) {
|
||||
System.out.println(e.getKey() + ": " + mapper.writeValueAsString(e.getValue()));
|
||||
|
||||
}
|
||||
|
||||
assertEquals(30000, count);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue