fixed a problem with xpaths

This commit is contained in:
Michele Artini 2024-02-13 08:36:42 +01:00
parent 265bfd364d
commit dd7350ecf2
3 changed files with 29 additions and 18 deletions

View File

@ -23,7 +23,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
public class BaseCollectorIterator implements Iterator<Element> {
public class BaseCollectorIterator implements Iterator<Document> {
private Object nextElement;
@ -51,13 +51,13 @@ public class BaseCollectorIterator implements Iterator<Element> {
@Override
public synchronized boolean hasNext() {
return (this.nextElement != null) && (this.nextElement instanceof Element);
return (this.nextElement != null) && (this.nextElement instanceof Document);
}
@Override
public synchronized Element next() {
public synchronized Document next() {
try {
return this.nextElement instanceof Element ? (Element) this.nextElement : null;
return this.nextElement instanceof Document ? (Document) this.nextElement : null;
} finally {
try {
this.nextElement = this.queue.take();
@ -113,7 +113,9 @@ public class BaseCollectorIterator implements Iterator<Element> {
for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
if (o instanceof Element) {
this.queue.put(((Element) o).detach());
final Element newRoot = (Element) ((Element) o).detach();
final Document newDoc = DocumentHelper.createDocument(newRoot);
this.queue.put(newDoc);
count++;
}
}

View File

@ -11,7 +11,7 @@ import java.util.stream.StreamSupport;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.dom4j.Element;
import org.dom4j.Document;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;
import org.slf4j.Logger;
@ -51,14 +51,14 @@ public class BaseCollectorPlugin implements CollectorPlugin {
throw new CollectorException(e);
}
final Iterator<Element> iterator = new BaseCollectorIterator(this.fs, filePath, report);
final Spliterator<Element> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
final Iterator<Document> iterator = new BaseCollectorIterator(this.fs, filePath, report);
final Spliterator<Document> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
return StreamSupport.stream(spliterator, false)
.filter(elem -> filterXml(elem, report))
.map(elem -> xmlToString(report, elem));
.filter(doc -> filterXml(doc, report))
.map(doc -> xmlToString(doc, report));
}
private boolean filterXml(final Element elem, final AggregatorReport report) {
private boolean filterXml(final Document doc, final AggregatorReport report) {
// TODO Auto-generated method stub
// HERE THE FILTERS ACCORDING TO THE DOCUMENTATION
@ -66,10 +66,10 @@ public class BaseCollectorPlugin implements CollectorPlugin {
return true;
}
private String xmlToString(final AggregatorReport report, final Element elem) {
private String xmlToString(final Document doc, final AggregatorReport report) {
try (final StringWriter sw = new StringWriter()) {
final XMLWriter writer = new XMLWriter(sw, OutputFormat.createPrettyPrint());
writer.write(elem);
writer.write(doc);
return writer.toString();
} catch (final IOException e) {
report.put(e.getClass().getName(), e.getMessage());

View File

@ -4,14 +4,18 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.Element;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
@ExtendWith(MockitoExtension.class)
@ -27,7 +31,7 @@ public class BaseCollectorIteratorTest {
final Map<String, Map<String, String>> collections = new HashMap<>();
while (iterator.hasNext()) {
final Element record = iterator.next();
final Document record = iterator.next();
count++;
@ -35,7 +39,10 @@ public class BaseCollectorIteratorTest {
System.out.println("# Read records: " + count);
}
// System.out.println(record.asXML());
for (final Object o : record.selectNodes("//*[local-name() = 'collection']")) {
final Element n = (Element) o;
final String collName = n.getText().trim();
if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) {
@ -46,14 +53,16 @@ public class BaseCollectorIteratorTest {
}
collections.put(collName, collAttrs);
}
}
}
collections.forEach((k, v) -> {
System.out.println(k);
v.forEach((ak, av) -> System.out.println(" - " + ak + "=" + av));
});
final ObjectMapper mapper = new ObjectMapper();
for (final Entry<String, Map<String, String>> e : collections.entrySet()) {
System.out.println(e.getKey() + ": " + mapper.writeValueAsString(e.getValue()));
}
assertEquals(30000, count);
}