fixed a problem with xpaths

This commit is contained in:
Michele Artini 2024-02-13 08:36:42 +01:00
parent 265bfd364d
commit dd7350ecf2
3 changed files with 29 additions and 18 deletions

View File

@ -23,7 +23,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
public class BaseCollectorIterator implements Iterator<Element> { public class BaseCollectorIterator implements Iterator<Document> {
private Object nextElement; private Object nextElement;
@ -51,13 +51,13 @@ public class BaseCollectorIterator implements Iterator<Element> {
@Override @Override
public synchronized boolean hasNext() { public synchronized boolean hasNext() {
return (this.nextElement != null) && (this.nextElement instanceof Element); return (this.nextElement != null) && (this.nextElement instanceof Document);
} }
@Override @Override
public synchronized Element next() { public synchronized Document next() {
try { try {
return this.nextElement instanceof Element ? (Element) this.nextElement : null; return this.nextElement instanceof Document ? (Document) this.nextElement : null;
} finally { } finally {
try { try {
this.nextElement = this.queue.take(); this.nextElement = this.queue.take();
@ -113,7 +113,9 @@ public class BaseCollectorIterator implements Iterator<Element> {
for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) { for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
if (o instanceof Element) { if (o instanceof Element) {
this.queue.put(((Element) o).detach()); final Element newRoot = (Element) ((Element) o).detach();
final Document newDoc = DocumentHelper.createDocument(newRoot);
this.queue.put(newDoc);
count++; count++;
} }
} }

View File

@ -11,7 +11,7 @@ import java.util.stream.StreamSupport;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.dom4j.Element; import org.dom4j.Document;
import org.dom4j.io.OutputFormat; import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter; import org.dom4j.io.XMLWriter;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -51,14 +51,14 @@ public class BaseCollectorPlugin implements CollectorPlugin {
throw new CollectorException(e); throw new CollectorException(e);
} }
final Iterator<Element> iterator = new BaseCollectorIterator(this.fs, filePath, report); final Iterator<Document> iterator = new BaseCollectorIterator(this.fs, filePath, report);
final Spliterator<Element> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED); final Spliterator<Document> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
return StreamSupport.stream(spliterator, false) return StreamSupport.stream(spliterator, false)
.filter(elem -> filterXml(elem, report)) .filter(doc -> filterXml(doc, report))
.map(elem -> xmlToString(report, elem)); .map(doc -> xmlToString(doc, report));
} }
private boolean filterXml(final Element elem, final AggregatorReport report) { private boolean filterXml(final Document doc, final AggregatorReport report) {
// TODO Auto-generated method stub // TODO Auto-generated method stub
// HERE THE FILTERS ACCORDING TO THE DOCUMENTATION // HERE THE FILTERS ACCORDING TO THE DOCUMENTATION
@ -66,10 +66,10 @@ public class BaseCollectorPlugin implements CollectorPlugin {
return true; return true;
} }
private String xmlToString(final AggregatorReport report, final Element elem) { private String xmlToString(final Document doc, final AggregatorReport report) {
try (final StringWriter sw = new StringWriter()) { try (final StringWriter sw = new StringWriter()) {
final XMLWriter writer = new XMLWriter(sw, OutputFormat.createPrettyPrint()); final XMLWriter writer = new XMLWriter(sw, OutputFormat.createPrettyPrint());
writer.write(elem); writer.write(doc);
return writer.toString(); return writer.toString();
} catch (final IOException e) { } catch (final IOException e) {
report.put(e.getClass().getName(), e.getMessage()); report.put(e.getClass().getName(), e.getMessage());

View File

@ -4,14 +4,18 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Attribute; import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.Element; import org.dom4j.Element;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
@ -27,7 +31,7 @@ public class BaseCollectorIteratorTest {
final Map<String, Map<String, String>> collections = new HashMap<>(); final Map<String, Map<String, String>> collections = new HashMap<>();
while (iterator.hasNext()) { while (iterator.hasNext()) {
final Element record = iterator.next(); final Document record = iterator.next();
count++; count++;
@ -35,7 +39,10 @@ public class BaseCollectorIteratorTest {
System.out.println("# Read records: " + count); System.out.println("# Read records: " + count);
} }
// System.out.println(record.asXML());
for (final Object o : record.selectNodes("//*[local-name() = 'collection']")) { for (final Object o : record.selectNodes("//*[local-name() = 'collection']")) {
final Element n = (Element) o; final Element n = (Element) o;
final String collName = n.getText().trim(); final String collName = n.getText().trim();
if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) { if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) {
@ -46,14 +53,16 @@ public class BaseCollectorIteratorTest {
} }
collections.put(collName, collAttrs); collections.put(collName, collAttrs);
} }
} }
} }
collections.forEach((k, v) -> { final ObjectMapper mapper = new ObjectMapper();
System.out.println(k); for (final Entry<String, Map<String, String>> e : collections.entrySet()) {
v.forEach((ak, av) -> System.out.println(" - " + ak + "=" + av)); System.out.println(e.getKey() + ": " + mapper.writeValueAsString(e.getValue()));
});
}
assertEquals(30000, count); assertEquals(30000, count);
} }