fixed a problem with xpaths
This commit is contained in:
parent
265bfd364d
commit
dd7350ecf2
|
@ -23,7 +23,7 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
|
||||||
public class BaseCollectorIterator implements Iterator<Element> {
|
public class BaseCollectorIterator implements Iterator<Document> {
|
||||||
|
|
||||||
private Object nextElement;
|
private Object nextElement;
|
||||||
|
|
||||||
|
@ -51,13 +51,13 @@ public class BaseCollectorIterator implements Iterator<Element> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized boolean hasNext() {
|
public synchronized boolean hasNext() {
|
||||||
return (this.nextElement != null) && (this.nextElement instanceof Element);
|
return (this.nextElement != null) && (this.nextElement instanceof Document);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized Element next() {
|
public synchronized Document next() {
|
||||||
try {
|
try {
|
||||||
return this.nextElement instanceof Element ? (Element) this.nextElement : null;
|
return this.nextElement instanceof Document ? (Document) this.nextElement : null;
|
||||||
} finally {
|
} finally {
|
||||||
try {
|
try {
|
||||||
this.nextElement = this.queue.take();
|
this.nextElement = this.queue.take();
|
||||||
|
@ -113,7 +113,9 @@ public class BaseCollectorIterator implements Iterator<Element> {
|
||||||
|
|
||||||
for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
|
for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
|
||||||
if (o instanceof Element) {
|
if (o instanceof Element) {
|
||||||
this.queue.put(((Element) o).detach());
|
final Element newRoot = (Element) ((Element) o).detach();
|
||||||
|
final Document newDoc = DocumentHelper.createDocument(newRoot);
|
||||||
|
this.queue.put(newDoc);
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.dom4j.Element;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.io.OutputFormat;
|
import org.dom4j.io.OutputFormat;
|
||||||
import org.dom4j.io.XMLWriter;
|
import org.dom4j.io.XMLWriter;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -51,14 +51,14 @@ public class BaseCollectorPlugin implements CollectorPlugin {
|
||||||
throw new CollectorException(e);
|
throw new CollectorException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
final Iterator<Element> iterator = new BaseCollectorIterator(this.fs, filePath, report);
|
final Iterator<Document> iterator = new BaseCollectorIterator(this.fs, filePath, report);
|
||||||
final Spliterator<Element> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
|
final Spliterator<Document> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
|
||||||
return StreamSupport.stream(spliterator, false)
|
return StreamSupport.stream(spliterator, false)
|
||||||
.filter(elem -> filterXml(elem, report))
|
.filter(doc -> filterXml(doc, report))
|
||||||
.map(elem -> xmlToString(report, elem));
|
.map(doc -> xmlToString(doc, report));
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean filterXml(final Element elem, final AggregatorReport report) {
|
private boolean filterXml(final Document doc, final AggregatorReport report) {
|
||||||
// TODO Auto-generated method stub
|
// TODO Auto-generated method stub
|
||||||
|
|
||||||
// HERE THE FILTERS ACCORDING TO THE DOCUMENTATION
|
// HERE THE FILTERS ACCORDING TO THE DOCUMENTATION
|
||||||
|
@ -66,10 +66,10 @@ public class BaseCollectorPlugin implements CollectorPlugin {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String xmlToString(final AggregatorReport report, final Element elem) {
|
private String xmlToString(final Document doc, final AggregatorReport report) {
|
||||||
try (final StringWriter sw = new StringWriter()) {
|
try (final StringWriter sw = new StringWriter()) {
|
||||||
final XMLWriter writer = new XMLWriter(sw, OutputFormat.createPrettyPrint());
|
final XMLWriter writer = new XMLWriter(sw, OutputFormat.createPrettyPrint());
|
||||||
writer.write(elem);
|
writer.write(doc);
|
||||||
return writer.toString();
|
return writer.toString();
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
report.put(e.getClass().getName(), e.getMessage());
|
report.put(e.getClass().getName(), e.getMessage());
|
||||||
|
|
|
@ -4,14 +4,18 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Attribute;
|
import org.dom4j.Attribute;
|
||||||
|
import org.dom4j.Document;
|
||||||
import org.dom4j.Element;
|
import org.dom4j.Element;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.junit.jupiter.api.extension.ExtendWith;
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
@ -27,7 +31,7 @@ public class BaseCollectorIteratorTest {
|
||||||
final Map<String, Map<String, String>> collections = new HashMap<>();
|
final Map<String, Map<String, String>> collections = new HashMap<>();
|
||||||
|
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
final Element record = iterator.next();
|
final Document record = iterator.next();
|
||||||
|
|
||||||
count++;
|
count++;
|
||||||
|
|
||||||
|
@ -35,7 +39,10 @@ public class BaseCollectorIteratorTest {
|
||||||
System.out.println("# Read records: " + count);
|
System.out.println("# Read records: " + count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// System.out.println(record.asXML());
|
||||||
|
|
||||||
for (final Object o : record.selectNodes("//*[local-name() = 'collection']")) {
|
for (final Object o : record.selectNodes("//*[local-name() = 'collection']")) {
|
||||||
|
|
||||||
final Element n = (Element) o;
|
final Element n = (Element) o;
|
||||||
final String collName = n.getText().trim();
|
final String collName = n.getText().trim();
|
||||||
if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) {
|
if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) {
|
||||||
|
@ -46,14 +53,16 @@ public class BaseCollectorIteratorTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
collections.put(collName, collAttrs);
|
collections.put(collName, collAttrs);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
collections.forEach((k, v) -> {
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
System.out.println(k);
|
for (final Entry<String, Map<String, String>> e : collections.entrySet()) {
|
||||||
v.forEach((ak, av) -> System.out.println(" - " + ak + "=" + av));
|
System.out.println(e.getKey() + ": " + mapper.writeValueAsString(e.getValue()));
|
||||||
});
|
|
||||||
|
}
|
||||||
|
|
||||||
assertEquals(30000, count);
|
assertEquals(30000, count);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue