134 lines
4.4 KiB
Java
134 lines
4.4 KiB
Java
package eu.dnetlib.data.mdstore.modular;
|
|
|
|
import java.io.StringReader;
|
|
import java.io.StringWriter;
|
|
import java.util.List;
|
|
|
|
import javax.xml.namespace.QName;
|
|
import javax.xml.stream.XMLEventFactory;
|
|
import javax.xml.stream.XMLEventReader;
|
|
import javax.xml.stream.XMLEventWriter;
|
|
import javax.xml.stream.XMLInputFactory;
|
|
import javax.xml.stream.XMLOutputFactory;
|
|
import javax.xml.stream.XMLStreamException;
|
|
import javax.xml.stream.events.Namespace;
|
|
import javax.xml.stream.events.StartElement;
|
|
import javax.xml.stream.events.XMLEvent;
|
|
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
|
import com.google.common.base.Function;
|
|
import com.google.common.collect.Lists;
|
|
|
|
import eu.dnetlib.miscutils.collections.Pair;
|
|
import eu.dnetlib.miscutils.factory.Factory;
|
|
|
|
public class BulkRecordMapperFactory implements Factory<Function<String, Pair<String, String>>> {
|
|
|
|
private static final Log log = LogFactory.getLog(BulkRecordMapperFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
|
|
|
|
protected static final String MD_RECORD = "mdRecord";
|
|
|
|
protected static final String MD_ID = "mdId";
|
|
|
|
protected static final String RECORD = "record";
|
|
|
|
protected ThreadLocal<XMLInputFactory> inputFactory = new ThreadLocal<XMLInputFactory>() {
|
|
@Override
|
|
protected XMLInputFactory initialValue() {
|
|
return XMLInputFactory.newInstance();
|
|
}
|
|
};
|
|
|
|
protected ThreadLocal<XMLOutputFactory> outputFactory = new ThreadLocal<XMLOutputFactory>() {
|
|
@Override
|
|
protected XMLOutputFactory initialValue() {
|
|
return XMLOutputFactory.newInstance();
|
|
}
|
|
};
|
|
|
|
protected ThreadLocal<XMLEventFactory> eventFactory = new ThreadLocal<XMLEventFactory>() {
|
|
@Override
|
|
protected XMLEventFactory initialValue() {
|
|
return XMLEventFactory.newInstance();
|
|
}
|
|
};
|
|
|
|
@Override
|
|
public Function<String, Pair<String, String>> newInstance() {
|
|
return new Function<String, Pair<String, String>>() {
|
|
private String mdId = null;
|
|
private String record = null;
|
|
@Override
|
|
public Pair<String, String> apply(String embeddedRecord) {
|
|
try {
|
|
final XMLEventReader parser = inputFactory.get().createXMLEventReader(new StringReader(embeddedRecord));
|
|
|
|
while (parser.hasNext()) {
|
|
final XMLEvent event = parser.nextEvent();
|
|
if (event != null && event.isStartElement()) {
|
|
final String localName = event.asStartElement().getName().getLocalPart();
|
|
|
|
if (MD_RECORD.equals(localName)) {
|
|
mdId = event.asStartElement().getAttributeByName(new QName(MD_ID)).getValue();
|
|
|
|
} else if (RECORD.equals(localName)) {
|
|
record = getRecord(embeddedRecord, parser);
|
|
}
|
|
}
|
|
}
|
|
} catch (final XMLStreamException e) {
|
|
log.error("error parsing record: " + embeddedRecord);
|
|
}
|
|
return new Pair<String, String>(mdId, record);
|
|
}
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Copy the /indexRecord/result element and children, preserving namespace declarations etc.
|
|
*
|
|
* @param indexDocument
|
|
* @param results
|
|
* @param parser
|
|
* @throws XMLStreamException
|
|
*/
|
|
protected String getRecord(final String record, final XMLEventReader parser) throws XMLStreamException {
|
|
StringWriter results = new StringWriter();
|
|
final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results);
|
|
|
|
// TODO: newRecord should copy all the namespace prefixes setup in parents
|
|
// fortunately the only parent of the result element is the 'indexrecord', so it should be easy to get
|
|
// the namespaces declared on the root element (and fast)
|
|
|
|
final List<Namespace> namespaces = Lists.newArrayList(
|
|
eventFactory.get().createNamespace("dri", "http://www.driver-repository.eu/namespace/dri"),
|
|
eventFactory.get().createNamespace("dr", "http://www.driver-repository.eu/namespace/dr"),
|
|
eventFactory.get().createNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance"),
|
|
eventFactory.get().createNamespace("dc", "http://purl.org/dc/elements/1.1/"));
|
|
|
|
StartElement newRecord = eventFactory.get().createStartElement("", null, RECORD, null, namespaces.iterator());
|
|
|
|
// new root record
|
|
writer.add(newRecord);
|
|
|
|
// copy the rest as it is
|
|
while (parser.hasNext()) {
|
|
final XMLEvent resultEvent = parser.nextEvent();
|
|
|
|
// TODO: replace with depth tracking instead of close tag tracking.
|
|
if (resultEvent.isEndElement() && resultEvent.asEndElement().getName().getLocalPart().equals(RECORD)) {
|
|
writer.add(resultEvent);
|
|
break;
|
|
}
|
|
|
|
writer.add(resultEvent);
|
|
}
|
|
writer.close();
|
|
|
|
return results.toString();
|
|
}
|
|
|
|
}
|