package eu.dnetlib.data.mdstore.modular; import java.io.StringReader; import java.io.StringWriter; import java.util.List; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventFactory; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLEventWriter; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.Namespace; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.google.common.base.Function; import com.google.common.collect.Lists; import eu.dnetlib.miscutils.collections.Pair; import eu.dnetlib.miscutils.factory.Factory; public class BulkRecordMapperFactory implements Factory>> { private static final Log log = LogFactory.getLog(BulkRecordMapperFactory.class); // NOPMD by marko on 11/24/08 5:02 PM protected static final String MD_RECORD = "mdRecord"; protected static final String MD_ID = "mdId"; protected static final String RECORD = "record"; protected ThreadLocal inputFactory = new ThreadLocal() { @Override protected XMLInputFactory initialValue() { return XMLInputFactory.newInstance(); } }; protected ThreadLocal outputFactory = new ThreadLocal() { @Override protected XMLOutputFactory initialValue() { return XMLOutputFactory.newInstance(); } }; protected ThreadLocal eventFactory = new ThreadLocal() { @Override protected XMLEventFactory initialValue() { return XMLEventFactory.newInstance(); } }; @Override public Function> newInstance() { return new Function>() { private String mdId = null; private String record = null; @Override public Pair apply(String embeddedRecord) { try { final XMLEventReader parser = inputFactory.get().createXMLEventReader(new StringReader(embeddedRecord)); while (parser.hasNext()) { final XMLEvent event = parser.nextEvent(); if (event != null && event.isStartElement()) { final String localName = event.asStartElement().getName().getLocalPart(); if (MD_RECORD.equals(localName)) { mdId = event.asStartElement().getAttributeByName(new QName(MD_ID)).getValue(); } else if (RECORD.equals(localName)) { record = getRecord(embeddedRecord, parser); } } } } catch (final XMLStreamException e) { log.error("error parsing record: " + embeddedRecord); } return new Pair(mdId, record); } }; } /** * Copy the /indexRecord/result element and children, preserving namespace declarations etc. * * @param indexDocument * @param results * @param parser * @throws XMLStreamException */ protected String getRecord(final String record, final XMLEventReader parser) throws XMLStreamException { StringWriter results = new StringWriter(); final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results); // TODO: newRecord should copy all the namespace prefixes setup in parents // fortunately the only parent of the result element is the 'indexrecord', so it should be easy to get // the namespaces declared on the root element (and fast) final List namespaces = Lists.newArrayList( eventFactory.get().createNamespace("dri", "http://www.driver-repository.eu/namespace/dri"), eventFactory.get().createNamespace("dr", "http://www.driver-repository.eu/namespace/dr"), eventFactory.get().createNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance"), eventFactory.get().createNamespace("dc", "http://purl.org/dc/elements/1.1/")); StartElement newRecord = eventFactory.get().createStartElement("", null, RECORD, null, namespaces.iterator()); // new root record writer.add(newRecord); // copy the rest as it is while (parser.hasNext()) { final XMLEvent resultEvent = parser.nextEvent(); // TODO: replace with depth tracking instead of close tag tracking. if (resultEvent.isEndElement() && resultEvent.asEndElement().getName().getLocalPart().equals(RECORD)) { writer.add(resultEvent); break; } writer.add(resultEvent); } writer.close(); return results.toString(); } }