package eu.dnetlib.data.collector.plugins.ariadneplus.ads; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Iterator; import com.google.common.collect.Lists; import com.ximpleware.*; import eu.dnetlib.data.collector.ThreadSafeIterator; import eu.dnetlib.rmi.data.CollectorServiceRuntimeException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; public class ADSIterator extends ThreadSafeIterator { private static final Log log = LogFactory.getLog(ADSIterator.class); private Iterator iterator; public ADSIterator(final Iterator recordIterator){ this.iterator = recordIterator; } @Override public boolean doHasNext() { return iterator.hasNext(); } @Override public String doNext() { String record = iterator.next(); try { return addADSNamespace(record); } catch (Exception e) { log.warn("Skipping record because of exception "+e); log.debug("Skipped record: "+record); if(this.hasNext()){ return this.next(); } else return ""; } } protected String addADSNamespace(final String xml) { String namespaceList = " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" + " xmlns:dcterms=\"http://purl.org/dc/terms/\"\n" + " xmlns:ads=\"https://archaeologydataservice.ac.uk/\""; try { VTDGen vg = new VTDGen(); vg.setDoc(xml.getBytes()); vg.parse(false); // namespace unaware to all name space nodes addressable using xpath @* VTDNav vn = vg.getNav(); XMLModifier xm = new XMLModifier(vn); byte[] attrBytes = namespaceList.getBytes(); vn.toElement(VTDNav.ROOT); xm.insertAttribute(attrBytes); ByteArrayOutputStream baos = new ByteArrayOutputStream(); xm.output(baos); return baos.toString(); } catch(ParseException | ModifyException | NavException | IOException | TranscodeException e){ log.error("Cannot add namespace declarations to element: "+xml); throw new CollectorServiceRuntimeException("Cannot add namespace declarations to element", e); } } public Iterator getIterator() { return iterator; } public void setIterator(final Iterator iterator) { this.iterator = iterator; } }