package eu.dnetlib.data.mdstore.modular; import java.io.StringReader; import java.util.HashMap; import java.util.Map; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathFactory; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.xml.sax.InputSource; import static eu.dnetlib.data.mdstore.modular.MDStoreConstants.*; /** * Terrible implementation of a record parser. * * @author marko * */ public class SimpleRecordParser implements RecordParser { static final Log log = LogFactory.getLog(SimpleRecordParser.class); // NOPMD by marko on 11/24/08 5:02 PM private long ts; @Override public Map parseRecord(String record) { Map props = new HashMap(); props.put(TIMESTAMP, String.valueOf(getTimestamp())); try { // DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); XPath xpath = XPathFactory.newInstance().newXPath(); // Document doc = builder.parse(new InputSource(new StringReader(record))); InputSource doc = new InputSource(new StringReader(record)); props.put(ID, xpath.evaluate("//*[local-name()='objIdentifier']", doc)); props.put("originalId", xpath.evaluate("//*[local-name()='efgEntity']/*/*[local-name()='identifier']", doc)); // String date = xpath.evaluate("//*[local-name()='dateOfCollection'][1]", doc); // props.put("date", new Date(date).getTime()); } catch (Exception e) { log.warn("got exception while parsing document", e); log.warn("record is:"); log.warn(record); log.warn("------------"); } return props; } @Override public void setTimestamp(final long ts) { this.ts = ts; log.debug("RecordParser date set to "+ts); } @Override public long getTimestamp() { return ts; } }