dnet-core/dnet-data-services/src/main/java/eu/dnetlib/data/mdstore/modular/SimpleRecordParser.java

67 lines
1.8 KiB
Java

package eu.dnetlib.data.mdstore.modular;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.xml.sax.InputSource;
import static eu.dnetlib.data.mdstore.modular.MDStoreConstants.*;
/**
* Terrible implementation of a record parser.
*
* @author marko
*
*/
public class SimpleRecordParser implements RecordParser {
static final Log log = LogFactory.getLog(SimpleRecordParser.class); // NOPMD by marko on 11/24/08 5:02 PM
private long ts;
@Override
public Map<String, String> parseRecord(String record) {
Map<String, String> props = new HashMap<String, String>();
props.put(TIMESTAMP, String.valueOf(getTimestamp()));
try {
// DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
XPath xpath = XPathFactory.newInstance().newXPath();
// Document doc = builder.parse(new InputSource(new StringReader(record)));
InputSource doc = new InputSource(new StringReader(record));
props.put(ID, xpath.evaluate("//*[local-name()='objIdentifier']", doc));
props.put("originalId", xpath.evaluate("//*[local-name()='efgEntity']/*/*[local-name()='identifier']", doc));
// String date = xpath.evaluate("//*[local-name()='dateOfCollection'][1]", doc);
// props.put("date", new Date(date).getTime());
} catch (Exception e) {
log.warn("got exception while parsing document", e);
log.warn("record is:");
log.warn(record);
log.warn("------------");
}
return props;
}
@Override
public void setTimestamp(final long ts) {
this.ts = ts;
log.debug("RecordParser date set to "+ts);
}
@Override
public long getTimestamp() {
return ts;
}
}