67 lines
1.8 KiB
Java
67 lines
1.8 KiB
Java
package eu.dnetlib.data.mdstore.modular;
|
|
|
|
import java.io.StringReader;
|
|
import java.util.HashMap;
|
|
import java.util.Map;
|
|
|
|
import javax.xml.xpath.XPath;
|
|
import javax.xml.xpath.XPathFactory;
|
|
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.xml.sax.InputSource;
|
|
|
|
import static eu.dnetlib.data.mdstore.modular.MDStoreConstants.*;
|
|
|
|
/**
|
|
* Terrible implementation of a record parser.
|
|
*
|
|
* @author marko
|
|
*
|
|
*/
|
|
public class SimpleRecordParser implements RecordParser {
|
|
static final Log log = LogFactory.getLog(SimpleRecordParser.class); // NOPMD by marko on 11/24/08 5:02 PM
|
|
|
|
private long ts;
|
|
|
|
@Override
|
|
public Map<String, String> parseRecord(String record) {
|
|
Map<String, String> props = new HashMap<String, String>();
|
|
props.put(TIMESTAMP, String.valueOf(getTimestamp()));
|
|
|
|
try {
|
|
// DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
|
|
XPath xpath = XPathFactory.newInstance().newXPath();
|
|
|
|
// Document doc = builder.parse(new InputSource(new StringReader(record)));
|
|
InputSource doc = new InputSource(new StringReader(record));
|
|
|
|
props.put(ID, xpath.evaluate("//*[local-name()='objIdentifier']", doc));
|
|
props.put("originalId", xpath.evaluate("//*[local-name()='efgEntity']/*/*[local-name()='identifier']", doc));
|
|
|
|
// String date = xpath.evaluate("//*[local-name()='dateOfCollection'][1]", doc);
|
|
// props.put("date", new Date(date).getTime());
|
|
|
|
} catch (Exception e) {
|
|
log.warn("got exception while parsing document", e);
|
|
log.warn("record is:");
|
|
log.warn(record);
|
|
log.warn("------------");
|
|
}
|
|
return props;
|
|
|
|
}
|
|
|
|
@Override
|
|
public void setTimestamp(final long ts) {
|
|
this.ts = ts;
|
|
log.debug("RecordParser date set to "+ts);
|
|
}
|
|
|
|
@Override
|
|
public long getTimestamp() {
|
|
return ts;
|
|
}
|
|
|
|
}
|