63 lines
1.7 KiB
JavaScript
63 lines
1.7 KiB
JavaScript
|
const Entity = require("../model/Entity");
|
||
|
const xml2js = require('xml2js');
|
||
|
|
||
|
|
||
|
class Arxiv {
|
||
|
|
||
|
async parse(pid, xml) {
|
||
|
try {
|
||
|
const entity = await this.parseXml(pid, xml);
|
||
|
console.log(entity)
|
||
|
return entity;
|
||
|
} catch (error) {
|
||
|
console.error('Parsing error:', error);
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
parseJson(pid, result) {
|
||
|
let entity = new Entity(pid);
|
||
|
entity.result.source = "arxiv";
|
||
|
entity.id = pid;
|
||
|
let metadata = result['OAI-PMH']['GetRecord']['record']['metadata']['oai_dc:dc'];
|
||
|
entity.title = metadata['dc:title'];
|
||
|
entity.result.url = metadata['dc:identifier'];
|
||
|
entity.type = 'publication';
|
||
|
entity.result.date = metadata['dc:date'].split("-")[0];
|
||
|
|
||
|
entity.result.accessRights = "OPEN";
|
||
|
// entity.result.publisher = item.publisher;
|
||
|
entity.result.journal = null;
|
||
|
// entity.entityord = item;
|
||
|
entity.result.description = metadata['dc:description'];
|
||
|
|
||
|
entity.result.authors = Array.isArray(metadata['dc:creator']) ? metadata['dc:creator'] : [metadata['dc:creator']];
|
||
|
return entity;
|
||
|
}
|
||
|
|
||
|
|
||
|
async parseXml(pid, xml) {
|
||
|
return new Promise((resolve, reject) => {
|
||
|
const parser = new xml2js.Parser({
|
||
|
trim: true, // Trims whitespace from text nodes
|
||
|
explicitArray: false, // Prevents wrapping single child nodes in an array
|
||
|
});
|
||
|
|
||
|
parser.parseString(xml, (err, result) => {
|
||
|
if (err) {
|
||
|
reject(err); // Reject the Promise if there's an error
|
||
|
} else {
|
||
|
console.log("parsed!")
|
||
|
console.log(result)
|
||
|
resolve(this.parseJson(pid, result)); // Resolve with the parsed result
|
||
|
}
|
||
|
});
|
||
|
});
|
||
|
}
|
||
|
|
||
|
|
||
|
}
|
||
|
|
||
|
module.exports = Arxiv;
|