pid-parser-service/parsers/Arxiv.js

63 lines
1.7 KiB
JavaScript
Raw Permalink Normal View History

2024-11-20 10:34:15 +01:00
const Entity = require("../model/Entity");
const xml2js = require('xml2js');
class Arxiv {
async parse(pid, xml) {
try {
const entity = await this.parseXml(pid, xml);
console.log(entity)
return entity;
} catch (error) {
console.error('Parsing error:', error);
return null;
}
}
parseJson(pid, result) {
let entity = new Entity(pid);
entity.result.source = "arxiv";
entity.id = pid;
let metadata = result['OAI-PMH']['GetRecord']['record']['metadata']['oai_dc:dc'];
entity.title = metadata['dc:title'];
entity.result.url = metadata['dc:identifier'];
entity.type = 'publication';
entity.result.date = metadata['dc:date'].split("-")[0];
entity.result.accessRights = "OPEN";
// entity.result.publisher = item.publisher;
entity.result.journal = null;
// entity.entityord = item;
entity.result.description = metadata['dc:description'];
entity.result.authors = Array.isArray(metadata['dc:creator']) ? metadata['dc:creator'] : [metadata['dc:creator']];
return entity;
}
async parseXml(pid, xml) {
return new Promise((resolve, reject) => {
const parser = new xml2js.Parser({
trim: true, // Trims whitespace from text nodes
explicitArray: false, // Prevents wrapping single child nodes in an array
});
parser.parseString(xml, (err, result) => {
if (err) {
reject(err); // Reject the Promise if there's an error
} else {
console.log("parsed!")
console.log(result)
resolve(this.parseJson(pid, result)); // Resolve with the parsed result
}
});
});
}
}
module.exports = Arxiv;