2020-04-29 13:24:29 +02:00
|
|
|
|
2020-04-20 10:02:42 +02:00
|
|
|
package eu.dnetlib.doiboost.orcid.xml;
|
2020-04-17 18:52:39 +02:00
|
|
|
|
2020-04-29 13:24:29 +02:00
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.List;
|
|
|
|
|
2020-11-17 18:23:12 +01:00
|
|
|
import org.mortbay.log.Log;
|
|
|
|
|
2020-04-22 14:41:19 +02:00
|
|
|
import com.ximpleware.AutoPilot;
|
|
|
|
import com.ximpleware.EOFException;
|
|
|
|
import com.ximpleware.EncodingException;
|
|
|
|
import com.ximpleware.EntityException;
|
|
|
|
import com.ximpleware.ParseException;
|
|
|
|
import com.ximpleware.VTDGen;
|
|
|
|
import com.ximpleware.VTDNav;
|
2020-04-29 13:24:29 +02:00
|
|
|
|
2020-04-22 14:41:19 +02:00
|
|
|
import eu.dnetlib.dhp.parser.utility.VtdException;
|
|
|
|
import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
|
2020-11-12 17:43:32 +01:00
|
|
|
import eu.dnetlib.dhp.schema.orcid.AuthorData;
|
2020-04-24 15:50:40 +02:00
|
|
|
import eu.dnetlib.doiboost.orcid.model.WorkData;
|
2020-04-17 18:52:39 +02:00
|
|
|
|
2020-04-22 14:41:19 +02:00
|
|
|
public class XMLRecordParser {
|
2020-04-17 18:52:39 +02:00
|
|
|
|
2020-04-29 13:24:29 +02:00
|
|
|
private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
|
|
|
|
private static final String NS_COMMON = "common";
|
|
|
|
private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
|
|
|
|
private static final String NS_PERSON = "person";
|
|
|
|
private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
|
|
|
|
private static final String NS_DETAILS = "personal-details";
|
|
|
|
private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
|
|
|
|
private static final String NS_OTHER = "other-name";
|
|
|
|
private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
|
|
|
|
private static final String NS_RECORD = "record";
|
|
|
|
private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
|
|
|
|
|
|
|
|
private static final String NS_WORK = "work";
|
|
|
|
private static final String NS_WORK_URL = "http://www.orcid.org/ns/work";
|
|
|
|
|
|
|
|
private static final String NS_ERROR = "error";
|
|
|
|
|
|
|
|
public static AuthorData VTDParseAuthorData(byte[] bytes)
|
|
|
|
throws VtdException, EncodingException, EOFException, EntityException, ParseException {
|
|
|
|
final VTDGen vg = new VTDGen();
|
|
|
|
vg.setDoc(bytes);
|
|
|
|
vg.parse(true);
|
|
|
|
final VTDNav vn = vg.getNav();
|
|
|
|
final AutoPilot ap = new AutoPilot(vn);
|
|
|
|
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
|
|
|
|
ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
|
|
|
|
ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
|
|
|
|
ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
|
|
|
|
ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
|
|
|
|
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
|
|
|
|
|
|
|
|
AuthorData authorData = new AuthorData();
|
|
|
|
final List<String> errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code");
|
|
|
|
if (!errors.isEmpty()) {
|
|
|
|
authorData.setErrorCode(errors.get(0));
|
|
|
|
return authorData;
|
|
|
|
}
|
|
|
|
|
|
|
|
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
|
|
|
|
.getTextValuesWithAttributes(
|
|
|
|
ap, vn, "//record:record", Arrays.asList("path"));
|
|
|
|
if (!recordNodes.isEmpty()) {
|
|
|
|
final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
|
|
|
|
authorData.setOid(oid);
|
|
|
|
} else {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
final List<String> names = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:given-names");
|
|
|
|
if (!names.isEmpty()) {
|
|
|
|
authorData.setName(names.get(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
final List<String> surnames = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:family-name");
|
|
|
|
if (!surnames.isEmpty()) {
|
|
|
|
authorData.setSurname(surnames.get(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
final List<String> creditNames = VtdUtilityParser.getTextValue(ap, vn, "//personal-details:credit-name");
|
|
|
|
if (!creditNames.isEmpty()) {
|
|
|
|
authorData.setCreditName(creditNames.get(0));
|
|
|
|
}
|
2020-11-12 17:43:32 +01:00
|
|
|
|
|
|
|
final List<String> otherNames = VtdUtilityParser.getTextValue(ap, vn, "//other-name:content");
|
|
|
|
if (!otherNames.isEmpty()) {
|
|
|
|
authorData.setOtherNames(otherNames);
|
|
|
|
}
|
|
|
|
|
2020-04-29 13:24:29 +02:00
|
|
|
return authorData;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static WorkData VTDParseWorkData(byte[] bytes)
|
|
|
|
throws VtdException, EncodingException, EOFException, EntityException, ParseException {
|
|
|
|
final VTDGen vg = new VTDGen();
|
|
|
|
vg.setDoc(bytes);
|
|
|
|
vg.parse(true);
|
|
|
|
final VTDNav vn = vg.getNav();
|
|
|
|
final AutoPilot ap = new AutoPilot(vn);
|
|
|
|
ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
|
|
|
|
ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL);
|
|
|
|
ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);
|
|
|
|
|
|
|
|
WorkData workData = new WorkData();
|
|
|
|
final List<String> errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code");
|
|
|
|
if (!errors.isEmpty()) {
|
|
|
|
workData.setErrorCode(errors.get(0));
|
|
|
|
return workData;
|
|
|
|
}
|
|
|
|
|
|
|
|
List<VtdUtilityParser.Node> workNodes = VtdUtilityParser
|
|
|
|
.getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path"));
|
|
|
|
if (!workNodes.isEmpty()) {
|
|
|
|
final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1];
|
|
|
|
workData.setOid(oid);
|
|
|
|
} else {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
final List<String> dois = VtdUtilityParser
|
|
|
|
.getTextValue(
|
|
|
|
ap, vn, "//common:external-id-type[text()=\"doi\"]/../common:external-id-value");
|
|
|
|
if (!dois.isEmpty()) {
|
|
|
|
workData.setDoi(dois.get(0));
|
|
|
|
workData.setDoiFound(true);
|
|
|
|
}
|
|
|
|
return workData;
|
|
|
|
}
|
2020-11-17 18:23:12 +01:00
|
|
|
|
|
|
|
public static String retrieveOrcidIdFromSummary(byte[] bytes, String defaultValue)
|
|
|
|
throws VtdException, ParseException {
|
|
|
|
return retrieveOrcidId(bytes, defaultValue, NS_RECORD, NS_RECORD_URL, "//record:record", "path").substring(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static String retrieveOrcidIdFromActivity(byte[] bytes, String defaultValue)
|
|
|
|
throws VtdException, ParseException {
|
|
|
|
return retrieveOrcidId(bytes, defaultValue, NS_WORK, NS_WORK_URL, "//work:work", "put-code");
|
|
|
|
}
|
|
|
|
|
|
|
|
private static String retrieveOrcidId(byte[] bytes, String defaultValue, String ns, String nsUrl, String xpath,
|
|
|
|
String idAttributeName)
|
|
|
|
throws VtdException, ParseException {
|
|
|
|
final VTDGen vg = new VTDGen();
|
|
|
|
vg.setDoc(bytes);
|
|
|
|
vg.parse(true);
|
|
|
|
final VTDNav vn = vg.getNav();
|
|
|
|
final AutoPilot ap = new AutoPilot(vn);
|
|
|
|
ap.declareXPathNameSpace(ns, nsUrl);
|
|
|
|
List<VtdUtilityParser.Node> recordNodes = VtdUtilityParser
|
|
|
|
.getTextValuesWithAttributes(
|
|
|
|
ap, vn, xpath, Arrays.asList(idAttributeName));
|
|
|
|
if (!recordNodes.isEmpty()) {
|
|
|
|
return (recordNodes.get(0).getAttributes().get(idAttributeName));
|
|
|
|
}
|
|
|
|
Log.info("id not found - default: " + defaultValue);
|
|
|
|
return defaultValue;
|
|
|
|
}
|
2020-04-17 18:52:39 +02:00
|
|
|
}
|