package eu.dnetlib.doiboost.orcidnodoi.xml; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.ximpleware.*; import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.parser.utility.VtdUtilityParser; import eu.dnetlib.doiboost.orcidnodoi.model.Contributor; import eu.dnetlib.doiboost.orcidnodoi.model.ExternalId; import eu.dnetlib.doiboost.orcidnodoi.model.PublicationDate; import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi; /** * This class is used for parsing xml data with vtd parser */ public class XMLRecordParserNoDoi { private static final Logger logger = LoggerFactory.getLogger(XMLRecordParserNoDoi.class); private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; private static final String NS_COMMON = "common"; private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person"; private static final String NS_PERSON = "person"; private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details"; private static final String NS_DETAILS = "personal-details"; private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name"; private static final String NS_OTHER = "other-name"; private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record"; private static final String NS_RECORD = "record"; private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error"; private static final String NS_WORK = "work"; private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; private static final String NS_ERROR = "error"; public static WorkDataNoDoi VTDParseWorkData(byte[] bytes) throws VtdException, EncodingException, EOFException, EntityException, ParseException, XPathParseException, NavException, XPathEvalException { final VTDGen vg = new VTDGen(); vg.setDoc(bytes); vg.parse(true); final VTDNav vn = vg.getNav(); final AutoPilot ap = new AutoPilot(vn); ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL); ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL); ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); WorkDataNoDoi workData = new WorkDataNoDoi(); final List errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code"); if (!errors.isEmpty()) { workData.setErrorCode(errors.get(0)); return workData; } List workNodes = VtdUtilityParser .getTextValuesWithAttributes(ap, vn, "//work:work", Arrays.asList("path", "put-code")); if (!workNodes.isEmpty()) { final String oid = (workNodes.get(0).getAttributes().get("path")).split("/")[1]; workData.setOid(oid); final String id = (workNodes.get(0).getAttributes().get("put-code")); workData.setId(id); } else { return null; } final List titles = VtdUtilityParser .getTextValue( ap, vn, "//common:title"); if (!titles.isEmpty()) { workData.setTitles(titles); } final List sourceNames = VtdUtilityParser .getTextValue( ap, vn, "//common:source-name"); if (!sourceNames.isEmpty()) { workData.setSourceName(sourceNames.get(0)); } final List types = VtdUtilityParser .getTextValue( ap, vn, "//work:type"); if (!types.isEmpty()) { workData.setType(types.get(0)); } final List urls = VtdUtilityParser .getTextValue( ap, vn, "//common:url"); if (!urls.isEmpty()) { workData.setUrls(urls); } workData.setPublicationDates(getPublicationDates(vg, vn, ap)); workData.setExtIds(getExternalIds(vg, vn, ap)); workData.setContributors(getContributors(vg, vn, ap)); return workData; } private static List getPublicationDates(VTDGen vg, VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { List publicationDates = new ArrayList(); int yearIndex = 0; ap.selectXPath("//common:publication-date/common:year"); while (ap.evalXPath() != -1) { PublicationDate publicationDate = new PublicationDate(); int t = vn.getText(); if (t >= 0) { publicationDate.setYear(vn.toNormalizedString(t)); publicationDates.add(yearIndex, publicationDate); yearIndex++; } } int monthIndex = 0; ap.selectXPath("//common:publication-date/common:month"); while (ap.evalXPath() != -1) { int t = vn.getText(); if (t >= 0) { publicationDates.get(monthIndex).setMonth(vn.toNormalizedString(t)); monthIndex++; } } int dayIndex = 0; ap.selectXPath("//common:publication-date/common:day"); while (ap.evalXPath() != -1) { int t = vn.getText(); if (t >= 0) { publicationDates.get(dayIndex).setDay(vn.toNormalizedString(t)); dayIndex++; } } return publicationDates; } private static List getExternalIds(VTDGen vg, VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { List extIds = new ArrayList(); int typeIndex = 0; ap.selectXPath("//common:external-id/common:external-id-type"); while (ap.evalXPath() != -1) { ExternalId extId = new ExternalId(); int t = vn.getText(); if (t >= 0) { extId.setType(vn.toNormalizedString(t)); extIds.add(typeIndex, extId); typeIndex++; } } int valueIndex = 0; ap.selectXPath("//common:external-id/common:external-id-value"); while (ap.evalXPath() != -1) { int t = vn.getText(); if (t >= 0) { extIds.get(valueIndex).setValue(vn.toNormalizedString(t)); valueIndex++; } } int relationshipIndex = 0; ap.selectXPath("//common:external-id/common:external-id-relationship"); while (ap.evalXPath() != -1) { int t = vn.getText(); if (t >= 0) { extIds.get(relationshipIndex).setRelationShip(vn.toNormalizedString(t)); relationshipIndex++; } } if (typeIndex == valueIndex) { return extIds; } return new ArrayList(); } private static List getContributors(VTDGen vg, VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { List contributors = new ArrayList(); ap.selectXPath("//work:contributors/work:contributor"); while (ap.evalXPath() != -1) { Contributor contributor = new Contributor(); if (vn.toElement(VTDNav.FIRST_CHILD, "work:credit-name")) { int val = vn.getText(); if (val != -1) { contributor.setCreditName(vn.toNormalizedString(val)); } vn.toElement(VTDNav.PARENT); } if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-attributes")) { if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-sequence")) { int val = vn.getText(); if (val != -1) { contributor.setSequence(vn.toNormalizedString(val)); } vn.toElement(VTDNav.PARENT); } if (vn.toElement(VTDNav.FIRST_CHILD, "work:contributor-role")) { int val = vn.getText(); if (val != -1) { contributor.setRole(vn.toNormalizedString(val)); } vn.toElement(VTDNav.PARENT); } vn.toElement(VTDNav.PARENT); } contributors.add(contributor); } return contributors; } }