used vtd for parsing orcid xml record, set 4g heapspace

2020-04-22 14:41:19 +02:00 · 2020-04-22 14:41:19 +02:00 · 7d759947ae
parent 5d46ec7d5f
commit 7d759947ae
8 changed files with 408 additions and 256 deletions
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java
@ -1,27 +1,22 @@
 package eu.dnetlib.doiboost.orcid;

+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import java.io.IOException;
 import java.net.URI;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import org.mortbay.log.Log;

 public class OrcidDSManager {

-	private static final Logger logger = LoggerFactory.getLogger(OrcidDSManager.class);
-	
    private String hdfsServerUri;
    private String hdfsOrcidDefaultPath;
    private String summariesFileNameTarGz;
    private String outputAuthorsPath;

    public static void main(String[] args) throws IOException, Exception {
-    	logger.info("OrcidDSManager started");
        OrcidDSManager orcidDSManager = new OrcidDSManager();
        orcidDSManager.loadArgs(args);
        orcidDSManager.generateAuthors();
@ -31,8 +26,12 @@ public class OrcidDSManager {
        Configuration conf = initConfigurationObject();
        FileSystem fs = initFileSystemObject(conf);
        String tarGzUri = hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(summariesFileNameTarGz);
-    	logger.info("Started parsing "+tarGzUri);
-    	Path outputPath = new Path(hdfsServerUri.concat(hdfsOrcidDefaultPath).concat(outputAuthorsPath).concat(Long.toString(System.currentTimeMillis())).concat("/authors.seq"));
+        Path outputPath =
+                new Path(
+                        hdfsServerUri
+                                .concat(hdfsOrcidDefaultPath)
+                                .concat(outputAuthorsPath)
+                                .concat("authors.seq"));
        SummariesDecompressor.parseGzSummaries(conf, tarGzUri, outputPath);
    }

@ -48,7 +47,7 @@ public class OrcidDSManager {
    }

    private FileSystem initFileSystemObject(Configuration conf) {
-    	//Get the filesystem - HDFS
+        // Get the filesystem - HDFS
        FileSystem fs = null;
        try {
            fs = FileSystem.get(URI.create(hdfsServerUri.concat(hdfsOrcidDefaultPath)), conf);
@ -60,16 +59,20 @@ public class OrcidDSManager {
    }

    private void loadArgs(String[] args) throws IOException, Exception {
-    	final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(OrcidDSManager.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json")));
+        final ArgumentApplicationParser parser =
+                new ArgumentApplicationParser(
+                        IOUtils.toString(
+                                OrcidDSManager.class.getResourceAsStream(
+                                        "/eu/dnetlib/dhp/doiboost/create_orcid_authors_data.json")));
        parser.parseArgument(args);

-        final String hdfsServerUri = parser.get("hdfsServerUri");
-        logger.info("HDFS URI: "+hdfsServerUri);
-        Path hdfsOrcidDefaultPath = new Path(parser.get("hdfsOrcidDefaultPath"));
-        logger.info("Default Path: "+hdfsOrcidDefaultPath);
-        final String summariesFileNameTarGz = parser.get("summariesFileNameTarGz");
-        logger.info("Summaries File Name: "+summariesFileNameTarGz);
-        final String outputAuthorsPath = parser.get("summariesFileNameTarGz");
-        logger.info("Output Authors Data: "+outputAuthorsPath);
+        hdfsServerUri = parser.get("hdfsServerUri");
+        Log.info("HDFS URI: " + hdfsServerUri);
+        hdfsOrcidDefaultPath = parser.get("hdfsOrcidDefaultPath");
+        Log.info("Default Path: " + hdfsOrcidDefaultPath);
+        summariesFileNameTarGz = parser.get("summariesFileNameTarGz");
+        Log.info("Summaries File Name: " + summariesFileNameTarGz);
+        outputAuthorsPath = parser.get("outputAuthorsPath");
+        Log.info("Output Authors Data: " + outputAuthorsPath);
    }
 }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java
@ -3,6 +3,11 @@ package eu.dnetlib.doiboost.orcid;
 import eu.dnetlib.doiboost.orcid.json.JsonWriter;
 import eu.dnetlib.doiboost.orcid.model.AuthorData;
 import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URI;
 import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
 import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
 import org.apache.hadoop.conf.Configuration;
@ -13,23 +18,14 @@ import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.CompressionCodecFactory;
-import org.apache.log4j.Logger;
-import org.xml.sax.SAXException;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.URI;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.xpath.XPathExpressionException;
+import org.mortbay.log.Log;

 public class SummariesDecompressor {

-	private static final Logger logger = Logger.getLogger(SummariesDecompressor.class);
+    private static final int MAX_XML_RECORDS_PARSED = -1;

-    public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) throws Exception {
+    public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath)
+            throws Exception {
        String uri = inputUri;
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        Path inputPath = new Path(uri);
@ -46,12 +42,13 @@ public class SummariesDecompressor {
            parseTarSummaries(fs, conf, gzipInputStream, outputPath);

        } finally {
-        	logger.debug("Closing gzip stream");
+            Log.debug("Closing gzip stream");
            IOUtils.closeStream(gzipInputStream);
        }
    }

-    private static void parseTarSummaries(FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) {
+    private static void parseTarSummaries(
+            FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) {
        int counter = 0;
        int nameFound = 0;
        int surnameFound = 0;
@ -61,33 +58,43 @@ public class SummariesDecompressor {
        try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream)) {
            TarArchiveEntry entry = null;

-    		try (SequenceFile.Writer writer = SequenceFile.createWriter(conf,
-	                SequenceFile.Writer.file(outputPath), SequenceFile.Writer.keyClass(Text.class),
+            try (SequenceFile.Writer writer =
+                    SequenceFile.createWriter(
+                            conf,
+                            SequenceFile.Writer.file(outputPath),
+                            SequenceFile.Writer.keyClass(Text.class),
                            SequenceFile.Writer.valueClass(Text.class))) {
-    			
                while ((entry = tais.getNextTarEntry()) != null) {
                    String filename = entry.getName();
+                    try {
                        if (entry.isDirectory()) {
-			        	logger.debug("Directory entry name: "+entry.getName());
+                            Log.debug("Directory entry name: " + entry.getName());
                        } else {
-			        	logger.debug("XML record entry name: "+entry.getName());
+                            Log.debug("XML record entry name: " + entry.getName());
                            counter++;
-			        	BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from tarInput
+                            BufferedReader br =
+                                    new BufferedReader(
+                                            new InputStreamReader(
+                                                    tais)); // Read directly from tarInput
                            String line;
                            StringBuffer buffer = new StringBuffer();
                            while ((line = br.readLine()) != null) {
                                buffer.append(line);
                            }
-			        	try (ByteArrayInputStream bais = new ByteArrayInputStream(buffer.toString().getBytes())) {
-							AuthorData authorData = XMLRecordParser.parse(bais);
-							if (authorData!=null) {
-								if (authorData.getErrorCode()!=null) {
-									errorFromOrcidFound+=1;
-									logger.debug("error from Orcid with code "+authorData.getErrorCode()+" for oid "+entry.getName());
+                            AuthorData authorData =
+                                    XMLRecordParser.VTDParse(buffer.toString().getBytes());
+                            if (authorData != null) {
+                                if (authorData.getErrorCode() != null) {
+                                    errorFromOrcidFound += 1;
+                                    Log.debug(
+                                            "error from Orcid with code "
+                                                    + authorData.getErrorCode()
+                                                    + " for oid "
+                                                    + entry.getName());
                                    continue;
                                }
                                String jsonData = JsonWriter.create(authorData);
-								logger.debug("oid: "+authorData.getOid() + " data: "+jsonData);
+                                Log.debug("oid: " + authorData.getOid() + " data: " + jsonData);

                                final Text key = new Text(authorData.getOid());
                                final Text value = new Text(jsonData);
@ -95,48 +102,59 @@ public class SummariesDecompressor {
                                try {
                                    writer.append(key, value);
                                } catch (IOException e) {
-	                            	logger.error("Writing to sequence file: "+e.getMessage());
-	                            	e.printStackTrace();
+                                    Log.debug("Writing to sequence file: " + e.getMessage());
+                                    Log.debug(e);
                                    throw new RuntimeException(e);
                                }

-					            if (authorData.getName()!=null) {
-					            	nameFound+=1;
+                                if (authorData.getName() != null) {
+                                    nameFound += 1;
                                }
-					            if (authorData.getSurname()!=null) {
-					            	surnameFound+=1;
+                                if (authorData.getSurname() != null) {
+                                    surnameFound += 1;
                                }
-					            if (authorData.getCreditName()!=null) {
-					            	creditNameFound+=1;
+                                if (authorData.getCreditName() != null) {
+                                    creditNameFound += 1;
                                }

+                            } else {
+                                Log.warn(
+                                        "Data not retrievable ["
+                                                + entry.getName()
+                                                + "] "
+                                                + buffer.toString());
+                                xmlParserErrorFound += 1;
                            }
-							else {
-								logger.error("Data not retrievable ["+entry.getName()+"] "+buffer.toString());
-								xmlParserErrorFound+=1;
-							}
-								
-						} catch (XPathExpressionException | ParserConfigurationException | SAXException e) {
-							logger.error("Parsing record from tar archive: "+e.getMessage());
-							e.printStackTrace();
                        }
+                    } catch (Exception e) {
+                        Log.warn(
+                                "Parsing record from tar archive and xml record: "
+                                        + filename
+                                        + "  "
+                                        + e.getMessage());
+                        Log.warn(e);
                    }

                    if ((counter % 100000) == 0) {
-			        	logger.info("Current xml records parsed: "+counter);
+                        Log.info("Current xml records parsed: " + counter);
+                    }
+
+                    if ((MAX_XML_RECORDS_PARSED > -1) && (counter > MAX_XML_RECORDS_PARSED)) {
+                        break;
                    }
                }
            }
        } catch (IOException e) {
-			logger.error("Parsing record from gzip archive: "+e.getMessage());
+            Log.warn("Parsing record from gzip archive: " + e.getMessage());
+            Log.warn(e);
            throw new RuntimeException(e);
        }
-    	logger.info("Summaries parse completed");
-    	logger.info("Total XML records parsed: "+counter);
-    	logger.info("Name found: "+nameFound);
-    	logger.info("Surname found: "+surnameFound);
-    	logger.info("Credit name found: "+creditNameFound);
-    	logger.info("Error from Orcid found: "+errorFromOrcidFound);
-    	logger.info("Error parsing xml record found: "+xmlParserErrorFound);
+        Log.info("Summaries parse completed");
+        Log.info("Total XML records parsed: " + counter);
+        Log.info("Name found: " + nameFound);
+        Log.info("Surname found: " + surnameFound);
+        Log.info("Credit name found: " + creditNameFound);
+        Log.info("Error from Orcid found: " + errorFromOrcidFound);
+        Log.info("Error parsing xml record found: " + xmlParserErrorFound);
    }
 }
--- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java
+++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java
@ -1,97 +1,80 @@
 package eu.dnetlib.doiboost.orcid.xml;

-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.util.Iterator;
-
-import javax.xml.namespace.NamespaceContext;
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
-import javax.xml.xpath.XPathExpressionException;
-import javax.xml.xpath.XPathFactory;
-
+import com.ximpleware.AutoPilot;
+import com.ximpleware.EOFException;
+import com.ximpleware.EncodingException;
+import com.ximpleware.EntityException;
+import com.ximpleware.ParseException;
+import com.ximpleware.VTDGen;
+import com.ximpleware.VTDNav;
+import eu.dnetlib.dhp.parser.utility.VtdException;
+import eu.dnetlib.dhp.parser.utility.VtdUtilityParser;
 import eu.dnetlib.doiboost.orcid.model.AuthorData;
-import org.apache.commons.lang.StringUtils;
-import org.w3c.dom.Document;
-import org.xml.sax.SAXException;
-
+import java.util.Arrays;
+import java.util.List;

 public class XMLRecordParser {

-	public static AuthorData parse(ByteArrayInputStream bytesStream) throws ParserConfigurationException, SAXException, IOException, XPathExpressionException {
-		bytesStream.reset();
-		DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
-		builderFactory.setNamespaceAware(true);
-		DocumentBuilder builder = builderFactory.newDocumentBuilder();
+    private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common";
+    private static final String NS_COMMON = "common";
+    private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person";
+    private static final String NS_PERSON = "person";
+    private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details";
+    private static final String NS_DETAILS = "personal-details";
+    private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name";
+    private static final String NS_OTHER = "other-name";
+    private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record";
+    private static final String NS_RECORD = "record";
+    private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error";
+    private static final String NS_ERROR = "error";

-		Document xmlDocument = builder.parse(bytesStream);
-		XPath xPath = XPathFactory.newInstance().newXPath();
-		xPath.setNamespaceContext(new NamespaceContext() {
-		    @Override
-		    public Iterator getPrefixes(String arg0) {
-		        return null;
-		    }
-		    @Override
-		    public String getPrefix(String arg0) {
-		        return null;
-		    }
-		    @Override
-		    public String getNamespaceURI(String arg0) {
-		        if ("common".equals(arg0)) {
-		            return "http://www.orcid.org/ns/common";
-		        }
-		        else if ("person".equals(arg0)) {
-		            return "http://www.orcid.org/ns/person";
-		        }
-		        else if ("personal-details".equals(arg0)) {
-		            return "http://www.orcid.org/ns/personal-details";
-		        }
-		        else if ("other-name".equals(arg0)) {
-		            return "http://www.orcid.org/ns/other-name";
-		        }
-		        else if ("record".equals(arg0)) {
-		            return "http://www.orcid.org/ns/record";
-		        }
-		        else if ("error".equals(arg0)) {
-		            return "http://www.orcid.org/ns/error";
-		        }
-		        return null;
-		    }
-		});
+    public static AuthorData VTDParse(byte[] bytes)
+            throws VtdException, EncodingException, EOFException, EntityException, ParseException {
+        final VTDGen vg = new VTDGen();
+        vg.setDoc(bytes);
+        vg.parse(true);
+        final VTDNav vn = vg.getNav();
+        final AutoPilot ap = new AutoPilot(vn);
+        ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL);
+        ap.declareXPathNameSpace(NS_PERSON, NS_PERSON_URL);
+        ap.declareXPathNameSpace(NS_DETAILS, NS_DETAILS_URL);
+        ap.declareXPathNameSpace(NS_OTHER, NS_OTHER_URL);
+        ap.declareXPathNameSpace(NS_RECORD, NS_RECORD_URL);
+        ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL);

        AuthorData authorData = new AuthorData();
-		String errorPath = "//error:response-code";
-		String error = (String)xPath.compile(errorPath).evaluate(xmlDocument, XPathConstants.STRING);
-		if (!StringUtils.isBlank(error)) {
-			authorData.setErrorCode(error);
+        final List<String> errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code");
+        if (!errors.isEmpty()) {
+            authorData.setErrorCode(errors.get(0));
            return authorData;
        }
-		String oidPath = "//record:record/@path";
-		String oid = (String)xPath.compile(oidPath).evaluate(xmlDocument, XPathConstants.STRING);
-		if (!StringUtils.isBlank(oid)) {
-			oid = oid.substring(1);
+
+        List<VtdUtilityParser.Node> recordNodes =
+                VtdUtilityParser.getTextValuesWithAttributes(
+                        ap, vn, "//record:record", Arrays.asList("path"));
+        if (!recordNodes.isEmpty()) {
+            final String oid = (recordNodes.get(0).getAttributes().get("path")).substring(1);
            authorData.setOid(oid);
-		}
-		else {
+        } else {
            return null;
        }
-		String namePath = "//personal-details:given-names";
-		String name = (String)xPath.compile(namePath).evaluate(xmlDocument, XPathConstants.STRING);
-		if (!StringUtils.isBlank(name)) {
-			authorData.setName(name);
+
+        final List<String> names =
+                VtdUtilityParser.getTextValue(ap, vn, "//personal-details:given-names");
+        if (!names.isEmpty()) {
+            authorData.setName(names.get(0));
        }
-		String surnamePath = "//personal-details:family-name";
-		String surname = (String)xPath.compile(surnamePath).evaluate(xmlDocument, XPathConstants.STRING);
-		if (!StringUtils.isBlank(surname)) {
-			authorData.setSurname(surname);
+
+        final List<String> surnames =
+                VtdUtilityParser.getTextValue(ap, vn, "//personal-details:family-name");
+        if (!surnames.isEmpty()) {
+            authorData.setSurname(surnames.get(0));
        }
-		String creditnamePath = "//personal-details:credit-name";
-		String creditName = (String)xPath.compile(creditnamePath).evaluate(xmlDocument, XPathConstants.STRING);
-		if (!StringUtils.isBlank(creditName)) {
-			authorData.setCreditName(creditName);
+
+        final List<String> creditNames =
+                VtdUtilityParser.getTextValue(ap, vn, "//personal-details:credit-name");
+        if (!creditNames.isEmpty()) {
+            authorData.setCreditName(creditNames.get(0));
        }
        return authorData;
    }
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/config-default.xml
@ -15,4 +15,8 @@
        <name>oozie.launcher.mapreduce.user.classpath.first</name>
        <value>true</value>
    </property>
+    <property>
+      <name>oozie.launcher.mapreduce.map.java.opts</name>
+      <value>-Xmx4g</value>
+    </property>
 </configuration>
--- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcid/oozie_app/workflow.xml
@ -1,4 +1,4 @@
-<workflow-app name="import Crossref from index into HDFS" xmlns="uri:oozie:workflow:0.5">
+<workflow-app name="import Orcid" xmlns="uri:oozie:workflow:0.5">
    <parameters>
        <property>
            <name>workingPath</name>
--- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java
+++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java
@ -0,0 +1,40 @@
+package eu.dnetlib.doiboost.orcid.xml;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import eu.dnetlib.doiboost.orcid.model.AuthorData;
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+
+public class XMLRecordParserTest {
+
+    @Test
+    public void testOrcidXMLRecordParser() throws Exception {
+
+        String xml =
+                IOUtils.toString(
+                        this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml"));
+
+        XMLRecordParser p = new XMLRecordParser();
+
+        AuthorData authorData = p.VTDParse(xml.getBytes());
+        assertNotNull(authorData);
+        assertNotNull(authorData.getName());
+        System.out.println("name: " + authorData.getName());
+        assertNotNull(authorData.getSurname());
+        System.out.println("surname: " + authorData.getSurname());
+    }
+
+    @Test
+    public void testOrcidXMLErrorRecordParser() throws Exception {
+
+        String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml"));
+
+        XMLRecordParser p = new XMLRecordParser();
+
+        AuthorData authorData = p.VTDParse(xml.getBytes());
+        assertNotNull(authorData);
+        assertNotNull(authorData.getErrorCode());
+        System.out.println("error: " + authorData.getErrorCode());
+    }
+}
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/summary_0000-0001-6828-479X.xml
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/summary_0000-0001-6828-479X.xml
@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<record:record xmlns:address="http://www.orcid.org/ns/address"
+    xmlns:email="http://www.orcid.org/ns/email
+    " xmlns:history="http://www.orcid.org/ns/history"
+    xmlns:employment="http://www.orcid.org/ns/employment"
+    xmlns:education="http://www.orcid.org/ns/education"
+    xmlns:other-name="http://www.orcid.org/ns/other-name"
+    xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+    xmlns:funding="http://www.orcid.org/ns/funding"
+    xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+    xmlns:service="http://www.orcid.org/ns/service"
+    xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+    xmlns:distinction="http://www.orcid.org/ns/distinction"
+    xmlns:internal="http://www.orcid.org/ns/internal"
+    xmlns:membership="http://www.orcid.org/ns/membership"
+    xmlns:person="http://www.orcid.org/ns/person"
+    xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+    xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+    xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+    xmlns:activities="http://www.orcid.org/ns/activities"
+    xmlns:qualification="http://www.orcid.org/ns/qualification"
+    xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+    xmlns:error="http://www.orcid.org/ns/error"
+    xmlns:preferences="http://www.orcid.org/ns/preferences"
+    xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+    xmlns:work="http://www.orcid.org/ns/work"
+    xmlns:peer-review="http://www.orcid.org/ns/peer-review" path="/0000-0001-6828-479X">
+    <common:orcid-identifier>
+        <common:uri>https://orcid.org/0000-0001-6828-479X</common:uri>
+        <common:path>0000-0001-6828-479X</common:path>
+        <common:host>orcid.org</common:host>
+    </common:orcid-identifier>
+    <preferences:preferences>
+        <preferences:locale>en</preferences:locale>
+    </preferences:preferences>
+    <history:history>
+        <history:creation-method>Member-referred</history:creation-method>
+        <history:submission-date>2017-02-17T06:16:06.428Z</history:submission-date>
+        <common:last-modified-date>2017-10-04T04:38:43.529Z</common:last-modified-date>
+        <history:claimed>true</history:claimed>
+        <history:verified-email>true</history:verified-email>
+        <history:verified-primary-email>true</history:verified-primary-email>
+    </history:history>
+    <person:person path="/0000-0001-6828-479X/person">
+        <person:name visibility="public" path="0000-0001-6828-479X">
+            <common:created-date>2017-02-17T06:16:06.428Z</common:created-date>
+            <common:last-modified-date>2017-02-17T06:16:06.652Z</common:last-modified-date>
+            <personal-details:given-names>Masahide</personal-details:given-names>
+            <personal-details:family-name>Terazima</personal-details:family-name>
+        </person:name>
+        <other-name:other-names path="/0000-0001-6828-479X/other-names"/>
+        <researcher-url:researcher-urls path="/0000-0001-6828-479X/researcher-urls"/>
+        <email:emails path="/0000-0001-6828-479X/email"/>
+        <address:addresses path="/0000-0001-6828-479X/address"/>
+        <keyword:keywords path="/0000-0001-6828-479X/keywords"/>
+        <external-identifier:external-identifiers path="/0000-0001-6828-479X/external-identifiers"/>
+    </person:person>
+    <activities:activities-summary path="/0000-0001-6828-479X/activities">
+        <activities:distinctions path="/0000-0001-6828-479X/distinctions"/>
+        <activities:educations path="/0000-0001-6828-479X/educations"/>
+        <activities:employments path="/0000-0001-6828-479X/employments"/>
+        <activities:fundings path="/0000-0001-6828-479X/fundings"/>
+        <activities:invited-positions path="/0000-0001-6828-479X/invited-positions"/>
+        <activities:memberships path="/0000-0001-6828-479X/memberships"/>
+        <activities:peer-reviews path="/0000-0001-6828-479X/peer-reviews"/>
+        <activities:qualifications path="/0000-0001-6828-479X/qualifications"/>
+        <activities:research-resources path="/0000-0001-6828-479X/research-resources"/>
+        <activities:services path="/0000-0001-6828-479X/services"/>
+        <activities:works path="/0000-0001-6828-479X/works"/>
+    </activities:activities-summary>
+</record:record>
--- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/summary_error.xml
+++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/summary_error.xml
@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<error:error xmlns:address="http://www.orcid.org/ns/address"
+    xmlns:email="http://www.orcid.org/ns/email" xmlns:history="http://www.orcid.org/ns/history"
+    xmlns:employment="http://www.orcid.org/ns/employment"
+    xmlns:education="http://www.orcid.org/ns/education"
+    xmlns:other-name="http://www.orcid.org/ns/other-name"
+    xmlns:deprecated="http://www.orcid.org/ns/deprecated"
+    xmlns:funding="http://www.orcid.org/ns/funding"
+    xmlns:research-resource="http://www.orcid.org/ns/research-resource"
+    xmlns:service="http://www.orcid.org/ns/service"
+    xmlns:researcher-url="http://www.orcid.org/ns/researcher-url"
+    xmlns:distinction="http://www.orcid.org/ns/distinction"
+    xmlns:internal="http://www.orcid.org/ns/internal"
+    xmlns:membership="http://www.orcid.org/ns/membership"
+    xmlns:person="http://www.orcid.org/ns/person"
+    xmlns:personal-details="http://www.orcid.org/ns/personal-details"
+    xmlns:bulk="http://www.orcid.org/ns/bulk" xmlns:common="http://www.orcid.org/ns/common"
+    xmlns:record="http://www.orcid.org/ns/record" xmlns:keyword="http://www.orcid.org/ns/keyword"
+    xmlns:activities="http://www.orcid.org/ns/activities"
+    xmlns:qualification="http://www.orcid.org/ns/qualification"
+    xmlns:external-identifier="http://www.orcid.org/ns/external-identifier"
+    xmlns:error="http://www.orcid.org/ns/error"
+    xmlns:preferences="http://www.orcid.org/ns/preferences"
+    xmlns:invited-position="http://www.orcid.org/ns/invited-position"
+    xmlns:work="http://www.orcid.org/ns/work"
+    xmlns:peer-review="http://www.orcid.org/ns/peer-review">
+    <error:response-code>409</error:response-code>
+    <error:developer-message>409 Conflict: The ORCID record is locked and cannot be edited. ORCID
+        https://orcid.org/0000-0002-9716-679X</error:developer-message>
+    <error:user-message>The ORCID record is locked.</error:user-message>
+    <error:error-code>9018</error:error-code>
+    <error:more-info>https://members.orcid.org/api/resources/troubleshooting</error:more-info>
+</error:error>