forked from D-Net/dnet-hadoop
wf to move orcid-no-doi dataset on the folder ready the import
This commit is contained in:
parent
59ec5137e1
commit
27068aacd1
|
@ -0,0 +1,42 @@
|
|||
<workflow-app name="import_orcid_no_doi" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>inputPath</name>
|
||||
<value>/data/orcid_activities_2020/no_doi_dataset</value>
|
||||
<description>path where retrieve the already generated action set</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<value>/data/orcid_activities_2020/test_import_orcid_no_doi</value>
|
||||
<description>path where to store the action set</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="importOrcidNoDoi"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="importOrcidNoDoi">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${inputPath}/*</arg>
|
||||
<arg>${outputPath}</arg>
|
||||
</distcp>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -16,19 +16,22 @@ import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
|||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
|
||||
import org.apache.commons.compress.utils.Lists;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.orcid.AuthorData;
|
||||
import eu.dnetlib.doiboost.orcid.xml.XMLRecordParserTest;
|
||||
import jdk.nashorn.internal.ir.annotations.Ignore;
|
||||
|
||||
public class OrcidClientTest {
|
||||
final String orcidId = "0000-0001-7291-3210";
|
||||
final int REQ_LIMIT = 24;
|
||||
final int REQ_MAX_TEST = 100;
|
||||
final int RECORD_DOWNLOADED_COUNTER_LOG_INTERVAL = 10;
|
||||
|
@ -41,14 +44,23 @@ public class OrcidClientTest {
|
|||
final String REQUEST_TYPE_WORK = "work/47652866";
|
||||
final String REQUEST_TYPE_WORKS = "works";
|
||||
|
||||
private static Path testPath;
|
||||
|
||||
@BeforeAll
|
||||
private static void setUp() throws IOException {
|
||||
testPath = Files.createTempDirectory(XMLRecordParserTest.class.getName());
|
||||
System.out.println("using test path: " + testPath);
|
||||
}
|
||||
|
||||
// curl -i -H "Accept: application/vnd.orcid+xml"
|
||||
// -H 'Authorization: Bearer 78fdb232-7105-4086-8570-e153f4198e3d'
|
||||
// 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record'
|
||||
|
||||
@Test
|
||||
private void downloadTest(String orcid) throws Exception {
|
||||
public void downloadTest() throws Exception {
|
||||
final String orcid = "0000-0001-7291-3210";
|
||||
String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD);
|
||||
String filename = "/tmp/downloaded_record_".concat(orcid).concat(".xml");
|
||||
String filename = testPath + "/downloaded_record_".concat(orcid).concat(".xml");
|
||||
File f = new File(filename);
|
||||
OutputStream outStream = new FileOutputStream(f);
|
||||
IOUtils.write(record.getBytes(), outStream);
|
||||
|
@ -63,9 +75,10 @@ public class OrcidClientTest {
|
|||
CloseableHttpResponse response = client.execute(httpGet);
|
||||
long end = System.currentTimeMillis();
|
||||
if (response.getStatusLine().getStatusCode() != 200) {
|
||||
logToFile("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
|
||||
logToFile(
|
||||
testPath, "Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
|
||||
}
|
||||
logToFile(orcidId + " " + dataType + " " + (end - start) / 1000 + " seconds");
|
||||
logToFile(testPath, orcidId + " " + dataType + " " + (end - start) / 1000 + " seconds");
|
||||
return IOUtils.toString(response.getEntity().getContent());
|
||||
} catch (Throwable e) {
|
||||
e.printStackTrace();
|
||||
|
@ -150,12 +163,13 @@ public class OrcidClientTest {
|
|||
final String base64CompressedRecord = IOUtils
|
||||
.toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64"));
|
||||
final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord);
|
||||
logToFile("\n\ndownloaded \n\n" + recordFromSeqFile);
|
||||
logToFile(testPath, "\n\ndownloaded \n\n" + recordFromSeqFile);
|
||||
final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD);
|
||||
assertTrue(recordFromSeqFile.equals(downloadedRecord));
|
||||
}
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
public void lambdaFileReaderTest() throws Exception {
|
||||
String last_update = "2021-01-12 00:00:06.685137";
|
||||
TarArchiveInputStream input = new TarArchiveInputStream(
|
||||
|
@ -198,17 +212,20 @@ public class OrcidClientTest {
|
|||
entry = input.getNextTarEntry();
|
||||
|
||||
}
|
||||
logToFile("modifiedNum : " + modifiedNum + " / " + rowNum);
|
||||
logToFile(testPath, "modifiedNum : " + modifiedNum + " / " + rowNum);
|
||||
}
|
||||
|
||||
public static void logToFile(String log)
|
||||
throws IOException {
|
||||
public static void logToFile(Path basePath, String log) throws IOException {
|
||||
log = log.concat("\n");
|
||||
Path path = Paths.get("/tmp/orcid_log.txt");
|
||||
Path path = basePath.resolve("orcid_log.txt");
|
||||
if (!Files.exists(path)) {
|
||||
Files.createFile(path);
|
||||
}
|
||||
Files.write(path, log.getBytes(), StandardOpenOption.APPEND);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
private void slowedDownDownloadTest() throws Exception {
|
||||
String orcid = "0000-0001-5496-1243";
|
||||
String record = slowedDownDownload(orcid);
|
||||
|
@ -227,16 +244,17 @@ public class OrcidClientTest {
|
|||
CloseableHttpResponse response = client.execute(httpGet);
|
||||
long endReq = System.currentTimeMillis();
|
||||
long reqSessionDuration = endReq - start;
|
||||
logToFile("req time (millisec): " + reqSessionDuration);
|
||||
logToFile(testPath, "req time (millisec): " + reqSessionDuration);
|
||||
if (reqSessionDuration < 1000) {
|
||||
logToFile("wait ....");
|
||||
logToFile(testPath, "wait ....");
|
||||
Thread.sleep(1000 - reqSessionDuration);
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
long total = end - start;
|
||||
logToFile("total time (millisec): " + total);
|
||||
logToFile(testPath, "total time (millisec): " + total);
|
||||
if (response.getStatusLine().getStatusCode() != 200) {
|
||||
logToFile("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
|
||||
logToFile(
|
||||
testPath, "Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
|
||||
}
|
||||
return IOUtils.toString(response.getEntity().getContent());
|
||||
} catch (Throwable e) {
|
||||
|
@ -246,7 +264,7 @@ public class OrcidClientTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
private void downloadWorkTest() throws Exception {
|
||||
public void downloadWorkTest() throws Exception {
|
||||
String orcid = "0000-0003-0015-1952";
|
||||
String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK);
|
||||
String filename = "/tmp/downloaded_work_".concat(orcid).concat(".xml");
|
||||
|
@ -256,7 +274,7 @@ public class OrcidClientTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
private void downloadRecordTest() throws Exception {
|
||||
public void downloadRecordTest() throws Exception {
|
||||
String orcid = "0000-0001-5004-5918";
|
||||
String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD);
|
||||
String filename = "/tmp/downloaded_record_".concat(orcid).concat(".xml");
|
||||
|
@ -266,7 +284,7 @@ public class OrcidClientTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
private void downloadWorksTest() throws Exception {
|
||||
public void downloadWorksTest() throws Exception {
|
||||
String orcid = "0000-0001-5004-5918";
|
||||
String record = testDownloadRecord(orcid, REQUEST_TYPE_WORKS);
|
||||
String filename = "/tmp/downloaded_works_".concat(orcid).concat(".xml");
|
||||
|
@ -276,7 +294,7 @@ public class OrcidClientTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
private void downloadSingleWorkTest() throws Exception {
|
||||
public void downloadSingleWorkTest() throws Exception {
|
||||
String orcid = "0000-0001-5004-5918";
|
||||
String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK);
|
||||
String filename = "/tmp/downloaded_work_47652866_".concat(orcid).concat(".xml");
|
||||
|
@ -286,7 +304,7 @@ public class OrcidClientTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
private void cleanAuthorListTest() throws Exception {
|
||||
public void cleanAuthorListTest() throws Exception {
|
||||
AuthorData a1 = new AuthorData();
|
||||
a1.setOid("1");
|
||||
a1.setName("n1");
|
||||
|
@ -315,11 +333,11 @@ public class OrcidClientTest {
|
|||
|
||||
@Test
|
||||
@Ignore
|
||||
private void testUpdatedRecord() throws Exception {
|
||||
public void testUpdatedRecord() throws Exception {
|
||||
final String base64CompressedRecord = IOUtils
|
||||
.toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64"));
|
||||
final String record = ArgumentApplicationParser.decompressValue(base64CompressedRecord);
|
||||
logToFile("\n\nrecord updated \n\n" + record);
|
||||
logToFile(testPath, "\n\nrecord updated \n\n" + record);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -327,6 +345,6 @@ public class OrcidClientTest {
|
|||
private void testUpdatedWork() throws Exception {
|
||||
final String base64CompressedWork = "H4sIAAAAAAAAAM1XS2/jNhC+51cQOuxJsiXZSR03Vmq0G6Bo013E6R56oyXaZiOJWpKy4y783zvUg5Ksh5uiCJogisX5Zjj85sHx3f1rFKI94YKyeGE4I9tAJPZZQOPtwvj9+cGaGUhIHAc4ZDFZGEcijHvv6u7A+MtcPVCSSgsUQObYzuzaccBEguVuYYxt+LHgbwKP6a11M3WnY6UzrpB7KuiahlQeF0aSrkPqGwhcisWcxpLwGIcLYydlMh+PD4fDiHGfBvDcjmMxLhGlBglSH8vsIH0qGlLqBFRIGvvDWjWQ1iMJJ2CKBANqGlNqMbkj3IpxRPq1KkypFZFoDRHa0aRfq8JoNjhnfIAJJS6xPouiIQJyeYmGQzE+cO5cXqITcItBlKyASExD0a93jiwtvJDjYXDDAqBPHoH2wMmVWGNf8xyyaEBiSTeUDHHWBpd2Nmmc10yfbgHQrHCyIRxKjQwRUoFKPRwEnIgBnQJQVdGeQgJaCRN0OMnPkaUFVbD9WkpaIndQJowf+8EFoIpTErJjBFQOBavElFpfUxwC9ZcqvQErdQXhe+oPFF8BaObupYzVsYEOARzSoZBWmKqaBMHcV0Wf8oG0beIqD+Gdkz0lhyE3NajUW6fhQFSV9Nw/MCBYyofYa0EN7wrBz13eP+Y+J6obWgE8Pdd2JpYD94P77Ezmjj13b0bu5PqPu3EXumEnxEJaEVxSUIHammsra+53z44zt2/m1/bItaeVtQ6dhs3c4XytvW75IYUchMKvEHVUyqmnWBFAS0VJrqSvQde6vp251ux2NtFuKcVOi+oK9YY0M0Cn6o4J6WkvtEK2XJ1vfPGAZxSoK8lb+SxJBbLQx1CohOLndjJUywQWUFmqEi3G6Zaqf/7buOyYJd5IYpfmf0XipfP18pDR9cQCeEuJQI/Lx36bFbVnpBeL2UwmqQw7ApAvf4GeGGQdEbENgolui/wdpjHaYCmPCIPPAmGBIsxfoLUhyRCB0SeCakEBJRKBtfJ+UBbI15TG4PaGBAhWthx8DmFYtHZQujv1CWbLLdzmmUKmHEOWCe1/zdu78bn/+YH+hCOqOzcXfFwuP6OVT/P710crwqGXFrpNaM2GT3MXarw01i15TIi3pmtJXgtbTVGf3h6HKfF+wBAnPyTfdCChudlm5gZaoG//F9pPZsGQcqqbyZN5hBau5OoIJ3PPwjTKDuG4s5MZp2rMzF5PZoK34IT6PIFOPrk+mTiVO5aJH2C+JJRjE/06eoRfpJxa4VgyYaLlaJUv/EhCfATMU/76gEOfmehL/qbJNNHjaFna+CQYB8wvo9PpPFJ5MOrJ1Ix7USBZqBl7KRNOx1d3jex7SG6zuijqCMWRusBsncjZSrM2u82UJmqzpGhvUJN2t6caIM9QQgO9c0t40UROnWsJd2Rbs+nsxpna9u30ttNkjechmzHjEST+X5CkkuNY0GzQkzyFseAf7lSZuLwdh1xSXKvvQJ4g4abTYgPV7uMt3rskohlJmMa82kQkshtyBEIYqQ+YB8X3oRHg7iFKi/bZP+Ao+T6BJhIT/vNPi8ffZs+flk+r2v0WNroZiyWn6xRmadHqTJXsjLJczElAZX6TnJdoWTM1SI2gfutv3rjeBt5t06rVvNuWup29246tlvluO+u2/G92bK9DXheL6uFd/Q3EaRDZqBIAAA==";
|
||||
final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork);
|
||||
logToFile("\n\nwork updated \n\n" + work);
|
||||
logToFile(testPath, "\n\nwork updated \n\n" + work);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
|
||||
package eu.dnetlib.doiboost.orcid.xml;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
@ -18,7 +19,6 @@ import eu.dnetlib.dhp.schema.orcid.AuthorSummary;
|
|||
import eu.dnetlib.dhp.schema.orcid.Work;
|
||||
import eu.dnetlib.dhp.schema.orcid.WorkDetail;
|
||||
import eu.dnetlib.doiboost.orcid.OrcidClientTest;
|
||||
import eu.dnetlib.doiboost.orcid.SparkDownloadOrcidWorks;
|
||||
import eu.dnetlib.doiboost.orcid.model.WorkData;
|
||||
import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter;
|
||||
import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi;
|
||||
|
@ -30,8 +30,15 @@ public class XMLRecordParserTest {
|
|||
private static final String NS_COMMON = "common";
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static Path testPath;
|
||||
|
||||
@BeforeAll
|
||||
private static void setUp() throws IOException {
|
||||
testPath = Files.createTempDirectory(XMLRecordParserTest.class.getName());
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testOrcidAuthorDataXMLParser() throws Exception {
|
||||
public void testOrcidAuthorDataXMLParser() throws Exception {
|
||||
|
||||
String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml"));
|
||||
|
||||
|
@ -43,11 +50,11 @@ public class XMLRecordParserTest {
|
|||
System.out.println("name: " + authorData.getName());
|
||||
assertNotNull(authorData.getSurname());
|
||||
System.out.println("surname: " + authorData.getSurname());
|
||||
OrcidClientTest.logToFile(OBJECT_MAPPER.writeValueAsString(authorData));
|
||||
OrcidClientTest.logToFile(testPath, OBJECT_MAPPER.writeValueAsString(authorData));
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testOrcidXMLErrorRecordParser() throws Exception {
|
||||
public void testOrcidXMLErrorRecordParser() throws Exception {
|
||||
|
||||
String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml"));
|
||||
|
||||
|
@ -60,7 +67,7 @@ public class XMLRecordParserTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
private void testOrcidWorkDataXMLParser() throws Exception {
|
||||
public void testOrcidWorkDataXMLParser() throws Exception {
|
||||
|
||||
String xml = IOUtils
|
||||
.toString(
|
||||
|
@ -72,12 +79,11 @@ public class XMLRecordParserTest {
|
|||
assertNotNull(workData);
|
||||
assertNotNull(workData.getOid());
|
||||
System.out.println("oid: " + workData.getOid());
|
||||
assertNotNull(workData.getDoi());
|
||||
System.out.println("doi: " + workData.getDoi());
|
||||
assertNull(workData.getDoi());
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testOrcidOtherNamesXMLParser() throws Exception {
|
||||
public void testOrcidOtherNamesXMLParser() throws Exception {
|
||||
|
||||
String xml = IOUtils
|
||||
.toString(
|
||||
|
@ -114,7 +120,7 @@ public class XMLRecordParserTest {
|
|||
this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml"));
|
||||
AuthorSummary authorSummary = XMLRecordParser.VTDParseAuthorSummary(xml.getBytes());
|
||||
authorSummary.setBase64CompressData(ArgumentApplicationParser.compressArgument(xml));
|
||||
OrcidClientTest.logToFile(JsonWriter.create(authorSummary));
|
||||
OrcidClientTest.logToFile(testPath, JsonWriter.create(authorSummary));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -126,6 +132,6 @@ public class XMLRecordParserTest {
|
|||
Work work = new Work();
|
||||
work.setWorkDetail(workDetail);
|
||||
work.setBase64CompressData(ArgumentApplicationParser.compressArgument(xml));
|
||||
OrcidClientTest.logToFile(JsonWriter.create(work));
|
||||
OrcidClientTest.logToFile(testPath, JsonWriter.create(work));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue