2020-05-15 19:49:26 +02:00
|
|
|
|
|
|
|
package eu.dnetlib.doiboost.orcid;
|
|
|
|
|
|
|
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
|
|
|
|
2020-07-03 23:30:31 +02:00
|
|
|
import java.io.*;
|
|
|
|
import java.nio.file.Files;
|
2020-11-23 09:49:22 +01:00
|
|
|
import java.nio.file.Path;
|
|
|
|
import java.nio.file.Paths;
|
|
|
|
import java.nio.file.StandardOpenOption;
|
2020-05-15 19:49:26 +02:00
|
|
|
import java.text.ParseException;
|
|
|
|
import java.text.SimpleDateFormat;
|
2020-11-24 17:49:32 +01:00
|
|
|
import java.time.Duration;
|
|
|
|
import java.time.LocalDateTime;
|
|
|
|
import java.time.temporal.TemporalUnit;
|
2020-12-02 23:20:16 +01:00
|
|
|
import java.util.*;
|
|
|
|
import java.util.stream.Collectors;
|
2020-05-15 19:49:26 +02:00
|
|
|
|
2020-11-23 09:49:22 +01:00
|
|
|
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
|
|
|
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
|
|
|
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
|
2020-12-02 23:20:16 +01:00
|
|
|
import org.apache.commons.compress.utils.Lists;
|
2020-05-15 19:49:26 +02:00
|
|
|
import org.apache.commons.io.IOUtils;
|
|
|
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
|
|
import org.apache.http.client.methods.HttpGet;
|
|
|
|
import org.apache.http.impl.client.CloseableHttpClient;
|
|
|
|
import org.apache.http.impl.client.HttpClients;
|
2020-11-23 09:49:22 +01:00
|
|
|
import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull;
|
2020-05-15 19:49:26 +02:00
|
|
|
import org.junit.jupiter.api.Test;
|
2020-11-24 17:49:32 +01:00
|
|
|
import org.mortbay.log.Log;
|
2020-05-15 19:49:26 +02:00
|
|
|
|
|
|
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
2020-12-02 23:20:16 +01:00
|
|
|
import eu.dnetlib.dhp.schema.orcid.AuthorData;
|
2020-07-03 23:30:31 +02:00
|
|
|
import jdk.nashorn.internal.ir.annotations.Ignore;
|
2020-05-15 19:49:26 +02:00
|
|
|
|
|
|
|
public class OrcidClientTest {
|
|
|
|
final String orcidId = "0000-0001-7291-3210";
|
|
|
|
final int REQ_LIMIT = 24;
|
|
|
|
final int REQ_MAX_TEST = 100;
|
|
|
|
final int RECORD_DOWNLOADED_COUNTER_LOG_INTERVAL = 10;
|
|
|
|
final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
|
|
|
|
final String toRetrieveDate = "2020-05-06 23:59:46.031145";
|
|
|
|
String toNotRetrieveDate = "2019-09-29 23:59:59.000000";
|
|
|
|
String lastUpdate = "2019-09-30 00:00:00";
|
|
|
|
String shortDate = "2020-05-06 16:06:11";
|
2020-12-02 23:20:16 +01:00
|
|
|
final String REQUEST_TYPE_RECORD = "record";
|
|
|
|
final String REQUEST_TYPE_WORK = "work/47652866";
|
|
|
|
final String REQUEST_TYPE_WORKS = "works";
|
2020-05-15 19:49:26 +02:00
|
|
|
|
2020-07-03 23:30:31 +02:00
|
|
|
// curl -i -H "Accept: application/vnd.orcid+xml"
|
2020-05-15 19:49:26 +02:00
|
|
|
// -H 'Authorization: Bearer 78fdb232-7105-4086-8570-e153f4198e3d'
|
|
|
|
// 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record'
|
|
|
|
|
2020-07-03 23:30:31 +02:00
|
|
|
@Test
|
2020-11-23 09:49:22 +01:00
|
|
|
private void multipleDownloadTest() throws Exception {
|
2020-11-24 17:49:32 +01:00
|
|
|
int toDownload = 10;
|
2020-11-23 09:49:22 +01:00
|
|
|
long start = System.currentTimeMillis();
|
|
|
|
OrcidDownloader downloader = new OrcidDownloader();
|
|
|
|
TarArchiveInputStream input = new TarArchiveInputStream(
|
|
|
|
new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar")));
|
|
|
|
TarArchiveEntry entry = input.getNextTarEntry();
|
|
|
|
BufferedReader br = null;
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
int rowNum = 0;
|
|
|
|
int entryNum = 0;
|
|
|
|
int modified = 0;
|
|
|
|
while (entry != null) {
|
|
|
|
br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput
|
|
|
|
String line;
|
|
|
|
while ((line = br.readLine()) != null) {
|
|
|
|
String[] values = line.toString().split(",");
|
|
|
|
List<String> recordInfo = Arrays.asList(values);
|
|
|
|
String orcidId = recordInfo.get(0);
|
|
|
|
if (downloader.isModified(orcidId, recordInfo.get(3))) {
|
2020-11-24 17:49:32 +01:00
|
|
|
slowedDownDownload(orcidId);
|
2020-11-23 09:49:22 +01:00
|
|
|
modified++;
|
|
|
|
}
|
|
|
|
rowNum++;
|
|
|
|
if (modified > toDownload) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
entryNum++;
|
|
|
|
entry = input.getNextTarEntry();
|
|
|
|
}
|
|
|
|
long end = System.currentTimeMillis();
|
|
|
|
logToFile("start test: " + new Date(start).toString());
|
|
|
|
logToFile("end test: " + new Date(end).toString());
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
private void downloadTest(String orcid) throws Exception {
|
2020-12-02 23:20:16 +01:00
|
|
|
String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD);
|
|
|
|
String filename = "/tmp/downloaded_record_".concat(orcid).concat(".xml");
|
2020-11-23 09:49:22 +01:00
|
|
|
File f = new File(filename);
|
2020-07-03 23:30:31 +02:00
|
|
|
OutputStream outStream = new FileOutputStream(f);
|
|
|
|
IOUtils.write(record.getBytes(), outStream);
|
|
|
|
}
|
|
|
|
|
2020-12-02 23:20:16 +01:00
|
|
|
private String testDownloadRecord(String orcidId, String dataType) throws Exception {
|
2020-05-15 19:49:26 +02:00
|
|
|
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
2020-12-02 23:20:16 +01:00
|
|
|
HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/" + dataType);
|
2020-05-15 19:49:26 +02:00
|
|
|
httpGet.addHeader("Accept", "application/vnd.orcid+xml");
|
|
|
|
httpGet.addHeader("Authorization", "Bearer 78fdb232-7105-4086-8570-e153f4198e3d");
|
2020-12-02 23:20:16 +01:00
|
|
|
long start = System.currentTimeMillis();
|
2020-05-15 19:49:26 +02:00
|
|
|
CloseableHttpResponse response = client.execute(httpGet);
|
2020-12-02 23:20:16 +01:00
|
|
|
long end = System.currentTimeMillis();
|
2020-05-15 19:49:26 +02:00
|
|
|
if (response.getStatusLine().getStatusCode() != 200) {
|
2020-12-02 23:20:16 +01:00
|
|
|
logToFile("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
|
2020-05-15 19:49:26 +02:00
|
|
|
}
|
2020-12-02 23:20:16 +01:00
|
|
|
logToFile(orcidId + " " + dataType + " " + (end - start) / 1000 + " seconds");
|
2020-05-15 19:49:26 +02:00
|
|
|
return IOUtils.toString(response.getEntity().getContent());
|
|
|
|
} catch (Throwable e) {
|
|
|
|
e.printStackTrace();
|
|
|
|
}
|
|
|
|
return new String("");
|
|
|
|
}
|
|
|
|
|
2020-11-23 09:49:22 +01:00
|
|
|
// @Test
|
2020-07-02 18:46:20 +02:00
|
|
|
private void testLambdaFileParser() throws Exception {
|
2020-05-15 19:49:26 +02:00
|
|
|
try (BufferedReader br = new BufferedReader(
|
|
|
|
new InputStreamReader(this.getClass().getResourceAsStream("last_modified.csv")))) {
|
|
|
|
String line;
|
|
|
|
int counter = 0;
|
|
|
|
int nReqTmp = 0;
|
|
|
|
long startDownload = System.currentTimeMillis();
|
|
|
|
long startReqTmp = System.currentTimeMillis();
|
|
|
|
while ((line = br.readLine()) != null) {
|
|
|
|
counter++;
|
|
|
|
// skip headers line
|
|
|
|
if (counter == 1) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
String[] values = line.split(",");
|
|
|
|
List<String> recordInfo = Arrays.asList(values);
|
2020-12-02 23:20:16 +01:00
|
|
|
testDownloadRecord(recordInfo.get(0), REQUEST_TYPE_RECORD);
|
2020-05-15 19:49:26 +02:00
|
|
|
long endReq = System.currentTimeMillis();
|
|
|
|
nReqTmp++;
|
|
|
|
if (nReqTmp == REQ_LIMIT) {
|
|
|
|
long reqSessionDuration = endReq - startReqTmp;
|
|
|
|
if (reqSessionDuration <= 1000) {
|
|
|
|
System.out
|
|
|
|
.println(
|
|
|
|
"\nreqSessionDuration: " + reqSessionDuration + " nReqTmp: " + nReqTmp + " wait ....");
|
|
|
|
Thread.sleep(1000 - reqSessionDuration);
|
|
|
|
} else {
|
|
|
|
nReqTmp = 0;
|
|
|
|
startReqTmp = System.currentTimeMillis();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (counter > REQ_MAX_TEST) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if ((counter % RECORD_DOWNLOADED_COUNTER_LOG_INTERVAL) == 0) {
|
|
|
|
System.out.println("Current record downloaded: " + counter);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
long endDownload = System.currentTimeMillis();
|
|
|
|
long downloadTime = endDownload - startDownload;
|
|
|
|
System.out.println("Download time: " + ((downloadTime / 1000) / 60) + " minutes");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-23 09:49:22 +01:00
|
|
|
// @Test
|
2020-07-03 23:30:31 +02:00
|
|
|
private void getRecordDatestamp() throws ParseException {
|
2020-05-15 19:49:26 +02:00
|
|
|
Date toRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toRetrieveDate);
|
|
|
|
Date toNotRetrieveDateDt = new SimpleDateFormat(DATE_FORMAT).parse(toNotRetrieveDate);
|
|
|
|
Date lastUpdateDt = new SimpleDateFormat(DATE_FORMAT).parse(lastUpdate);
|
|
|
|
assertTrue(toRetrieveDateDt.after(lastUpdateDt));
|
|
|
|
assertTrue(!toNotRetrieveDateDt.after(lastUpdateDt));
|
|
|
|
}
|
|
|
|
|
2020-07-03 23:30:31 +02:00
|
|
|
private void testDate(String value) throws ParseException {
|
2020-05-15 19:49:26 +02:00
|
|
|
System.out.println(value.toString());
|
|
|
|
if (value.length() != 19) {
|
|
|
|
value = value.substring(0, 19);
|
|
|
|
}
|
|
|
|
Date valueDt = new SimpleDateFormat(DATE_FORMAT).parse(value);
|
|
|
|
System.out.println(valueDt.toString());
|
|
|
|
}
|
|
|
|
|
2020-11-23 09:49:22 +01:00
|
|
|
// @Test
|
2020-07-03 23:30:31 +02:00
|
|
|
@Ignore
|
|
|
|
private void testModifiedDate() throws ParseException {
|
2020-05-15 19:49:26 +02:00
|
|
|
testDate(toRetrieveDate);
|
|
|
|
testDate(toNotRetrieveDate);
|
|
|
|
testDate(shortDate);
|
|
|
|
}
|
|
|
|
|
2020-11-23 09:49:22 +01:00
|
|
|
@Test
|
2020-11-24 17:49:32 +01:00
|
|
|
private void testReadBase64CompressedRecord() throws Exception {
|
2020-05-15 19:49:26 +02:00
|
|
|
final String base64CompressedRecord = IOUtils
|
2020-11-23 09:49:22 +01:00
|
|
|
.toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64"));
|
2020-05-15 19:49:26 +02:00
|
|
|
final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord);
|
2020-11-23 09:49:22 +01:00
|
|
|
logToFile("\n\ndownloaded \n\n" + recordFromSeqFile);
|
2020-12-02 23:20:16 +01:00
|
|
|
final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD);
|
2020-05-15 19:49:26 +02:00
|
|
|
assertTrue(recordFromSeqFile.equals(downloadedRecord));
|
|
|
|
}
|
2020-11-23 09:49:22 +01:00
|
|
|
|
|
|
|
@Test
|
|
|
|
private void lambdaFileReaderTest() throws Exception {
|
|
|
|
TarArchiveInputStream input = new TarArchiveInputStream(
|
|
|
|
new GzipCompressorInputStream(new FileInputStream("/develop/last_modified.csv.tar")));
|
|
|
|
TarArchiveEntry entry = input.getNextTarEntry();
|
|
|
|
BufferedReader br = null;
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
int rowNum = 0;
|
|
|
|
int entryNum = 0;
|
|
|
|
while (entry != null) {
|
|
|
|
br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput
|
|
|
|
String line;
|
|
|
|
while ((line = br.readLine()) != null) {
|
|
|
|
String[] values = line.toString().split(",");
|
|
|
|
List<String> recordInfo = Arrays.asList(values);
|
|
|
|
assertTrue(recordInfo.size() == 4);
|
|
|
|
|
|
|
|
rowNum++;
|
|
|
|
if (rowNum == 1) {
|
|
|
|
assertTrue(recordInfo.get(3).equals("last_modified"));
|
|
|
|
} else if (rowNum == 2) {
|
|
|
|
assertTrue(recordInfo.get(0).equals("0000-0002-0499-7333"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
entryNum++;
|
|
|
|
assertTrue(entryNum == 1);
|
|
|
|
entry = input.getNextTarEntry();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
private void lambdaFileCounterTest() throws Exception {
|
|
|
|
final String lastUpdate = "2020-09-29 00:00:00";
|
|
|
|
OrcidDownloader downloader = new OrcidDownloader();
|
|
|
|
TarArchiveInputStream input = new TarArchiveInputStream(
|
|
|
|
new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar")));
|
|
|
|
TarArchiveEntry entry = input.getNextTarEntry();
|
|
|
|
BufferedReader br = null;
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
int rowNum = 0;
|
|
|
|
int entryNum = 0;
|
|
|
|
int modified = 0;
|
|
|
|
while (entry != null) {
|
|
|
|
br = new BufferedReader(new InputStreamReader(input)); // Read directly from tarInput
|
|
|
|
String line;
|
|
|
|
while ((line = br.readLine()) != null) {
|
|
|
|
String[] values = line.toString().split(",");
|
|
|
|
List<String> recordInfo = Arrays.asList(values);
|
|
|
|
String orcidId = recordInfo.get(0);
|
|
|
|
if (downloader.isModified(orcidId, recordInfo.get(3))) {
|
|
|
|
modified++;
|
|
|
|
}
|
|
|
|
rowNum++;
|
|
|
|
}
|
|
|
|
entryNum++;
|
|
|
|
entry = input.getNextTarEntry();
|
|
|
|
}
|
|
|
|
logToFile("rowNum: " + rowNum);
|
|
|
|
logToFile("modified: " + modified);
|
|
|
|
}
|
|
|
|
|
2020-12-02 23:20:16 +01:00
|
|
|
public static void logToFile(String log)
|
2020-11-23 09:49:22 +01:00
|
|
|
throws IOException {
|
|
|
|
log = log.concat("\n");
|
|
|
|
Path path = Paths.get("/tmp/orcid_log.txt");
|
|
|
|
Files.write(path, log.getBytes(), StandardOpenOption.APPEND);
|
|
|
|
}
|
2020-11-24 17:49:32 +01:00
|
|
|
|
|
|
|
@Test
|
|
|
|
private void slowedDownDownloadTest() throws Exception {
|
|
|
|
String orcid = "0000-0001-5496-1243";
|
|
|
|
String record = slowedDownDownload(orcid);
|
|
|
|
String filename = "/tmp/downloaded_".concat(orcid).concat(".xml");
|
|
|
|
File f = new File(filename);
|
|
|
|
OutputStream outStream = new FileOutputStream(f);
|
|
|
|
IOUtils.write(record.getBytes(), outStream);
|
|
|
|
}
|
|
|
|
|
|
|
|
private String slowedDownDownload(String orcidId) throws Exception {
|
|
|
|
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
|
|
|
HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record");
|
|
|
|
httpGet.addHeader("Accept", "application/vnd.orcid+xml");
|
|
|
|
httpGet.addHeader("Authorization", "Bearer 78fdb232-7105-4086-8570-e153f4198e3d");
|
|
|
|
long start = System.currentTimeMillis();
|
|
|
|
CloseableHttpResponse response = client.execute(httpGet);
|
|
|
|
long endReq = System.currentTimeMillis();
|
|
|
|
long reqSessionDuration = endReq - start;
|
|
|
|
logToFile("req time (millisec): " + reqSessionDuration);
|
|
|
|
if (reqSessionDuration < 1000) {
|
|
|
|
logToFile("wait ....");
|
|
|
|
Thread.sleep(1000 - reqSessionDuration);
|
|
|
|
}
|
|
|
|
long end = System.currentTimeMillis();
|
|
|
|
long total = end - start;
|
|
|
|
logToFile("total time (millisec): " + total);
|
|
|
|
if (response.getStatusLine().getStatusCode() != 200) {
|
|
|
|
logToFile("Downloading " + orcidId + " status code: " + response.getStatusLine().getStatusCode());
|
|
|
|
}
|
|
|
|
return IOUtils.toString(response.getEntity().getContent());
|
|
|
|
} catch (Throwable e) {
|
|
|
|
e.printStackTrace();
|
|
|
|
}
|
|
|
|
return new String("");
|
|
|
|
}
|
2020-12-02 23:20:16 +01:00
|
|
|
|
|
|
|
@Test
|
|
|
|
private void downloadWorkTest() throws Exception {
|
|
|
|
String orcid = "0000-0003-0015-1952";
|
|
|
|
String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK);
|
|
|
|
String filename = "/tmp/downloaded_work_".concat(orcid).concat(".xml");
|
|
|
|
File f = new File(filename);
|
|
|
|
OutputStream outStream = new FileOutputStream(f);
|
|
|
|
IOUtils.write(record.getBytes(), outStream);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
private void downloadRecordTest() throws Exception {
|
|
|
|
String orcid = "0000-0001-5004-5918";
|
|
|
|
String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD);
|
|
|
|
String filename = "/tmp/downloaded_record_".concat(orcid).concat(".xml");
|
|
|
|
File f = new File(filename);
|
|
|
|
OutputStream outStream = new FileOutputStream(f);
|
|
|
|
IOUtils.write(record.getBytes(), outStream);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
private void downloadWorksTest() throws Exception {
|
|
|
|
String orcid = "0000-0001-5004-5918";
|
|
|
|
String record = testDownloadRecord(orcid, REQUEST_TYPE_WORKS);
|
|
|
|
String filename = "/tmp/downloaded_works_".concat(orcid).concat(".xml");
|
|
|
|
File f = new File(filename);
|
|
|
|
OutputStream outStream = new FileOutputStream(f);
|
|
|
|
IOUtils.write(record.getBytes(), outStream);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
private void downloadSingleWorkTest() throws Exception {
|
|
|
|
String orcid = "0000-0001-5004-5918";
|
|
|
|
String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK);
|
|
|
|
String filename = "/tmp/downloaded_work_47652866_".concat(orcid).concat(".xml");
|
|
|
|
File f = new File(filename);
|
|
|
|
OutputStream outStream = new FileOutputStream(f);
|
|
|
|
IOUtils.write(record.getBytes(), outStream);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void cleanAuthorListTest() throws Exception {
|
|
|
|
AuthorData a1 = new AuthorData();
|
|
|
|
a1.setOid("1");
|
|
|
|
a1.setName("n1");
|
|
|
|
a1.setSurname("s1");
|
|
|
|
a1.setCreditName("c1");
|
|
|
|
AuthorData a2 = new AuthorData();
|
|
|
|
a2.setOid("1");
|
|
|
|
a2.setName("n1");
|
|
|
|
a2.setSurname("s1");
|
|
|
|
a2.setCreditName("c1");
|
|
|
|
AuthorData a3 = new AuthorData();
|
|
|
|
a3.setOid("3");
|
|
|
|
a3.setName("n3");
|
|
|
|
a3.setSurname("s3");
|
|
|
|
a3.setCreditName("c3");
|
|
|
|
List<AuthorData> list = Lists.newArrayList();
|
|
|
|
list.add(a1);
|
|
|
|
list.add(a2);
|
|
|
|
list.add(a3);
|
|
|
|
|
|
|
|
Set<String> namesAlreadySeen = new HashSet<>();
|
|
|
|
assertTrue(list.size() == 3);
|
|
|
|
list.removeIf(a -> !namesAlreadySeen.add(a.getOid()));
|
|
|
|
assertTrue(list.size() == 2);
|
|
|
|
}
|
2020-05-15 19:49:26 +02:00
|
|
|
}
|