diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java index abb9dc148..eca433e9e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java @@ -5,13 +5,71 @@ import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; +import java.util.Optional; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class MakeTarArchive implements Serializable { + private static final Logger log = LoggerFactory.getLogger(MakeTarArchive.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + MakeTarArchive.class + .getResourceAsStream( + "/eu/dnetlib/dhp/common/input_maketar_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + final String outputPath = parser.get("hdfsPath"); + log.info("hdfsPath: {}", outputPath); + + final String hdfsNameNode = parser.get("nameNode"); + log.info("nameNode: {}", hdfsNameNode); + + final String inputPath = parser.get("sourcePath"); + log.info("input path : {}", inputPath); + + final int gBperSplit = Optional + .ofNullable(parser.get("splitSize")) + .map(Integer::valueOf) + .orElse(10); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + + makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit); + + } + + public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit) + throws IOException { + + RemoteIterator dirIterator = fileSystem.listLocatedStatus(new Path(inputPath)); + + while (dirIterator.hasNext()) { + LocatedFileStatus fileStatus = dirIterator.next(); + + Path p = fileStatus.getPath(); + String pathString = p.toString(); + String entity = pathString.substring(pathString.lastIndexOf("/") + 1); + + MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit); + } + } + private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException { Path hdfsWritePath = new Path(outputPath); if (fileSystem.exists(hdfsWritePath)) { @@ -21,7 +79,7 @@ public class MakeTarArchive implements Serializable { return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream()); } - private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name) + private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dirName) throws IOException { Path hdfsWritePath = new Path(outputPath); @@ -37,7 +95,7 @@ public class MakeTarArchive implements Serializable { new Path(inputPath), true); while (iterator.hasNext()) { - writeCurrentFile(fileSystem, dir_name, iterator, ar, 0); + writeCurrentFile(fileSystem, dirName, iterator, ar, 0); } } @@ -59,32 +117,30 @@ public class MakeTarArchive implements Serializable { new Path(inputPath), true); boolean next = fileStatusListIterator.hasNext(); while (next) { - TarArchiveOutputStream ar = getTar(fileSystem, outputPath + "_" + (partNum + 1) + ".tar"); + try (TarArchiveOutputStream ar = getTar(fileSystem, outputPath + "_" + (partNum + 1) + ".tar")) { - long current_size = 0; - while (next && current_size < bytesPerSplit) { - current_size = writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, current_size); - next = fileStatusListIterator.hasNext(); + long currentSize = 0; + while (next && currentSize < bytesPerSplit) { + currentSize = writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, currentSize); + next = fileStatusListIterator.hasNext(); + } + + partNum += 1; } - - partNum += 1; - ar.close(); } - } - } - private static long writeCurrentFile(FileSystem fileSystem, String dir_name, + private static long writeCurrentFile(FileSystem fileSystem, String dirName, RemoteIterator fileStatusListIterator, - TarArchiveOutputStream ar, long current_size) throws IOException { + TarArchiveOutputStream ar, long currentSize) throws IOException { LocatedFileStatus fileStatus = fileStatusListIterator.next(); Path p = fileStatus.getPath(); - String p_string = p.toString(); - if (!p_string.endsWith("_SUCCESS")) { - String name = p_string.substring(p_string.lastIndexOf("/") + 1); + String pString = p.toString(); + if (!pString.endsWith("_SUCCESS")) { + String name = pString.substring(pString.lastIndexOf("/") + 1); if (name.startsWith("part-") & name.length() > 10) { String tmp = name.substring(0, 10); if (name.contains(".")) { @@ -92,9 +148,9 @@ public class MakeTarArchive implements Serializable { } name = tmp; } - TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name); + TarArchiveEntry entry = new TarArchiveEntry(dirName + "/" + name); entry.setSize(fileStatus.getLen()); - current_size += fileStatus.getLen(); + currentSize += fileStatus.getLen(); ar.putArchiveEntry(entry); InputStream is = fileSystem.open(fileStatus.getPath()); @@ -110,7 +166,7 @@ public class MakeTarArchive implements Serializable { ar.closeArchiveEntry(); } - return current_size; + return currentSize; } } diff --git a/dhp-common/src/main/resources/eu/dnetlib/dhp/common/input_maketar_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/common/input_maketar_parameters.json new file mode 100644 index 000000000..a15318865 --- /dev/null +++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/common/input_maketar_parameters.json @@ -0,0 +1,30 @@ +[ + + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "hdp", + "paramLongName": "hdfsPath", + "paramDescription": "the path used to store the output archive", + "paramRequired": true + }, + { + "paramName":"nn", + "paramLongName":"nameNode", + "paramDescription": "the name node", + "paramRequired": true + }, + { + "paramName":"ss", + "paramLongName":"splitSize", + "paramDescription": "the maximum size of the archive", + "paramRequired": false + } +] + + + diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 0569bacfd..6b9c56005 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,6 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -13,6 +15,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.Function; @@ -20,6 +23,7 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.Lists; import com.google.gson.JsonElement; import com.google.gson.JsonParser; @@ -42,6 +46,7 @@ public class SparkDownloadOrcidWorks { public static final String ORCID_XML_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public static final DateTimeFormatter ORCID_XML_DATETIMEFORMATTER = DateTimeFormatter .ofPattern(ORCID_XML_DATETIME_FORMAT); + public static final String DOWNLOAD_WORKS_REQUEST_SEPARATOR = ","; public static void main(String[] args) throws Exception { @@ -56,7 +61,6 @@ public class SparkDownloadOrcidWorks { .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); - logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); logger.info("workingPath: {}", workingPath); final String outputPath = parser.get("outputPath"); @@ -69,32 +73,22 @@ public class SparkDownloadOrcidWorks { isSparkSessionManaged, spark -> { final String lastUpdateValue = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); - logger.info("lastUpdateValue: ", lastUpdateValue); JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); LongAccumulator updatedAuthorsAcc = spark.sparkContext().longAccumulator("updated_authors"); LongAccumulator parsedAuthorsAcc = spark.sparkContext().longAccumulator("parsed_authors"); LongAccumulator parsedWorksAcc = spark.sparkContext().longAccumulator("parsed_works"); LongAccumulator modifiedWorksAcc = spark.sparkContext().longAccumulator("modified_works"); - LongAccumulator maxModifiedWorksLimitAcc = spark - .sparkContext() - .longAccumulator("max_modified_works_limit"); LongAccumulator errorCodeFoundAcc = spark.sparkContext().longAccumulator("error_code_found"); - LongAccumulator errorLoadingJsonFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_json_found"); - LongAccumulator errorLoadingXMLFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_xml_found"); LongAccumulator errorParsingXMLFoundAcc = spark .sparkContext() .longAccumulator("error_parsing_xml_found"); LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); - LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors"); JavaPairRDD updatedAuthorsRDD = sc .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); - updatedAuthorsAcc.setValue(updatedAuthorsRDD.count()); + long authorsCount = updatedAuthorsRDD.count(); + updatedAuthorsAcc.setValue(authorsCount); FlatMapFunction, String> retrieveWorkUrlFunction = data -> { String orcidId = data._1().toString(); @@ -106,11 +100,10 @@ public class SparkDownloadOrcidWorks { if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); if (StringUtils.isEmpty(compressedData)) { - errorLoadingJsonFoundAcc.add(1); + } else { String authorSummary = ArgumentApplicationParser.decompressValue(compressedData); if (StringUtils.isEmpty(authorSummary)) { - errorLoadingXMLFoundAcc.add(1); } else { try { workIdLastModifiedDate = XMLRecordParser @@ -125,22 +118,38 @@ public class SparkDownloadOrcidWorks { errorCodeFoundAcc.add(1); } parsedAuthorsAcc.add(1); + workIdLastModifiedDate.forEach((k, v) -> { parsedWorksAcc.add(1); if (isModified(orcidId, v, lastUpdateValue)) { modifiedWorksAcc.add(1); - workIds.add(orcidId.concat("/work/").concat(k)); + workIds.add(k); } }); - if (workIdLastModifiedDate.size() > 50) { - maxModifiedWorksLimitAcc.add(1); + if (workIds.isEmpty()) { + return new ArrayList().iterator(); } - return workIds.iterator(); + List worksDownloadUrls = new ArrayList<>(); + + // Creation of url for reading multiple works (up to 100) with ORCID API + // see this https://github.com/ORCID/ORCID-Source/blob/development/orcid-api-web/tutorial/works.md + + List> partitionedWorks = Lists.partition(workIds, 100); + partitionedWorks.stream().forEach(p -> { + String worksDownloadUrl = orcidId.concat("/works/"); + final StringBuffer buffer = new StringBuffer(worksDownloadUrl); + p.forEach(id -> { + buffer.append(id).append(DOWNLOAD_WORKS_REQUEST_SEPARATOR); + }); + String finalUrl = buffer.substring(0, buffer.lastIndexOf(DOWNLOAD_WORKS_REQUEST_SEPARATOR)); + worksDownloadUrls.add(finalUrl); + }); + return worksDownloadUrls.iterator(); }; - Function> downloadWorkFunction = data -> { - String relativeWorkUrl = data; - String orcidId = relativeWorkUrl.split("/")[0]; + Function> downloadWorksFunction = data -> { + String relativeWorksUrl = data; + String orcidId = relativeWorksUrl.split("/")[0]; final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); downloaded.setLastModifiedDate(lastUpdateValue); @@ -149,7 +158,7 @@ public class SparkDownloadOrcidWorks { httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); httpConnector.setAuthToken(token); - String apiUrl = "https://api.orcid.org/v3.0/" + relativeWorkUrl; + String apiUrl = "https://api.orcid.org/v3.0/" + relativeWorksUrl; DownloadsReport report = new DownloadsReport(); long startReq = System.currentTimeMillis(); boolean downloadCompleted = false; @@ -167,7 +176,6 @@ public class SparkDownloadOrcidWorks { } else { downloaded.setStatusCode(-4); } - errorsAcc.add(1); } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; @@ -176,7 +184,6 @@ public class SparkDownloadOrcidWorks { } if (downloadCompleted) { downloaded.setStatusCode(200); - downloadedRecordsAcc.add(1); downloaded .setCompressedData( ArgumentApplicationParser @@ -185,24 +192,69 @@ public class SparkDownloadOrcidWorks { return downloaded.toTuple2(); }; + FlatMapFunction, Tuple2> splitWorksFunction = data -> { + List> splittedDownloadedWorks = new ArrayList<>(); + String jsonData = data._2().toString(); + JsonElement jElement = new JsonParser().parse(jsonData); + String orcidId = data._1().toString(); + String statusCode = getJsonValue(jElement, "statusCode"); + String lastModifiedDate = getJsonValue(jElement, "lastModifiedDate"); + String compressedData = getJsonValue(jElement, "compressedData"); + String errorMessage = getJsonValue(jElement, "errorMessage"); + String works = ArgumentApplicationParser.decompressValue(compressedData); + + // split a single xml containing multiple works into multiple xml (a single work for each xml) + List splittedWorks = null; + try { + splittedWorks = XMLRecordParser + .splitWorks(orcidId, works.getBytes(StandardCharsets.UTF_8)); + } catch (Throwable t) { + final DownloadedRecordData errDownloaded = new DownloadedRecordData(); + errDownloaded.setOrcidId(orcidId); + errDownloaded.setLastModifiedDate(lastModifiedDate); + errDownloaded.setStatusCode(-10); + errDownloaded.setErrorMessage(t.getMessage()); + splittedDownloadedWorks.add(errDownloaded.toTuple2()); + errorParsingXMLFoundAcc.add(1); + return splittedDownloadedWorks.iterator(); + } + splittedWorks.forEach(w -> { + final DownloadedRecordData downloaded = new DownloadedRecordData(); + downloaded.setOrcidId(orcidId); + downloaded.setLastModifiedDate(lastModifiedDate); + downloaded.setStatusCode(Integer.parseInt(statusCode)); + downloaded.setErrorMessage(errorMessage); + try { + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(w)); + } catch (Throwable t) { + downloaded.setStatusCode(-11); + downloaded.setErrorMessage(t.getMessage()); + } + splittedDownloadedWorks.add(downloaded.toTuple2()); + downloadedRecordsAcc.add(1); + }); + + return splittedDownloadedWorks.iterator(); + }; + updatedAuthorsRDD .flatMap(retrieveWorkUrlFunction) .repartition(100) - .map(downloadWorkFunction) - .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) + .map(downloadWorksFunction) + .flatMap(splitWorksFunction) + .mapToPair(w -> new Tuple2<>(new Text(w._1()), new Text(w._2()))) .saveAsTextFile(workingPath.concat(outputPath), GzipCodec.class); logger.info("updatedAuthorsAcc: {}", updatedAuthorsAcc.value()); logger.info("parsedAuthorsAcc: {}", parsedAuthorsAcc.value()); logger.info("parsedWorksAcc: {}", parsedWorksAcc.value()); logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value()); - logger.info("maxModifiedWorksLimitAcc: {}", maxModifiedWorksLimitAcc.value()); logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value()); - logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value()); - logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value()); logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); - logger.info("errorsAcc: {}", errorsAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java index 5ef6efa26..fc8f62bca 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java @@ -83,8 +83,6 @@ public class MultiAttemptsHttpConnector { throw new CollectorException(msg); } - log.info("Request attempt {} [{}]", retryNumber, requestUrl); - InputStream input = null; try { @@ -104,9 +102,9 @@ public class MultiAttemptsHttpConnector { urlConn.addRequestProperty(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", getAuthToken())); } - if (log.isDebugEnabled()) { - logHeaderFields(urlConn); - } +// if (log.isDebugEnabled()) { +// logHeaderFields(urlConn); +// } int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); if (is2xx(urlConn.getResponseCode())) { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java index e8745aa96..c1375ee2a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java @@ -1,7 +1,11 @@ package eu.dnetlib.doiboost.orcid.xml; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.*; +import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.mortbay.log.Log; @@ -34,6 +38,33 @@ public class XMLRecordParser { private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; private static final String NS_HISTORY = "history"; private static final String NS_HISTORY_URL = "http://www.orcid.org/ns/history"; + private static final String NS_BULK_URL = "http://www.orcid.org/ns/bulk"; + private static final String NS_BULK = "bulk"; + + private static final String namespaceList = " xmlns:internal=\"http://www.orcid.org/ns/internal\"\n" + + " xmlns:education=\"http://www.orcid.org/ns/education\"\n" + + " xmlns:distinction=\"http://www.orcid.org/ns/distinction\"\n" + + " xmlns:deprecated=\"http://www.orcid.org/ns/deprecated\"\n" + + " xmlns:other-name=\"http://www.orcid.org/ns/other-name\"\n" + + " xmlns:membership=\"http://www.orcid.org/ns/membership\"\n" + + " xmlns:error=\"http://www.orcid.org/ns/error\" xmlns:common=\"http://www.orcid.org/ns/common\"\n" + + " xmlns:record=\"http://www.orcid.org/ns/record\"\n" + + " xmlns:personal-details=\"http://www.orcid.org/ns/personal-details\"\n" + + " xmlns:keyword=\"http://www.orcid.org/ns/keyword\" xmlns:email=\"http://www.orcid.org/ns/email\"\n" + + " xmlns:external-identifier=\"http://www.orcid.org/ns/external-identifier\"\n" + + " xmlns:funding=\"http://www.orcid.org/ns/funding\"\n" + + " xmlns:preferences=\"http://www.orcid.org/ns/preferences\"\n" + + " xmlns:address=\"http://www.orcid.org/ns/address\"\n" + + " xmlns:invited-position=\"http://www.orcid.org/ns/invited-position\"\n" + + " xmlns:work=\"http://www.orcid.org/ns/work\" xmlns:history=\"http://www.orcid.org/ns/history\"\n" + + " xmlns:employment=\"http://www.orcid.org/ns/employment\"\n" + + " xmlns:qualification=\"http://www.orcid.org/ns/qualification\"\n" + + " xmlns:service=\"http://www.orcid.org/ns/service\" xmlns:person=\"http://www.orcid.org/ns/person\"\n" + + " xmlns:activities=\"http://www.orcid.org/ns/activities\"\n" + + " xmlns:researcher-url=\"http://www.orcid.org/ns/researcher-url\"\n" + + " xmlns:peer-review=\"http://www.orcid.org/ns/peer-review\"\n" + + " xmlns:bulk=\"http://www.orcid.org/ns/bulk\"\n" + + " xmlns:research-resource=\"http://www.orcid.org/ns/research-resource\""; private static final String NS_ERROR = "error"; @@ -307,4 +338,65 @@ public class XMLRecordParser { } return authorHistory; } + + public static List splitWorks(String orcidId, byte[] bytes) + throws ParseException, XPathParseException, NavException, XPathEvalException, VtdException, ModifyException, + IOException, TranscodeException { + + final VTDGen vg = new VTDGen(); + vg.setDoc(bytes); + vg.parse(true); + final VTDNav vn = vg.getNav(); + final AutoPilot ap = new AutoPilot(vn); + ap.declareXPathNameSpace(NS_COMMON, NS_COMMON_URL); + ap.declareXPathNameSpace(NS_WORK, NS_WORK_URL); + ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); + ap.declareXPathNameSpace(NS_BULK, NS_BULK_URL); + + List works = new ArrayList<>(); + try { + ap.selectXPath("//work:work"); + while (ap.evalXPath() != -1) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + long l = vn.getElementFragment(); + String xmlHeader = ""; + bos.write(xmlHeader.getBytes(StandardCharsets.UTF_8)); + bos.write(vn.getXML().getBytes(), (int) l, (int) (l >> 32)); + works.add(bos.toString()); + bos.close(); + } + } catch (Exception e) { + throw new VtdException(e); + } + + List vgModifiers = Arrays.asList(new VTDGen()); + List xmModifiers = Arrays.asList(new XMLModifier()); + List buffer = Arrays.asList(new ByteArrayOutputStream()); + List updatedWorks = works.stream().map(work -> { + vgModifiers.get(0).setDoc(work.getBytes()); + try { + vgModifiers.get(0).parse(false); + final VTDNav vnModifier = vgModifiers.get(0).getNav(); + xmModifiers.get(0).bind(vnModifier); + vnModifier.toElement(VTDNav.ROOT); + int attr = vnModifier.getAttrVal("put-code"); + if (attr > -1) { + xmModifiers + .get(0) + .insertAttribute( + " path=\"/" + orcidId + "/work/" + vnModifier.toNormalizedString(attr) + "\"" + + " " + namespaceList); + } + buffer.set(0, new ByteArrayOutputStream()); + xmModifiers.get(0).output(buffer.get(0)); + buffer.get(0).close(); + return buffer.get(0).toString(); + } catch (NavException | ModifyException | IOException | TranscodeException | ParseException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + }).collect(Collectors.toList()); + + return updatedWorks; + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index d4079058e..70bbd066a 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -161,13 +161,11 @@ public class OrcidClientTest { @Test @Disabled - void testReadBase64CompressedRecord() throws Exception { + void testReadBase64CompressedWork() throws Exception { final String base64CompressedRecord = IOUtils - .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); + .toString(getClass().getResourceAsStream("0000-0001-7281-6306.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); logToFile(testPath, "\n\ndownloaded \n\n" + recordFromSeqFile); - final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD); - assertEquals(recordFromSeqFile, downloadedRecord); } @Test @@ -337,7 +335,7 @@ public class OrcidClientTest { @Ignore void testUpdatedRecord() throws Exception { final String base64CompressedRecord = IOUtils - .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); + .toString(getClass().getResourceAsStream("0000-0001-7281-6306.compressed.base64")); final String record = ArgumentApplicationParser.decompressValue(base64CompressedRecord); logToFile(testPath, "\n\nrecord updated \n\n" + record); } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index 78760fa96..cb6713009 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -108,4 +108,12 @@ public class XMLRecordParserTest { work.setBase64CompressData(ArgumentApplicationParser.compressArgument(xml)); OrcidClientTest.logToFile(testPath, JsonWriter.create(work)); } + + @Test + void testWorksSplit() throws Exception { + String xml = IOUtils + .toString( + this.getClass().getResourceAsStream("multiple_downloaded_works.xml")); + XMLRecordParser.splitWorks("0000-0001-7291-3210", xml.getBytes()); + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-7281-6306.compressed.base64 b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-7281-6306.compressed.base64 new file mode 100644 index 000000000..07b2d46a2 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0001-7281-6306.compressed.base64 @@ -0,0 +1 @@ +H4sIAAAAAAAAAN1Y23LbNhB971dg+NAnkiKpSJZUS2luTerESSd22pm+QSQkISEBFiAlqxn9exe8CaRExkmc4Uw9Y9rEnrO72MUCC14+votCtCVCUs7mhms7BiLM5wFl67nx4fY3a2IgmWAW4JAzMjf2RBqPF5c7Lj7N1APFONnMjYEDPxb8utaFN3Gt8dAZD5R8MHWHk+nF1DUQAlNMzihLiGA4nBubJIlng8Fut7O58GkAz/WAyUGJMH5CqGSRIPVxkjnZRqsgOi+gMqHM72ZqoBqXxIKAShJ0UCuMzuTJhgiL4Yi0M48YnRmRaAnZ2NC4nXnE1CIkBBcd0VFio8D6PIq6ApLLde0wSS464pDLdUYMLnLIohWQBNNQtnObSF3LJ7LfdRouAOXMSAQaOqKgxLWo3eVrzaIBYQldUdIVw1OwrmuVsrxu2vgFoBYlQVZEQMmRrgAdQToXB4EgsoNXAHQOZVsKi9WKuaTdRdFE6lpUZbczlbTMxwZKi4t9O7gA1HISxSHfRxDirkSWGJ35T4pDSMuXdooaTOdLIrbU7yjaAmDU1viXVnYtZ7DLQFxpV7qPmHoFSoKFrzaNVHQs8TquXpEwJsiWkl2XyxVI5y7TsCPjSnrOV1AkeSq6InoCNVCcJhYcQUA6Hh5bKumShjSBpRSny5D6xiIzqH4u8/1q5guidmIrgOfCczzXcoaWN711vdnQm7mPbGfs/X05OIc+0RVimVgRHIRQ5UeNnuWMLce9dUDdaOY59tgdHjWe4ZzozSd5HD+VWX5IYV3DJlNH6chU0IWKqISQHsOZE6uz2LNG04lnTaaTYeWiIrZqVWf5ooudAVrpGy6TReVNRcqG6/Md3GvCjbCoo3Jx4/M4lchCL0KpFqlo6spQZ9VgCdWrKt7igq6p+uN/fYzPNDrfENxz7IcO7n3m2xqbLIxXXG5SjJ7idL1pV1uPeCMfmiDrGROahC35yUXPOHR/UcwFFnskU9hutziEnjSIOfSFcoaeMFQ0iMoJkEG5rVJJ1KigTFIfxaCDMoLWIeURRoKs4ZBR6pI02FcONly5HJxzMPf6I8xFnfu58C1JBbfeQZsc8vW+4NUhDb5Pk8zbxsRrMivZx2SxpMuE3BU666IuLsQoJYtfMSTGD8nnLGOe416YmTtojj7/8LgezCIEylo9RAdzD3u8Glc+HcwtD9Mo88qdHkyWqnZWvcFLjNdEZhLvYmq53sQ5mDhNNlzkk4BLyN5EtzaCKwl6gxkx0ZP85SlMnoTSRB+Kd56uViQx0Yv8/SUPgwgzE90UZHBpr95e2MXIb1yQDPHWfp2P/IH9T0SY6L19VSgVnFHpq7HC7DWEB6Ztoiu7MHSzoRsTPbOtQu2zDUDwOo1iHGITXeejr6COcBhWc3nJkwSLgCvrL/Oh5xseYkGB86rg8NUqc/BNqRln4XhaRgCyrhzJ2RzeMvT7asJ+Ji7YVxBLqch/ltNPQxzQysO/sICe00Svy4ldc/aRKPHh0Fyg+fpr1tLpsi82AbWcy4Ip1mxZfrWVXu2d2Ymfm6ofqzpKLbKFWmFViWcjp1tTu7pSldbpy/PGNET7pq2B8hoOOK28OBHeS00eadexXWc6HDCScuYPGL9znYuzmhuZ6VLNuIigMf6XBCgRGCo+68ATkRLjKwwetdzPqiBhlgl1n11IEq7Oaq2hzp93rRn5vpQRGjxIyjxLerZjTUbO0L2YjkfjRz8yX/e09n9LFpWSPUyBjbzhaDIeI/jHm4zcH1tcYMxS1h4+RzFsrxZ/2DSdk8rTPRRunwvt1iezzt0G4YCyHRx1xTcjG3CPocjmp0v2ZxzFv6gZMCJ+fz6/fju5fffk/Y3Wb4cnnRZX3coyTbhobtxN+Zlo5hBBAprkbe2x4SiPNE3YCFm3/m8yXzY4vRjXGqp+7B8buF7saw1jP8nXG9RePKg1xL14oDfg/SxCveHvxYPaBaMXD7QLTS/2Ty5QvXihXdh62o70C2IvLugX0n5ycLwA97QSywt3TydyccHvJ/vaB4W+DsTyA0Yv9rUPJj0dx9UHml7s6x+E+jkKyw9Q32P9VFZcFAqBeiz+A4MY5OQYIQAA \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 deleted file mode 100644 index 34de6ba16..000000000 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/0000-0003-3028-6161.compressed.base64 +++ /dev/null @@ -1 +0,0 @@ -H4sIAAAAAAAAAO19S4/jRpbuvn9FoIC+0w0U3w+RuuU00plluxouV05llu9gNokQGSnRphgaksqyauX5FzPADDCz7IUXc3txgd4M0LmdX+Ffck8ESYnKFClKKbL0CMNVlSmdiBOMYMT5zonzePXlz+MQ3ZM4CWj0xQtNVl8gEnnUD6LhFy8+3HwtOS9QkuLIxyGNyBcvZiR58eXZ717FxKOx38/+QROcjr54oajwnwR/DMk0TFcy7N4/vEDAIEr6QZSSOMLhFy9GaTrpK8rHjx9lGnuBD38PlShRCoqiBfGnHk75sKqazEmKNn6QpEHk1bcqEc3bkQk8CU6JX9NsTlO0oumIxFKEx6S61YKmaDUm4wHM9yiYVLda0MxnI45pXDMT7OuC1qPjcd0EZN8X1NkSVlNn3xfUExgWhVWSfJLiIEyq2z2mLHr4icw+1jLMCeZPPobWNU/Ovp7T/py9Q1LgkygN7gJSN2dPiYt+7qZRtgOq2uYE81mJyR2JYeOQuglZEBXtsO/HJKlpkxMsdtF9AC+gNKFJUP+SP6YseoCZ/am6Ffu2oBzBNqHxrJo4J1is0ySkszFMZd1iFTRFq3+a4hCmft1OXyIr2iYkvg+8ms2XEyy/u+ve2PnawAkBcxjULemCZrGbEoJjj236aVzz2i7TLUYIv8fkPiAf64Y5JyraDaZhzaqybx+PDzpI6DSum70npC/Ofofgv1fZ+dHnxKXNk31bppjGwRnrPYHuF12vkBKvlFKLJ90w6XJW14oTPGk2okl6Nmc7J+YfZw+i1D7Jq9KGLe/wEqcyRUg9HJIzEr1SVnycM6zt8lW+o4qtV2JUfOPFhG8BaUzSEfXPLgM4ntNXStX3T3tIpoNxkDBxL/kgzM50VbMkTZNU+0Zz+6re10zZ7un/uOjzcYsnEx3iJJXGgBlg7vyiV11lveo661WDXh1Z1+1/nE/5ijYrnjbEwZj4Z2k8JaVnzD99Sg84JuuPC4VHzR59WdN6EgdjHM/qe1kmypd35QK+ys6V/PipwUrFAVR+wbKmDD+g+yAJBkEYpHAmT6aDMPBe5J2twl2LTspLxd+PxSJptqSakmbfqCpbJN2Ue5q6WKQl6pX9rVz6Nb3WLX3poRfIoT8M7knEQVRydhHiqR/QV0/wxRLVmg7v8DgIZ5z27G0Q4mhVf2WixYoopSUpfbzAeSVYmNQtdonshVLqaVkyPBIotT0+Il3qlb+mGZaq7SODU+WWOQQpsEr9QxV4pdxBjucK4FfbvqBZHvtToLYK6dU/1wr6gsl8TbN/8g8Xsr0EBaRkOmbbvnYOFqBg47PSgYOyb2iyrVsNz8rSMEuKTe1clOmWl3rR11y1qp/VOVVlP3PEt+a1m5NVnFw1E6eZNxqcNHbfNGXL6G1w0pTX+O4OztZMeA5jOp0skzYcitpjQ1HdvuXKtqM3H0q5/9K7migryBaTVZrexZs5TSVQ4AHYaYam9XTTeoFgxSchnklB5JOf4YsXzdZCWfSwQvg8HVn5IR5JGjY9qmRoNzpgAaOv9WQQE00kTVeTX+aRwd3VNE/pJA4fq6nLLZ6JiOu63hAl13W1FjmvbKxsNCuP5pCL01y2o0IkryBZvW5Kg4UrGPpkguOUbxne4zX1AhwiHPnoYhqm0xh+OY/SUUwnNKTDGbqcN1gwetxJLceYhkRKgxTUgHP/xymcvOgqpncgJ2k877FEVP9upsC45h0u084ADjA85s658E/WtoM/7F1aNMs+WNvOx7MzVVtMEp6tW681D7NQNYc4Cj7x03n9MPiKnE9TGtExnSboQxRwC2c6Q/QOfQUQiYSA9OajqF7Bcq85tGm2gzxgdvaUE/+4WQd0GqWgQby+XjTOP1q/A9cOdb5gQYLHg2A45Ydvs1lu1r6sTQ9jOEh6qqPKd4t3o1HLTQfBmudnwDfv31yu4lYiWT+Rm85PyaJQR/pKqRXhj0CK0hyl7B7QbI2tyv3vCtA4lgly3d0ez8w7eC6c0VxJdSTdudEspuIatuzou4Yzz5r6Mg8BZ44YzpRwwxxUcDDzfm4NQBLK7ITwzZvLNzcf3qML2A0xQXfwSUH3EohAvR8HET8zeB9voojeZ78GEbqB4QTJGHWCWewWMUsX+ONdkpKQYHTtjSgNGfAopu9/4fHkf6NvaTIJUsyOoBMFIp9TWC7RlYwPq40Y+W1frQWjoKkyhDy+kKvt7AlxVa+Li+La/kpkVT2VbpVquyrTVfW1dFtX29syZVV/T26iGplBF9RV/eb3g7W9FTRVfbAr09oOOMHmRq2trIGPB/cM3MehjQXgw5VVtyVDFpsaPoGrsJ5h6z0YTS20Y22VBeWqG5InUHFjsxVMRu9G1/qq0Vdd2XJ6u8Z5z5nqMg+B844R5/FNUgOyyqPKyF6HsGcmJEQ+QSFGxGOGkIdfMWA4AHxjHCISoZDCqRXTGYOFCcJwUgBd+PCXYeDB7/40xlFKEHRy8e6HN5dSyYZUB/iUdaNddTSsf64SdbNXodRASmcTcgbQNpo/wZNvN+7zHodTcqZqqin1NMdY2XNGs3HXEV+i4BPxUQqLkATco4bdQr9Yw3DRcnOu0zic7/AJjQGYymzK+D4v5Kjy5vr6e6V+DKyfjZnHJMwAAACUM2ZYlejdyt6XCNdv6bWvzSrKKrSavdnsbfkRZoPRwkADLyTFS1/5Is2POi6SMjy6kTKkqw2UocWB2YhN9jzFo2Q79lsaB59AbQJEdA4InxnAiwMhf8hl+hVY/YlIr8HnawxYewReGp9lGx1l2dnj02CLg6n6UNJUWetZtqbEmMj3mitr2jbH06ZHU1O2TQ6pdQcUTBk/mJryrD2U1h5ICQm3PY2anUQNT6E1cFmzerZjNYHLBWVbcFl1b3S1rzt905AtS8BlAZf3Gi5/BxA5jegwxncMJMN3FI1IHOMxO98wQ9EBt4N6wcNfIuRTDz4mMrqKgzGJGYzGCYPMBE0jhrHvSZIGw5w6oYOYoJRb/iiaIeoF9LCR9HbyqqrLncqsKiZdya0q/q3JriqGu5JfvP/GaLpiNPuksYHocyRTd/TOXq9aht1obPVjOF6NbS5bs4lim+9nucn+q5ySz6kEbuvFs7hIq7kRK7djbju6tYEXz/YKZ6Fl0iGTvK9/nhAmU0HohkLfrF6g/dM3HU3TFLfnuLpjmrpq6668lU1sG+HdmPeuNc/GjE9C/bQNw3XNRrc1OWVrtzUOCzwx1L6pyj1TqJ9C/dxr9fO3X/7lAicEJenUnyGjv3BQfYmuJziIfvvlX4XG2KbUqeLUqeSpGkS70qeK63EokIPdK5ClGTc7e9nWM93VG8aCwOEYCX2A8/xVY5OorOd/wkpk7UZsokkOKP1J8kZ4AmPZXzVy8UwbqJFqN2okiNB39yROMwdLkJdfog8AIGOeuCaIhtyX9S2O8JD98iEe4IUz6zcx/ZiO0IDMKBBdEZC9k8xt7QcaTscE6X3E5fM1yGdQE09bMd3Ci8vQVdNophdklG16cRl9zYT5kB3HEXqB0Av2WS84R/DS0Oz2KY2pP/X4jVIfBWM8JBGaIcLtZSTyAsw8ukhY3DO1py6s2P9l0pKUZEiCX4zRKAwiwl+1q+yE92miZF8RP0ixcnX9mv2rG/LEvzt00bkDcXZJEEwRonEQwuHRbMEPSCjpGncP0PqWJeuq+3mEkmZoDY1VGWWbQkmHmejrIJR2bqx61lSXeQihJIQSF0p3sO14NhfEEDcqwW44r9CfpkmK0hFBXxGA2hGg7S8P23TVlg1B6zm24bqO3qUNYQ3T1m0Ia/ifjA2hmCQPDzIzAoMxyYhO+A8VM3W6cKh0xvTR6589noSKR6t6zFeZZRbi+j3ByTQmCY9UjfE9yXJ65Hr+QSGkk79PVh1V0Uxbs23HkdmbJmtWT1PdXleXypsNYNub5W7veXe35CTwd7rkupTosio5lmrrhmFomtXFOjfkehiLu5tLfNXWDKdRyGVB2UAvUrfyIc9yeal9y5Rd91gu8b2QvYCN4ehq3SjrZL44umQBUJAc1zGeqSKt6qljFanBBK1Sk649OpkmSEKvw4SFpsfNNaVylxiQRcwRCo0DUGCEHps1Vracn8p5rdNtVxN3quV+ReBXdlXk8TQpfVSouHCseWSMfyrulJIJ/M7CaP2pV2RJST9SFJFgOBrQeESpzwHp0wwcB6kOt+XJsVO8V8Wue8xXNZJj8K7YDgNWdblLHFjFoyMsWMX+GBa9FXOYZvZUzTBVVGy+dhZd4jVSni59bZdL3bL2HCqfOWopzWXp8w37GpMkwUOQhDiKaIrmY0WLFIiIPzdiE9+vmail7uonqf69zmbpNOxwuRku4eBRhi+VIPqIYz8veSMD3ZdwzH3x9HDgKbzYU0YkfnP5xdvvnZt35++vS9DuUANIdmCvm3vWEBZTORkJp5kt7idNVe81u5/MKFt2prdYFQfT2HmKS+E0sw963fHcT14EUx/7JEGzecz1lLn70T76239/iDBi4ieLzGa7IgEIAEfGICQt3lM29JkpboVoljiSy6MkSAmrPHeHp2Gq3AUh/Mb3jmopb3gyJ3KbP+fvdTV70o2daGLCwkePR/5UZ95kKvtS3k0hlVbMZp3XTE9vlGGkoGzXlVN1+oYqpJKQSnsvlUJ8z7OMxhhNSJTguOS7l3vyvWW+kKB8PfxnRPbGgROHA5xMs5crP5A9OJAVEEOOoxA4M7NnkEgkhVgaEhbMDQephyUfvh3nzySVH+l0HRleL615iFE2Xw+/sglDMGFovPwSCPG0Yk5r8sW6PVdtli82o2zXqVM1+pYmO6ouxJMQT/ssnrbxN98P+dR6gAE/j32JyaPjkUNX59fvrl+i9wTOvyy/2c08P9kVTuMAuofXs6iGdVBySNdY2nTDZIejbmufSU3StaZqEqdsObhAY5PR23kixmdNdZmHkENCDjGyN3mcGzvS+mjJ7fc8Qq/vaXjPHC+uRiSiY/gTtSeGDvYyVcQWiNgCEVvQ3FYsYgs+C0Cyes0Ku+SULduR1b7Vk3WhqAuAtN8A6TuMvCnLLs3uL0E3e/g1YabDPtPMs8OBKXHzL5g+F+JMZ095la08gzXh1V/G9D7gKa7h54QXQWaOGgJRPUVUkmNKruM4kqubkt0hqnJMxha4rmbaOqpaw/94UdWx456bx2fCMKQDHMosAVIyIewswQny4uxuImHWv0NMNXFQgZQtRtWZPcOFfzuOqqvjehhRdbtb3LaiZHXNsh1Ds4uABde1VbOLdd58AIex5DsMpKwvS14OpNS6CaTsudqRqDgikPJxYxFIub7rvVFJTyWQspweqGQwm4zoIAC19Rz9icFIMkM3I8B4wxH6Gn5Hl8TjPrwlz81Lck9COmG1IV6iK3jMqAjB/I56OEQXNPLgtD5w/bXN8MqdAYUqdt2DhaqRHEOkXcvhlc9QCKp4dKQUVLE/hkVvxX6V7TndRMXmE+GVq8MrayZKhFd2E16ZHw4ivHLpeVaHVy6hoBI+Ega66qXaTxuO6tq2azl6jopsS1WtTm04zQdwGDac3S15ezbZnuFopt3rJKNdQ66Hsbg7NNA1KktQUDYw0OnPM9CxHM0i05kw0FVMqTDQCQNd2wY6nwx5PZw+wrlPCHMTIWPuCTeZ3w0TYWdrHcxVsese0FWN5BhMLu3a2Z4D8qp4dAT0qtgfw6K3k8asZ/Z6tmmjYvMJO1tFGrPqiRJ2tk7sbMXhIOxsS8+zbDL7U/Ybu4W8niYpDiI8CMmOff4XzCs0VVtzdceyG9QqWlDu3pVEcyXVZQF/aq9v6X3DYGUvhaZaqalqzF3YlDRXe66muqqnw9BUL2KaJDG5e9ydUHKEkrOW3f4oOVX1tzcbwQEV4X4GEtl8jg4XYZTbjXlFZG0BZMcblEhetGqzRHInaKbTFA9G39RkTbP2+85wS1WzNhxpEcG7Wg3a8e3RbqK7t45Bek5k97HcVtm603MaprbKKNuMmNX6ltpXLdntme2kFNlua5d5iIhZETG7w/j+Q9YD2k8zspUUqur/1POMHArOb5pkRNkI6x9lvrWL86/eHBTCFl55TxVXq8fSkXxGr7yaARyG49bulrzVSGmr1ysdDJ145dVyPYzF3Z1XXs9t6pXHKLsIm9VcV9x1CK+81VMqvPKEV96utdZyvRL2xTSC8w2Bfhqw19MjvMzoiKDzKB3FdEI9EpE+wh4rVzqF4zwaopSMJzRmBt+FBnzYemyH91nbY70qdt3jvaqRHIP/VuvBsVtjwCoeHeHAKvbHsOjCaU847TV4u4/Waa84HITT3tLzHO01914k6jVNtZE6XlC2WYZU68P/hitbR+N6KNQ73vj4rh2vMauc4j3J0zuN0CSmcCKNcZ5xE8EL7DFI0KK34QYFSEGr9GgEKmPCT9+sCGn+MHAY56mEPSxNI6l4Eil7Emn+JJKmqr2es5FEosC5i/yqTgM5VG435p5c6iKOZtzctctY4LQ2XbtuRoTlv5ovmpByKyetRspZltmw2HZG2bKUc/qqKttl9xch5YSU2z8pV/arDxI0ANEx5glp0Bj/COpukCSgPt7BTx4/JhD2WEwAN10ymbEfIo+7ehAY5XTM3zZW+87HCpMVitpTSvZTCURdTD/CIxYiLxeBJy/pdLUTSfd/mEcOeg0IBV40D33N1kxIu5UTV6fT6VajxCcFZZvSjt2v9jVH7lmiiLeQdoci7VjgWC4JCjXusSTYNyWuiRhjepuquycvzUrMutTb0GmLssZbYaPLptZ8BHVLMy3lx6wEMbuydWTNllVTW8lo9y6Cm/HfRYROKbJvM+YnEaKjWY5hNqr4UFC2hatU90ZX+4bbNw3ZOpqKDwJX8cZHjateojErQpfiuIjLnxVlp+5YxQAZfWDV6wbwbiEWvxNzVBP4Ldaj78KzoSXPpp1KqCpunUupqoG0L6mqOB+Hc03SgnON7VpST9O7e+FqGe7q7ZrQOMWhzKaMv2UxyU5E5c319fdK/RiOt2LiIz2U7cKf5Y024n46rrSpeO5Aibw6v353LaP3BNBkillp15tMYILovMJpHED3IOwvpiG7xA2Film9cvuX6IHVndVsR3ckgJmdZITnNV85S9PdbQb4hvVma3mfhCJpG6pj280M9Bllm7ke9L6h9zVWHV0okkKR3GtF8iKY+phld0M/YI/7K5FEZhbWO9gRrMj5NB7AQHkxY5JMGEVSKpZ+4LXj2qx9/jwZVMWifTlUxbl1WVTF+CAzPUxoKIUkkmGClDAMBqDvKB7faz65n+805cv7L1TT09w7rJFeT98I1neb9qEJpN8FNKfh3/4fTBx67QfZHJ02+t7GV0HXjYZQKKNsEwoZfd1kaa9UWxdQSEChfYZCb5ht3J96mQc6RiFG3hwd3ZfQ0flkGqUkQQmFY72wtaPZEzs8nGJfYRhhSCMscJLASV3ipAOxeHYElRhGkrwRnqR743ezA6x0seJ06rM7vsa62ykjq0Oza7pwXNimqVpb1SLe4oxcw7BVm+Ya3idi0zRVp5FzTEHZrtOxqffVHsu6IYC8APL7DOTfkwTErjfiUTQ378+/RxfwdZCmhCAJLbkkB2MQiynPGTShCeyekMAPsHF45qAJHAUECei+e3FU1X37IqmKc+tiqYrxQZo32QyRaUxBzQjzH+Af5T0ZXuIUKySd+gDZr28+XGYxYbbuao6pvLl6990tfPoH9uEfsw9vX38vT/y7jfB8TJg/yfEg+ddsBgmO0BXMZ4BZUZ2X6Co7hy4J0+XYR/w8u4bN4HG3BH5qXdARSZgfPKcOhLl0c5TV07RmVQJyynYDmS2T1bTWVBHaJVDWXqOsLF3HJ2Z1QPMUF4iwqmABO60C2BQswitlNlH4nGViTFISfcII53ZRYRitNow6kms6titZjtRZUkVmmHQ4W8v5TKUC1vA/XsPovrtr7gDlnEc4+om8RNcTwu5HiIAqKyetrqCRpjYtaMQp2y1opNt91ZR7Rk9AFQFV9hqqMG0RzauZJgBA/Di4J3GWZyV38+b6FOzPOEhpzO5vs6Se6awP2OUuZOVNQTQgmqWRHpN0RH0a0iEchtCW1z9NWA5FzKBOZoAKeOqyIfx+8I5yLYS66IZjS47VU7vCN/UMuwl1qR/D8eKbR0akHAMGP9JknOapXEEeyZPRRMk+VHL8o9wH5KNiGHbpKD3UmJcdgKg3EZ9q1hKOnXLqVp5HEESjh97iCA95aeblimwCbq2Y3roUd46rN0xxxynbtQxpet9wZMs0BNwScGuf4dYNgXc9AhWPe57cB/hHkqAZSlbmd20PF22Yzg5WiYQEZ9lbg5Qkik/uMGBD5S4I4Te+XVRLWTzd73WVRPDXD/wJ4YcZ/Cmy2P7+wvj9Vwb7+mYas1/OL/nz3r7L+Zz6nUc+DyC3RpRyCVau4PItTSYBwCg4S09bbjV+3zdSA9ouCWe5PU3VXGcbZL8xqm/G9ZRKwtmGDnKsYQwAp2w3BsDgJeFsa9elrwV0EdBlt9BlxC6qguieqVI0YlLJi/EA3QWwsVmWXmY7Cimz+IxIDAJqSBZ2pRSOJm4kYnaf70FYjwi8hugrdksWHrb9p6VsO6ahm+zWB7az0QPk4XSZZqch852VkOrSMlMxoC1Xv93SYc/BClU8OsILVewPzqdr88pKxdztorLSkYZnMITDhMa3haTg/qg7NcYteFdiQV70t1E8aEHZfnlgQ+7px1KpQZQHftxY2XiCVkFDUR5YlAcWyH7NswlkL5C9QPYC2T8d9P4ge4Hol0Yhrikq/vsM1xS22jMNV+v4mqKW62FcU+xucVuoMqE5hqUrJA6xl8iaamhbOdBvgTsaMj6MJd7NTRS3KTiNrQ9OF9YHU1atY/FZFtaHx42F9WF918L60LX1oajVAWdNiFhEXEqQT+5JSCdzP02fTAhOcJQGeW2rIELfMcCJzscAONv0lTpgS8NupH0Vm+4kftUIjqFSRrtmhueg+CoeHSH5KvbHsOitxMZqTs81e5aKAGSZMP8tLbpE4hhU+ydLX9vlUresPUfBZ46qluyPi8837GtMkgQPQcjhKKIpmo81D1MCeBkj/tyITXy/ZqKWuqufpPr3OpulAzKedmpHKw6HXdjRjjacZZ75hBVjIR/ZtcwS5ImyLCc4DgYDRnadAsITWU42VcNtvWc7DUOHM8p2Q4dhPjRDNhz1SNRwodbxxkfoEDrP7nzDhR37RUaXrFZ0ApBgRiKfIhKiYIxBs4RjirJ8rCFGPqsOjW547tUQeTjhX9w8/DUinw5bj2un7J1jsbPB0POH70iDK7juHMBvUPuudgynFhBMEibgaQKLlO08BrVkHCtXl18rP+g95XvVVO71XmRi1dw4hup4YVQ+bayAHYl5LoLgPou/y08wgZhWTmNNCI1t9KxmITQZZbvRv6yomCY7lsgLJxDTISAmXnu6j7wpzhFSUScDTvYYs4MJQBIeAn7iBThDOLt45jgmnmmSl9AgIm3KCsOnaeuSbVj/0BVKqmfYUdqU2jGcDEpiEwWiimBe2gEPvIil0fMnLMMQN0oB5OgpAJF0SykXgZeyfSgRkPzZxxLfh/C3FGIp24eSTyTYh1JpH0rFPlQEzmKny1s29+gDn/zTRlSND9rNfFxaLQvsOKYO8t2Q3C4cgniZH87SMFYzbLkscC3vEymhobqW2bAsMKdsuSywy0to6CIOXoD4vQbxr8PCbpAbMMdTBuNjlidRRn/77/fTIMKIsmJTCQ0DP0/rw02fCWLWUTp++BV+9PB4QhIghk8RbKCHXwFq4OTLQ8f17dXDe56QqmLRvqCq4ty6sKpifKihGhOc0IRGYRARPkesHB4NiZfXweNfS6z4L1Y005Ki6ZjEFETAMrhnv/FNK/FNC3/DJpSoVNqxDP0C4scZyic+9kkiLTYsk3cB360bYf9uyxL3usH9V+fX765f8ktqls2e3eTML4aucBoH0D0c3xdFKtnT1g22KlpsmQ2rcGSULScs6vUNQ3Z0UbRYALW9BmpLRYvl0qlUHOkZZnuJuA0WNmmMH/6LFvfRIfKpl9IY/Yn8NAtDaPY2ltG3M58IfCbwWZf47EDsr8cB0TrOt9MRRutAZz1lUNemwbe9CF/DVF3bUbeqLb99XEAt18MI/9zd4rYR4Qv/KXiMI1nTXavXxeI25HoYi7sb6zqL2G3mVFxQdhHba5jHYl0Xsb2PGysbT9AqHU7E9orY3h2q4O/SEZyhJEnQeZSOYjrhRaEAvKEbyiKE0JsBQ//zgJZlqlLi+8PWudsK732+qK/i0ZG4r2J/DDGe7Qb2Pge8V/HoCMBXsT+GRW8nsNcyHc00DMSgRU93W1r0IwjsrZ4oEdjbSWBvcTgcZmBvR9a4lXAnSU/bhrZVGIplNg1D4ZQth6E4fVOT3aPJnyV0Nt74+C5G392DXpjnb56hsi+8jEoF6oYhHTD/dqDxaJTCZqVJlga6WezJEmMGKkK+F3gLBDMxnIIszncziV4sDatUm3Myykb2DR8OwBMYYcIJspTU+dhKQ3rEa5faYsPCebngxGSS3RWyisfBPU0Un3rTMQ8fUPKierdBdAfggtzSxePfzm6XVmVv6+F1JDBFPbz2RKitag1FKKdsV4QaBs994R5LAQwhQnnj4xOh32EURHg65PVM6f/8c35Xjn775d8ucIK5/9A1DX/75d/Zhfs3jAqDpGL37z+S8STkGS98knARFnwqOmGn/v/8834VfyWhH7DkHTLz5MBwzk6jGVboJIiYkqqUpiGSPHh2KaGhNORPLC0/YXSrGfotvHmmq5nyKB2HG0m1iHxMJnhC4g41QruBgCu3gz+wlbRFworsg7XtfDw70xYOROzXtoQpCS+L9TwocanrN5rTh/8NTbbLJZI6FZeg3jUUl5yyXXFpmqzsuWPu2hX3WVNd5iHEpRCXjOxDhNGQRCSrcc5kX6HfPPzfQeC1GfNUJe0WooOyS8pjkyVmJ7IEYJAfxMRL8YmHdTR+Hze6hGrNSUw3dVNTYg8HMnsxZQ20Ll3WDasjd7HN+O8iFhwmkUuIjZmfRjC47Viu3gjY5JQtAhvN6Jtq39ABeogYIwFs9hrY5FGNzEY+wTH26Y/wM3zy8CtX4g88R1NLfks7lT5V3DqXQFUDaV8KVXE+DoeaNjKqaoYh2Zbb3QtXy7CjZKq1YzisMLWKAe3TOwab2pDULlPR1TLsKBVd7RgO6x17nt+U/7O80WHfODYx5sUA9tZcoS6M2BuYK3S1E3NFkaPi4s3lu6+Q/3fndyRO0JuIry+7oMDhYdnE9+EKWXdMtZkXVk7ZbvkEw2JWa9cUyYCF6rjXquM5S0/BfT8ffsUv5zmAZ+xaGMcxDUMerO1NCaiSD3+J+uVMwdDTNApSXlSB3TYP4LU7bF2zFRCm6rBzLau72Jh6ht2AsPoxnAwI4xOVQQUm2hMZzzdcgGXAD8o/TbEPnSZK7k+gMGSlaKaxESrr3k29CTLbAVr6+3x6uGvL372J4BhKpym6YD4gD//B4FNpQk/8/mertF623TStF6dsN60X8yUApcRxBW4SuGmfcdNFnoKGV3SBnzzsUxCACI537oI3iemYFvlrFogJSILMsz3Mc+BckjDNPPWuMOzjh18HGP3hqxgnQfjHQ8dSref4AoTRFaYq5djS2jWgrs3xVcH/ZDBVRY6vqwxj+DTP70Wz/F5X16/Zv7eafXtJ7gLQV4J7ekt4/tONYxo6TszVEcYqsnK9L51OM/QNK0ABctHL83C9Z3m6BMBaOZ81AKunmk4zgJVRtuys6fZVV9Z6mgBYAmDtM8B6iycZLCoOpyzy7hRcNK0Gp365XX7nsbC0bHDnUSo73uadxzmA3CBGb/EQfwJ5jc4vX393/v3Nu2tk2Orf/iqkyso5rJEqhqM2DAHIKNuUKmpfA7XdlXVXqO1Cquy1VPkO8yKGWShYkVyWooc/p1Hg0azoYQLK+8N/etxzruxFx6XR51HXNwyWg2WigyLvccn6DL8lJA7IrU9uSx8r0AcdxvguwMnSzyS6zfTglN7eU48ktzjPoHw7u4X9nf+yzxrUlrLUbiJLdyAXX0cohZeNT3Ef8Tl+iYp5BR2sPMlCRK6YzhqPAE3VmlUWyylbtGxrbh/+BxFp6o4QkUJE7rOIvCBxSj7lqdW94peSEbufi1BWu2IGItObMkMRbU/6HezFP3dENGHP5w/fjaV6zvVzu2BWjuFkLNVPAVkDNBbRFABWsOQPqETarabZt8V+vCW3i80JHYRFqp7b+d4EiDbfm8q27f5QZEj6I0DJUHq977nztoR7Vkdw73u2tGh5aTM/UAHvVk5frQWkYaxgTtmuBUTX+6Yl26qwgAh4t9fw7vLhzx72MQL5jaYRRsSfFhd8LHaQuSSUXD8Tlh8uCgYh6WdAg2WJy6UXGgAVrz2fuzKwjHJxtOwWWhTvSUCisB88GiEmf0r3ing8YPCF1QB6HS4Kn0Evk5D8SIsRssOSZTZ6i4PJw19/++XfXzK21zhKAzzkjC5GQXjw/qft+EzYak/VbUs39a5Q6HqmrftLrOF/Mig0n6MwGMQ0kWHTj+XxzwrLWBlE0yArWRZPJzyEWJpJZEziYZZ9U1qaRYn3sHnKrD00+e0Ay12U5+8lKiaQqcSLCZTRd1x5TgJA0SEnRdN4gHnpsgidjx/+zJIwAxX0hU8bBDY+iTc6iNutX2Y6LNG4oWnaNufqxpp9M66nVuKq1+gmtKBsv8SVIff0Y0m/LUpcPW6sbDxBq9QCUeJKlLjaoVb3dUzHmYMmSibYIyilKEvjnWYf9NE56GjpiF/uTkYzlr7aJ/ckpBOWCRwFEUpHpHSp/T64J3F+3c0uuD8F4R9fXhLQ5CZMGcuqtiI8/31+UU77iCXaI2l2j/zwa3ZdvhxPWLpJPzq/9zZLLj0Pb1Tx6AhzVLE/jgwxLSjuLIaShVAizYbJ193Vtymi5FK/ZqJEyaXtLQaNSy4tDofDLLm0QI+fLZYVPwpl7cAWsPQd6yJ/O5Y+X/y4YPAqfw/62T9nv/v/DSZhtnENAgA= \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/multiple_downloaded_works.xml b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/multiple_downloaded_works.xml new file mode 100644 index 000000000..b155b16e3 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/orcid/xml/multiple_downloaded_works.xml @@ -0,0 +1,57 @@ + + + + 2015-05-23T18:56:52.486Z + 2017-02-28T08:22:12.454Z + + + https://orcid.org/0000-0001-7291-3210 + 0000-0001-7291-3210 + orcid.org + + Paolo Manghi + + + The Query Language TQL + + 5th International Workshop on Web and Data Bases (WebDB02) in conjunction with ACM SIGMOD 2002 + + bibtex + @inproceedings{Conforti2002, Author= {Giovanni Conforti and Giorgio Ghelli and Antonio Albano and Dario Colazzo and Paolo Manghi and Carlo Sartiani}, Bibsource= {DBLP, http://dblp.uni-trier.de}, Booktitle= {5th International Workshop on Web and Data Bases (WebDB02) in conjunction with ACM SIGMOD 2002}, Ee= {http://www.db.ucsd.edu/webdb2002/papers/43.pdf}, Pages= {13-18}, Title= {The Query Language TQL}, Year= {2002}} + + + + conference-paper + + 2002 + + + + + 2015-05-23T18:58:18.492Z + 2017-02-28T08:22:12.455Z + + + https://orcid.org/0000-0001-7291-3210 + 0000-0001-7291-3210 + orcid.org + + Paolo Manghi + + + The Query Language TQL - Demo Presentation + + X Convegno nazionale su Sistemi Evoluti per Basi di Dati (SEBD) + + bibtex + @inproceedings{Conforti2002Demo, Address= {Portoferraio, Italy}, Author= {Giovanni Conforti and Giorgio Ghelli and Antonio Albano and Dario Colazzo and Paolo Manghi and Carlo Sartiani}, Bibsource= {DBLP, http://dblp.uni-trier.de}, Booktitle= {X Convegno nazionale su Sistemi Evoluti per Basi di Dati (SEBD)}, Month= {June}, Pages= {427-431}, Title= {The Query Language TQL - Demo Presentation}, Year= {2002}} + + + + conference-paper + + 2002 + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties b/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties index 20f56e38d..520bf1a18 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties +++ b/dhp-workflows/dhp-doiboost/src/test/resources/log4j.properties @@ -7,5 +7,6 @@ log4j.appender.A1=org.apache.log4j.ConsoleAppender # A1 uses PatternLayout. log4j.logger.org = ERROR log4j.logger.eu.dnetlib = DEBUG +log4j.logger.eu.dnetlib.doiboost.orcid = INFO log4j.appender.A1.layout=org.apache.log4j.PatternLayout log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/create_scholix_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/create_scholix_params.json index 336181e0a..c56e130c0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/create_scholix_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/create_scholix_params.json @@ -2,5 +2,6 @@ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"r", "paramLongName":"relationPath", "paramDescription": "the relation resolved Path", "paramRequired": true}, {"paramName":"s", "paramLongName":"summaryPath", "paramDescription": "the summary Path", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target base path of the scholix", "paramRequired": true} + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the target base path of the scholix", "paramRequired": true}, + {"paramName":"dc", "paramLongName":"dumpCitations", "paramDescription": "should dump citation relations", "paramRequired": false} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/dumpScholix/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/dumpScholix/oozie_app/workflow.xml index a37d85ad4..d47ebb0be 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/dumpScholix/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/dumpScholix/oozie_app/workflow.xml @@ -16,7 +16,11 @@ maxNumberOfPid filter relation with at least #maxNumberOfPid - + + dumpCitations + false + should dump citation relations + @@ -98,6 +102,7 @@ --summaryPath${targetPath}/provision/summaries --targetPath${targetPath}/provision/scholix --relationPath${targetPath}/relation + --dumpCitations${dumpCitations} @@ -135,11 +140,21 @@ --objectTypescholix --maxPidNumberFiltermaxNumberOfPid + + + + + + + eu.dnetlib.dhp.common.MakeTarArchive + --nameNode${nameNode} + --hdfsPath${targetPath}/tar + --sourcePath${targetPath}/json + - - + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala index 0073afff5..556106180 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkConvertRDDtoDataset.scala @@ -1,7 +1,10 @@ package eu.dnetlib.dhp.sx.graph import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Relation, Result, Software, Dataset => OafDataset} import org.apache.commons.io.IOUtils import org.apache.commons.lang3.StringUtils @@ -9,7 +12,8 @@ import org.apache.spark.SparkConf import org.apache.spark.sql.{Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} -import scala.collection.JavaConverters._ +import scala.reflect.ClassTag +import scala.util.Try object SparkConvertRDDtoDataset { @@ -36,11 +40,12 @@ object SparkConvertRDDtoDataset { val t = parser.get("targetPath") log.info(s"targetPath -> $t") - val filterRelation = parser.get("filterRelation") - log.info(s"filterRelation -> $filterRelation") + val subRelTypeFilter = parser.get("filterRelation") + log.info(s"filterRelation -> $subRelTypeFilter") val entityPath = s"$t/entities" val relPath = s"$t/relation" + val mapper = new ObjectMapper() implicit val datasetEncoder: Encoder[OafDataset] = Encoders.kryo(classOf[OafDataset]) implicit val publicationEncoder: Encoder[Publication] = Encoders.kryo(classOf[Publication]) @@ -99,44 +104,66 @@ object SparkConvertRDDtoDataset { log.info("Converting Relation") - if (filterRelation != null && StringUtils.isNoneBlank(filterRelation)) { + val relClassFilter = List( + ModelConstants.MERGES, + ModelConstants.IS_MERGED_IN, + ModelConstants.HAS_AMONG_TOP_N_SIMILAR_DOCS, + ModelConstants.IS_AMONG_TOP_N_SIMILAR_DOCS + ) - val rddRelation = spark.sparkContext - .textFile(s"$sourcePath/relation") - .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) - .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) - //filter OpenCitations relations - .filter(r => - r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k => - "opencitations".equalsIgnoreCase(k.getValue) - ) - ) - .filter(r => r.getSubRelType != null && r.getSubRelType.equalsIgnoreCase(filterRelation)) - spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") - } else { - - val relationSemanticFilter = List( - "merges", - "ismergedin", - "HasAmongTopNSimilarDocuments", - "IsAmongTopNSimilarDocuments" + val rddRelation = spark.sparkContext + .textFile(s"$sourcePath/relation") + .map(s => mapper.readValue(s, classOf[Relation])) + .filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference) + .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) + .filter(r => filterRelations(subRelTypeFilter, relClassFilter, r)) + //filter OpenCitations relations + .filter(r => + r.getDataInfo.getProvenanceaction != null && + !"sysimport:crosswalk:opencitations".equals(r.getDataInfo.getProvenanceaction.getClassid) ) - val rddRelation = spark.sparkContext - .textFile(s"$sourcePath/relation") - .map(s => mapper.readValue(s, classOf[Relation])) - .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) - .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) - //filter OpenCitations relations - .filter(r => - r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k => - "opencitations".equalsIgnoreCase(k.getValue) - ) - ) - .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) - spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") - } - + spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") } + + private def filterRelations(subRelTypeFilter: String, relClassFilter: List[String], r: Relation): Boolean = { + if (StringUtils.isNotBlank(subRelTypeFilter)) { + subRelTypeFilter.equalsIgnoreCase(r.getSubRelType) + } else { + !relClassFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)) + } + } + + /* + //TODO: finalise implementation + private def processResult[T<: Result]( + implicit ct: ClassTag[T], + log: Logger, + spark: SparkSession, + sourcePath: String, + entityPath: String, + clazz: Class[T] + ): Unit = { + val entityType = clazz.getSimpleName.toLowerCase + + log.info(s"Converting $entityType") + + val mapper = new ObjectMapper() with ScalaObjectMapper + mapper.registerModule(DefaultScalaModule) + + val rdd = spark.sparkContext + .textFile(s"$sourcePath/$entityType") + .map(s => mapper.readValue(s, clazz)) + .filter(r => r.getDataInfo != null && !r.getDataInfo.getDeletedbyinference); + + implicit val encoder: Encoder[T] = Encoders.kryo(clazz) + spark + .createDataset(rdd) + .as[T] + .write + .mode(SaveMode.Overwrite) + .save(s"$entityPath/$entityType") + } + */ + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala index af19b9698..fd06e7dea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholix.scala @@ -12,6 +12,8 @@ import org.apache.spark.sql.functions.count import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} +import scala.util.Try + object SparkCreateScholix { def main(args: Array[String]): Unit = { @@ -37,6 +39,8 @@ object SparkCreateScholix { log.info(s"summaryPath -> $summaryPath") val targetPath = parser.get("targetPath") log.info(s"targetPath -> $targetPath") + val dumpCitations = Try(parser.get("dumpCitations").toBoolean).getOrElse(false) + log.info(s"dumpCitations -> $dumpCitations") implicit val relEncoder: Encoder[Relation] = Encoders.kryo[Relation] implicit val summaryEncoder: Encoder[ScholixSummary] = Encoders.kryo[ScholixSummary] @@ -138,7 +142,7 @@ object SparkCreateScholix { val relatedEntitiesDS: Dataset[RelatedEntities] = spark.read .load(s"$targetPath/related_entities") .as[RelatedEntities] - .filter(r => r.relatedPublication > 0 || r.relatedDataset > 0) + .filter(r => dumpCitations || r.relatedPublication > 0 || r.relatedDataset > 0) relatedEntitiesDS .joinWith(summaryDS, relatedEntitiesDS("id").equalTo(summaryDS("_1")), "inner") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index b75f626d6..b4ee7e740 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -926,6 +926,24 @@ class MappersTest { // assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); } + @Test + void testROHub2() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("rohub-modified.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + } + + @Test + void testRiunet() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("riunet.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + } + private void assertValidId(final String id) { // System.out.println(id); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index 59311d5a7..33c32a9c7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1049,6 +1049,7 @@ dnet:pid_types @=@ dnet:pid_types @=@ who @=@ WHO Identifier dnet:pid_types @=@ dnet:pid_types @=@ drks @=@ DRKS Identifier dnet:pid_types @=@ dnet:pid_types @=@ handle @=@ Handle dnet:pid_types @=@ dnet:pid_types @=@ data.europa.eu @=@ EU Persistent URL +dnet:pid_types @=@ dnet:pid_types @=@ w3id @=@ w3id.org dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/SUBJECT/ACM @=@ An ACM classification term that can be associated to your publications dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/SUBJECT/ARXIV @=@ An ARXIV classification term that can be associated to your publications dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/SUBJECT/DDC @=@ A Dewey Decimal classification term (DDC) that can be associated to your publications diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/riunet.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/riunet.xml new file mode 100644 index 000000000..57c1c51b5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/riunet.xml @@ -0,0 +1,93 @@ + + +
+ oai:riunet.upv.es:10251/178464 + 2022-05-10T09:12:14Z + com_10251_3822 + col_10251_169956 + 2022-09-01T07:51:12.657Z + od______1560::8f7a139735f493882bb0f4abceb6e200 + od______1560::8f7a139735f493882bb0f4abceb6e200 + 2019-03-27T15:15:22.22Z + riunet________ +
+ + + 10.4995/ijpme.2020.12944 + + 10251/148537 + + + journal article + VoR + + http://creativecommons.org/licenses/by-nc-nd/4.0/ + open access + + + Production planning in 3D printing factories + + + [EN] Production planning in 3D printing factories brings new challenges among which the scheduling of parts to be produced stands out. A main issue is to increase the efficiency of the plant and 3D printers productivity. Planning, scheduling, and nesting in 3D printing are recurrent problems in the search for new techniques to promote the development of this technology. In this work, we address the problem for the suppliers that have to schedule their daily production. This problem is part of the LONJA3D model, a managed 3D printing market where the parts ordered by the customers are reorganized into new batches so that suppliers can optimize their production capacity. In this paper, we propose a method derived from the design of combinatorial auctions to solve the nesting problem in 3D printing. First, we propose the use of a heuristic to create potential manufacturing batches. Then, we compute the expected return for each batch. The selected batch should generate the highest income. Several experiments have been tested to validate the process. This method is a first approach to the planning problem in 3D printing and further research is proposed to improve the procedure. + This research has been partially financed by the project: “Lonja de Impresión 3D para la Industria 4.0 y la Empresa Digital (LONJA3D)” funded by the Regional Government of Castile and Leon and the European Regional Development Fund (ERDF, FEDER) with grant VA049P17. + + eng + Universitat Politècnica de València + + application/pdf + 716912 + + + + Junta de Castilla y León + http://dx.doi.org/10.13039/501100014180 + VA049P17 + + + + + De Antón, J. + + + Senovilla, J. + + + González, J.M. + + + Acebes, F. + + + Pajares, J. + + + + + 2020-07-18 + + + Additive manufacturing + Production planning + Packing problem + Optimization + Nesting + + + 10.4995/ijpme.2020.12944 + 10251/148537 + 0038 + 2020-07-18 + OPEN + eng + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub-modified.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub-modified.xml new file mode 100644 index 000000000..95d65ac8d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub-modified.xml @@ -0,0 +1,85 @@ + + +
+ https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca + 2022-09-02T09:55:35Z + rohub_data + ro-crate_data + fsh_____4119::afc7592914ae190a50570db90f55f9c2 + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca + 2019-03-27T15:15:22.22Z + fsh_____4119 + 2019-04-17T16:04:20.586Z +
+ + + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca + + + + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/24fae96f-f986-46e1-bfd0-a21ca20ff0ce + + + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/6d3427a8-352e-49f4-9796-f618c44dc16d + + + RO-crate + + open access + + + Using biological effects tools to define Good Environmental Status under the European Union Marine Strategy Framework Directive + + + The use of biological effects tools offer enormous potential to meet the challenges outlined by the European Union Marine Strategy Framework Directive (MSFD) whereby Member States are required to develop a robust set of tools for defining 11 qualitative descriptors of Good Environmental Status (GES), such as demonstrating that "Concentrations of contaminants are at levels not giving rise to pollution effects" (GES Descriptor 8). This paper discusses the combined approach of monitoring chemical contaminant levels, along side biological effect measurements relating to the effect of pollutants, for undertaking assessments of GES across European marine regions. We outline the minimum standards that biological effects tools should meet if they are to be used for defining GES in relation to Descriptor 8 and describe the current international initiatives underway to develop assessment criteria for these biological effects techniques. Crown Copyright (C) 2010 Published by Elsevier Ltd. All rights reserved. + + Poznań Supercomputing and Networking Center + + + + Generation Service + + + + + + + CNR-ISMAR + + + + + 2018-06-20T11:21:46Z + + + The use of biological effects tools offer enormous potential to meet the challenges outlined by the European Union Marine Strategy Framework Directive (MSFD) whereby Member States are required to develop a robust set of tools for defining 11 qualitative descriptors of Good Environmental Status (GES), such as demonstrating that "Concentrations of contaminants are at levels not giving rise to pollution effects" (GES Descriptor 8). This paper discusses the combined approach of monitoring chemical contaminant levels, along side biological effect measurements relating to the effect of pollutants, for undertaking assessments of GES across European marine regions. We outline the minimum standards that biological effects tools should meet if they are to be used for defining GES in relation to Descriptor 8 and describe the current international initiatives underway to develop assessment criteria for these biological effects techniques. Crown Copyright (C) 2010 Published by Elsevier Ltd. All rights reserved. + + 2018 + + open access + + + 3.866 KB + + + Ecology + EOSC::RO-crate + + + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca + other research product + + OPEN + + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml index ca3ebe6c2..c85b55786 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/rohub.xml @@ -1,103 +1,85 @@ - + xmlns:oai="http://www.openarchives.org/OAI/2.0/" + xmlns:dri="http://www.driver-repository.eu/namespace/dri" + xmlns:xs="http://www.w3.org/2001/XMLSchema" + xmlns:dc="http://purl.org/dc/elements/1.1/">
- eosca5322f5f::4dd1aaf93ae136b65dc9ee4e6f76eac9 - 53aa90bf-c593-4e6d-923f-d4711ac4b0e1 - 2022-05-25T15:35:48.262Z - eosca5322f5f - 53aa90bf-c593-4e6d-923f-d4711ac4b0e1 - 2022-05-25T15:35:38Z + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca + 2022-09-02T09:55:35Z rohub_data ro-crate_data - 2022-05-25T15:36:11.094Z + fsh_____4119::afc7592914ae190a50570db90f55f9c2 + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca + 2019-03-27T15:15:22.22Z + fsh_____4119 + 2019-04-17T16:04:20.586Z
- - https://w3id.org/ro-id/53aa90bf-c593-4e6d-923f-d4711ac4b0e1 - - http://api.rohub.org/api/ros/53aa90bf-c593-4e6d-923f-d4711ac4b0e1/ - + + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca + - https://github.com/NordicESMhub/RELIANCE/blob/main/content/science/notebooks/air_quality_lockdown.ipynb + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/24fae96f-f986-46e1-bfd0-a21ca20ff0ce - https://github.com/NordicESMhub/RELIANCE/blob/main/content/science/notebooks/air_quality_lockdown.ipynb - https://nordicesmhub.github.io/RELIANCE/science/notebooks/air_quality_lockdown.html + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca/resources/6d3427a8-352e-49f4-9796-f618c44dc16d - https://nordicesmhub.github.io/RELIANCE/science/notebooks/air_quality_lockdown.html + RO-crate + + open access + + + Using biological effects tools to define Good Environmental Status under the European Union Marine Strategy Framework Directive + + + The use of biological effects tools offer enormous potential to meet the challenges outlined by the European Union Marine Strategy Framework Directive (MSFD) whereby Member States are required to develop a robust set of tools for defining 11 qualitative descriptors of Good Environmental Status (GES), such as demonstrating that "Concentrations of contaminants are at levels not giving rise to pollution effects" (GES Descriptor 8). This paper discusses the combined approach of monitoring chemical contaminant levels, along side biological effect measurements relating to the effect of pollutants, for undertaking assessments of GES across European marine regions. We outline the minimum standards that biological effects tools should meet if they are to be used for defining GES in relation to Descriptor 8 and describe the current international initiatives underway to develop assessment criteria for these biological effects techniques. Crown Copyright (C) 2010 Published by Elsevier Ltd. All rights reserved. + + Poznań Supercomputing and Networking Center + + + + Generation Service + + + - Anne Fouilloux + CNR-ISMAR - 2021-12-19T21:18:33Z + 2018-06-20T11:21:46Z - The COVID-19 pandemic has led to significant reductions in economic activity, especially during lockdowns. Several studies has shown that the concentration of nitrogen dioxyde and particulate matter levels have reduced during lockdown events. Reductions in transportation sector emissions are most likely largely responsible for the NO2 anomalies. In this study, we analyze the impact of lockdown events on the air quality using data from Copernicus Atmosphere Monitoring Service over Europe and at selected locations. + The use of biological effects tools offer enormous potential to meet the challenges outlined by the European Union Marine Strategy Framework Directive (MSFD) whereby Member States are required to develop a robust set of tools for defining 11 qualitative descriptors of Good Environmental Status (GES), such as demonstrating that "Concentrations of contaminants are at levels not giving rise to pollution effects" (GES Descriptor 8). This paper discusses the combined approach of monitoring chemical contaminant levels, along side biological effect measurements relating to the effect of pollutants, for undertaking assessments of GES across European marine regions. We outline the minimum standards that biological effects tools should meet if they are to be used for defining GES in relation to Descriptor 8 and describe the current international initiatives underway to develop assessment criteria for these biological effects techniques. Crown Copyright (C) 2010 Published by Elsevier Ltd. All rights reserved. - - - European Commission - 10.13039/501100000781 - 101017502 - Research Lifecycle Management for Earth Science Communities and Copernicus Users - - - MIT License - University of Oslo - 2021 - RO-crate + 2018 open access - 11.971 MB + 3.866 KB - Applied sciences - Meteorology + Ecology EOSC::RO-crate - - Impact of the Covid-19 Lockdown on Air quality over Europe - - - https://w3id.org/ro-id/53aa90bf-c593-4e6d-923f-d4711ac4b0e1 - 0048 + + https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca + other research product OPEN - https://opensource.org/licenses/MIT - und - - + + + - - - - https%3A%2F%2Fapi.rohub.org%2Fapi%2Foai2d%2F - 53aa90bf-c593-4e6d-923f-d4711ac4b0e1 - 2022-05-25T15:35:38Z - - - - - false - false - 0.9 - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index 86a155292..be5dda411 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -81,6 +81,21 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + @Test + public void testRiunet() throws IOException, TransformerException { + + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); + + final Publication p = load("riunet.json", Publication.class); + + final JoinedEntity je = new JoinedEntity<>(p); + final String record = xmlRecordFactory.build(je); + assertNotNull(record); + testRecordTransformation(record); + } + + @Test public void testForEOSCFutureDataTransferPilot() throws IOException, TransformerException { final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/data-transfer-pilot.xml")); @@ -114,6 +129,8 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + + @Test void testDoiUrlNormalization() throws MalformedURLException { diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/riunet.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/riunet.json new file mode 100644 index 000000000..62f00a503 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/riunet.json @@ -0,0 +1,470 @@ +{ + "collectedfrom": [ + { + "key": "10|opendoar____::3a20f62a0af1aa152670bab3c602feed", + "value": "RiuNet", + "dataInfo": null + } + ], + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + }, + "lastupdatetimestamp": 1662543204165, + "id": "50|od______1560::8f7a139735f493882bb0f4abceb6e200", + "originalId": [ + "50|od______1560::8f7a139735f493882bb0f4abceb6e200", + "oai:riunet.upv.es:10251/178464" + ], + "pid": [ + { + "value": "10251/178464", + "qualifier": { + "classid": "handle", + "classname": "Handle", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofcollection": "2019-03-27T15:15:22.22Z", + "dateoftransformation": "2022-09-01T07:51:12.657Z", + "extraInfo": [], + "oaiprovenance": null, + "processingchargeamount": null, + "processingchargecurrency": null, + "measures": null, + "author": [ + { + "fullname": "Nieto Nieto, Justo", + "name": "Justo", + "surname": "Nieto Nieto", + "rank": 1, + "pid": [], + "affiliation": [] + } + ], + "resulttype": { + "classid": "publication", + "classname": "publication", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "language": { + "classid": "spa", + "classname": "Spanish; Castilian", + "schemeid": "dnet:languages", + "schemename": "dnet:languages" + }, + "country": [], + "subject": [ + { + "value": "Justo Nieto Nieto (Discursos)", + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Universitat Politècnica de València (UPV)", + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Presentación inaugural", + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Curso académico 1990-91", + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Discurso inaugural", + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Inaugural speech", + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Inaugural presentation", + "qualifier": { + "classid": "", + "classname": "", + "schemeid": "", + "schemename": "" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "title": [ + { + "value": "Discurso de inauguración del curso academico 1990-1991 de la Universitat Politècnica de València", + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "relevantdate": [ + { + "value": "1991", + "qualifier": { + "classid": "Issued", + "classname": "Issued", + "schemeid": "dnet:dataCite_date", + "schemename": "dnet:dataCite_date" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "description": [ + { + "value": "[ES] Discurso de Justo Nieto en el acto de inauguración del curso académico 1990-1991", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "[EN] Inaugural speech by Justo Nieto at the opening ceremony of the 1990-1991 academic year", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofacceptance": { + "value": "1991-01-01", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "publisher": null, + "embargoenddate": null, + "source": [], + "fulltext": [], + "format": [ + { + "value": "application/pdf", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "5055377", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "contributor": [], + "resourcetype": { + "classid": "lecture", + "classname": "lecture", + "schemeid": "dnet:dataCite_resource", + "schemename": "dnet:dataCite_resource" + }, + "coverage": [], + "bestaccessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes" + }, + "context": [], + "externalReference": [], + "instance": [ + { + "license": null, + "accessright": { + "classid": "OPEN", + "classname": "Open Access", + "schemeid": "dnet:access_modes", + "schemename": "dnet:access_modes", + "openAccessRoute": null + }, + "instancetype": { + "classid": "0038", + "classname": "Other literature type", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "hostedby": { + "key": "10|opendoar____::3a20f62a0af1aa152670bab3c602feed", + "value": "RiuNet", + "dataInfo": null + }, + "url": null, + "distributionlocation": null, + "collectedfrom": { + "key": "10|opendoar____::3a20f62a0af1aa152670bab3c602feed", + "value": "RiuNet", + "dataInfo": null + }, + "pid": [ + { + "value": "10251/178464", + "qualifier": { + "classid": "handle", + "classname": "Handle", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "alternateIdentifier": [], + "dateofacceptance": { + "value": "1991-01-01", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:crosswalk:repository", + "classname": "sysimport:crosswalk:repository", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "processingchargeamount": null, + "processingchargecurrency": null, + "refereed": { + "classid": "UNKNOWN", + "classname": "Unknown", + "schemeid": "dnet:review_levels", + "schemename": "dnet:review_levels" + }, + "measures": null + } +] +} \ No newline at end of file