diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeEntitiesComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeEntitiesComparator.java index 5f212c242..8e50a0e69 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeEntitiesComparator.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeEntitiesComparator.java @@ -1,4 +1,3 @@ - package eu.dnetlib.dhp.schema.oaf.utils; import java.util.*; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index f63bfcb48..67e07ba59 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -46,11 +46,11 @@ public class CollectorWorker extends ReportingJob { private final HttpClientParams clientParams; public CollectorWorker( - final ApiDescriptor api, - final FileSystem fileSystem, - final MDStoreVersion mdStoreVersion, - final HttpClientParams clientParams, - final AggregatorReport report) { + final ApiDescriptor api, + final FileSystem fileSystem, + final MDStoreVersion mdStoreVersion, + final HttpClientParams clientParams, + final AggregatorReport report) { super(report); this.api = api; this.fileSystem = fileSystem; @@ -69,25 +69,22 @@ public class CollectorWorker extends ReportingJob { scheduleReport(counter); try (SequenceFile.Writer writer = SequenceFile - .createWriter( - this.fileSystem.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer - .keyClass(IntWritable.class), - SequenceFile.Writer - .valueClass(Text.class), - SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) { + .createWriter(this.fileSystem.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer + .keyClass(IntWritable.class), SequenceFile.Writer + .valueClass(Text.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) { final IntWritable key = new IntWritable(counter.get()); final Text value = new Text(); plugin - .collect(this.api, this.report) - .forEach(content -> { - key.set(counter.getAndIncrement()); - value.set(content); - try { - writer.append(key, value); - } catch (final Throwable e) { - throw new RuntimeException(e); - } - }); + .collect(this.api, this.report) + .forEach(content -> { + key.set(counter.getAndIncrement()); + value.set(content); + try { + writer.append(key, value); + } catch (final Throwable e) { + throw new RuntimeException(e); + } + }); } catch (final Throwable e) { this.report.put(e.getClass().getName(), e.getMessage()); throw new CollectorException(e); @@ -115,36 +112,36 @@ public class CollectorWorker extends ReportingJob { private CollectorPlugin getCollectorPlugin() throws UnknownCollectorPluginException { switch (CollectorPlugin.NAME.valueOf(this.api.getProtocol())) { - case oai: - return new OaiCollectorPlugin(this.clientParams); - case rest_json2xml: - return new RestCollectorPlugin(this.clientParams); - case file: - return new FileCollectorPlugin(this.fileSystem); - case fileGzip: - return new FileGZipCollectorPlugin(this.fileSystem); - case baseDump: - return new BaseCollectorPlugin(this.fileSystem); - case gtr2Publications: - return new Gtr2PublicationsCollectorPlugin(this.clientParams); - case osfPreprints: - return new OsfPreprintsCollectorPlugin(this.clientParams); - case other: - final CollectorPlugin.NAME.OTHER_NAME plugin = Optional + case oai: + return new OaiCollectorPlugin(this.clientParams); + case rest_json2xml: + return new RestCollectorPlugin(this.clientParams); + case file: + return new FileCollectorPlugin(this.fileSystem); + case fileGzip: + return new FileGZipCollectorPlugin(this.fileSystem); + case baseDump: + return new BaseCollectorPlugin(this.fileSystem); + case gtr2Publications: + return new Gtr2PublicationsCollectorPlugin(this.clientParams); + case osfPreprints: + return new OsfPreprintsCollectorPlugin(this.clientParams); + case other: + final CollectorPlugin.NAME.OTHER_NAME plugin = Optional .ofNullable(this.api.getParams().get("other_plugin_type")) .map(CollectorPlugin.NAME.OTHER_NAME::valueOf) .orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type")); - switch (plugin) { - case mdstore_mongodb_dump: - return new MongoDbDumpCollectorPlugin(this.fileSystem); - case mdstore_mongodb: - return new MDStoreCollectorPlugin(); - default: - throw new UnknownCollectorPluginException("plugin is not managed: " + plugin); - } + switch (plugin) { + case mdstore_mongodb_dump: + return new MongoDbDumpCollectorPlugin(this.fileSystem); + case mdstore_mongodb: + return new MDStoreCollectorPlugin(); default: - throw new UnknownCollectorPluginException("protocol is not managed: " + this.api.getProtocol()); + throw new UnknownCollectorPluginException("plugin is not managed: " + plugin); + } + default: + throw new UnknownCollectorPluginException("protocol is not managed: " + this.api.getProtocol()); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPlugin.java index b0787eb45..fdc9df06f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPlugin.java @@ -31,19 +31,17 @@ public class OsfPreprintsCollectorPlugin implements CollectorPlugin { final String baseUrl = api.getBaseUrl(); final int pageSize = Optional - .ofNullable(api.getParams().get("pageSize")) - .filter(StringUtils::isNotBlank) - .map(s -> NumberUtils.toInt(s, PAGE_SIZE_VALUE_DEFAULT)) - .orElse(PAGE_SIZE_VALUE_DEFAULT); + .ofNullable(api.getParams().get("pageSize")) + .filter(StringUtils::isNotBlank) + .map(s -> NumberUtils.toInt(s, PAGE_SIZE_VALUE_DEFAULT)) + .orElse(PAGE_SIZE_VALUE_DEFAULT); - if (StringUtils.isBlank(baseUrl)) { - throw new CollectorException("Param 'baseUrl' is null or empty"); - } + if (StringUtils.isBlank(baseUrl)) { throw new CollectorException("Param 'baseUrl' is null or empty"); } final OsfPreprintsIterator it = new OsfPreprintsIterator(baseUrl, pageSize, getClientParams()); return StreamSupport - .stream(Spliterators.spliteratorUnknownSize(it, Spliterator.ORDERED), false); + .stream(Spliterators.spliteratorUnknownSize(it, Spliterator.ORDERED), false); } public HttpClientParams getClientParams() { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java index 76adba1a8..de18ef37f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java @@ -34,9 +34,9 @@ public class OsfPreprintsIterator implements Iterator { private final Queue recordQueue = new PriorityBlockingQueue<>(); public OsfPreprintsIterator( - final String baseUrl, - final int pageSize, - final HttpClientParams clientParams) { + final String baseUrl, + final int pageSize, + final HttpClientParams clientParams) { this.clientParams = clientParams; this.baseUrl = baseUrl; @@ -54,8 +54,7 @@ public class OsfPreprintsIterator implements Iterator { @Override public boolean hasNext() { synchronized (this.recordQueue) { - while (this.recordQueue.isEmpty() && StringUtils.isNotBlank(this.currentUrl) - && this.currentUrl.startsWith("http")) { + while (this.recordQueue.isEmpty() && StringUtils.isNotBlank(this.currentUrl) && this.currentUrl.startsWith("http")) { try { this.currentUrl = downloadPage(this.currentUrl); } catch (final CollectorException e) { @@ -64,9 +63,7 @@ public class OsfPreprintsIterator implements Iterator { } } - if (!this.recordQueue.isEmpty()) { - return true; - } + if (!this.recordQueue.isEmpty()) { return true; } return false; } @@ -115,9 +112,7 @@ public class OsfPreprintsIterator implements Iterator { } private Document downloadUrl(final String url, final int attempt) throws CollectorException { - if (attempt > MAX_ATTEMPTS) { - throw new CollectorException("Max Number of attempts reached, url:" + url); - } + if (attempt > MAX_ATTEMPTS) { throw new CollectorException("Max Number of attempts reached, url:" + url); } if (attempt > 0) { final int delay = (attempt * 5000); diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json index 685d2e50e..4d85cf26b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json @@ -35,18 +35,6 @@ "paramRequired": true } , - { - "paramName": "wip", - "paramLongName": "webCrawlInputPath", - "paramDescription": "the path to get the input data from Web Crawl", - "paramRequired": true - }, - { - "paramName": "pip", - "paramLongName": "publisherInputPath", - "paramDescription": "the path to get the input data from publishers", - "paramRequired": true - }, { "paramName": "o", "paramLongName": "outputPath", diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java index 664b84d5a..fe2274c89 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java @@ -50,10 +50,9 @@ public class OsfPreprintsCollectorPluginTest { @Test @Disabled void test_one() throws CollectorException { - this.plugin - .collect(this.api, new AggregatorReport()) - .limit(1) - .forEach(log::info); + this.plugin.collect(this.api, new AggregatorReport()) + .limit(1) + .forEach(log::info); } @Test @@ -96,8 +95,7 @@ public class OsfPreprintsCollectorPluginTest { final HttpConnector2 connector = new HttpConnector2(); try { - final String res = connector - .getInputSource("https://api.osf.io/v2/preprints/ydtzx/contributors/?format=json"); + final String res = connector.getInputSource("https://api.osf.io/v2/preprints/ydtzx/contributors/?format=json"); System.out.println(res); fail(); } catch (final Throwable e) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java index 281ed0ff0..18c9ce18d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java @@ -29,7 +29,7 @@ class JsonPathTest { Assertions.assertNotNull(row); Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier"))); - System.out.println("row = " + row.getAs("country")); + System.out.println("row = " + row.getAs("countrytitle")); } @Test