code formatting

This commit is contained in:
Claudio Atzori 2024-09-30 15:13:23 +02:00
parent 3854fcc5e0
commit 6e0b6a886f
7 changed files with 135 additions and 125 deletions

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.schema.oaf.utils; package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.*; import java.util.*;

View File

@ -130,7 +130,8 @@ public class PrepareAffiliationRelations implements Serializable {
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class); outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
} }
private static JavaPairRDD<Text, Text> prepareAffiliationRelationFromPublisherNewModel(SparkSession spark, String inputPath, private static JavaPairRDD<Text, Text> prepareAffiliationRelationFromPublisherNewModel(SparkSession spark,
String inputPath,
List<KeyValue> collectedfrom) { List<KeyValue> collectedfrom) {
Dataset<Row> df = spark Dataset<Row> df = spark
@ -153,7 +154,6 @@ public class PrepareAffiliationRelations implements Serializable {
.json(inputPath) .json(inputPath)
.where("DOI is not null"); .where("DOI is not null");
return getTextTextJavaPairRDD(collectedfrom, df.selectExpr("DOI", "Organizations as Matchings")); return getTextTextJavaPairRDD(collectedfrom, df.selectExpr("DOI", "Organizations as Matchings"));
} }

View File

@ -69,9 +69,12 @@ public class CollectorWorker extends ReportingJob {
scheduleReport(counter); scheduleReport(counter);
try (SequenceFile.Writer writer = SequenceFile try (SequenceFile.Writer writer = SequenceFile
.createWriter(this.fileSystem.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer .createWriter(
.keyClass(IntWritable.class), SequenceFile.Writer this.fileSystem.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
.valueClass(Text.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) { .keyClass(IntWritable.class),
SequenceFile.Writer
.valueClass(Text.class),
SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
final IntWritable key = new IntWritable(counter.get()); final IntWritable key = new IntWritable(counter.get());
final Text value = new Text(); final Text value = new Text();
plugin plugin

View File

@ -36,7 +36,9 @@ public class OsfPreprintsCollectorPlugin implements CollectorPlugin {
.map(s -> NumberUtils.toInt(s, PAGE_SIZE_VALUE_DEFAULT)) .map(s -> NumberUtils.toInt(s, PAGE_SIZE_VALUE_DEFAULT))
.orElse(PAGE_SIZE_VALUE_DEFAULT); .orElse(PAGE_SIZE_VALUE_DEFAULT);
if (StringUtils.isBlank(baseUrl)) { throw new CollectorException("Param 'baseUrl' is null or empty"); } if (StringUtils.isBlank(baseUrl)) {
throw new CollectorException("Param 'baseUrl' is null or empty");
}
final OsfPreprintsIterator it = new OsfPreprintsIterator(baseUrl, pageSize, getClientParams()); final OsfPreprintsIterator it = new OsfPreprintsIterator(baseUrl, pageSize, getClientParams());

View File

@ -54,7 +54,8 @@ public class OsfPreprintsIterator implements Iterator<String> {
@Override @Override
public boolean hasNext() { public boolean hasNext() {
synchronized (this.recordQueue) { synchronized (this.recordQueue) {
while (this.recordQueue.isEmpty() && StringUtils.isNotBlank(this.currentUrl) && this.currentUrl.startsWith("http")) { while (this.recordQueue.isEmpty() && StringUtils.isNotBlank(this.currentUrl)
&& this.currentUrl.startsWith("http")) {
try { try {
this.currentUrl = downloadPage(this.currentUrl); this.currentUrl = downloadPage(this.currentUrl);
} catch (final CollectorException e) { } catch (final CollectorException e) {
@ -63,7 +64,9 @@ public class OsfPreprintsIterator implements Iterator<String> {
} }
} }
if (!this.recordQueue.isEmpty()) { return true; } if (!this.recordQueue.isEmpty()) {
return true;
}
return false; return false;
} }
@ -112,7 +115,9 @@ public class OsfPreprintsIterator implements Iterator<String> {
} }
private Document downloadUrl(final String url, final int attempt) throws CollectorException { private Document downloadUrl(final String url, final int attempt) throws CollectorException {
if (attempt > MAX_ATTEMPTS) { throw new CollectorException("Max Number of attempts reached, url:" + url); } if (attempt > MAX_ATTEMPTS) {
throw new CollectorException("Max Number of attempts reached, url:" + url);
}
if (attempt > 0) { if (attempt > 0) {
final int delay = (attempt * 5000); final int delay = (attempt * 5000);

View File

@ -112,7 +112,6 @@ public class PrepareAffiliationRelationsTest {
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Relation) aa.getPayload())); .map(aa -> ((Relation) aa.getPayload()));
// count the number of relations // count the number of relations
assertEquals(150, tmp.count());// 18 + 24 *3 + 30 * 2 = assertEquals(150, tmp.count());// 18 + 24 *3 + 30 * 2 =
@ -173,7 +172,6 @@ public class PrepareAffiliationRelationsTest {
+ IdentifierFactory.md5("https://ror.org/03265fv13") + "'") + IdentifierFactory.md5("https://ror.org/03265fv13") + "'")
.count()); .count());
Assertions Assertions
.assertEquals( .assertEquals(
3, execVerification 3, execVerification
@ -185,6 +183,5 @@ public class PrepareAffiliationRelationsTest {
+ IdentifierFactory.md5("https://ror.org/00a0n9e72") + "'") + IdentifierFactory.md5("https://ror.org/00a0n9e72") + "'")
.count()); .count());
} }
} }

View File

@ -50,7 +50,8 @@ public class OsfPreprintsCollectorPluginTest {
@Test @Test
@Disabled @Disabled
void test_one() throws CollectorException { void test_one() throws CollectorException {
this.plugin.collect(this.api, new AggregatorReport()) this.plugin
.collect(this.api, new AggregatorReport())
.limit(1) .limit(1)
.forEach(log::info); .forEach(log::info);
} }
@ -95,7 +96,8 @@ public class OsfPreprintsCollectorPluginTest {
final HttpConnector2 connector = new HttpConnector2(); final HttpConnector2 connector = new HttpConnector2();
try { try {
final String res = connector.getInputSource("https://api.osf.io/v2/preprints/ydtzx/contributors/?format=json"); final String res = connector
.getInputSource("https://api.osf.io/v2/preprints/ydtzx/contributors/?format=json");
System.out.println(res); System.out.println(res);
fail(); fail();
} catch (final Throwable e) { } catch (final Throwable e) {