From 9cbe966b4adcc497798af69f1a01c47aab3b5926 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Sat, 29 Jun 2024 18:35:49 +0200 Subject: [PATCH] [AffiliationIngestion]refactoring --- .../bipaffiliations/PrepareAffiliationRelations.java | 6 +++--- .../webcrawl/CreateActionSetFromWebEntries.java | 5 ++--- .../bipaffiliations/PrepareAffiliationRelationsTest.java | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java index b0b757005..8f911e980 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java @@ -106,15 +106,15 @@ public class PrepareAffiliationRelations implements Serializable { spark, dataciteInputPath, collectedFromDatacite); List collectedFromWebCrawl = OafMapperUtils - .listKeyValues(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME); + .listKeyValues(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME); JavaPairRDD webCrawlRelations = prepareAffiliationRelations( - spark, webcrawlInputPath, collectedFromWebCrawl); + spark, webcrawlInputPath, collectedFromWebCrawl); crossrefRelations .union(pubmedRelations) .union(openAPCRelations) .union(dataciteRelations) - .union(webCrawlRelations) + .union(webCrawlRelations) .saveAsHadoopFile( outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java index f4b0cbc6f..533b90cd8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java @@ -1,15 +1,12 @@ package eu.dnetlib.dhp.actionmanager.webcrawl; - import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.actionmanager.Constants; -import io.netty.util.Constant; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; @@ -24,6 +21,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.Constants; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; @@ -32,6 +30,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner; import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import io.netty.util.Constant; import scala.Tuple2; /** diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java index bb0188e43..bffe41ac7 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java @@ -88,7 +88,7 @@ public class PrepareAffiliationRelationsTest { "-pubmedInputPath", crossrefAffiliationRelationPath, "-openapcInputPath", crossrefAffiliationRelationPath, "-dataciteInputPath", crossrefAffiliationRelationPath, - "-webCrawlInputPath", crossrefAffiliationRelationPath, + "-webCrawlInputPath", crossrefAffiliationRelationPath, "-outputPath", outputPath });