[AffiliationFromPublisher]refactoring after compilation

This commit is contained in:
Miriam Baglioni 2024-08-07 11:17:56 +02:00
parent 907eeadce8
commit 42531afc3e
5 changed files with 59 additions and 54 deletions

View File

@ -89,11 +89,15 @@ public class PrepareAffiliationRelations implements Serializable {
isSparkSessionManaged,
spark -> {
Constants.removeOutputDir(spark, outputPath);
createActionSet(spark, crossrefInputPath, pubmedInputPath, openapcInputPath, dataciteInputPath, webcrawlInputPath, publisherInputPath, outputPath);
createActionSet(
spark, crossrefInputPath, pubmedInputPath, openapcInputPath, dataciteInputPath, webcrawlInputPath,
publisherInputPath, outputPath);
});
}
private static void createActionSet(SparkSession spark, String crossrefInputPath, String pubmedInputPath, String openapcInputPath, String dataciteInputPath, String webcrawlInputPath, String publisherlInputPath, String outputPath) {
private static void createActionSet(SparkSession spark, String crossrefInputPath, String pubmedInputPath,
String openapcInputPath, String dataciteInputPath, String webcrawlInputPath, String publisherlInputPath,
String outputPath) {
List<KeyValue> collectedFromCrossref = OafMapperUtils
.listKeyValues(ModelConstants.CROSSREF_ID, "Crossref");
JavaPairRDD<Text, Text> crossrefRelations = prepareAffiliationRelations(
@ -137,7 +141,6 @@ public class PrepareAffiliationRelations implements Serializable {
private static JavaPairRDD<Text, Text> prepareAffiliationRelationFromPublisher(SparkSession spark, String inputPath,
List<KeyValue> collectedfrom) {
Dataset<Row> df = spark
.read()
.schema("`DOI` STRING, `Organizations` ARRAY<STRUCT<`RORid`:STRING,`Confidence`:DOUBLE>>")
@ -146,7 +149,6 @@ public class PrepareAffiliationRelations implements Serializable {
return getTextTextJavaPairRDD(collectedfrom, df.selectExpr("DOI", "Organizations as Matchings"));
}
private static <I extends Result> JavaPairRDD<Text, Text> prepareAffiliationRelations(SparkSession spark,

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.broker.oa.matchers.simple;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ -16,7 +17,8 @@ class EnrichMissingAuthorOrcidTest {
final EnrichMissingAuthorOrcid matcher = new EnrichMissingAuthorOrcid();
@BeforeEach
void setUp() throws Exception {}
void setUp() throws Exception {
}
@Test
void testFindDifferences_1() {

View File

@ -23,7 +23,8 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class ConversionUtilsTest {
@BeforeEach
public void setUp() throws Exception {}
public void setUp() throws Exception {
}
@Test
public void testAllResultPids() {

View File

@ -20,7 +20,6 @@ import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import eu.dnetlib.dhp.oa.provision.model.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
@ -42,6 +41,7 @@ import com.google.common.collect.Sets;
import com.mycila.xmltool.XMLDoc;
import com.mycila.xmltool.XMLTag;
import eu.dnetlib.dhp.oa.provision.model.*;
import eu.dnetlib.dhp.schema.common.*;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Result;