Indentation fixes

This commit is contained in:
Serafeim Chatzopoulos 2023-07-17 16:04:21 +03:00
parent bc1a4611aa
commit be320ba3c1
7 changed files with 235 additions and 215 deletions

View File

@ -3,7 +3,6 @@ package eu.dnetlib.dhp.actionmanager;
import java.util.Optional;
import eu.dnetlib.dhp.common.HdfsSupport;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
@ -12,6 +11,7 @@ import org.apache.spark.sql.SparkSession;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject;
@ -94,6 +94,7 @@ public class Constants {
return s;
}
public static void removeOutputDir(SparkSession spark, String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
}

View File

@ -8,12 +8,6 @@ import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.actionmanager.Constants;
import eu.dnetlib.dhp.actionmanager.bipaffiliations.model.*;
import eu.dnetlib.dhp.actionmanager.ror.GenerateRorActionSetJob;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
@ -22,17 +16,23 @@ import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.spark.sql.Dataset;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.Constants;
import eu.dnetlib.dhp.actionmanager.bipaffiliations.model.*;
import eu.dnetlib.dhp.actionmanager.ror.GenerateRorActionSetJob;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.Tuple2;
@ -79,7 +79,8 @@ public class PrepareAffiliationRelations implements Serializable {
});
}
private static <I extends Result> void prepareAffiliationRelations(SparkSession spark, String inputPath, String outputPath) {
private static <I extends Result> void prepareAffiliationRelations(SparkSession spark, String inputPath,
String outputPath) {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -89,15 +90,23 @@ public class PrepareAffiliationRelations implements Serializable {
.map(item -> OBJECT_MAPPER.readValue(item, AffiliationRelationDeserializer.class));
// convert affiliation to an internal representation
Dataset<AffiliationRelationModel> affiliationRelations =
spark.createDataset(
affiliationRelationsDeserializeRDD.flatMap(entry ->
entry.getMatchings().stream().flatMap(matching ->
matching.getRorId().stream().map( rorId -> new AffiliationRelationModel(
Dataset<AffiliationRelationModel> affiliationRelations = spark
.createDataset(
affiliationRelationsDeserializeRDD
.flatMap(
entry -> entry
.getMatchings()
.stream()
.flatMap(
matching -> matching
.getRorId()
.stream()
.map(
rorId -> new AffiliationRelationModel(
entry.getDoi(),
rorId,
matching.getConfidence()
))).collect(Collectors.toList())
matching.getConfidence())))
.collect(Collectors.toList())
.iterator())
.rdd(),
Encoders.bean(AffiliationRelationModel.class));
@ -107,19 +116,22 @@ public class PrepareAffiliationRelations implements Serializable {
.flatMap((FlatMapFunction<AffiliationRelationModel, Relation>) affRel -> {
// DOI to OpenAIRE id
final String paperId = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", affRel.getDoi()));
final String paperId = ID_PREFIX
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", affRel.getDoi()));
// ROR id to OpenAIRE id
final String affId = GenerateRorActionSetJob.calculateOpenaireId(affRel.getRorId());
Qualifier qualifier = OafMapperUtils.qualifier(
Qualifier qualifier = OafMapperUtils
.qualifier(
BIP_AFFILIATIONS_CLASSID,
BIP_AFFILIATIONS_CLASSNAME,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS);
// format data info; setting `confidence` into relation's `trust`
DataInfo dataInfo = OafMapperUtils.dataInfo(
DataInfo dataInfo = OafMapperUtils
.dataInfo(
false,
BIP_INFERENCE_PROVENANCE,
true,
@ -141,8 +153,10 @@ public class PrepareAffiliationRelations implements Serializable {
}
private static List<Relation> getAffiliationRelationPair(String paperId, String affId, DataInfo dataInfo) {
return Arrays.asList(
OafMapperUtils.getRelation(
return Arrays
.asList(
OafMapperUtils
.getRelation(
paperId,
affId,
ModelConstants.RESULT_ORGANIZATION,
@ -151,7 +165,8 @@ public class PrepareAffiliationRelations implements Serializable {
null,
dataInfo,
null),
OafMapperUtils.getRelation(
OafMapperUtils
.getRelation(
affId,
paperId,
ModelConstants.RESULT_ORGANIZATION,
@ -159,7 +174,6 @@ public class PrepareAffiliationRelations implements Serializable {
ModelConstants.IS_AUTHOR_INSTITUTION_OF,
null,
dataInfo,
null)
);
null));
}
}

View File

@ -1,11 +1,13 @@
package eu.dnetlib.dhp.actionmanager.bipaffiliations.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
package eu.dnetlib.dhp.actionmanager.bipaffiliations.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
@Data
public class AffiliationRelationDeserializer implements Serializable {
@JsonProperty("DOI")
@ -23,4 +25,3 @@ public class AffiliationRelationDeserializer implements Serializable {
}
}

View File

@ -1,11 +1,12 @@
package eu.dnetlib.dhp.actionmanager.bipaffiliations.model;
import java.io.Serializable;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.io.Serializable;
@Data
@AllArgsConstructor
public class AffiliationRelationModel implements Serializable {

View File

@ -28,15 +28,8 @@ oozie.use.system.libpath=true
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
# I think this should be the oozie workflow directory
# oozieWorkflowPath=/user/ilias.kanellos/workflow_example/
# The workflow application path
wfAppPath=${oozieTopWfApplicationPath}
# The following is needed as a property of a workflow
oozie.wf.application.path=${oozieTopWfApplicationPath}
inputPath=/user/schatz/affiliations/data-v3.json
outputPath=/tmp/crossref-affiliations-output-v3
inputPath=/user/schatz/affiliations/data-v3.1.json
outputPath=/tmp/crossref-affiliations-output-v3.1

View File

@ -1,4 +1,4 @@
<workflow-app name="BipFinderScore" xmlns="uri:oozie:workflow:0.5">
<workflow-app name="BipAffiliations" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
@ -84,7 +84,7 @@
<master>yarn</master>
<mode>cluster</mode>
<name>Produces the atomic action with the inferred by BIP! affiliation relations from Crossref</name>
<class>eu.dnetlib.dhp.actionmanager.bipaffiliations.PrepareAffiliationRelations/class>
<class>eu.dnetlib.dhp.actionmanager.bipaffiliations.PrepareAffiliationRelations</class>
<jar>dhp-aggregation-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.actionmanager.bipaffiliations;
import static org.junit.jupiter.api.Assertions.*;
@ -6,10 +7,6 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf;
@ -29,6 +26,10 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
public class PrepareAffiliationRelationsTest {
@ -105,16 +106,21 @@ public class PrepareAffiliationRelationsTest {
Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
dataset.createOrReplaceTempView("result");
Dataset<Row> execVerification = spark.sql("select r.relType, r.relClass, r.source, r.target, r.dataInfo.trust from result r");
Dataset<Row> execVerification = spark
.sql("select r.relType, r.relClass, r.source, r.target, r.dataInfo.trust from result r");
// verify that we have equal number of bi-directional relations
Assertions.assertEquals(8, execVerification
Assertions
.assertEquals(
8, execVerification
.filter(
"relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION + "'")
.collectAsList()
.size());
Assertions.assertEquals(8, execVerification
Assertions
.assertEquals(
8, execVerification
.filter(
"relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF + "'")
.collectAsList()
@ -123,13 +129,17 @@ public class PrepareAffiliationRelationsTest {
// check confidence value of a specific relation
String sourceDOI = "10.1105/tpc.8.3.343";
final String sourceOpenaireId = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", sourceDOI));
final String sourceOpenaireId = ID_PREFIX
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", sourceDOI));
Assertions.assertEquals("0.7071067812", execVerification
Assertions
.assertEquals(
"0.7071067812", execVerification
.filter(
"source='" + sourceOpenaireId + "'")
.collectAsList().get(0).getString(4));
.collectAsList()
.get(0)
.getString(4));
}
}