forked from D-Net/dnet-hadoop
[AFFILIATION PROPAGATION] Applied some SonarLint suggestions
This commit is contained in:
parent
4c70201412
commit
c7c0c3187b
|
@ -30,25 +30,17 @@ import scala.Tuple2;
|
||||||
|
|
||||||
public class PrepareInfo implements Serializable {
|
public class PrepareInfo implements Serializable {
|
||||||
|
|
||||||
// leggo le relazioni e seleziono quelle fra result ed organizzazioni
|
|
||||||
// raggruppo per result e salvo
|
|
||||||
// r => {o1, o2, o3}
|
|
||||||
|
|
||||||
// leggo le relazioni fra le organizzazioni e creo la gerarchia delle parentele:
|
|
||||||
// hashMap key organizzazione -> value tutti i suoi padri
|
|
||||||
// o => {p1, p2}
|
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(PrepareInfo.class);
|
private static final Logger log = LoggerFactory.getLogger(PrepareInfo.class);
|
||||||
|
|
||||||
// associate orgs with all their parent
|
// associate orgs with all their parent
|
||||||
private static final String relOrgQuery = "SELECT target key, collect_set(source) as valueSet " +
|
private static final String ORGANIZATION_ORGANIZATION_QUERY = "SELECT target key, collect_set(source) as valueSet " +
|
||||||
"FROM relation " +
|
"FROM relation " +
|
||||||
"WHERE lower(relclass) = '" + ModelConstants.IS_PARENT_OF.toLowerCase() +
|
"WHERE lower(relclass) = '" + ModelConstants.IS_PARENT_OF.toLowerCase() +
|
||||||
"' and datainfo.deletedbyinference = false " +
|
"' and datainfo.deletedbyinference = false " +
|
||||||
"GROUP BY target";
|
"GROUP BY target";
|
||||||
|
|
||||||
private static final String relResQuery = "SELECT source key, collect_set(target) as valueSet " +
|
//associates results with all the orgs they are affiliated to
|
||||||
|
private static final String RESULT_ORGANIZATION_QUERY = "SELECT source key, collect_set(target) as valueSet " +
|
||||||
"FROM relation " +
|
"FROM relation " +
|
||||||
"WHERE lower(relclass) = '" + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() +
|
"WHERE lower(relclass) = '" + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase() +
|
||||||
"' and datainfo.deletedbyinference = false " +
|
"' and datainfo.deletedbyinference = false " +
|
||||||
|
@ -101,7 +93,7 @@ public class PrepareInfo implements Serializable {
|
||||||
relation.createOrReplaceTempView("relation");
|
relation.createOrReplaceTempView("relation");
|
||||||
|
|
||||||
spark
|
spark
|
||||||
.sql(relOrgQuery)
|
.sql(ORGANIZATION_ORGANIZATION_QUERY)
|
||||||
.as(Encoders.bean(KeyValueSet.class))
|
.as(Encoders.bean(KeyValueSet.class))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
|
@ -109,7 +101,7 @@ public class PrepareInfo implements Serializable {
|
||||||
.json(childParentOrganizationPath);
|
.json(childParentOrganizationPath);
|
||||||
|
|
||||||
spark
|
spark
|
||||||
.sql(relResQuery)
|
.sql(RESULT_ORGANIZATION_QUERY)
|
||||||
.as(Encoders.bean(KeyValueSet.class))
|
.as(Encoders.bean(KeyValueSet.class))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
|
@ -130,7 +122,7 @@ public class PrepareInfo implements Serializable {
|
||||||
"' and datainfo.deletedbyinference = false")
|
"' and datainfo.deletedbyinference = false")
|
||||||
.as(Encoders.STRING());
|
.as(Encoders.STRING());
|
||||||
|
|
||||||
// prendo dalla join i risultati che hanno solo il lato sinistro: sono foglie
|
// takes from the join the entities having only the left hand side: the leaves. Saves them
|
||||||
children
|
children
|
||||||
.joinWith(parent, children.col("child").equalTo(parent.col("parent")), "left")
|
.joinWith(parent, children.col("child").equalTo(parent.col("parent")), "left")
|
||||||
.map((MapFunction<Tuple2<String, String>, String>) value -> {
|
.map((MapFunction<Tuple2<String, String>, String>) value -> {
|
||||||
|
|
|
@ -32,6 +32,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
public class SparkResultToOrganizationFromSemRel implements Serializable {
|
public class SparkResultToOrganizationFromSemRel implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class);
|
||||||
private static final int MAX_ITERATION = 5;
|
private static final int MAX_ITERATION = 5;
|
||||||
|
public static final String NEW_RELATION_PATH = "/newRelation";
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
@ -120,11 +121,11 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
|
||||||
iteration++;
|
iteration++;
|
||||||
StepActions
|
StepActions
|
||||||
.execStep(
|
.execStep(
|
||||||
spark, graphPath, workingPath + "/newRelation",
|
spark, graphPath, workingPath + NEW_RELATION_PATH,
|
||||||
leavesPath, childParentPath, resultOrganizationPath);
|
leavesPath, childParentPath, resultOrganizationPath);
|
||||||
StepActions
|
StepActions
|
||||||
.prepareForNextStep(
|
.prepareForNextStep(
|
||||||
spark, workingPath + "/newRelation", resultOrganizationPath, leavesPath,
|
spark, workingPath + NEW_RELATION_PATH, resultOrganizationPath, leavesPath,
|
||||||
childParentPath, workingPath + "/leaves", workingPath + "/resOrg");
|
childParentPath, workingPath + "/leaves", workingPath + "/resOrg");
|
||||||
moveOutput(spark, workingPath, leavesPath, resultOrganizationPath);
|
moveOutput(spark, workingPath, leavesPath, resultOrganizationPath);
|
||||||
leavesCount = readPath(spark, leavesPath, Leaves.class).count();
|
leavesCount = readPath(spark, leavesPath, Leaves.class).count();
|
||||||
|
@ -154,7 +155,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
|
||||||
propagationCounter.getNotReachedFirstParent().add(1);
|
propagationCounter.getNotReachedFirstParent().add(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
addNewRelations(spark, workingPath + "/newRelation", outputPath);
|
addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void moveOutput(SparkSession spark, String workingPath, String leavesPath,
|
private static void moveOutput(SparkSession spark, String workingPath, String leavesPath,
|
||||||
|
|
|
@ -27,7 +27,6 @@ import scala.Tuple2;
|
||||||
public class StepActions implements Serializable {
|
public class StepActions implements Serializable {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
private static final Logger log = LoggerFactory.getLogger(StepActions.class);
|
|
||||||
|
|
||||||
public static void execStep(SparkSession spark,
|
public static void execStep(SparkSession spark,
|
||||||
String graphPath, String newRelationPath,
|
String graphPath, String newRelationPath,
|
||||||
|
@ -185,10 +184,9 @@ public class StepActions implements Serializable {
|
||||||
"GROUP BY resId")
|
"GROUP BY resId")
|
||||||
.as(Encoders.bean(KeyValueSet.class));
|
.as(Encoders.bean(KeyValueSet.class));
|
||||||
|
|
||||||
// resultParent.foreach((ForeachFunction<KeyValueSet>)kv ->
|
|
||||||
// System.out.println(OBJECT_MAPPER.writeValueAsString(kv)));
|
|
||||||
// create new relations from result to organization for each result linked to a leaf
|
// create new relations from result to organization for each result linked to a leaf
|
||||||
Dataset<Relation> tmp = resultParent
|
return resultParent
|
||||||
.flatMap(
|
.flatMap(
|
||||||
(FlatMapFunction<KeyValueSet, Relation>) v -> v
|
(FlatMapFunction<KeyValueSet, Relation>) v -> v
|
||||||
.getValueSet()
|
.getValueSet()
|
||||||
|
@ -206,8 +204,8 @@ public class StepActions implements Serializable {
|
||||||
.collect(Collectors.toList())
|
.collect(Collectors.toList())
|
||||||
.iterator(),
|
.iterator(),
|
||||||
Encoders.bean(Relation.class));
|
Encoders.bean(Relation.class));
|
||||||
tmp.foreach((ForeachFunction<Relation>) r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
|
|
||||||
return tmp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue