forked from D-Net/dnet-hadoop
Add basic tests for affiliation relations
This commit is contained in:
parent
bc7b00bcd1
commit
c2998a14e8
|
@ -5,8 +5,11 @@ import static org.junit.jupiter.api.Assertions.*;
|
|||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -26,7 +29,6 @@ import org.slf4j.LoggerFactory;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class PrepareAffiliationRelationsTest {
|
||||
|
||||
|
@ -35,6 +37,7 @@ public class PrepareAffiliationRelationsTest {
|
|||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
private static final String ID_PREFIX = "50|doi_________::";
|
||||
private static final Logger log = LoggerFactory
|
||||
.getLogger(PrepareAffiliationRelationsTest.class);
|
||||
|
||||
|
@ -69,71 +72,64 @@ public class PrepareAffiliationRelationsTest {
|
|||
|
||||
@Test
|
||||
void testMatch() throws Exception {
|
||||
|
||||
String affiliationRelationsPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
|
||||
.getPath();
|
||||
|
||||
String outputPath = workingDir.toString() + "/actionSet";
|
||||
|
||||
PrepareAffiliationRelations
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-inputPath",
|
||||
affiliationRelationsPath,
|
||||
"-outputPath",
|
||||
workingDir.toString() + "/actionSet"
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-inputPath", affiliationRelationsPath,
|
||||
"-outputPath", outputPath
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
// JavaRDD<Result> tmp = sc
|
||||
// .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
|
||||
// .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
// .map(aa -> ((Result) aa.getPayload()));
|
||||
//
|
||||
// assertEquals(4, tmp.count());
|
||||
//
|
||||
// Dataset<Result> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Result.class));
|
||||
// verificationDataset.createOrReplaceTempView("result");
|
||||
//
|
||||
// Dataset<Row> execVerification = spark
|
||||
// .sql(
|
||||
// "Select p.id oaid, mes.id, mUnit.value from result p " +
|
||||
// "lateral view explode(measures) m as mes " +
|
||||
// "lateral view explode(mes.unit) u as mUnit ");
|
||||
//
|
||||
// Assertions.assertEquals(12, execVerification.count());
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// "6.63451994567e-09", execVerification
|
||||
// .filter(
|
||||
// "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
||||
// "and id = 'influence'")
|
||||
// .select("value")
|
||||
// .collectAsList()
|
||||
// .get(0)
|
||||
// .getString(0));
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// "0.348694533145", execVerification
|
||||
// .filter(
|
||||
// "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
||||
// "and id = 'popularity_alt'")
|
||||
// .select("value")
|
||||
// .collectAsList()
|
||||
// .get(0)
|
||||
// .getString(0));
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// "2.16094680115e-09", execVerification
|
||||
// .filter(
|
||||
// "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
||||
// "and id = 'popularity'")
|
||||
// .select("value")
|
||||
// .collectAsList()
|
||||
// .get(0)
|
||||
// .getString(0));
|
||||
//
|
||||
JavaRDD<Relation> tmp = sc
|
||||
.sequenceFile(outputPath, Text.class, Text.class)
|
||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(aa -> ((Relation) aa.getPayload()));
|
||||
|
||||
for (Relation r : tmp.collect()) {
|
||||
System.out.println(
|
||||
r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred()
|
||||
);
|
||||
}
|
||||
// count the number of relations
|
||||
assertEquals(16, tmp.count());
|
||||
|
||||
Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||
dataset.createOrReplaceTempView("result");
|
||||
|
||||
Dataset<Row> execVerification = spark.sql("select r.relType, r.relClass, r.source, r.target, r.dataInfo.trust from result r");
|
||||
|
||||
// verify that we have equal number of bi-directional relations
|
||||
Assertions.assertEquals(8, execVerification
|
||||
.filter(
|
||||
"relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION +"'")
|
||||
.collectAsList()
|
||||
.size());
|
||||
|
||||
Assertions.assertEquals(8, execVerification
|
||||
.filter(
|
||||
"relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF +"'")
|
||||
.collectAsList()
|
||||
.size());
|
||||
|
||||
// check confidence value of a specific relation
|
||||
String sourceDOI = "10.1105/tpc.8.3.343";
|
||||
|
||||
final String sourceOpenaireId = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", sourceDOI));
|
||||
|
||||
Assertions.assertEquals("0.7071067812", execVerification
|
||||
.filter(
|
||||
"source='" + sourceOpenaireId +"'")
|
||||
.collectAsList().get(0).getString(4));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue