forked from D-Net/dnet-hadoop
Add basic tests for affiliation relations
This commit is contained in:
parent
bc7b00bcd1
commit
c2998a14e8
|
@ -5,8 +5,11 @@ import static org.junit.jupiter.api.Assertions.*;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -26,7 +29,6 @@ import org.slf4j.LoggerFactory;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
|
|
||||||
public class PrepareAffiliationRelationsTest {
|
public class PrepareAffiliationRelationsTest {
|
||||||
|
|
||||||
|
@ -35,6 +37,7 @@ public class PrepareAffiliationRelationsTest {
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
private static final String ID_PREFIX = "50|doi_________::";
|
||||||
private static final Logger log = LoggerFactory
|
private static final Logger log = LoggerFactory
|
||||||
.getLogger(PrepareAffiliationRelationsTest.class);
|
.getLogger(PrepareAffiliationRelationsTest.class);
|
||||||
|
|
||||||
|
@ -69,71 +72,64 @@ public class PrepareAffiliationRelationsTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testMatch() throws Exception {
|
void testMatch() throws Exception {
|
||||||
|
|
||||||
String affiliationRelationsPath = getClass()
|
String affiliationRelationsPath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
|
.getResource("/eu/dnetlib/dhp/actionmanager/bipaffiliations/doi_to_ror.json")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
|
String outputPath = workingDir.toString() + "/actionSet";
|
||||||
|
|
||||||
PrepareAffiliationRelations
|
PrepareAffiliationRelations
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isSparkSessionManaged",
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
Boolean.FALSE.toString(),
|
"-inputPath", affiliationRelationsPath,
|
||||||
"-inputPath",
|
"-outputPath", outputPath
|
||||||
affiliationRelationsPath,
|
|
||||||
"-outputPath",
|
|
||||||
workingDir.toString() + "/actionSet"
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
// JavaRDD<Result> tmp = sc
|
JavaRDD<Relation> tmp = sc
|
||||||
// .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
|
.sequenceFile(outputPath, Text.class, Text.class)
|
||||||
// .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||||
// .map(aa -> ((Result) aa.getPayload()));
|
.map(aa -> ((Relation) aa.getPayload()));
|
||||||
//
|
|
||||||
// assertEquals(4, tmp.count());
|
for (Relation r : tmp.collect()) {
|
||||||
//
|
System.out.println(
|
||||||
// Dataset<Result> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Result.class));
|
r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred()
|
||||||
// verificationDataset.createOrReplaceTempView("result");
|
);
|
||||||
//
|
}
|
||||||
// Dataset<Row> execVerification = spark
|
// count the number of relations
|
||||||
// .sql(
|
assertEquals(16, tmp.count());
|
||||||
// "Select p.id oaid, mes.id, mUnit.value from result p " +
|
|
||||||
// "lateral view explode(measures) m as mes " +
|
Dataset<Relation> dataset = spark.createDataset(tmp.rdd(), Encoders.bean(Relation.class));
|
||||||
// "lateral view explode(mes.unit) u as mUnit ");
|
dataset.createOrReplaceTempView("result");
|
||||||
//
|
|
||||||
// Assertions.assertEquals(12, execVerification.count());
|
Dataset<Row> execVerification = spark.sql("select r.relType, r.relClass, r.source, r.target, r.dataInfo.trust from result r");
|
||||||
// Assertions
|
|
||||||
// .assertEquals(
|
// verify that we have equal number of bi-directional relations
|
||||||
// "6.63451994567e-09", execVerification
|
Assertions.assertEquals(8, execVerification
|
||||||
// .filter(
|
.filter(
|
||||||
// "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
"relClass='" + ModelConstants.HAS_AUTHOR_INSTITUTION +"'")
|
||||||
// "and id = 'influence'")
|
.collectAsList()
|
||||||
// .select("value")
|
.size());
|
||||||
// .collectAsList()
|
|
||||||
// .get(0)
|
Assertions.assertEquals(8, execVerification
|
||||||
// .getString(0));
|
.filter(
|
||||||
// Assertions
|
"relClass='" + ModelConstants.IS_AUTHOR_INSTITUTION_OF +"'")
|
||||||
// .assertEquals(
|
.collectAsList()
|
||||||
// "0.348694533145", execVerification
|
.size());
|
||||||
// .filter(
|
|
||||||
// "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
// check confidence value of a specific relation
|
||||||
// "and id = 'popularity_alt'")
|
String sourceDOI = "10.1105/tpc.8.3.343";
|
||||||
// .select("value")
|
|
||||||
// .collectAsList()
|
final String sourceOpenaireId = ID_PREFIX + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", sourceDOI));
|
||||||
// .get(0)
|
|
||||||
// .getString(0));
|
Assertions.assertEquals("0.7071067812", execVerification
|
||||||
// Assertions
|
.filter(
|
||||||
// .assertEquals(
|
"source='" + sourceOpenaireId +"'")
|
||||||
// "2.16094680115e-09", execVerification
|
.collectAsList().get(0).getString(4));
|
||||||
// .filter(
|
|
||||||
// "oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
|
||||||
// "and id = 'popularity'")
|
|
||||||
// .select("value")
|
|
||||||
// .collectAsList()
|
|
||||||
// .get(0)
|
|
||||||
// .getString(0));
|
|
||||||
//
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue