|
|
|
@ -28,6 +28,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
|
|
|
|
|
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
|
|
|
|
|
|
|
|
public class SparkAtomicActionScoreJobTest {
|
|
|
|
|
|
|
|
|
@ -69,13 +70,9 @@ public class SparkAtomicActionScoreJobTest {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
void matchOne() throws Exception {
|
|
|
|
|
void testMatch() throws Exception {
|
|
|
|
|
String bipScoresPath = getClass()
|
|
|
|
|
.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
|
|
|
|
|
.getPath();
|
|
|
|
|
String inputPath = getClass()
|
|
|
|
|
.getResource(
|
|
|
|
|
"/eu/dnetlib/dhp/actionmanager/bipfinder/publication.json")
|
|
|
|
|
.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores_oid.json")
|
|
|
|
|
.getPath();
|
|
|
|
|
|
|
|
|
|
SparkAtomicActionScoreJob
|
|
|
|
@ -84,234 +81,57 @@ public class SparkAtomicActionScoreJobTest {
|
|
|
|
|
"-isSparkSessionManaged",
|
|
|
|
|
Boolean.FALSE.toString(),
|
|
|
|
|
"-inputPath",
|
|
|
|
|
inputPath,
|
|
|
|
|
"-bipScorePath",
|
|
|
|
|
bipScoresPath,
|
|
|
|
|
"-resultTableName",
|
|
|
|
|
"eu.dnetlib.dhp.schema.oaf.Publication",
|
|
|
|
|
"-outputPath",
|
|
|
|
|
workingDir.toString() + "/actionSet"
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
|
|
|
|
|
|
|
|
|
JavaRDD<Publication> tmp = sc
|
|
|
|
|
.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
|
|
|
|
|
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
|
|
|
|
.map(aa -> ((Publication) aa.getPayload()));
|
|
|
|
|
|
|
|
|
|
assertEquals(1, tmp.count());
|
|
|
|
|
|
|
|
|
|
Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
|
|
|
|
|
verificationDataset.createOrReplaceTempView("publication");
|
|
|
|
|
|
|
|
|
|
Dataset<Row> execVerification = spark
|
|
|
|
|
.sql(
|
|
|
|
|
"Select p.id oaid, mes.id, mUnit.value from publication p " +
|
|
|
|
|
"lateral view explode(measures) m as mes " +
|
|
|
|
|
"lateral view explode(mes.unit) u as mUnit ");
|
|
|
|
|
|
|
|
|
|
Assertions.assertEquals(2, execVerification.count());
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb",
|
|
|
|
|
execVerification.select("oaid").collectAsList().get(0).getString(0));
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
"1.47565045883e-08",
|
|
|
|
|
execVerification.filter("id = 'influence'").select("value").collectAsList().get(0).getString(0));
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
"0.227515392",
|
|
|
|
|
execVerification.filter("id = 'popularity'").select("value").collectAsList().get(0).getString(0));
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
void matchOneWithTwo() throws Exception {
|
|
|
|
|
String bipScoresPath = getClass()
|
|
|
|
|
.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
|
|
|
|
|
.getPath();
|
|
|
|
|
String inputPath = getClass()
|
|
|
|
|
.getResource(
|
|
|
|
|
"/eu/dnetlib/dhp/actionmanager/bipfinder/publication_2.json")
|
|
|
|
|
.getPath();
|
|
|
|
|
|
|
|
|
|
SparkAtomicActionScoreJob
|
|
|
|
|
.main(
|
|
|
|
|
new String[] {
|
|
|
|
|
"-isSparkSessionManaged",
|
|
|
|
|
Boolean.FALSE.toString(),
|
|
|
|
|
"-inputPath",
|
|
|
|
|
inputPath,
|
|
|
|
|
"-bipScorePath",
|
|
|
|
|
bipScoresPath,
|
|
|
|
|
"-resultTableName",
|
|
|
|
|
"eu.dnetlib.dhp.schema.oaf.Publication",
|
|
|
|
|
"-outputPath",
|
|
|
|
|
workingDir.toString() + "/actionSet"
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
|
|
|
|
|
|
|
|
|
JavaRDD<Publication> tmp = sc
|
|
|
|
|
.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
|
|
|
|
|
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
|
|
|
|
.map(aa -> ((Publication) aa.getPayload()));
|
|
|
|
|
|
|
|
|
|
assertEquals(1, tmp.count());
|
|
|
|
|
|
|
|
|
|
Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
|
|
|
|
|
verificationDataset.createOrReplaceTempView("publication");
|
|
|
|
|
|
|
|
|
|
Dataset<Row> execVerification = spark
|
|
|
|
|
.sql(
|
|
|
|
|
"Select p.id oaid, mes.id, mUnit.value from publication p " +
|
|
|
|
|
"lateral view explode(measures) m as mes " +
|
|
|
|
|
"lateral view explode(mes.unit) u as mUnit ");
|
|
|
|
|
|
|
|
|
|
Assertions.assertEquals(4, execVerification.count());
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
"50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb",
|
|
|
|
|
execVerification.select("oaid").collectAsList().get(0).getString(0));
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
2,
|
|
|
|
|
execVerification.filter("id = 'influence'").count());
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
2,
|
|
|
|
|
execVerification.filter("id = 'popularity'").count());
|
|
|
|
|
|
|
|
|
|
List<Row> tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList();
|
|
|
|
|
String tmp_influence = tmp_ds.get(0).getString(0);
|
|
|
|
|
assertTrue(
|
|
|
|
|
"1.47565045883e-08".equals(tmp_influence) ||
|
|
|
|
|
"1.98956540239e-08".equals(tmp_influence));
|
|
|
|
|
|
|
|
|
|
tmp_influence = tmp_ds.get(1).getString(0);
|
|
|
|
|
assertTrue(
|
|
|
|
|
"1.47565045883e-08".equals(tmp_influence) ||
|
|
|
|
|
"1.98956540239e-08".equals(tmp_influence));
|
|
|
|
|
|
|
|
|
|
assertNotEquals(tmp_ds.get(1).getString(0), tmp_ds.get(0).getString(0));
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
void matchTwo() throws Exception {
|
|
|
|
|
String bipScoresPath = getClass()
|
|
|
|
|
.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
|
|
|
|
|
.getPath();
|
|
|
|
|
String inputPath = getClass()
|
|
|
|
|
.getResource(
|
|
|
|
|
"/eu/dnetlib/dhp/actionmanager/bipfinder/publication_3.json")
|
|
|
|
|
.getPath();
|
|
|
|
|
|
|
|
|
|
SparkAtomicActionScoreJob
|
|
|
|
|
.main(
|
|
|
|
|
new String[] {
|
|
|
|
|
"-isSparkSessionManaged",
|
|
|
|
|
Boolean.FALSE.toString(),
|
|
|
|
|
"-inputPath",
|
|
|
|
|
inputPath,
|
|
|
|
|
"-bipScorePath",
|
|
|
|
|
bipScoresPath,
|
|
|
|
|
"-resultTableName",
|
|
|
|
|
"eu.dnetlib.dhp.schema.oaf.Publication",
|
|
|
|
|
"-outputPath",
|
|
|
|
|
workingDir.toString() + "/actionSet"
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
|
|
|
|
|
|
|
|
|
JavaRDD<Publication> tmp = sc
|
|
|
|
|
JavaRDD<Result> tmp = sc
|
|
|
|
|
.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
|
|
|
|
|
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
|
|
|
|
.map(aa -> ((Publication) aa.getPayload()));
|
|
|
|
|
.map(aa -> ((Result) aa.getPayload()));
|
|
|
|
|
|
|
|
|
|
assertEquals(2, tmp.count());
|
|
|
|
|
assertEquals(4, tmp.count());
|
|
|
|
|
|
|
|
|
|
Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
|
|
|
|
|
verificationDataset.createOrReplaceTempView("publication");
|
|
|
|
|
Dataset<Result> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Result.class));
|
|
|
|
|
verificationDataset.createOrReplaceTempView("result");
|
|
|
|
|
|
|
|
|
|
Dataset<Row> execVerification = spark
|
|
|
|
|
.sql(
|
|
|
|
|
"Select p.id oaid, mes.id, mUnit.value from publication p " +
|
|
|
|
|
"Select p.id oaid, mes.id, mUnit.value from result p " +
|
|
|
|
|
"lateral view explode(measures) m as mes " +
|
|
|
|
|
"lateral view explode(mes.unit) u as mUnit ");
|
|
|
|
|
|
|
|
|
|
Assertions.assertEquals(4, execVerification.count());
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
2,
|
|
|
|
|
execVerification.filter("oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb'").count());
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
2,
|
|
|
|
|
execVerification.filter("oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09'").count());
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
2,
|
|
|
|
|
execVerification.filter("id = 'influence'").count());
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
2,
|
|
|
|
|
execVerification.filter("id = 'popularity'").count());
|
|
|
|
|
|
|
|
|
|
Assertions.assertEquals(12, execVerification.count());
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
"1.47565045883e-08",
|
|
|
|
|
execVerification
|
|
|
|
|
"6.63451994567e-09", execVerification
|
|
|
|
|
.filter(
|
|
|
|
|
"oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb' " +
|
|
|
|
|
"oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
|
|
|
|
"and id = 'influence'")
|
|
|
|
|
.select("value")
|
|
|
|
|
.collectAsList()
|
|
|
|
|
.get(0)
|
|
|
|
|
.getString(0));
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
"1.98956540239e-08",
|
|
|
|
|
execVerification
|
|
|
|
|
"0.348694533145", execVerification
|
|
|
|
|
.filter(
|
|
|
|
|
"oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09' " +
|
|
|
|
|
"and id = 'influence'")
|
|
|
|
|
"oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
|
|
|
|
"and id = 'popularity_alt'")
|
|
|
|
|
.select("value")
|
|
|
|
|
.collectAsList()
|
|
|
|
|
.get(0)
|
|
|
|
|
.getString(0));
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
"0.282046161584",
|
|
|
|
|
execVerification
|
|
|
|
|
.filter(
|
|
|
|
|
"oaid = '50|acm_________::faed5b7a1bd8f51118d13ed29cfaee09' " +
|
|
|
|
|
"and id = 'popularity'")
|
|
|
|
|
.select("value")
|
|
|
|
|
.collectAsList()
|
|
|
|
|
.get(0)
|
|
|
|
|
.getString(0));
|
|
|
|
|
|
|
|
|
|
Assertions
|
|
|
|
|
.assertEquals(
|
|
|
|
|
"0.227515392",
|
|
|
|
|
execVerification
|
|
|
|
|
"2.16094680115e-09", execVerification
|
|
|
|
|
.filter(
|
|
|
|
|
"oaid = '50|355e65625b88::ffa5bad14f4adc0c9a15c00efbbccddb' " +
|
|
|
|
|
"oaid='50|arXiv_dedup_::4a2d5fd8d71daec016c176ec71d957b1' " +
|
|
|
|
|
"and id = 'popularity'")
|
|
|
|
|
.select("value")
|
|
|
|
|
.collectAsList()
|
|
|
|
|