forked from D-Net/dnet-hadoop
[FOS]fixed doi value in test
This commit is contained in:
parent
6116fc5d40
commit
10579c0dd0
|
@ -65,35 +65,35 @@ public class PrepareFOSSparkJob implements Serializable {
|
||||||
private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) {
|
private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) {
|
||||||
Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class);
|
Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class);
|
||||||
|
|
||||||
fosDataset.groupByKey((MapFunction<FOSDataModel,String>)v->v.getDoi(), Encoders.STRING())
|
fosDataset
|
||||||
.mapGroups((MapGroupsFunction<String, FOSDataModel, Result>)(k,it)->{
|
.groupByKey((MapFunction<FOSDataModel, String>) v -> v.getDoi(), Encoders.STRING())
|
||||||
Result r = new Result();
|
.mapGroups((MapGroupsFunction<String, FOSDataModel, Result>) (k, it) -> {
|
||||||
FOSDataModel first = it.next();
|
Result r = new Result();
|
||||||
r.setId(DHPUtils.generateUnresolvedIdentifier(first.getDoi(), DOI));
|
FOSDataModel first = it.next();
|
||||||
HashSet<String> level1 = new HashSet<>();
|
r.setId(DHPUtils.generateUnresolvedIdentifier(first.getDoi(), DOI));
|
||||||
HashSet<String> level2 = new HashSet<>();
|
HashSet<String> level1 = new HashSet<>();
|
||||||
HashSet<String> level3 = new HashSet<>();
|
HashSet<String> level2 = new HashSet<>();
|
||||||
addLevels(level1, level2, level3, first);
|
HashSet<String> level3 = new HashSet<>();
|
||||||
it.forEachRemaining(v -> addLevels(level1, level2, level3, v));
|
addLevels(level1, level2, level3, first);
|
||||||
List<StructuredProperty>sbjs = new ArrayList<>();
|
it.forEachRemaining(v -> addLevels(level1, level2, level3, v));
|
||||||
level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME)));
|
List<StructuredProperty> sbjs = new ArrayList<>();
|
||||||
level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME)));
|
level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME)));
|
||||||
level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME)));
|
level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME)));
|
||||||
r.setSubject(sbjs);
|
level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME)));
|
||||||
return r;
|
r.setSubject(sbjs);
|
||||||
}, Encoders.bean(Result.class))
|
return r;
|
||||||
|
}, Encoders.bean(Result.class))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath + "/fos");
|
.json(outputPath + "/fos");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void addLevels(HashSet<String> level1, HashSet<String> level2, HashSet<String> level3, FOSDataModel first) {
|
private static void addLevels(HashSet<String> level1, HashSet<String> level2, HashSet<String> level3,
|
||||||
|
FOSDataModel first) {
|
||||||
level1.add(first.getLevel1());
|
level1.add(first.getLevel1());
|
||||||
level2.add(first.getLevel2());
|
level2.add(first.getLevel2());
|
||||||
level3.add(first.getLevel3());
|
level3.add(first.getLevel3());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,29 +67,29 @@ public class ProduceTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void produceTestSubjects()throws Exception{
|
void produceTestSubjects() throws Exception {
|
||||||
|
|
||||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||||
|
|
||||||
List<StructuredProperty> sbjs = tmp
|
List<StructuredProperty> sbjs = tmp
|
||||||
.filter(row -> row.getSubject()!= null && row.getSubject().size()>0)
|
.filter(row -> row.getSubject() != null && row.getSubject().size() > 0)
|
||||||
.flatMap(row -> row.getSubject().iterator())
|
.flatMap(row -> row.getSubject().iterator())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
sbjs.forEach(sbj -> Assertions.assertEquals("FOS", sbj.getQualifier().getClassid()));
|
sbjs.forEach(sbj -> Assertions.assertEquals("FOS", sbj.getQualifier().getClassid()));
|
||||||
sbjs
|
sbjs
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions
|
sbj -> Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
"Fields of Science and Technology classification", sbj.getQualifier().getClassname()));
|
"Fields of Science and Technology classification", sbj.getQualifier().getClassname()));
|
||||||
sbjs
|
sbjs
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions
|
sbj -> Assertions
|
||||||
.assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid()));
|
.assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid()));
|
||||||
sbjs
|
sbjs
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions
|
sbj -> Assertions
|
||||||
.assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename()));
|
.assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename()));
|
||||||
|
|
||||||
sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference()));
|
sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference()));
|
||||||
sbjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred()));
|
sbjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred()));
|
||||||
|
@ -97,38 +97,36 @@ public class ProduceTest {
|
||||||
sbjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust()));
|
sbjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust()));
|
||||||
sbjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()));
|
sbjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()));
|
||||||
sbjs
|
sbjs
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid()));
|
sbj -> Assertions.assertEquals("subject:fos", sbj.getDataInfo().getProvenanceaction().getClassid()));
|
||||||
sbjs
|
sbjs
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions
|
sbj -> Assertions
|
||||||
.assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname()));
|
.assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname()));
|
||||||
sbjs
|
sbjs
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions
|
sbj -> Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid()));
|
ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid()));
|
||||||
sbjs
|
sbjs
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions
|
sbj -> Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
sbj.getDataInfo().getProvenanceaction().getSchemename()));
|
sbj.getDataInfo().getProvenanceaction().getSchemename()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void produceTestMeasuress()throws Exception{
|
void produceTestMeasuress() throws Exception {
|
||||||
|
|
||||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||||
|
|
||||||
List<KeyValue> mes = tmp
|
List<KeyValue> mes = tmp
|
||||||
.filter(row -> row.getInstance()!= null && row.getInstance().size()>0)
|
.filter(row -> row.getInstance() != null && row.getInstance().size() > 0)
|
||||||
.flatMap(row -> row.getInstance().iterator())
|
.flatMap(row -> row.getInstance().iterator())
|
||||||
.flatMap(i->i.getMeasures().iterator())
|
.flatMap(i -> i.getMeasures().iterator())
|
||||||
.flatMap(m ->m.getUnit().iterator())
|
.flatMap(m -> m.getUnit().iterator())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference()));
|
mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference()));
|
||||||
mes.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred()));
|
mes.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred()));
|
||||||
|
@ -136,119 +134,118 @@ public class ProduceTest {
|
||||||
mes.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust()));
|
mes.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust()));
|
||||||
mes.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()));
|
mes.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()));
|
||||||
mes
|
mes
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions.assertEquals("measure:bip", sbj.getDataInfo().getProvenanceaction().getClassid()));
|
sbj -> Assertions.assertEquals("measure:bip", sbj.getDataInfo().getProvenanceaction().getClassid()));
|
||||||
mes
|
mes
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions
|
sbj -> Assertions
|
||||||
.assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname()));
|
.assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname()));
|
||||||
mes
|
mes
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions
|
sbj -> Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid()));
|
ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid()));
|
||||||
mes
|
mes
|
||||||
.forEach(
|
.forEach(
|
||||||
sbj -> Assertions
|
sbj -> Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
sbj.getDataInfo().getProvenanceaction().getSchemename()));
|
sbj.getDataInfo().getProvenanceaction().getSchemename()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void produceTest6Subjects() throws Exception{
|
void produceTest6Subjects() throws Exception {
|
||||||
final String doi = "unresolved::10.3390/s18072310::doi";
|
final String doi = "unresolved::10.3390/s18072310::doi";
|
||||||
|
|
||||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
6, tmp
|
6, tmp
|
||||||
.filter(row -> row.getId().equals(doi))
|
.filter(row -> row.getId().equals(doi))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.size());
|
.size());
|
||||||
|
|
||||||
List<StructuredProperty> sbjs = tmp
|
List<StructuredProperty> sbjs = tmp
|
||||||
.filter(row -> row.getId().equals(doi))
|
.filter(row -> row.getId().equals(doi))
|
||||||
.flatMap(row -> row.getSubject().iterator())
|
.flatMap(row -> row.getSubject().iterator())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences")));
|
true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences")));
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology")));
|
true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0404 agricultural biotechnology")));
|
||||||
Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science")));
|
Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("040502 food science")));
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences")));
|
.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("03 medical and health sciences")));
|
||||||
Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences")));
|
Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0303 health sciences")));
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics")));
|
.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics")));
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void produceTest3Measures()throws Exception{
|
void produceTest3Measures() throws Exception {
|
||||||
final String doi = "unresolved::10.3390/s18072310::doi";
|
final String doi = "unresolved::10.3390/s18072310::doi";
|
||||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
3, tmp
|
3, tmp
|
||||||
.filter(row -> row.getId().equals(doi))
|
.filter(row -> row.getId().equals(doi))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getInstance()
|
.getInstance()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getMeasures()
|
.getMeasures()
|
||||||
.size());
|
.size());
|
||||||
|
|
||||||
|
|
||||||
List<Measure> measures = tmp
|
List<Measure> measures = tmp
|
||||||
.filter(row -> row.getId().equals(doi))
|
.filter(row -> row.getId().equals(doi))
|
||||||
.flatMap(row -> row.getInstance().iterator())
|
.flatMap(row -> row.getInstance().iterator())
|
||||||
.flatMap(inst -> inst.getMeasures().iterator())
|
.flatMap(inst -> inst.getMeasures().iterator())
|
||||||
.collect();
|
.collect();
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
"7.5597134689e-09", measures
|
"7.5597134689e-09", measures
|
||||||
.stream()
|
.stream()
|
||||||
.filter(mes -> mes.getId().equals("influence"))
|
.filter(mes -> mes.getId().equals("influence"))
|
||||||
.collect(Collectors.toList())
|
.collect(Collectors.toList())
|
||||||
.get(0)
|
.get(0)
|
||||||
.getUnit()
|
.getUnit()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getValue());
|
.getValue());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
"4.903880192", measures
|
"4.903880192", measures
|
||||||
.stream()
|
.stream()
|
||||||
.filter(mes -> mes.getId().equals("popularity_alt"))
|
.filter(mes -> mes.getId().equals("popularity_alt"))
|
||||||
.collect(Collectors.toList())
|
.collect(Collectors.toList())
|
||||||
.get(0)
|
.get(0)
|
||||||
.getUnit()
|
.getUnit()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getValue());
|
.getValue());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
"1.17977512835e-08", measures
|
"1.17977512835e-08", measures
|
||||||
.stream()
|
.stream()
|
||||||
.filter(mes -> mes.getId().equals("popularity"))
|
.filter(mes -> mes.getId().equals("popularity"))
|
||||||
.collect(Collectors.toList())
|
.collect(Collectors.toList())
|
||||||
.get(0)
|
.get(0)
|
||||||
.getUnit()
|
.getUnit()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getValue());
|
.getValue());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void produceTestSomeNumbers() throws Exception {
|
void produceTestSomeNumbers() throws Exception {
|
||||||
|
|
||||||
|
@ -316,44 +313,39 @@ public class ProduceTest {
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void prepareTest5Subjects()throws Exception{
|
void prepareTest5Subjects() throws Exception {
|
||||||
final String doi = "unresolved::10.3390/s18072310::doi";
|
final String doi = "unresolved::10.1063/5.0032658::doi";
|
||||||
|
|
||||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||||
|
|
||||||
Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count());
|
Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
5, tmp
|
5, tmp
|
||||||
.filter(row -> row.getId().equals(doi))
|
.filter(row -> row.getId().equals(doi))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.size());
|
.size());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
List<StructuredProperty> sbjs = tmp
|
List<StructuredProperty> sbjs = tmp
|
||||||
.filter(row -> row.getId().equals(doi))
|
.filter(row -> row.getId().equals(doi))
|
||||||
.flatMap(row -> row.getSubject().iterator())
|
.flatMap(row -> row.getSubject().iterator())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("01 natural sciences")));
|
true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("01 natural sciences")));
|
||||||
Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0103 physical sciences")));
|
Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0103 physical sciences")));
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010304 chemical physics")));
|
.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010304 chemical physics")));
|
||||||
Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0104 chemical sciences")));
|
Assertions.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("0104 chemical sciences")));
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010402 general chemistry")));
|
.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("010402 general chemistry")));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue