FOS #175

Merged
miriam.baglioni merged 5 commits from FOS into beta 2021-12-23 09:06:57 +01:00
2 changed files with 146 additions and 154 deletions
Showing only changes of commit 10579c0dd0 - Show all commits

View File

@ -65,7 +65,8 @@ public class PrepareFOSSparkJob implements Serializable {
private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) {
Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class);
fosDataset.groupByKey((MapFunction<FOSDataModel,String>)v->v.getDoi(), Encoders.STRING())
fosDataset
.groupByKey((MapFunction<FOSDataModel, String>) v -> v.getDoi(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, FOSDataModel, Result>) (k, it) -> {
Result r = new Result();
FOSDataModel first = it.next();
@ -88,12 +89,11 @@ public class PrepareFOSSparkJob implements Serializable {
.json(outputPath + "/fos");
}
private static void addLevels(HashSet<String> level1, HashSet<String> level2, HashSet<String> level3, FOSDataModel first) {
private static void addLevels(HashSet<String> level1, HashSet<String> level2, HashSet<String> level3,
FOSDataModel first) {
level1.add(first.getLevel1());
level2.add(first.getLevel2());
level3.add(first.getLevel3());
}
}

View File

@ -128,8 +128,6 @@ public class ProduceTest {
.flatMap(m -> m.getUnit().iterator())
.collect();
mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference()));
mes.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred()));
mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible()));
@ -154,6 +152,7 @@ public class ProduceTest {
ModelConstants.DNET_PROVENANCE_ACTIONS,
sbj.getDataInfo().getProvenanceaction().getSchemename()));
}
@Test
void produceTest6Subjects() throws Exception {
final String doi = "unresolved::10.3390/s18072310::doi";
@ -174,7 +173,6 @@ public class ProduceTest {
.flatMap(row -> row.getSubject().iterator())
.collect();
Assertions
.assertEquals(
true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences")));
@ -190,7 +188,6 @@ public class ProduceTest {
Assertions
.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics")));
}
@Test
@ -209,7 +206,6 @@ public class ProduceTest {
.getMeasures()
.size());
List<Measure> measures = tmp
.filter(row -> row.getId().equals(doi))
.flatMap(row -> row.getInstance().iterator())
@ -249,6 +245,7 @@ public class ProduceTest {
.getValue());
}
@Test
void produceTestSomeNumbers() throws Exception {
@ -316,10 +313,10 @@ public class ProduceTest {
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
}
@Test
void prepareTest5Subjects() throws Exception {
final String doi = "unresolved::10.3390/s18072310::doi";
final String doi = "unresolved::10.1063/5.0032658::doi";
JavaRDD<Result> tmp = getResultJavaRDD();
Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count());
@ -333,16 +330,11 @@ public class ProduceTest {
.getSubject()
.size());
List<StructuredProperty> sbjs = tmp
.filter(row -> row.getId().equals(doi))
.flatMap(row -> row.getSubject().iterator())
.collect();
Assertions
.assertEquals(
true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("01 natural sciences")));