FOS #175
|
@ -65,8 +65,9 @@ public class PrepareFOSSparkJob implements Serializable {
|
|||
private static void distributeFOSdois(SparkSession spark, String sourcePath, String outputPath) {
|
||||
Dataset<FOSDataModel> fosDataset = readPath(spark, sourcePath, FOSDataModel.class);
|
||||
|
||||
fosDataset.groupByKey((MapFunction<FOSDataModel,String>)v->v.getDoi(), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, FOSDataModel, Result>)(k,it)->{
|
||||
fosDataset
|
||||
.groupByKey((MapFunction<FOSDataModel, String>) v -> v.getDoi(), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, FOSDataModel, Result>) (k, it) -> {
|
||||
Result r = new Result();
|
||||
FOSDataModel first = it.next();
|
||||
r.setId(DHPUtils.generateUnresolvedIdentifier(first.getDoi(), DOI));
|
||||
|
@ -75,7 +76,7 @@ public class PrepareFOSSparkJob implements Serializable {
|
|||
HashSet<String> level3 = new HashSet<>();
|
||||
addLevels(level1, level2, level3, first);
|
||||
it.forEachRemaining(v -> addLevels(level1, level2, level3, v));
|
||||
List<StructuredProperty>sbjs = new ArrayList<>();
|
||||
List<StructuredProperty> sbjs = new ArrayList<>();
|
||||
level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME)));
|
||||
level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME)));
|
||||
level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME)));
|
||||
|
@ -88,12 +89,11 @@ public class PrepareFOSSparkJob implements Serializable {
|
|||
.json(outputPath + "/fos");
|
||||
}
|
||||
|
||||
private static void addLevels(HashSet<String> level1, HashSet<String> level2, HashSet<String> level3, FOSDataModel first) {
|
||||
private static void addLevels(HashSet<String> level1, HashSet<String> level2, HashSet<String> level3,
|
||||
FOSDataModel first) {
|
||||
level1.add(first.getLevel1());
|
||||
level2.add(first.getLevel2());
|
||||
level3.add(first.getLevel3());
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -67,12 +67,12 @@ public class ProduceTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
void produceTestSubjects()throws Exception{
|
||||
void produceTestSubjects() throws Exception {
|
||||
|
||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||
|
||||
List<StructuredProperty> sbjs = tmp
|
||||
.filter(row -> row.getSubject()!= null && row.getSubject().size()>0)
|
||||
.filter(row -> row.getSubject() != null && row.getSubject().size() > 0)
|
||||
.flatMap(row -> row.getSubject().iterator())
|
||||
.collect();
|
||||
|
||||
|
@ -117,19 +117,17 @@ public class ProduceTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
void produceTestMeasuress()throws Exception{
|
||||
void produceTestMeasuress() throws Exception {
|
||||
|
||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||
|
||||
List<KeyValue> mes = tmp
|
||||
.filter(row -> row.getInstance()!= null && row.getInstance().size()>0)
|
||||
.filter(row -> row.getInstance() != null && row.getInstance().size() > 0)
|
||||
.flatMap(row -> row.getInstance().iterator())
|
||||
.flatMap(i->i.getMeasures().iterator())
|
||||
.flatMap(m ->m.getUnit().iterator())
|
||||
.flatMap(i -> i.getMeasures().iterator())
|
||||
.flatMap(m -> m.getUnit().iterator())
|
||||
.collect();
|
||||
|
||||
|
||||
|
||||
mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference()));
|
||||
mes.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred()));
|
||||
mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible()));
|
||||
|
@ -154,8 +152,9 @@ public class ProduceTest {
|
|||
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||
sbj.getDataInfo().getProvenanceaction().getSchemename()));
|
||||
}
|
||||
|
||||
@Test
|
||||
void produceTest6Subjects() throws Exception{
|
||||
void produceTest6Subjects() throws Exception {
|
||||
final String doi = "unresolved::10.3390/s18072310::doi";
|
||||
|
||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||
|
@ -174,7 +173,6 @@ public class ProduceTest {
|
|||
.flatMap(row -> row.getSubject().iterator())
|
||||
.collect();
|
||||
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("04 agricultural and veterinary sciences")));
|
||||
|
@ -190,11 +188,10 @@ public class ProduceTest {
|
|||
Assertions
|
||||
.assertEquals(true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("030309 nutrition & dietetics")));
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void produceTest3Measures()throws Exception{
|
||||
void produceTest3Measures() throws Exception {
|
||||
final String doi = "unresolved::10.3390/s18072310::doi";
|
||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||
|
||||
|
@ -209,7 +206,6 @@ public class ProduceTest {
|
|||
.getMeasures()
|
||||
.size());
|
||||
|
||||
|
||||
List<Measure> measures = tmp
|
||||
.filter(row -> row.getId().equals(doi))
|
||||
.flatMap(row -> row.getInstance().iterator())
|
||||
|
@ -249,6 +245,7 @@ public class ProduceTest {
|
|||
.getValue());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void produceTestSomeNumbers() throws Exception {
|
||||
|
||||
|
@ -316,10 +313,10 @@ public class ProduceTest {
|
|||
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void prepareTest5Subjects()throws Exception{
|
||||
final String doi = "unresolved::10.3390/s18072310::doi";
|
||||
void prepareTest5Subjects() throws Exception {
|
||||
final String doi = "unresolved::10.1063/5.0032658::doi";
|
||||
|
||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||
|
||||
Assertions.assertEquals(1, tmp.filter(row -> row.getId().equals(doi)).count());
|
||||
|
@ -333,16 +330,11 @@ public class ProduceTest {
|
|||
.getSubject()
|
||||
.size());
|
||||
|
||||
|
||||
|
||||
|
||||
List<StructuredProperty> sbjs = tmp
|
||||
.filter(row -> row.getId().equals(doi))
|
||||
.flatMap(row -> row.getSubject().iterator())
|
||||
.collect();
|
||||
|
||||
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
true, sbjs.stream().anyMatch(sbj -> sbj.getValue().equals("01 natural sciences")));
|
||||
|
|
Loading…
Reference in New Issue