1
0
Fork 0
This commit is contained in:
Miriam Baglioni 2022-07-01 12:38:03 +02:00
commit c09fcdb40b
7 changed files with 260 additions and 18 deletions

View File

@ -75,9 +75,14 @@ public class DHPUtils {
final HttpGet req = new HttpGet(url);
log.info("MDStoreManager request: {}", req);
try (final CloseableHttpClient client = HttpClients.createDefault()) {
try (final CloseableHttpResponse response = client.execute(req)) {
final String json = IOUtils.toString(response.getEntity().getContent());
log.info("MDStoreManager response: {}", json);
final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class);
return Arrays
.stream(mdstores)

View File

@ -172,6 +172,61 @@ public class PromoteActionPayloadForGraphTableJobTest {
}
}
@Test
void shouldPromoteActionPayload_custom() throws Exception {
Class<? extends Oaf> rowClazz = Publication.class;
Class<? extends Oaf> actionPayloadClazz = Result.class;
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.MERGE_FROM_AND_GET;
// given
Path inputGraphTableDir = createGraphTable(inputGraphRootDir, rowClazz);
Path inputActionPayloadDir = createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz);
Path outputGraphTableDir = outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase());
// when
PromoteActionPayloadForGraphTableJob
.main(
new String[] {
"-isSparkSessionManaged",
Boolean.FALSE.toString(),
"-inputGraphTablePath",
inputGraphTableDir.toString(),
"-graphTableClassName",
rowClazz.getCanonicalName(),
"-inputActionPayloadPath",
inputActionPayloadDir.toString(),
"-actionPayloadClassName",
actionPayloadClazz.getCanonicalName(),
"-outputGraphTablePath",
outputGraphTableDir.toString(),
"-mergeAndGetStrategy",
strategy.name(),
"--shouldGroupById",
"true"
});
// then
assertTrue(Files.exists(outputGraphTableDir));
List<? extends Oaf> actualOutputRows = readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz)
.collectAsList()
.stream()
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
Publication p = actualOutputRows
.stream()
.map(o -> (Publication) o)
.filter(o -> "50|4ScienceCRIS::6a67ed3daba1c380bf9de3c13ed9c879".equals(o.getId()))
.findFirst()
.get();
assertNotNull(p.getMeasures());
assertTrue(p.getMeasures().size() > 0);
}
public static Stream<Arguments> promoteJobTestParams() {
return Stream
.of(

View File

@ -18,3 +18,4 @@
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018243405,"id":"50|CSC_________::00019460865d6cc381b36076131a5bc1","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"Computer Science::Networking and Internet Architecture","qualifier":{"classid":"arxiv","classname":"arxiv","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7416","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018240982,"id":"50|CSC_________::0001d663c95c4132355e1765375a5275","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"animal diseases","qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7461","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018240982,"id":"50|CSC_________::0001d663c95c4132355e1765375a5275","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"animal diseases","qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7461","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
{"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"id":"50|4ScienceCRIS::6a67ed3daba1c380bf9de3c13ed9c879","originalId":null,"pid":null,"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"processingchargeamount":null,"processingchargecurrency":null,"measures":[{"id":"influence","unit":[{"key":"score","value":"1.64385446761e-08","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity_alt","unit":[{"key":"score","value":"18.9590813696","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity","unit":[{"key":"score","value":"6.00577981643e-08","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]}],"author":null,"resulttype":null,"language":null,"country":null,"subject":null,"title":null,"relevantdate":null,"description":null,"dateofacceptance":null,"publisher":null,"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":null}

View File

@ -65,11 +65,19 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String mdstoreManagerUrl = parser.get("mdstoreManagerUrl");
log.info("mdstoreManagerUrl: {}", mdstoreManagerUrl);
final String mdFormat = parser.get("mdFormat");
log.info("mdFormat: {}", mdFormat);
final String mdLayout = parser.get("mdLayout");
log.info("mdLayout: {}", mdLayout);
final String mdInterpretation = parser.get("mdInterpretation");
log.info("mdInterpretation: {}", mdInterpretation);
final String hdfsPath = parser.get("hdfsPath");
log.info("hdfsPath: {}", hdfsPath);
final Set<String> paths = mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation);
@ -95,6 +103,8 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication
.filter(p -> HdfsSupport.exists(p, sc.hadoopConfiguration()))
.toArray(size -> new String[size]);
log.info("Processing existing paths {}", Arrays.asList(validPaths));
if (validPaths.length > 0) {
spark
.read()

View File

@ -999,17 +999,11 @@ public class XmlRecordFactory implements Serializable {
private List<String> measuresAsXml(List<Measure> measures) {
return measures
.stream()
.flatMap(
m -> m
.getUnit()
.stream()
.map(
u -> Lists
.newArrayList(
new Tuple2<>("id", m.getId()),
new Tuple2<>("key", u.getKey()),
new Tuple2<>("value", u.getValue())))
.map(l -> XmlSerializationUtils.asXmlElement("measure", l)))
.map(m -> {
List<Tuple2<String, String>> l = Lists.newArrayList(new Tuple2<>("id", m.getId()));
m.getUnit().forEach(kv -> l.add(new Tuple2<>(kv.getKey(), kv.getValue())));
return XmlSerializationUtils.asXmlElement("measure", l);
})
.collect(Collectors.toList());
}

View File

@ -73,11 +73,20 @@ public class XmlRecordFactoryTest {
assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()"));
assertEquals(
"1.00889953098e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'influence']/@value"));
"5.06690394631e-09", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'influence']/@score"));
assertEquals(
"30.6576853333", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity_alt']/@value"));
"C", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'influence']/@class"));
assertEquals(
"4.62970429725e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity']/@value"));
"0.0", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity_alt']/@score"));
assertEquals(
"C", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity_alt']/@class"));
assertEquals(
"3.11855618382e-09", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity']/@score"));
assertEquals(
"C", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity']/@class"));
}
@Test

View File

@ -5,7 +5,37 @@
"unit": [
{
"key": "score",
"value": "1.00889953098e-08"
"value": "5.06690394631e-09",
"dataInfo": {
"invisible": false,
"inferred": true,
"deletedbyinference": false,
"trust": "",
"inferenceprovenance": "update",
"provenanceaction": {
"classid": "measure:bip",
"classname": "Inferred by OpenAIRE",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
},
{
"key": "class",
"value": "C",
"dataInfo": {
"invisible": false,
"inferred": true,
"deletedbyinference": false,
"trust": "",
"inferenceprovenance": "update",
"provenanceaction": {
"classid": "measure:bip",
"classname": "Inferred by OpenAIRE",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
}
]
},
@ -14,7 +44,37 @@
"unit": [
{
"key": "score",
"value": "30.6576853333"
"value": "0.0",
"dataInfo": {
"invisible": false,
"inferred": true,
"deletedbyinference": false,
"trust": "",
"inferenceprovenance": "update",
"provenanceaction": {
"classid": "measure:bip",
"classname": "Inferred by OpenAIRE",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
},
{
"key": "class",
"value": "C",
"dataInfo": {
"invisible": false,
"inferred": true,
"deletedbyinference": false,
"trust": "",
"inferenceprovenance": "update",
"provenanceaction": {
"classid": "measure:bip",
"classname": "Inferred by OpenAIRE",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
}
]
},
@ -23,7 +83,115 @@
"unit": [
{
"key": "score",
"value": "4.62970429725e-08"
"value": "3.11855618382e-09",
"dataInfo": {
"invisible": false,
"inferred": true,
"deletedbyinference": false,
"trust": "",
"inferenceprovenance": "update",
"provenanceaction": {
"classid": "measure:bip",
"classname": "Inferred by OpenAIRE",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
},
{
"key": "class",
"value": "C",
"dataInfo": {
"invisible": false,
"inferred": true,
"deletedbyinference": false,
"trust": "",
"inferenceprovenance": "update",
"provenanceaction": {
"classid": "measure:bip",
"classname": "Inferred by OpenAIRE",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
}
]
},
{
"id": "influence_alt",
"unit": [
{
"key": "score",
"value": "0.0",
"dataInfo": {
"invisible": false,
"inferred": true,
"deletedbyinference": false,
"trust": "",
"inferenceprovenance": "update",
"provenanceaction": {
"classid": "measure:bip",
"classname": "Inferred by OpenAIRE",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
},
{
"key": "class",
"value": "C",
"dataInfo": {
"invisible": false,
"inferred": true,
"deletedbyinference": false,
"trust": "",
"inferenceprovenance": "update",
"provenanceaction": {
"classid": "measure:bip",
"classname": "Inferred by OpenAIRE",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
}
]
},
{
"id": "impulse",
"unit": [
{
"key": "score",
"value": "0.0",
"dataInfo": {
"invisible": false,
"inferred": true,
"deletedbyinference": false,
"trust": "",
"inferenceprovenance": "update",
"provenanceaction": {
"classid": "measure:bip",
"classname": "Inferred by OpenAIRE",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
},
{
"key": "class",
"value": "C",
"dataInfo": {
"invisible": false,
"inferred": true,
"deletedbyinference": false,
"trust": "",
"inferenceprovenance": "update",
"provenanceaction": {
"classid": "measure:bip",
"classname": "Inferred by OpenAIRE",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
}
]
}