Merge branch 'beta' of https://code-repo.d4science.org/D-Net/dnet-hadoop into beta
This commit is contained in:
commit
c09fcdb40b
|
@ -75,9 +75,14 @@ public class DHPUtils {
|
|||
|
||||
final HttpGet req = new HttpGet(url);
|
||||
|
||||
log.info("MDStoreManager request: {}", req);
|
||||
|
||||
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||
final String json = IOUtils.toString(response.getEntity().getContent());
|
||||
|
||||
log.info("MDStoreManager response: {}", json);
|
||||
|
||||
final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class);
|
||||
return Arrays
|
||||
.stream(mdstores)
|
||||
|
|
|
@ -172,6 +172,61 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldPromoteActionPayload_custom() throws Exception {
|
||||
|
||||
Class<? extends Oaf> rowClazz = Publication.class;
|
||||
Class<? extends Oaf> actionPayloadClazz = Result.class;
|
||||
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.MERGE_FROM_AND_GET;
|
||||
|
||||
// given
|
||||
Path inputGraphTableDir = createGraphTable(inputGraphRootDir, rowClazz);
|
||||
Path inputActionPayloadDir = createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz);
|
||||
Path outputGraphTableDir = outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase());
|
||||
|
||||
// when
|
||||
PromoteActionPayloadForGraphTableJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged",
|
||||
Boolean.FALSE.toString(),
|
||||
"-inputGraphTablePath",
|
||||
inputGraphTableDir.toString(),
|
||||
"-graphTableClassName",
|
||||
rowClazz.getCanonicalName(),
|
||||
"-inputActionPayloadPath",
|
||||
inputActionPayloadDir.toString(),
|
||||
"-actionPayloadClassName",
|
||||
actionPayloadClazz.getCanonicalName(),
|
||||
"-outputGraphTablePath",
|
||||
outputGraphTableDir.toString(),
|
||||
"-mergeAndGetStrategy",
|
||||
strategy.name(),
|
||||
"--shouldGroupById",
|
||||
"true"
|
||||
});
|
||||
|
||||
// then
|
||||
assertTrue(Files.exists(outputGraphTableDir));
|
||||
|
||||
List<? extends Oaf> actualOutputRows = readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz)
|
||||
.collectAsList()
|
||||
.stream()
|
||||
.sorted(Comparator.comparingInt(Object::hashCode))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Publication p = actualOutputRows
|
||||
.stream()
|
||||
.map(o -> (Publication) o)
|
||||
.filter(o -> "50|4ScienceCRIS::6a67ed3daba1c380bf9de3c13ed9c879".equals(o.getId()))
|
||||
.findFirst()
|
||||
.get();
|
||||
|
||||
assertNotNull(p.getMeasures());
|
||||
assertTrue(p.getMeasures().size() > 0);
|
||||
|
||||
}
|
||||
|
||||
public static Stream<Arguments> promoteJobTestParams() {
|
||||
return Stream
|
||||
.of(
|
||||
|
|
|
@ -18,3 +18,4 @@
|
|||
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018243405,"id":"50|CSC_________::00019460865d6cc381b36076131a5bc1","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"Computer Science::Networking and Internet Architecture","qualifier":{"classid":"arxiv","classname":"arxiv","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7416","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
|
||||
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018240982,"id":"50|CSC_________::0001d663c95c4132355e1765375a5275","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"animal diseases","qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7461","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
|
||||
{"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}},"lastupdatetimestamp":1572018240982,"id":"50|CSC_________::0001d663c95c4132355e1765375a5275","originalId":[],"collectedfrom":[],"pid":[],"dateofcollection":"","dateoftransformation":"","extraInfo":[],"oaiprovenance":null,"author":[],"resulttype":{"classid":"","classname":"","schemeid":"","schemename":""},"language":{"classid":"","classname":"","schemeid":"","schemename":""},"country":[],"subject":[{"value":"animal diseases","qualifier":{"classid":"mesheuropmc","classname":"mesheuropmc","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"0.7461","inferenceprovenance":"iis::document_classes","provenanceaction":{"classid":"iis","classname":"iis","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[],"relevantdate":[],"description":[],"dateofacceptance":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"publisher":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"embargoenddate":{"value":"","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"","inferenceprovenance":"","provenanceaction":{"classid":"","classname":"","schemeid":"","schemename":""}}},"source":[],"fulltext":[],"format":[],"contributor":[],"resourcetype":{"classid":"","classname":"","schemeid":"","schemename":""},"coverage":[],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":null,"instance":[]}
|
||||
{"collectedfrom":null,"dataInfo":null,"lastupdatetimestamp":null,"id":"50|4ScienceCRIS::6a67ed3daba1c380bf9de3c13ed9c879","originalId":null,"pid":null,"dateofcollection":null,"dateoftransformation":null,"extraInfo":null,"oaiprovenance":null,"processingchargeamount":null,"processingchargecurrency":null,"measures":[{"id":"influence","unit":[{"key":"score","value":"1.64385446761e-08","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity_alt","unit":[{"key":"score","value":"18.9590813696","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]},{"id":"popularity","unit":[{"key":"score","value":"6.00577981643e-08","dataInfo":{"invisible":false,"inferred":true,"deletedbyinference":false,"trust":"","inferenceprovenance":"update","provenanceaction":{"classid":"measure:bip","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}]}],"author":null,"resulttype":null,"language":null,"country":null,"subject":null,"title":null,"relevantdate":null,"description":null,"dateofacceptance":null,"publisher":null,"embargoenddate":null,"source":null,"fulltext":null,"format":null,"contributor":null,"resourcetype":null,"coverage":null,"bestaccessright":null,"context":null,"externalReference":null,"instance":null}
|
|
@ -65,11 +65,19 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication
|
|||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
final String mdstoreManagerUrl = parser.get("mdstoreManagerUrl");
|
||||
log.info("mdstoreManagerUrl: {}", mdstoreManagerUrl);
|
||||
|
||||
final String mdFormat = parser.get("mdFormat");
|
||||
log.info("mdFormat: {}", mdFormat);
|
||||
|
||||
final String mdLayout = parser.get("mdLayout");
|
||||
log.info("mdLayout: {}", mdLayout);
|
||||
|
||||
final String mdInterpretation = parser.get("mdInterpretation");
|
||||
log.info("mdInterpretation: {}", mdInterpretation);
|
||||
|
||||
final String hdfsPath = parser.get("hdfsPath");
|
||||
log.info("hdfsPath: {}", hdfsPath);
|
||||
|
||||
final Set<String> paths = mdstorePaths(mdstoreManagerUrl, mdFormat, mdLayout, mdInterpretation);
|
||||
|
||||
|
@ -95,6 +103,8 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication
|
|||
.filter(p -> HdfsSupport.exists(p, sc.hadoopConfiguration()))
|
||||
.toArray(size -> new String[size]);
|
||||
|
||||
log.info("Processing existing paths {}", Arrays.asList(validPaths));
|
||||
|
||||
if (validPaths.length > 0) {
|
||||
spark
|
||||
.read()
|
||||
|
|
|
@ -999,17 +999,11 @@ public class XmlRecordFactory implements Serializable {
|
|||
private List<String> measuresAsXml(List<Measure> measures) {
|
||||
return measures
|
||||
.stream()
|
||||
.flatMap(
|
||||
m -> m
|
||||
.getUnit()
|
||||
.stream()
|
||||
.map(
|
||||
u -> Lists
|
||||
.newArrayList(
|
||||
new Tuple2<>("id", m.getId()),
|
||||
new Tuple2<>("key", u.getKey()),
|
||||
new Tuple2<>("value", u.getValue())))
|
||||
.map(l -> XmlSerializationUtils.asXmlElement("measure", l)))
|
||||
.map(m -> {
|
||||
List<Tuple2<String, String>> l = Lists.newArrayList(new Tuple2<>("id", m.getId()));
|
||||
m.getUnit().forEach(kv -> l.add(new Tuple2<>(kv.getKey(), kv.getValue())));
|
||||
return XmlSerializationUtils.asXmlElement("measure", l);
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
|
|
@ -73,11 +73,20 @@ public class XmlRecordFactoryTest {
|
|||
assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()"));
|
||||
|
||||
assertEquals(
|
||||
"1.00889953098e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'influence']/@value"));
|
||||
"5.06690394631e-09", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'influence']/@score"));
|
||||
assertEquals(
|
||||
"30.6576853333", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity_alt']/@value"));
|
||||
"C", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'influence']/@class"));
|
||||
|
||||
assertEquals(
|
||||
"4.62970429725e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity']/@value"));
|
||||
"0.0", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity_alt']/@score"));
|
||||
assertEquals(
|
||||
"C", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity_alt']/@class"));
|
||||
|
||||
assertEquals(
|
||||
"3.11855618382e-09", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity']/@score"));
|
||||
assertEquals(
|
||||
"C", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity']/@class"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -5,7 +5,37 @@
|
|||
"unit": [
|
||||
{
|
||||
"key": "score",
|
||||
"value": "1.00889953098e-08"
|
||||
"value": "5.06690394631e-09",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": false,
|
||||
"trust": "",
|
||||
"inferenceprovenance": "update",
|
||||
"provenanceaction": {
|
||||
"classid": "measure:bip",
|
||||
"classname": "Inferred by OpenAIRE",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "class",
|
||||
"value": "C",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": false,
|
||||
"trust": "",
|
||||
"inferenceprovenance": "update",
|
||||
"provenanceaction": {
|
||||
"classid": "measure:bip",
|
||||
"classname": "Inferred by OpenAIRE",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -14,7 +44,37 @@
|
|||
"unit": [
|
||||
{
|
||||
"key": "score",
|
||||
"value": "30.6576853333"
|
||||
"value": "0.0",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": false,
|
||||
"trust": "",
|
||||
"inferenceprovenance": "update",
|
||||
"provenanceaction": {
|
||||
"classid": "measure:bip",
|
||||
"classname": "Inferred by OpenAIRE",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "class",
|
||||
"value": "C",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": false,
|
||||
"trust": "",
|
||||
"inferenceprovenance": "update",
|
||||
"provenanceaction": {
|
||||
"classid": "measure:bip",
|
||||
"classname": "Inferred by OpenAIRE",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -23,7 +83,115 @@
|
|||
"unit": [
|
||||
{
|
||||
"key": "score",
|
||||
"value": "4.62970429725e-08"
|
||||
"value": "3.11855618382e-09",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": false,
|
||||
"trust": "",
|
||||
"inferenceprovenance": "update",
|
||||
"provenanceaction": {
|
||||
"classid": "measure:bip",
|
||||
"classname": "Inferred by OpenAIRE",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "class",
|
||||
"value": "C",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": false,
|
||||
"trust": "",
|
||||
"inferenceprovenance": "update",
|
||||
"provenanceaction": {
|
||||
"classid": "measure:bip",
|
||||
"classname": "Inferred by OpenAIRE",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "influence_alt",
|
||||
"unit": [
|
||||
{
|
||||
"key": "score",
|
||||
"value": "0.0",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": false,
|
||||
"trust": "",
|
||||
"inferenceprovenance": "update",
|
||||
"provenanceaction": {
|
||||
"classid": "measure:bip",
|
||||
"classname": "Inferred by OpenAIRE",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "class",
|
||||
"value": "C",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": false,
|
||||
"trust": "",
|
||||
"inferenceprovenance": "update",
|
||||
"provenanceaction": {
|
||||
"classid": "measure:bip",
|
||||
"classname": "Inferred by OpenAIRE",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "impulse",
|
||||
"unit": [
|
||||
{
|
||||
"key": "score",
|
||||
"value": "0.0",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": false,
|
||||
"trust": "",
|
||||
"inferenceprovenance": "update",
|
||||
"provenanceaction": {
|
||||
"classid": "measure:bip",
|
||||
"classname": "Inferred by OpenAIRE",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "class",
|
||||
"value": "C",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": true,
|
||||
"deletedbyinference": false,
|
||||
"trust": "",
|
||||
"inferenceprovenance": "update",
|
||||
"provenanceaction": {
|
||||
"classid": "measure:bip",
|
||||
"classname": "Inferred by OpenAIRE",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue