[graph provision] align serialisation of the usage count measures to the agrred specifications

This commit is contained in:
Claudio Atzori 2024-06-05 16:34:40 +02:00
commit 91b49366c6
4 changed files with 68 additions and 31 deletions

View File

@ -172,7 +172,7 @@ public class SparkBulkTagJob {
.option("compression", "gzip")
.json(outputPath + "project");
readPath(spark, outputPath + "project", Datasource.class)
readPath(spark, outputPath + "project", Project.class)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")

View File

@ -170,18 +170,19 @@ public class XmlSerializationUtils {
return sb.toString();
}
// <measure views="0" datasource="infrastruct_::f66f1bd369679b5b077dcdf006089556||OpenAIRE" />
// <measure downloads="0" datasource="infrastruct_::f66f1bd369679b5b077dcdf006089556||OpenAIRE" />
// <measure id="views" count="0" datasource="infrastruct_::f66f1bd369679b5b077dcdf006089556||OpenAIRE" />
// <measure id="downloads" count="0" datasource="infrastruct_::f66f1bd369679b5b077dcdf006089556||OpenAIRE" />
public static String usageMeasureAsXmlElement(String name, Measure measure) {
StringBuilder sb = new StringBuilder();
for (KeyValue kv : measure.getUnit()) {
sb
.append("<")
.append(name)
.append(" ")
.append(attr(measure.getId(), kv.getValue()))
.append(attr("datasource", kv.getKey()))
.append(" />");
.append("<")
.append(name)
.append(" ")
.append(attr("id", measure.getId()))
.append(attr("count", kv.getValue()))
.append(attr("datasource", kv.getKey()))
.append(" />");
}
return sb.toString();
}

View File

@ -9,6 +9,7 @@ import java.io.StringReader;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.commons.math3.optim.AbstractOptimizationProblem;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
@ -42,6 +43,7 @@ public class XmlRecordFactoryTest {
final Publication p = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
final String xml = xmlRecordFactory.build(new JoinedEntity(p));
assertNotNull(xml);
@ -51,7 +53,12 @@ public class XmlRecordFactoryTest {
assertNotNull(doc);
// System.out.println(doc.asXML());
System.out.println(doc.asXML());
assertEquals("10", doc.valueOf("//measure[@id = 'downloads']/@count"));
assertEquals("fakeds", doc.valueOf("//measure[@id = 'downloads']/@datasource"));
assertEquals(0, doc.selectNodes("//measure[@id = 'views']").size());
assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));

View File

@ -8,6 +8,26 @@
}
],
"measures": [
{
"id": "downloads",
"unit": [
{
"key": "fakeds",
"value": "10",
"dataInfo": null
}
]
},
{
"id": "views",
"unit": [
{
"key": "fakedss",
"value": "0",
"dataInfo": null
}
]
},
{
"id": "influence",
"unit": [
@ -505,13 +525,13 @@
"extraInfo": [],
"format": [],
"fulltext": [
{ "value" : "https://osf.io/preprints/socarxiv/7vgtu/download" },
{ "value" : "https://osf.io/preprints/socarxiv/7vgtu/download2" }
{"value": "https://osf.io/preprints/socarxiv/7vgtu/download"},
{"value": "https://osf.io/preprints/socarxiv/7vgtu/download2"}
],
"id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c",
"instance": [
{
"fulltext" : "https://osf.io/preprints/socarxiv/7vgtu/download",
"fulltext": "https://osf.io/preprints/socarxiv/7vgtu/download",
"measures": [
{
"id": "influence",
@ -539,6 +559,24 @@
"value": "4.62970429725e-08"
}
]
},
{
"id": "downloads",
"unit": [
{
"key": "fakeds",
"value": "10"
}
]
},
{
"id": "views",
"unit": [
{
"key": "fakeds",
"value": "10"
}
]
}
],
"pid": [
@ -674,9 +712,7 @@
},
"value": ""
},
"url": [
"http://dx.doi.org/10.1109/TED.2018.2853550"
]
"url": ["http://dx.doi.org/10.1109/TED.2018.2853550"]
},
{
"pid": [
@ -812,9 +848,7 @@
},
"value": "CC-BY"
},
"url": [
"http://dx.doi.org/10.1109/TED.2018.2853550"
]
"url": ["http://dx.doi.org/10.1109/TED.2018.2853550"]
},
{
"pid": [
@ -950,9 +984,7 @@
},
"value": ""
},
"url": [
"http://dx.doi.org/10.1109/TED.2018.2853551"
]
"url": ["http://dx.doi.org/10.1109/TED.2018.2853551"]
},
{
"pid": [
@ -1221,8 +1253,7 @@
},
"value": ""
},
"url": [
]
"url": []
},
{
"pid": [
@ -1352,9 +1383,7 @@
},
"value": ""
},
"url": [
""
]
"url": [""]
},
{
"pid": [
@ -1484,9 +1513,7 @@
},
"value": ""
},
"url": [
"asdasd://not a URL"
]
"url": ["asdasd://not a URL"]
}
],
"journal": {
@ -1580,7 +1607,9 @@
"schemeid": "dnet:result_typologies",
"schemename": "dnet:result_typologies"
},
"source": [ { "value" : "Bulletin of the National Technical University \"KhPI\" A series of \"Information and Modeling\"; № 1 (3) (2020):" } ],
"source": [
{"value": "Bulletin of the National Technical University \"KhPI\" A series of \"Information and Modeling\"; № 1 (3) (2020):"}
],
"subject": [
{
"dataInfo": {