[graph provision] obtain context info from the context API instead from the ISLookUp service

This commit is contained in:
Claudio Atzori 2024-01-22 15:53:17 +01:00
parent 2655eea5bc
commit 1c6db320f4
9 changed files with 213 additions and 6 deletions

View File

@ -0,0 +1,39 @@
package eu.dnetlib.dhp.common.api.context;
public class CategorySummary {
private String id;
private String label;
private boolean hasConcept;
public String getId() {
return id;
}
public String getLabel() {
return label;
}
public boolean isHasConcept() {
return hasConcept;
}
public CategorySummary setId(final String id) {
this.id = id;
return this;
}
public CategorySummary setLabel(final String label) {
this.label = label;
return this;
}
public CategorySummary setHasConcept(final boolean hasConcept) {
this.hasConcept = hasConcept;
return this;
}
}

View File

@ -0,0 +1,7 @@
package eu.dnetlib.dhp.common.api.context;
import java.util.ArrayList;
public class CategorySummaryList extends ArrayList<CategorySummary> {
}

View File

@ -0,0 +1,52 @@
package eu.dnetlib.dhp.common.api.context;
import java.util.List;
public class ConceptSummary {
private String id;
private String label;
public boolean hasSubConcept;
private List<ConceptSummary> concepts;
public String getId() {
return id;
}
public String getLabel() {
return label;
}
public List<ConceptSummary> getConcepts() {
return concepts;
}
public ConceptSummary setId(final String id) {
this.id = id;
return this;
}
public ConceptSummary setLabel(final String label) {
this.label = label;
return this;
}
public boolean isHasSubConcept() {
return hasSubConcept;
}
public ConceptSummary setHasSubConcept(final boolean hasSubConcept) {
this.hasSubConcept = hasSubConcept;
return this;
}
public ConceptSummary setConcept(final List<ConceptSummary> concepts) {
this.concepts = concepts;
return this;
}
}

View File

@ -0,0 +1,7 @@
package eu.dnetlib.dhp.common.api.context;
import java.util.ArrayList;
public class ConceptSummaryList extends ArrayList<ConceptSummary> {
}

View File

@ -0,0 +1,50 @@
package eu.dnetlib.dhp.common.api.context;
public class ContextSummary {
private String id;
private String label;
private String type;
private String status;
public String getId() {
return id;
}
public String getLabel() {
return label;
}
public String getType() {
return type;
}
public String getStatus() {
return status;
}
public ContextSummary setId(final String id) {
this.id = id;
return this;
}
public ContextSummary setLabel(final String label) {
this.label = label;
return this;
}
public ContextSummary setType(final String type) {
this.type = type;
return this;
}
public ContextSummary setStatus(final String status) {
this.status = status;
return this;
}
}

View File

@ -0,0 +1,7 @@
package eu.dnetlib.dhp.common.api.context;
import java.util.ArrayList;
public class ContextSummaryList extends ArrayList<ContextSummary> {
}

View File

@ -62,8 +62,8 @@ public class XmlConverterJob {
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
final String contextApiBaseUrl = parser.get("contextApiBaseUrl");
log.info("contextApiBaseUrl: {}", contextApiBaseUrl);
final SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
@ -71,7 +71,7 @@ public class XmlConverterJob {
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
removeOutputDir(spark, outputPath);
convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl));
convertToXml(spark, inputPath, outputPath, ContextMapper.fromAPI(contextApiBaseUrl));
});
}

View File

@ -1,18 +1,22 @@
package eu.dnetlib.dhp.oa.provision.utils;
import java.io.Serializable;
import java.io.StringReader;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull;
import org.xml.sax.SAXException;
import com.google.common.base.Joiner;
import eu.dnetlib.dhp.common.api.context.*;
import eu.dnetlib.dhp.common.rest.DNetRestClient;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -23,6 +27,42 @@ public class ContextMapper extends HashMap<String, ContextDef> implements Serial
private static final String XQUERY = "for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']//*[name()='context' or name()='category' or name()='concept'] return <entry id=\"{$x/@id}\" label=\"{$x/@label|$x/@name}\" name=\"{$x/name()}\" type=\"{$x/@type}\"/>";
public static ContextMapper fromAPI(final String baseURL) throws Exception {
final ContextMapper contextMapper = new ContextMapper();
for (ContextSummary ctx : DNetRestClient.doGET(baseURL + "/contexts", ContextSummaryList.class)) {
contextMapper.put(ctx.getId(), new ContextDef(ctx.getId(), ctx.getLabel(), "context", ctx.getType()));
for (CategorySummary cat : DNetRestClient
.doGET(baseURL + "/context/" + ctx.getId(), CategorySummaryList.class)) {
contextMapper.put(cat.getId(), new ContextDef(cat.getId(), cat.getLabel(), "category", ""));
if (cat.isHasConcept()) {
for (ConceptSummary c : DNetRestClient
.doGET(baseURL + "/context/category/" + cat.getId(), ConceptSummaryList.class)) {
contextMapper.put(c.getId(), new ContextDef(c.getId(), c.getLabel(), "concept", ""));
if (c.isHasSubConcept()) {
for (ConceptSummary cs : c.getConcepts()) {
contextMapper.put(cs.getId(), new ContextDef(cs.getId(), cs.getLabel(), "concept", ""));
if (cs.isHasSubConcept()) {
for (ConceptSummary css : cs.getConcepts()) {
contextMapper
.put(
css.getId(),
new ContextDef(css.getId(), css.getLabel(), "concept", ""));
}
}
}
}
}
}
}
}
return contextMapper;
}
@Deprecated
public static ContextMapper fromIS(final String isLookupUrl)
throws DocumentException, ISLookUpException, SAXException {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
@ -32,6 +72,7 @@ public class ContextMapper extends HashMap<String, ContextDef> implements Serial
return fromXml(sb.toString());
}
@Deprecated
public static ContextMapper fromXml(final String xml) throws DocumentException, SAXException {
final ContextMapper contextMapper = new ContextMapper();

View File

@ -9,6 +9,10 @@
<name>isLookupUrl</name>
<description>URL for the isLookup service</description>
</property>
<property>
<name>contextApiBaseUrl</name>
<description>context API URL</description>
</property>
<property>
<name>relPartitions</name>
<description>number or partitions for the relations Dataset</description>
@ -589,7 +593,7 @@
</spark-opts>
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
<arg>--outputPath</arg><arg>${workingDir}/xml</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--contextApiBaseUrl</arg><arg>${contextApiBaseUrl}</arg>
</spark>
<ok to="should_index"/>
<error to="Kill"/>