[graph provision] expand the context info for each entity type

This commit is contained in:
Claudio Atzori 2024-07-16 11:43:48 +02:00
parent 5aa7847ea6
commit beb93cdfe9
6 changed files with 91 additions and 49 deletions

View File

@ -5,7 +5,6 @@ import java.io.StringReader;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.solr.ExternalReference;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
@ -31,6 +30,7 @@ import eu.dnetlib.dhp.schema.solr.Context;
import eu.dnetlib.dhp.schema.solr.Country; import eu.dnetlib.dhp.schema.solr.Country;
import eu.dnetlib.dhp.schema.solr.Datasource; import eu.dnetlib.dhp.schema.solr.Datasource;
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines; import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
import eu.dnetlib.dhp.schema.solr.ExternalReference;
import eu.dnetlib.dhp.schema.solr.Instance; import eu.dnetlib.dhp.schema.solr.Instance;
import eu.dnetlib.dhp.schema.solr.Journal; import eu.dnetlib.dhp.schema.solr.Journal;
import eu.dnetlib.dhp.schema.solr.Measure; import eu.dnetlib.dhp.schema.solr.Measure;
@ -562,10 +562,16 @@ public class ProvisionModelSupport {
.orElse(null); .orElse(null);
} }
private static List<ExternalReference> mapExternalReference(List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) { private static List<ExternalReference> mapExternalReference(
return Optional.ofNullable(externalReference) List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
.map(ext -> ext.stream() return Optional
.map(e -> ExternalReference.newInstance( .ofNullable(externalReference)
.map(
ext -> ext
.stream()
.map(
e -> ExternalReference
.newInstance(
e.getSitename(), e.getSitename(),
e.getLabel(), e.getLabel(),
e.getAlternateLabel(), e.getAlternateLabel(),
@ -573,8 +579,8 @@ public class ProvisionModelSupport {
mapCodeLabel(e.getQualifier()), mapCodeLabel(e.getQualifier()),
e.getRefidentifier(), e.getRefidentifier(),
e.getQuery())) e.getQuery()))
.collect(Collectors.toList())) .collect(Collectors.toList()))
.orElse(Lists.newArrayList()); .orElse(Lists.newArrayList());
} }
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList, private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,

View File

@ -219,6 +219,13 @@ public class XmlRecordFactory implements Serializable {
if (entity.getMeasures() != null) { if (entity.getMeasures() != null) {
metadata.addAll(measuresAsXml(entity.getMeasures())); metadata.addAll(measuresAsXml(entity.getMeasures()));
} }
if (entity.getContext() != null) {
contexts.addAll(entity.getContext().stream().map(Context::getId).collect(Collectors.toList()));
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
if (contexts.contains("dh-ch::subcommunity::2")) {
contexts.add("clarin");
}
}
if (ModelSupport.isResult(type)) { if (ModelSupport.isResult(type)) {
final Result r = (Result) entity; final Result r = (Result) entity;
@ -245,14 +252,6 @@ public class XmlRecordFactory implements Serializable {
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (r.getContext() != null) {
contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList()));
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
if (contexts.contains("dh-ch::subcommunity::2")) {
contexts.add("clarin");
}
}
if (r.getTitle() != null) { if (r.getTitle() != null) {
metadata metadata
.addAll( .addAll(
@ -1603,9 +1602,7 @@ public class XmlRecordFactory implements Serializable {
private List<String> buildContexts(final String type, final Set<String> contexts) { private List<String> buildContexts(final String type, final Set<String> contexts) {
final List<String> res = Lists.newArrayList(); final List<String> res = Lists.newArrayList();
if (contextMapper != null if (contextMapper != null && !contextMapper.isEmpty()) {
&& !contextMapper.isEmpty()
&& MainEntityType.result.toString().equals(type)) {
XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot"); XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");

View File

@ -1,12 +1,13 @@
package eu.dnetlib.dhp.oa.provision; package eu.dnetlib.dhp.oa.provision;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URI; import java.net.URI;
import java.nio.file.Path; import java.nio.file.Path;
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.junit.jupiter.api.Assertions.assertEquals; import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class SolrConfigExploreTest { public class SolrConfigExploreTest {
@ -91,7 +91,7 @@ public class SolrConfigExploreTest {
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
conf.registerKryoClasses(new Class[] { conf.registerKryoClasses(new Class[] {
SerializableSolrInputDocument.class SerializableSolrInputDocument.class
}); });
conf.setMaster("local[1]"); conf.setMaster("local[1]");
@ -101,10 +101,10 @@ public class SolrConfigExploreTest {
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString()); conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
spark = SparkSession spark = SparkSession
.builder() .builder()
.appName(SolrConfigExploreTest.class.getSimpleName()) .appName(SolrConfigExploreTest.class.getSimpleName())
.config(conf) .config(conf)
.getOrCreate(); .getOrCreate();
// random unassigned HTTP port // random unassigned HTTP port
final int jettyPort = 0; final int jettyPort = 0;
@ -134,35 +134,35 @@ public class SolrConfigExploreTest {
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString()); log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
log log
.info( .info(
CollectionAdminRequest.ClusterStatus CollectionAdminRequest.ClusterStatus
.getClusterStatus() .getClusterStatus()
.process(miniCluster.getSolrClient()) .process(miniCluster.getSolrClient())
.toString()); .toString());
NamedList<Object> res = createCollection( NamedList<Object> res = createCollection(
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME); miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
res.forEach(o -> log.info(o.toString())); res.forEach(o -> log.info(o.toString()));
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION); // miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
res = createCollection( res = createCollection(
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME); miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
res.forEach(o -> log.info(o.toString())); res.forEach(o -> log.info(o.toString()));
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress()); admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
CollectionAdminResponse rsp = (CollectionAdminResponse) admin CollectionAdminResponse rsp = (CollectionAdminResponse) admin
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION); .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
assertEquals(0, rsp.getStatus()); assertEquals(0, rsp.getStatus());
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION); rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
assertEquals(0, rsp.getStatus()); assertEquals(0, rsp.getStatus());
log log
.info( .info(
CollectionAdminRequest.ClusterStatus CollectionAdminRequest.ClusterStatus
.getClusterStatus() .getClusterStatus()
.process(miniCluster.getSolrClient()) .process(miniCluster.getSolrClient())
.toString()); .toString());
} }
@ -180,7 +180,8 @@ public class SolrConfigExploreTest {
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
.run(isLookupClient); .run(isLookupClient);
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); Assertions
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
String[] queryStrings = { String[] queryStrings = {
"cancer", "cancer",
@ -200,7 +201,8 @@ public class SolrConfigExploreTest {
// System.out.println(rsp.getExplainMap()); // System.out.println(rsp.getExplainMap());
for (SolrDocument doc : rsp.getResults()) { for (SolrDocument doc : rsp.getResults()) {
log.info( log
.info(
doc.get("score") + "\t" + doc.get("score") + "\t" +
doc.get("__indexrecordidentifier") + "\t" + doc.get("__indexrecordidentifier") + "\t" +
doc.get("resultidentifier") + "\t" + doc.get("resultidentifier") + "\t" +
@ -216,7 +218,7 @@ public class SolrConfigExploreTest {
} }
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards, protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
int replicationFactor, int maxShardsPerNode, String configName) throws Exception { int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
ModifiableSolrParams modParams = new ModifiableSolrParams(); ModifiableSolrParams modParams = new ModifiableSolrParams();
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name()); modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
modParams.set("name", name); modParams.set("name", name);

View File

@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest {
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
.run(isLookupClient); .run(isLookupClient);
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); Assertions
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
String[] queryStrings = { String[] queryStrings = {
"cancer", "cancer",

View File

@ -1,8 +1,7 @@
package eu.dnetlib.dhp.oa.provision; package eu.dnetlib.dhp.oa.provision;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
@ -22,6 +21,7 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
import eu.dnetlib.dhp.oa.provision.utils.ContextDef;
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -51,7 +51,7 @@ public class XmlRecordFactoryTest {
assertNotNull(doc); assertNotNull(doc);
// System.out.println(doc.asXML()); System.out.println(doc.asXML());
assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid")); assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));
@ -267,4 +267,39 @@ public class XmlRecordFactoryTest {
} }
@Test
public void test_AKA_project() throws DocumentException, IOException {
final ContextMapper contextMapper = new ContextMapper();
contextMapper
.put("dh-ch", new ContextDef("dh-ch", "Digital Humanities and Cultural Heritage", "context", "community"));
contextMapper.put("dh-ch::projects", new ContextDef("dh-ch::projects", "DH-CH Projects", "category", ""));
contextMapper
.put("dh-ch::projects::2", new ContextDef("dh-ch::projects::2", "ARIADNE", "concept", "community"));
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
PayloadConverterJob.schemaLocation);
final Project p = OBJECT_MAPPER
.readValue(
IOUtils.toString(getClass().getResourceAsStream("project_aka.json")),
Project.class);
assertNotNull(p.getContext());
assertEquals(1, p.getContext().size());
assertEquals("dh-ch::projects::2", p.getContext().get(0).getId());
final String xml = xmlRecordFactory.build(new JoinedEntity(p));
assertNotNull(xml);
final Document doc = new SAXReader().read(new StringReader(xml));
assertNotNull(doc);
assertEquals("dh-ch", doc.valueOf("//context/@id"));
assertEquals("dh-ch::projects", doc.valueOf("//context/category/@id"));
assertEquals("dh-ch::projects::2", doc.valueOf("//context/category/concept/@id"));
}
} }

File diff suppressed because one or more lines are too long