[graph provision] expand the context info for each entity type
This commit is contained in:
parent
5aa7847ea6
commit
beb93cdfe9
|
@ -5,7 +5,6 @@ import java.io.StringReader;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.solr.ExternalReference;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
|
@ -31,6 +30,7 @@ import eu.dnetlib.dhp.schema.solr.Context;
|
||||||
import eu.dnetlib.dhp.schema.solr.Country;
|
import eu.dnetlib.dhp.schema.solr.Country;
|
||||||
import eu.dnetlib.dhp.schema.solr.Datasource;
|
import eu.dnetlib.dhp.schema.solr.Datasource;
|
||||||
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
|
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.ExternalReference;
|
||||||
import eu.dnetlib.dhp.schema.solr.Instance;
|
import eu.dnetlib.dhp.schema.solr.Instance;
|
||||||
import eu.dnetlib.dhp.schema.solr.Journal;
|
import eu.dnetlib.dhp.schema.solr.Journal;
|
||||||
import eu.dnetlib.dhp.schema.solr.Measure;
|
import eu.dnetlib.dhp.schema.solr.Measure;
|
||||||
|
@ -562,10 +562,16 @@ public class ProvisionModelSupport {
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<ExternalReference> mapExternalReference(List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
|
private static List<ExternalReference> mapExternalReference(
|
||||||
return Optional.ofNullable(externalReference)
|
List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
|
||||||
.map(ext -> ext.stream()
|
return Optional
|
||||||
.map(e -> ExternalReference.newInstance(
|
.ofNullable(externalReference)
|
||||||
|
.map(
|
||||||
|
ext -> ext
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
e -> ExternalReference
|
||||||
|
.newInstance(
|
||||||
e.getSitename(),
|
e.getSitename(),
|
||||||
e.getLabel(),
|
e.getLabel(),
|
||||||
e.getAlternateLabel(),
|
e.getAlternateLabel(),
|
||||||
|
@ -573,8 +579,8 @@ public class ProvisionModelSupport {
|
||||||
mapCodeLabel(e.getQualifier()),
|
mapCodeLabel(e.getQualifier()),
|
||||||
e.getRefidentifier(),
|
e.getRefidentifier(),
|
||||||
e.getQuery()))
|
e.getQuery()))
|
||||||
.collect(Collectors.toList()))
|
.collect(Collectors.toList()))
|
||||||
.orElse(Lists.newArrayList());
|
.orElse(Lists.newArrayList());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
|
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
|
||||||
|
|
|
@ -219,6 +219,13 @@ public class XmlRecordFactory implements Serializable {
|
||||||
if (entity.getMeasures() != null) {
|
if (entity.getMeasures() != null) {
|
||||||
metadata.addAll(measuresAsXml(entity.getMeasures()));
|
metadata.addAll(measuresAsXml(entity.getMeasures()));
|
||||||
}
|
}
|
||||||
|
if (entity.getContext() != null) {
|
||||||
|
contexts.addAll(entity.getContext().stream().map(Context::getId).collect(Collectors.toList()));
|
||||||
|
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
|
||||||
|
if (contexts.contains("dh-ch::subcommunity::2")) {
|
||||||
|
contexts.add("clarin");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (ModelSupport.isResult(type)) {
|
if (ModelSupport.isResult(type)) {
|
||||||
final Result r = (Result) entity;
|
final Result r = (Result) entity;
|
||||||
|
@ -245,14 +252,6 @@ public class XmlRecordFactory implements Serializable {
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (r.getContext() != null) {
|
|
||||||
contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList()));
|
|
||||||
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
|
|
||||||
if (contexts.contains("dh-ch::subcommunity::2")) {
|
|
||||||
contexts.add("clarin");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (r.getTitle() != null) {
|
if (r.getTitle() != null) {
|
||||||
metadata
|
metadata
|
||||||
.addAll(
|
.addAll(
|
||||||
|
@ -1603,9 +1602,7 @@ public class XmlRecordFactory implements Serializable {
|
||||||
private List<String> buildContexts(final String type, final Set<String> contexts) {
|
private List<String> buildContexts(final String type, final Set<String> contexts) {
|
||||||
final List<String> res = Lists.newArrayList();
|
final List<String> res = Lists.newArrayList();
|
||||||
|
|
||||||
if (contextMapper != null
|
if (contextMapper != null && !contextMapper.isEmpty()) {
|
||||||
&& !contextMapper.isEmpty()
|
|
||||||
&& MainEntityType.result.toString().equals(type)) {
|
|
||||||
|
|
||||||
XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
|
XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
|
@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class SolrConfigExploreTest {
|
public class SolrConfigExploreTest {
|
||||||
|
@ -91,7 +91,7 @@ public class SolrConfigExploreTest {
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
||||||
conf.registerKryoClasses(new Class[] {
|
conf.registerKryoClasses(new Class[] {
|
||||||
SerializableSolrInputDocument.class
|
SerializableSolrInputDocument.class
|
||||||
});
|
});
|
||||||
|
|
||||||
conf.setMaster("local[1]");
|
conf.setMaster("local[1]");
|
||||||
|
@ -101,10 +101,10 @@ public class SolrConfigExploreTest {
|
||||||
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
|
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
|
||||||
|
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(SolrConfigExploreTest.class.getSimpleName())
|
.appName(SolrConfigExploreTest.class.getSimpleName())
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
|
|
||||||
// random unassigned HTTP port
|
// random unassigned HTTP port
|
||||||
final int jettyPort = 0;
|
final int jettyPort = 0;
|
||||||
|
@ -134,35 +134,35 @@ public class SolrConfigExploreTest {
|
||||||
|
|
||||||
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
|
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
|
||||||
log
|
log
|
||||||
.info(
|
.info(
|
||||||
CollectionAdminRequest.ClusterStatus
|
CollectionAdminRequest.ClusterStatus
|
||||||
.getClusterStatus()
|
.getClusterStatus()
|
||||||
.process(miniCluster.getSolrClient())
|
.process(miniCluster.getSolrClient())
|
||||||
.toString());
|
.toString());
|
||||||
|
|
||||||
NamedList<Object> res = createCollection(
|
NamedList<Object> res = createCollection(
|
||||||
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||||
res.forEach(o -> log.info(o.toString()));
|
res.forEach(o -> log.info(o.toString()));
|
||||||
|
|
||||||
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
|
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
|
||||||
|
|
||||||
res = createCollection(
|
res = createCollection(
|
||||||
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||||
res.forEach(o -> log.info(o.toString()));
|
res.forEach(o -> log.info(o.toString()));
|
||||||
|
|
||||||
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
|
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
|
||||||
CollectionAdminResponse rsp = (CollectionAdminResponse) admin
|
CollectionAdminResponse rsp = (CollectionAdminResponse) admin
|
||||||
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
|
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
|
||||||
assertEquals(0, rsp.getStatus());
|
assertEquals(0, rsp.getStatus());
|
||||||
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
|
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
|
||||||
assertEquals(0, rsp.getStatus());
|
assertEquals(0, rsp.getStatus());
|
||||||
|
|
||||||
log
|
log
|
||||||
.info(
|
.info(
|
||||||
CollectionAdminRequest.ClusterStatus
|
CollectionAdminRequest.ClusterStatus
|
||||||
.getClusterStatus()
|
.getClusterStatus()
|
||||||
.process(miniCluster.getSolrClient())
|
.process(miniCluster.getSolrClient())
|
||||||
.toString());
|
.toString());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,7 +180,8 @@ public class SolrConfigExploreTest {
|
||||||
|
|
||||||
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
||||||
.run(isLookupClient);
|
.run(isLookupClient);
|
||||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
Assertions
|
||||||
|
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||||
|
|
||||||
String[] queryStrings = {
|
String[] queryStrings = {
|
||||||
"cancer",
|
"cancer",
|
||||||
|
@ -200,7 +201,8 @@ public class SolrConfigExploreTest {
|
||||||
// System.out.println(rsp.getExplainMap());
|
// System.out.println(rsp.getExplainMap());
|
||||||
|
|
||||||
for (SolrDocument doc : rsp.getResults()) {
|
for (SolrDocument doc : rsp.getResults()) {
|
||||||
log.info(
|
log
|
||||||
|
.info(
|
||||||
doc.get("score") + "\t" +
|
doc.get("score") + "\t" +
|
||||||
doc.get("__indexrecordidentifier") + "\t" +
|
doc.get("__indexrecordidentifier") + "\t" +
|
||||||
doc.get("resultidentifier") + "\t" +
|
doc.get("resultidentifier") + "\t" +
|
||||||
|
@ -216,7 +218,7 @@ public class SolrConfigExploreTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
||||||
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
||||||
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
||||||
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
||||||
modParams.set("name", name);
|
modParams.set("name", name);
|
||||||
|
|
|
@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest {
|
||||||
|
|
||||||
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
||||||
.run(isLookupClient);
|
.run(isLookupClient);
|
||||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
Assertions
|
||||||
|
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||||
|
|
||||||
String[] queryStrings = {
|
String[] queryStrings = {
|
||||||
"cancer",
|
"cancer",
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
@ -22,6 +21,7 @@ import com.google.common.collect.Lists;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ContextDef;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
@ -51,7 +51,7 @@ public class XmlRecordFactoryTest {
|
||||||
|
|
||||||
assertNotNull(doc);
|
assertNotNull(doc);
|
||||||
|
|
||||||
// System.out.println(doc.asXML());
|
System.out.println(doc.asXML());
|
||||||
|
|
||||||
assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
|
assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
|
||||||
assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));
|
assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));
|
||||||
|
@ -267,4 +267,39 @@ public class XmlRecordFactoryTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_AKA_project() throws DocumentException, IOException {
|
||||||
|
final ContextMapper contextMapper = new ContextMapper();
|
||||||
|
|
||||||
|
contextMapper
|
||||||
|
.put("dh-ch", new ContextDef("dh-ch", "Digital Humanities and Cultural Heritage", "context", "community"));
|
||||||
|
contextMapper.put("dh-ch::projects", new ContextDef("dh-ch::projects", "DH-CH Projects", "category", ""));
|
||||||
|
contextMapper
|
||||||
|
.put("dh-ch::projects::2", new ContextDef("dh-ch::projects::2", "ARIADNE", "concept", "community"));
|
||||||
|
|
||||||
|
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||||
|
PayloadConverterJob.schemaLocation);
|
||||||
|
|
||||||
|
final Project p = OBJECT_MAPPER
|
||||||
|
.readValue(
|
||||||
|
IOUtils.toString(getClass().getResourceAsStream("project_aka.json")),
|
||||||
|
Project.class);
|
||||||
|
|
||||||
|
assertNotNull(p.getContext());
|
||||||
|
assertEquals(1, p.getContext().size());
|
||||||
|
assertEquals("dh-ch::projects::2", p.getContext().get(0).getId());
|
||||||
|
|
||||||
|
final String xml = xmlRecordFactory.build(new JoinedEntity(p));
|
||||||
|
|
||||||
|
assertNotNull(xml);
|
||||||
|
|
||||||
|
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||||
|
|
||||||
|
assertNotNull(doc);
|
||||||
|
|
||||||
|
assertEquals("dh-ch", doc.valueOf("//context/@id"));
|
||||||
|
assertEquals("dh-ch::projects", doc.valueOf("//context/category/@id"));
|
||||||
|
assertEquals("dh-ch::projects::2", doc.valueOf("//context/category/concept/@id"));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue