[graph provision] expand the context info for each entity type
This commit is contained in:
parent
5aa7847ea6
commit
beb93cdfe9
|
@ -5,7 +5,6 @@ import java.io.StringReader;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.schema.solr.ExternalReference;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
|
@ -31,6 +30,7 @@ import eu.dnetlib.dhp.schema.solr.Context;
|
|||
import eu.dnetlib.dhp.schema.solr.Country;
|
||||
import eu.dnetlib.dhp.schema.solr.Datasource;
|
||||
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
|
||||
import eu.dnetlib.dhp.schema.solr.ExternalReference;
|
||||
import eu.dnetlib.dhp.schema.solr.Instance;
|
||||
import eu.dnetlib.dhp.schema.solr.Journal;
|
||||
import eu.dnetlib.dhp.schema.solr.Measure;
|
||||
|
@ -562,10 +562,16 @@ public class ProvisionModelSupport {
|
|||
.orElse(null);
|
||||
}
|
||||
|
||||
private static List<ExternalReference> mapExternalReference(List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
|
||||
return Optional.ofNullable(externalReference)
|
||||
.map(ext -> ext.stream()
|
||||
.map(e -> ExternalReference.newInstance(
|
||||
private static List<ExternalReference> mapExternalReference(
|
||||
List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
|
||||
return Optional
|
||||
.ofNullable(externalReference)
|
||||
.map(
|
||||
ext -> ext
|
||||
.stream()
|
||||
.map(
|
||||
e -> ExternalReference
|
||||
.newInstance(
|
||||
e.getSitename(),
|
||||
e.getLabel(),
|
||||
e.getAlternateLabel(),
|
||||
|
@ -573,8 +579,8 @@ public class ProvisionModelSupport {
|
|||
mapCodeLabel(e.getQualifier()),
|
||||
e.getRefidentifier(),
|
||||
e.getQuery()))
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(Lists.newArrayList());
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(Lists.newArrayList());
|
||||
}
|
||||
|
||||
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
|
||||
|
|
|
@ -219,6 +219,13 @@ public class XmlRecordFactory implements Serializable {
|
|||
if (entity.getMeasures() != null) {
|
||||
metadata.addAll(measuresAsXml(entity.getMeasures()));
|
||||
}
|
||||
if (entity.getContext() != null) {
|
||||
contexts.addAll(entity.getContext().stream().map(Context::getId).collect(Collectors.toList()));
|
||||
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
|
||||
if (contexts.contains("dh-ch::subcommunity::2")) {
|
||||
contexts.add("clarin");
|
||||
}
|
||||
}
|
||||
|
||||
if (ModelSupport.isResult(type)) {
|
||||
final Result r = (Result) entity;
|
||||
|
@ -245,14 +252,6 @@ public class XmlRecordFactory implements Serializable {
|
|||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
if (r.getContext() != null) {
|
||||
contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList()));
|
||||
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
|
||||
if (contexts.contains("dh-ch::subcommunity::2")) {
|
||||
contexts.add("clarin");
|
||||
}
|
||||
}
|
||||
|
||||
if (r.getTitle() != null) {
|
||||
metadata
|
||||
.addAll(
|
||||
|
@ -1603,9 +1602,7 @@ public class XmlRecordFactory implements Serializable {
|
|||
private List<String> buildContexts(final String type, final Set<String> contexts) {
|
||||
final List<String> res = Lists.newArrayList();
|
||||
|
||||
if (contextMapper != null
|
||||
&& !contextMapper.isEmpty()
|
||||
&& MainEntityType.result.toString().equals(type)) {
|
||||
if (contextMapper != null && !contextMapper.isEmpty()) {
|
||||
|
||||
XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
|
||||
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.provision;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
|
@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir;
|
|||
import org.mockito.Mock;
|
||||
import org.mockito.Mockito;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
public class SolrConfigExploreTest {
|
||||
|
@ -91,7 +91,7 @@ public class SolrConfigExploreTest {
|
|||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
||||
conf.registerKryoClasses(new Class[] {
|
||||
SerializableSolrInputDocument.class
|
||||
SerializableSolrInputDocument.class
|
||||
});
|
||||
|
||||
conf.setMaster("local[1]");
|
||||
|
@ -101,10 +101,10 @@ public class SolrConfigExploreTest {
|
|||
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(SolrConfigExploreTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
.builder()
|
||||
.appName(SolrConfigExploreTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
|
||||
// random unassigned HTTP port
|
||||
final int jettyPort = 0;
|
||||
|
@ -134,35 +134,35 @@ public class SolrConfigExploreTest {
|
|||
|
||||
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
|
||||
log
|
||||
.info(
|
||||
CollectionAdminRequest.ClusterStatus
|
||||
.getClusterStatus()
|
||||
.process(miniCluster.getSolrClient())
|
||||
.toString());
|
||||
.info(
|
||||
CollectionAdminRequest.ClusterStatus
|
||||
.getClusterStatus()
|
||||
.process(miniCluster.getSolrClient())
|
||||
.toString());
|
||||
|
||||
NamedList<Object> res = createCollection(
|
||||
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||
res.forEach(o -> log.info(o.toString()));
|
||||
|
||||
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
|
||||
|
||||
res = createCollection(
|
||||
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||
res.forEach(o -> log.info(o.toString()));
|
||||
|
||||
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
|
||||
CollectionAdminResponse rsp = (CollectionAdminResponse) admin
|
||||
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
|
||||
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
|
||||
assertEquals(0, rsp.getStatus());
|
||||
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
|
||||
assertEquals(0, rsp.getStatus());
|
||||
|
||||
log
|
||||
.info(
|
||||
CollectionAdminRequest.ClusterStatus
|
||||
.getClusterStatus()
|
||||
.process(miniCluster.getSolrClient())
|
||||
.toString());
|
||||
.info(
|
||||
CollectionAdminRequest.ClusterStatus
|
||||
.getClusterStatus()
|
||||
.process(miniCluster.getSolrClient())
|
||||
.toString());
|
||||
|
||||
}
|
||||
|
||||
|
@ -180,7 +180,8 @@ public class SolrConfigExploreTest {
|
|||
|
||||
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
||||
.run(isLookupClient);
|
||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||
Assertions
|
||||
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||
|
||||
String[] queryStrings = {
|
||||
"cancer",
|
||||
|
@ -200,7 +201,8 @@ public class SolrConfigExploreTest {
|
|||
// System.out.println(rsp.getExplainMap());
|
||||
|
||||
for (SolrDocument doc : rsp.getResults()) {
|
||||
log.info(
|
||||
log
|
||||
.info(
|
||||
doc.get("score") + "\t" +
|
||||
doc.get("__indexrecordidentifier") + "\t" +
|
||||
doc.get("resultidentifier") + "\t" +
|
||||
|
@ -216,7 +218,7 @@ public class SolrConfigExploreTest {
|
|||
}
|
||||
|
||||
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
||||
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
||||
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
||||
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
||||
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
||||
modParams.set("name", name);
|
||||
|
|
|
@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest {
|
|||
|
||||
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
||||
.run(isLookupClient);
|
||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||
Assertions
|
||||
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||
|
||||
String[] queryStrings = {
|
||||
"cancer",
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.provision;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -22,6 +21,7 @@ import com.google.common.collect.Lists;
|
|||
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
||||
import eu.dnetlib.dhp.oa.provision.utils.ContextDef;
|
||||
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
||||
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
@ -51,7 +51,7 @@ public class XmlRecordFactoryTest {
|
|||
|
||||
assertNotNull(doc);
|
||||
|
||||
// System.out.println(doc.asXML());
|
||||
System.out.println(doc.asXML());
|
||||
|
||||
assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
|
||||
assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));
|
||||
|
@ -267,4 +267,39 @@ public class XmlRecordFactoryTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_AKA_project() throws DocumentException, IOException {
|
||||
final ContextMapper contextMapper = new ContextMapper();
|
||||
|
||||
contextMapper
|
||||
.put("dh-ch", new ContextDef("dh-ch", "Digital Humanities and Cultural Heritage", "context", "community"));
|
||||
contextMapper.put("dh-ch::projects", new ContextDef("dh-ch::projects", "DH-CH Projects", "category", ""));
|
||||
contextMapper
|
||||
.put("dh-ch::projects::2", new ContextDef("dh-ch::projects::2", "ARIADNE", "concept", "community"));
|
||||
|
||||
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||
PayloadConverterJob.schemaLocation);
|
||||
|
||||
final Project p = OBJECT_MAPPER
|
||||
.readValue(
|
||||
IOUtils.toString(getClass().getResourceAsStream("project_aka.json")),
|
||||
Project.class);
|
||||
|
||||
assertNotNull(p.getContext());
|
||||
assertEquals(1, p.getContext().size());
|
||||
assertEquals("dh-ch::projects::2", p.getContext().get(0).getId());
|
||||
|
||||
final String xml = xmlRecordFactory.build(new JoinedEntity(p));
|
||||
|
||||
assertNotNull(xml);
|
||||
|
||||
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||
|
||||
assertNotNull(doc);
|
||||
|
||||
assertEquals("dh-ch", doc.valueOf("//context/@id"));
|
||||
assertEquals("dh-ch::projects", doc.valueOf("//context/category/@id"));
|
||||
assertEquals("dh-ch::projects::2", doc.valueOf("//context/category/concept/@id"));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue