solr index refactoring
This commit is contained in:
parent
0538c16484
commit
ab1d2c35e4
|
@ -11,6 +11,9 @@ import eu.dnetlib.common.index.solr.SolrService;
|
|||
@SpringBootApplication
|
||||
public class IndexManagerApplication extends AbstractDnetApp {
|
||||
|
||||
@Value("${solr.cloud}")
|
||||
private boolean solrCloud;
|
||||
|
||||
@Value("${solr.urls}")
|
||||
private String[] solrUrls;
|
||||
|
||||
|
@ -20,6 +23,6 @@ public class IndexManagerApplication extends AbstractDnetApp {
|
|||
|
||||
@Bean
|
||||
public SolrService solrService() {
|
||||
return new SolrService(solrUrls);
|
||||
return new SolrService(solrCloud, solrUrls);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,6 +45,9 @@ public class WfExecutorApplication extends AbstractDnetApp {
|
|||
@Value("${mdstores.data.datasource.password}")
|
||||
private String databasePassword;
|
||||
|
||||
@Value("${solr.cloud}")
|
||||
private boolean solrCloud;
|
||||
|
||||
@Value("${solr.urls}")
|
||||
private String[] solrUrls;
|
||||
|
||||
|
@ -70,7 +73,7 @@ public class WfExecutorApplication extends AbstractDnetApp {
|
|||
|
||||
@Bean
|
||||
public SolrService solrService() {
|
||||
return new SolrService(solrUrls);
|
||||
return new SolrService(solrCloud, solrUrls);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -4,8 +4,6 @@ import java.io.Serializable;
|
|||
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.EnumType;
|
||||
import jakarta.persistence.Enumerated;
|
||||
import jakarta.persistence.Id;
|
||||
import jakarta.persistence.IdClass;
|
||||
import jakarta.persistence.Table;
|
||||
|
@ -31,50 +29,9 @@ public class IndexField implements Serializable {
|
|||
@Column(name = "xpath")
|
||||
private String xpath;
|
||||
|
||||
@Enumerated(EnumType.STRING)
|
||||
@Column(name = "type")
|
||||
private IndexFieldType type;
|
||||
|
||||
// TODO (HIGH PRIORITY): DELETE ??
|
||||
@Deprecated
|
||||
@Column(name = "indexable")
|
||||
private boolean indexable;
|
||||
|
||||
// TODO (HIGH PRIORITY): DELETE ??
|
||||
@Deprecated
|
||||
@Column(name = "result")
|
||||
private boolean result;
|
||||
|
||||
// TODO (HIGH PRIORITY): DELETE ??
|
||||
@Deprecated
|
||||
@Column(name = "header")
|
||||
private boolean header;
|
||||
|
||||
// TODO (HIGH PRIORITY): DELETE ??
|
||||
@Deprecated
|
||||
@Column(name = "stat")
|
||||
private boolean stat;
|
||||
|
||||
// TODO (HIGH PRIORITY): DELETE ??
|
||||
@Deprecated
|
||||
@Column(name = "tokenizable")
|
||||
private boolean tokenizable;
|
||||
|
||||
// TODO (HIGH PRIORITY): DELETE ??
|
||||
@Deprecated
|
||||
@Column(name = "multivalued")
|
||||
private boolean multiValued;
|
||||
|
||||
// TODO (HIGH PRIORITY): DELETE ??
|
||||
@Deprecated
|
||||
@Column(name = "stored")
|
||||
private boolean stored;
|
||||
|
||||
// TODO (HIGH PRIORITY): DELETE ??
|
||||
@Deprecated
|
||||
@Column(name = "copy")
|
||||
private boolean copy;
|
||||
|
||||
public String getIndexId() {
|
||||
return indexId;
|
||||
}
|
||||
|
@ -107,92 +64,12 @@ public class IndexField implements Serializable {
|
|||
this.constant = constant;
|
||||
}
|
||||
|
||||
public IndexFieldType getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(final IndexFieldType type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public boolean isIndexable() {
|
||||
return indexable;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setIndexable(final boolean indexable) {
|
||||
this.indexable = indexable;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public boolean isResult() {
|
||||
return result;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setResult(final boolean result) {
|
||||
this.result = result;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public boolean isHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setHeader(final boolean header) {
|
||||
this.header = header;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public boolean isStat() {
|
||||
return stat;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setStat(final boolean stat) {
|
||||
this.stat = stat;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public boolean isTokenizable() {
|
||||
return tokenizable;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setTokenizable(final boolean tokenizable) {
|
||||
this.tokenizable = tokenizable;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public boolean isMultiValued() {
|
||||
return multiValued;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setMultiValued(final boolean multiValued) {
|
||||
this.multiValued = multiValued;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public boolean isStored() {
|
||||
return stored;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setStored(final boolean stored) {
|
||||
this.stored = stored;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public boolean isCopy() {
|
||||
return copy;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setCopy(final boolean copy) {
|
||||
this.copy = copy;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
package eu.dnetlib.domain.index;
|
||||
|
||||
public enum IndexFieldType {
|
||||
STRING, DOUBLE, BOOLEAN, LONG, DATE, DATETIME
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
package eu.dnetlib.common.index.solr;
|
||||
|
||||
import eu.dnetlib.domain.index.IndexField;
|
||||
import jakarta.persistence.Transient;
|
||||
|
||||
public class SolrField extends IndexField {
|
||||
|
||||
private static final long serialVersionUID = -8910762200990817492L;
|
||||
|
||||
@Transient
|
||||
private final String type;
|
||||
@Transient
|
||||
private final boolean multiValued;
|
||||
|
||||
public SolrField(final IndexField field, final String type, final boolean multiValued) {
|
||||
setName(field.getName());
|
||||
setConstant(field.getConstant());
|
||||
setIndexId(field.getIndexId());
|
||||
setXpath(field.getXpath());
|
||||
setResult(field.isResult());
|
||||
this.type = type;
|
||||
this.multiValued = multiValued;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public boolean isMultiValued() {
|
||||
return multiValued;
|
||||
}
|
||||
|
||||
}
|
|
@ -2,15 +2,16 @@ package eu.dnetlib.common.index.solr;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.lang3.BooleanUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
|
@ -18,11 +19,11 @@ import org.apache.solr.client.solrj.SolrQuery;
|
|||
import org.apache.solr.client.solrj.SolrQuery.ORDER;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.impl.Http2SolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.client.solrj.response.UpdateResponse;
|
||||
import org.apache.solr.client.solrj.response.schema.SchemaRepresentation;
|
||||
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
|
@ -33,10 +34,8 @@ import org.dom4j.Node;
|
|||
|
||||
import eu.dnetlib.domain.index.IndexConfiguration;
|
||||
import eu.dnetlib.domain.index.IndexField;
|
||||
import eu.dnetlib.domain.index.IndexFieldType;
|
||||
import eu.dnetlib.errors.DnetException;
|
||||
import eu.dnetlib.errors.DnetRuntimeException;
|
||||
import eu.dnetlib.utils.DateUtils;
|
||||
|
||||
public class SolrService {
|
||||
|
||||
|
@ -46,9 +45,11 @@ public class SolrService {
|
|||
|
||||
// https://solr.apache.org/guide/solr/latest/deployment-guide/solrj.html
|
||||
|
||||
private final boolean cloud;
|
||||
private final String[] solrUrls;
|
||||
|
||||
public SolrService(final String... solrUrls) {
|
||||
public SolrService(final boolean cloud, final String... solrUrls) {
|
||||
this.cloud = cloud;
|
||||
this.solrUrls = solrUrls;
|
||||
}
|
||||
|
||||
|
@ -60,8 +61,6 @@ public class SolrService {
|
|||
query.setStart(from);
|
||||
query.setRows(limit);
|
||||
|
||||
// TODO (HIGH PRIORITY) the result fields should be obtained using findSolrSchema()
|
||||
|
||||
conf.getFields()
|
||||
.stream()
|
||||
.filter(IndexField::isResult)
|
||||
|
@ -89,7 +88,8 @@ public class SolrService {
|
|||
public int indexRecord(final IndexConfiguration conf, final String xml, final boolean commit) throws DnetException {
|
||||
|
||||
try (final SolrClient solr = newSolrClient()) {
|
||||
final UpdateResponse updateResponse = solr.add(conf.getId(), asSolrDocument(conf, xml));
|
||||
final Set<SolrField> solrFields = findSolrFields(conf);
|
||||
final UpdateResponse updateResponse = solr.add(conf.getId(), asSolrDocument(xml, solrFields));
|
||||
if (commit) {
|
||||
forceCommit(solr, conf.getId());
|
||||
}
|
||||
|
@ -103,7 +103,8 @@ public class SolrService {
|
|||
|
||||
public int indexRecords(final IndexConfiguration conf, final Stream<String> inputStream) throws DnetException {
|
||||
try (final SolrClient solr = newSolrClient()) {
|
||||
final Iterator<SolrInputDocument> iterator = inputStream.map(s -> asSolrDocument(conf, s)).iterator();
|
||||
final Set<SolrField> solrFields = findSolrFields(conf);
|
||||
final Iterator<SolrInputDocument> iterator = inputStream.map(xml -> asSolrDocument(xml, solrFields)).iterator();
|
||||
final UpdateResponse updateResponse = solr.add(conf.getId(), iterator);
|
||||
forceCommit(solr, conf.getId());
|
||||
return updateResponse.getResponse().size();
|
||||
|
@ -123,21 +124,19 @@ public class SolrService {
|
|||
}
|
||||
|
||||
private SolrClient newSolrClient() {
|
||||
return new CloudSolrClient.Builder(Arrays.asList(solrUrls)).build();
|
||||
return cloud ? new CloudSolrClient.Builder(Arrays.asList(solrUrls)).build() : new Http2SolrClient.Builder(solrUrls[0]).build();
|
||||
}
|
||||
|
||||
private void forceCommit(final SolrClient client, final String solrCollection) throws SolrServerException, IOException {
|
||||
client.commit(solrCollection);
|
||||
}
|
||||
|
||||
private SolrInputDocument asSolrDocument(final IndexConfiguration conf, final String xml) {
|
||||
private SolrInputDocument asSolrDocument(final String xml, final Set<SolrField> solrFields) {
|
||||
try {
|
||||
final Document xmlDoc = DocumentHelper.parseText(xml);
|
||||
|
||||
// TODO (HIGH PRIORITY) the multiValued fields should be obtained using findSolrSchema()
|
||||
|
||||
final SolrInputDocument doc = new SolrInputDocument();
|
||||
conf.getFields()
|
||||
solrFields
|
||||
.stream()
|
||||
.filter(f -> StringUtils.isNotBlank(f.getName()))
|
||||
.forEach(f -> {
|
||||
|
@ -166,14 +165,16 @@ public class SolrService {
|
|||
}
|
||||
}
|
||||
|
||||
private Object convertToType(final String s, final IndexFieldType type) {
|
||||
return switch (type) {
|
||||
case STRING -> s;
|
||||
case LONG -> NumberUtils.toLong(s);
|
||||
case DOUBLE -> NumberUtils.toDouble(s);
|
||||
case BOOLEAN -> BooleanUtils.toBoolean(s);
|
||||
case DATE -> DateUtils.parseDate(s);
|
||||
case DATETIME -> DateUtils.parseDateTime(s);
|
||||
private Object convertToType(final String s, final String solrType) {
|
||||
// TODO (HIGH PRIORITY): complete the mapping
|
||||
|
||||
return switch (solrType) {
|
||||
case "string", "text_general" -> s;
|
||||
// case LONG -> NumberUtils.toLong(s);
|
||||
// case DOUBLE -> NumberUtils.toDouble(s);
|
||||
case "boolean" -> BooleanUtils.toBoolean(s);
|
||||
// case DATE -> DateUtils.parseDate(s);
|
||||
// case DATETIME -> DateUtils.parseDateTime(s);
|
||||
default -> s;
|
||||
};
|
||||
}
|
||||
|
@ -196,13 +197,28 @@ public class SolrService {
|
|||
}
|
||||
}
|
||||
|
||||
protected SchemaRepresentation findSolrSchema(final IndexConfiguration conf) throws DnetException {
|
||||
protected Set<SolrField> findSolrFields(final IndexConfiguration conf) throws DnetException {
|
||||
// TODO (HIGH PRIORITY) test and use to obtain info relative to the schema
|
||||
|
||||
// TODO (HIGH PRIORITY) the method should return also the not configured fields ???
|
||||
try (final SolrClient solr = newSolrClient()) {
|
||||
final SchemaRequest request = new SchemaRequest();
|
||||
final SchemaResponse response = request.process(solr, conf.getId());
|
||||
return response.getSchemaRepresentation();
|
||||
|
||||
final Set<SolrField> res = new HashSet<>();
|
||||
for (final Map<String, Object> map : response.getSchemaRepresentation().getFields()) {
|
||||
final String name = map.getOrDefault("name", "").toString();
|
||||
for (final IndexField f : conf.getFields()) {
|
||||
if (StringUtils.equals(f.getName(), name)) {
|
||||
final String type = map.getOrDefault("type", "string").toString();
|
||||
final boolean multivalued = BooleanUtils.toBoolean(map.getOrDefault("multiValued", "false").toString());
|
||||
res.add(new SolrField(f, type, multivalued));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
|
||||
} catch (final Throwable e) {
|
||||
log.error("error deleting index: " + conf.getId(), e);
|
||||
throw new DnetException("error deleting commit: " + conf.getId(), e);
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
package eu.dnetlib.common.index.solr;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.domain.index.IndexConfiguration;
|
||||
import eu.dnetlib.errors.DnetException;
|
||||
|
||||
class SolrServiceTest {
|
||||
|
||||
// TODO (HIGH PRIORITY) Complete the tests
|
||||
|
||||
private SolrService solr;
|
||||
private IndexConfiguration conf;
|
||||
|
||||
private static final String TEST_INDEX = "gettingstarted";
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
solr = new SolrService(false, "http://localhost:8983/solr");
|
||||
conf = new IndexConfiguration();
|
||||
conf.setId(TEST_INDEX);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testSolrService() {
|
||||
fail("Not yet implemented");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testQuery() {
|
||||
fail("Not yet implemented");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testIndexRecord() {
|
||||
fail("Not yet implemented");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testIndexRecords() {
|
||||
fail("Not yet implemented");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCommit() {
|
||||
fail("Not yet implemented");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testExistsIndex() {
|
||||
fail("Not yet implemented");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCreateIndex() {
|
||||
fail("Not yet implemented");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testFindSolrSchema() throws DnetException, JsonProcessingException {
|
||||
|
||||
final Set<SolrField> solrFields = solr.findSolrFields(conf);
|
||||
|
||||
System.out.println(new ObjectMapper().writeValueAsString(solrFields));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDeleteIndex() {
|
||||
fail("Not yet implemented");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDeleteByQuery() {
|
||||
fail("Not yet implemented");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDeleteOldRecords() {
|
||||
fail("Not yet implemented");
|
||||
}
|
||||
|
||||
}
|
|
@ -200,10 +200,13 @@ services:
|
|||
|
||||
solr:
|
||||
image: solr:9.4.0
|
||||
ports:
|
||||
- ${SOLR_PORT}:${SOLR_PORT}
|
||||
expose:
|
||||
- ${SOLR_PORT}
|
||||
networks:
|
||||
- backend
|
||||
- frontend
|
||||
volumes:
|
||||
- solrdata:/var/solr
|
||||
command:
|
||||
|
|
2
start.sh
2
start.sh
|
@ -17,7 +17,7 @@ export PG_VOCS_DB=dnet_vocabularies
|
|||
export PG_CONTEXTS_DB=dnet_contexts
|
||||
export PG_MDSTORES_DATA_DB=dnet_mdstores_data
|
||||
|
||||
export COMPOSE_PROFILES=base,mail,dsm,vocs,mdstores,wfs
|
||||
export COMPOSE_PROFILES=base,index
|
||||
#export COMPOSE_PROFILES=base,mail,dsm,vocs,mdstores,wfs,index,contexts,ui
|
||||
|
||||
docker-compose -f docker-compose.dev.yml up --force-recreate --build
|
||||
|
|
Loading…
Reference in New Issue