From 0b5bf53b45316792a5e0fba058ebc0979d6b54ce Mon Sep 17 00:00:00 2001 From: Serafeim Chatzopoulos Date: Thu, 23 Feb 2023 12:42:42 +0200 Subject: [PATCH] Remove unecessary indexed fields from Solr --- .../dhp/oa/provision/XmlIndexingJob.java | 2 +- .../utils/StreamingInputDocumentFactory.java | 25 +- .../dhp/oa/provision/EOSCFuture_Test.java | 3 +- .../provision/IndexRecordTransformerTest.java | 6 +- .../dhp/oa/provision/SolrConfigTest.java | 11 +- .../eu/dnetlib/dhp/oa/provision/fields.xml | 273 +++++++----------- 6 files changed, 120 insertions(+), 200 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java index e7dbdbd2b..1560fcbd9 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java @@ -151,7 +151,7 @@ public class XmlIndexingJob { .sequenceFile(inputPath, Text.class, Text.class) .map(t -> t._2().toString()) .map(s -> toIndexRecord(SaxonTransformerFactory.newInstance(indexRecordXslt), s)) - .map(s -> new StreamingInputDocumentFactory(version, dsId).parseDocument(s)); + .map(s -> new StreamingInputDocumentFactory().parseDocument(s)); switch (outputFormat) { case SOLR: diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java index 36028be9e..b42f9ee83 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java @@ -36,10 +36,6 @@ public class StreamingInputDocumentFactory { private static final String INDEX_FIELD_PREFIX = "__"; - private static final String DS_VERSION = INDEX_FIELD_PREFIX + "dsversion"; - - private static final String DS_ID = INDEX_FIELD_PREFIX + "dsid"; - private static final String RESULT = "result"; private static final String INDEX_RESULT = INDEX_FIELD_PREFIX + RESULT; @@ -65,20 +61,13 @@ public class StreamingInputDocumentFactory { private final ThreadLocal eventFactory = ThreadLocal .withInitial(XMLEventFactory::newInstance); - private final String version; - - private final String dsId; - private String resultName = DEFAULTDNETRESULT; - public StreamingInputDocumentFactory(final String version, final String dsId) { - this(version, dsId, DEFAULTDNETRESULT); + public StreamingInputDocumentFactory() { + this(DEFAULTDNETRESULT); } - public StreamingInputDocumentFactory( - final String version, final String dsId, final String resultName) { - this.version = version; - this.dsId = dsId; + public StreamingInputDocumentFactory(final String resultName) { this.resultName = resultName; } @@ -111,14 +100,6 @@ public class StreamingInputDocumentFactory { } } - if (version != null) { - indexDocument.addField(DS_VERSION, version); - } - - if (dsId != null) { - indexDocument.addField(DS_ID, dsId); - } - if (!indexDocument.containsKey(INDEX_RECORD_ID)) { throw new IllegalStateException("cannot extract record ID from: " + inputDocument); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java index 3e1a501d1..8800abf95 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/EOSCFuture_Test.java @@ -79,8 +79,7 @@ public class EOSCFuture_Test { final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); - final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) - .parseDocument(indexRecordXML); + final SolrInputDocument solrDoc = new StreamingInputDocumentFactory().parseDocument(indexRecordXML); final String xmlDoc = ClientUtils.toXML(solrDoc); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index cd5e08426..ce593cf07 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -39,9 +39,6 @@ import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory; */ public class IndexRecordTransformerTest { - public static final String VERSION = "2021-04-15T10:05:53Z"; - public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"; - private ContextMapper contextMapper; @BeforeEach @@ -197,8 +194,7 @@ public class IndexRecordTransformerTest { final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); - final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) - .parseDocument(indexRecordXML); + final SolrInputDocument solrDoc = new StreamingInputDocumentFactory().parseDocument(indexRecordXML); final String xmlDoc = ClientUtils.toXML(solrDoc); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java index ab98b1da2..451e29128 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java @@ -115,16 +115,9 @@ public class SolrConfigTest extends SolrTest { for (SolrDocument doc : rsp.getResults()) { System.out .println( - doc.get("score") + "\t" + doc.get("__indexrecordidentifier") + "\t" + - doc.get("resultidentifier") + "\t" + - doc.get("resultauthor") + "\t" + - doc.get("resultacceptanceyear") + "\t" + - doc.get("resultsubject") + "\t" + - doc.get("resulttitle") + "\t" + - doc.get("relprojectname") + "\t" + - doc.get("resultdescription") + "\t" + - doc.get("__all") + "\t"); + doc.get("__result") + "\t" + ); } } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index be2ee7b98..0bf588a57 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -1,165 +1,116 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file