forked from D-Net/dnet-hadoop
Added functionality to create index collection inside dhp-graph provision
This commit is contained in:
parent
89f7007080
commit
56f880c89d
|
@ -45,6 +45,10 @@
|
|||
</build>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>stringtemplate</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
|
|
@ -59,6 +59,9 @@ public class SolrAdminApplication implements Closeable {
|
|||
final String zkHost = isLookup.getZkHost();
|
||||
log.info("zkHost: {}", zkHost);
|
||||
|
||||
|
||||
|
||||
|
||||
final String collection = ProvisionConstants.getCollectionName(format);
|
||||
log.info("collection: {}", collection);
|
||||
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
package eu.dnetlib.dhp.oa.provision;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.type.MapType;
|
||||
import com.fasterxml.jackson.databind.type.TypeFactory;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.io.DocumentResult;
|
||||
import org.dom4j.io.DocumentSource;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.stringtemplate.v4.ST;
|
||||
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
public class SolrUtil {
|
||||
|
||||
/**
|
||||
* The log.
|
||||
*/
|
||||
private static final Logger log = LoggerFactory.getLogger(SolrUtil.class);
|
||||
|
||||
/**
|
||||
* The Constant CONFIGS_PATH.
|
||||
*/
|
||||
private static final String CONFIGS_PATH = "/configs";
|
||||
|
||||
private static final char DELIMITER = '$';
|
||||
|
||||
private static final String CONF_BASE_PATH ="/eu/dnetlib/dhp/oa/provision/conf";
|
||||
|
||||
private static final String CONF_FILE_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/files/";
|
||||
|
||||
private static final String SCHEMA_TEMPLATE_PATH= "/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt";
|
||||
|
||||
public static void uploadZookeperConfig(final SolrZkClient zkClient,
|
||||
final String coreName,
|
||||
final boolean overwrite,
|
||||
final String layout){
|
||||
|
||||
final String basepath = CONFIGS_PATH + "/" + coreName;
|
||||
|
||||
log.info("uploading solr configuration to ZK for index collection: " + coreName);
|
||||
try {
|
||||
if (overwrite && zkClient.getSolrZooKeeper().exists(basepath, false) != null) {
|
||||
log.info("cleanup ZK configuration: " + coreName);
|
||||
for (String child : zkClient.getSolrZooKeeper().getChildren(basepath, false)) {
|
||||
final String path = basepath + "/" + child;
|
||||
log.debug("cleanup ZK file: " + path);
|
||||
zkClient.delete(path, -1, true);
|
||||
}
|
||||
zkClient.delete(basepath, -1, true);
|
||||
}
|
||||
if (!zkClient.exists(basepath, true)) {
|
||||
log.info("upload ZK configuration: " + coreName);
|
||||
zkClient.makePath(basepath, true);
|
||||
uploadConfiguration(zkClient, basepath, buildConfiguration(layout));
|
||||
}
|
||||
log.info("upload ZK configuration complete");
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("unable to upload solr configuration", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static void uploadConfiguration(final SolrZkClient zkClient, final String basePath, final Map<String, byte[]> resources) throws KeeperException,
|
||||
InterruptedException, IOException {
|
||||
|
||||
if (!zkClient.exists(basePath, true)) {
|
||||
zkClient.makePath(basePath, true);
|
||||
}
|
||||
|
||||
for (final Map.Entry<String, byte[]> e : resources.entrySet()) {
|
||||
String path = basePath + "/" + e.getKey();
|
||||
log.debug("upload ZK configuration: " + path);
|
||||
zkClient.create(path, e.getValue(), CreateMode.PERSISTENT, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static String loadFileInClassPath(final String aPath) {
|
||||
try {
|
||||
return IOUtils.toString(Objects.requireNonNull(SolrUtil.class.getResourceAsStream(aPath)), Charset.defaultCharset());
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static Map<String,String> getServiceProperties() throws IOException {
|
||||
final String properties = loadFileInClassPath(CONF_BASE_PATH+"/service_properties.json");
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
TypeFactory typeFactory = mapper.getTypeFactory();
|
||||
MapType mapType = typeFactory.constructMapType(HashMap.class, String.class, String.class);
|
||||
return mapper.readValue(properties, mapType);
|
||||
}
|
||||
|
||||
|
||||
public static String getConfig() throws Exception {
|
||||
final Map<String, String> p = getServiceProperties();
|
||||
final String st = loadFileInClassPath(CONF_BASE_PATH+"/solrconfig.xml.st");
|
||||
final ST solrConfig = new ST(st, DELIMITER, DELIMITER);
|
||||
p.forEach(solrConfig::add);
|
||||
return solrConfig.toString();
|
||||
}
|
||||
|
||||
private static Map<String, byte[]> buildConfiguration(final String layout)
|
||||
throws Exception {
|
||||
|
||||
|
||||
Map<String, byte[]> res = new HashMap<>();
|
||||
|
||||
try {
|
||||
log.debug("adding schema.xml to the resource map");
|
||||
res.put("schema.xml", getSchemaXML(layout).getBytes());
|
||||
|
||||
res.put("solrconfig.xml", getConfig().getBytes());
|
||||
log.debug("adding solrconfig.xml to the resource map");
|
||||
|
||||
Files.list(
|
||||
Paths.get(Objects.requireNonNull(SolrUtil.class.getResource(CONF_FILE_BASE_PATH)).getPath()))
|
||||
.map(Path::getFileName)
|
||||
.forEach(s-> {
|
||||
log.debug(String.format("put file from path %s",CONF_FILE_BASE_PATH + s));
|
||||
res.put(String.valueOf(s),
|
||||
|
||||
Objects.requireNonNull(loadFileInClassPath(CONF_FILE_BASE_PATH + s)).getBytes(StandardCharsets.UTF_8));}
|
||||
);
|
||||
|
||||
return res;
|
||||
} catch (Throwable e) {
|
||||
throw new Exception("failed to build configuration", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static String getSchemaXML(final String layout) throws Exception {
|
||||
|
||||
final Document fields = new SAXReader().read(new ByteArrayInputStream(layout.getBytes(StandardCharsets.UTF_8)));
|
||||
|
||||
|
||||
Transformer transformer = TransformerFactory.newInstance().newTransformer(new DocumentSource(new SAXReader().read(SolrUtil.class.getResourceAsStream(SCHEMA_TEMPLATE_PATH))));
|
||||
transformer.setParameter("textFieldType", "text_common");
|
||||
|
||||
final DocumentResult result = new DocumentResult();
|
||||
|
||||
transformer.transform(new DocumentSource(fields), result);
|
||||
String xml = result.getDocument().asXML();
|
||||
|
||||
log.debug("new index schema:\n" + xml);
|
||||
|
||||
return xml;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
|
||||
|
||||
<currencyConfig version="1.0">
|
||||
<rates>
|
||||
<!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
|
||||
<rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
|
||||
<rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
|
||||
<rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
|
||||
<rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
|
||||
<rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
|
||||
<rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
|
||||
<rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
|
||||
<rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
|
||||
<rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
|
||||
<rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
|
||||
<rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
|
||||
<rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
|
||||
<rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
|
||||
<rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
|
||||
<rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
|
||||
<rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
|
||||
<rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
|
||||
<rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
|
||||
<rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
|
||||
<rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
|
||||
<rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
|
||||
<rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
|
||||
<rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
|
||||
<rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
|
||||
<rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
|
||||
<rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
|
||||
<rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
|
||||
<rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
|
||||
<rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
|
||||
<rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
|
||||
<rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
|
||||
<rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
|
||||
<rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
|
||||
<rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
|
||||
<rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
|
||||
<rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
|
||||
<rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
|
||||
|
||||
<!-- Cross-rates for some common currencies -->
|
||||
<rate from="EUR" to="GBP" rate="0.869914" />
|
||||
<rate from="EUR" to="NOK" rate="7.800095" />
|
||||
<rate from="GBP" to="NOK" rate="8.966508" />
|
||||
</rates>
|
||||
</currencyConfig>
|
|
@ -0,0 +1,42 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- If this file is found in the config directory, it will only be
|
||||
loaded once at startup. If it is found in Solr's data
|
||||
directory, it will be re-loaded every commit.
|
||||
|
||||
See http://wiki.apache.org/solr/QueryElevationComponent for more info
|
||||
|
||||
-->
|
||||
<elevate>
|
||||
<!-- Query elevation examples
|
||||
<query text="foo bar">
|
||||
<doc id="1" />
|
||||
<doc id="2" />
|
||||
<doc id="3" />
|
||||
</query>
|
||||
|
||||
for use with techproducts example
|
||||
|
||||
<query text="ipod">
|
||||
<doc id="MA147LL/A" /> put the actual ipod at the top
|
||||
<doc id="IW-02" exclude="true" /> exclude this cable
|
||||
</query>
|
||||
-->
|
||||
|
||||
</elevate>
|
|
@ -0,0 +1,20 @@
|
|||
{"params":{
|
||||
"query":{
|
||||
"defType":"edismax",
|
||||
"q.alt":"*:*",
|
||||
"rows":"10",
|
||||
"fl":"*,score",
|
||||
"":{"v":0}
|
||||
},
|
||||
"facets":{
|
||||
"facet":"on",
|
||||
"facet.mincount": "1",
|
||||
"":{"v":0}
|
||||
},
|
||||
"velocity":{
|
||||
"wt": "velocity",
|
||||
"v.template":"browse",
|
||||
"v.layout": "layout",
|
||||
"":{"v":0}
|
||||
}
|
||||
}}
|
|
@ -0,0 +1,21 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# Use a protected word file to protect against the stemmer reducing two
|
||||
# unrelated words to the same base word.
|
||||
|
||||
# Some non-words that normally won't be encountered,
|
||||
# just to test that they won't be stemmed.
|
||||
dontstems
|
||||
zwhacky
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
a
|
||||
an
|
||||
and
|
||||
are
|
||||
as
|
||||
at
|
||||
be
|
||||
but
|
||||
by
|
||||
for
|
||||
if
|
||||
in
|
||||
into
|
||||
is
|
||||
it
|
||||
no
|
||||
not
|
||||
of
|
||||
on
|
||||
or
|
||||
s
|
||||
such
|
||||
t
|
||||
that
|
||||
the
|
||||
their
|
||||
then
|
||||
there
|
||||
these
|
||||
they
|
||||
this
|
||||
to
|
||||
was
|
||||
will
|
||||
with
|
|
@ -0,0 +1,29 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
#some test synonym mappings unlikely to appear in real input text
|
||||
aaafoo => aaabar
|
||||
bbbfoo => bbbfoo bbbbar
|
||||
cccfoo => cccbar cccbaz
|
||||
fooaaa,baraaa,bazaaa
|
||||
|
||||
# Some synonym groups specific to this example
|
||||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
pixima => pixma
|
||||
|
|
@ -0,0 +1,549 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
<xsl:output omit-xml-declaration="yes" indent="yes"/>
|
||||
|
||||
<xsl:template match="//FIELDS">
|
||||
|
||||
<xsl:param name="textFieldType" select="string('text_common')"/>
|
||||
<xsl:variable name="smallcase" select="'abcdefghijklmnopqrstuvwxyz'"/>
|
||||
<xsl:variable name="uppercase" select="'ABCDEFGHIJKLMNOPQRSTUVWXYZ'"/>
|
||||
|
||||
<!--
|
||||
D-Net index schema template
|
||||
|
||||
CHANGELOG
|
||||
|
||||
0.1 : first release
|
||||
0.2 : added preserveOriginal="1" for text field type in the index analyzer and catenateWords="1" for the query analyzer
|
||||
0.3 : changed language for SnowballPorterFilterFactory to language="German2" (index/query) in the text field type
|
||||
0.4 : added solr.ASCIIFoldingFilterFactory filter (index/query) in the text field type
|
||||
0.5 : added long_keyword field type, to be used for objIdentifiers
|
||||
0.6 : added field types for spellchecking
|
||||
0.7 : added parameter for text field type
|
||||
0.8 : added field _version_, needed by Solr 4.0.0 for the transaction log
|
||||
0.9 : added type: text_en_splitting
|
||||
0.91 : added type: ngramtext
|
||||
0.92 : added schema optimizations, removing unnecessary stored fields
|
||||
0.93 : added attribute preserveOriginal="1" to fieldtype ngramtext (query analysis) to improve matches
|
||||
0.94 : updated and simplified ngramtext fieldtype
|
||||
0.95 : update to solr 4.4, removed attribute "compress" from field definition, ngramfield doesn't support NGramFilterFactory anymore
|
||||
0.96 : update to solr 4.9
|
||||
0.97 : introduced field type string_ci supporting case insensitivity.
|
||||
1.0 : updated to solr 6.6.0
|
||||
-->
|
||||
<schema name="dnet" version="1.0">
|
||||
|
||||
<!-- Valid attributes for fields:
|
||||
name: mandatory - the name for the field
|
||||
type: mandatory - the name of a field type from the
|
||||
fieldTypes section
|
||||
indexed: true if this field should be indexed (searchable or sortable)
|
||||
stored: true if this field should be retrievable
|
||||
docValues: true if this field should have doc values. Doc values are
|
||||
useful (required, if you are using *Point fields) for faceting,
|
||||
grouping, sorting and function queries. Doc values will make the index
|
||||
faster to load, more NRT-friendly and more memory-efficient.
|
||||
They however come with some limitations: they are currently only
|
||||
supported by StrField, UUIDField, all Trie*Fields and *PointFields,
|
||||
and depending on the field type, they might require the field to be
|
||||
single-valued, be required or have a default value (check the
|
||||
documentation of the field type you're interested in for more information)
|
||||
multiValued: true if this field may contain multiple values per document
|
||||
omitNorms: (expert) set to true to omit the norms associated with
|
||||
this field (this disables length normalization and index-time
|
||||
boosting for the field, and saves some memory). Only full-text
|
||||
fields or fields that need an index-time boost need norms.
|
||||
Norms are omitted for primitive (non-analyzed) types by default.
|
||||
termVectors: [false] set to true to store the term vector for a
|
||||
given field.
|
||||
When using MoreLikeThis, fields used for similarity should be
|
||||
stored for best performance.
|
||||
termPositions: Store position information with the term vector.
|
||||
This will increase storage costs.
|
||||
termOffsets: Store offset information with the term vector. This
|
||||
will increase storage costs.
|
||||
required: The field is required. It will throw an error if the
|
||||
value does not exist
|
||||
default: a value that should be used if no value is specified
|
||||
when adding a document.
|
||||
-->
|
||||
|
||||
<!-- field names should consist of alphanumeric or underscore characters only and
|
||||
not start with a digit. This is not currently strictly enforced,
|
||||
but other field names will not have first class support from all components
|
||||
and back compatibility is not guaranteed. Names with both leading and
|
||||
trailing underscores (e.g. _version_) are reserved.
|
||||
-->
|
||||
|
||||
<xsl:for-each select="./FIELD">
|
||||
<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/>
|
||||
<xsl:variable name="fieldtype">
|
||||
<xsl:choose>
|
||||
<xsl:when test="@type"><xsl:value-of select="@type"/></xsl:when>
|
||||
<xsl:when test="@tokenizable='false'">string</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:value-of select="$textFieldType"/>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:variable>
|
||||
<xsl:variable name="isMultivalued">
|
||||
<xsl:choose>
|
||||
<xsl:when test="@multivalued='false'">false</xsl:when>
|
||||
<xsl:otherwise>true</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:variable>
|
||||
<xsl:variable name="isStored">
|
||||
<xsl:choose>
|
||||
<xsl:when test="@stored='true'">true</xsl:when>
|
||||
<xsl:otherwise>false</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:variable>
|
||||
|
||||
<field name="{$fieldname}" type="{$fieldtype}" indexed="{@indexable}" stored="{normalize-space($isStored)}" multiValued="{normalize-space($isMultivalued)}"/>
|
||||
</xsl:for-each>
|
||||
|
||||
<field name="__indexrecordidentifier" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
|
||||
|
||||
<field name="__deleted" type="boolean" indexed="true" stored="false" default="false" omitNorms="true" omitTermFreqAndPositions="true"/>
|
||||
|
||||
<field name="__dsid" type="string" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/>
|
||||
|
||||
<field name="__dsversion" type="pdate" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/>
|
||||
|
||||
<field name="__result" type="string" indexed="false" stored="true" multiValued="false" docValues="false"/>
|
||||
|
||||
<field name="__all" type="{$textFieldType}" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
|
||||
|
||||
<field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
|
||||
|
||||
<!-- field for ping -->
|
||||
<field name="text" type="{$textFieldType}" indexed="false" stored="false"/>
|
||||
|
||||
<!-- Field to use to determine and enforce document uniqueness.
|
||||
Unless this field is marked with required="false", it will be a required field
|
||||
-->
|
||||
<uniqueKey>__indexrecordidentifier</uniqueKey>
|
||||
|
||||
<xsl:for-each select="./FIELD[@copy = 'true']">
|
||||
<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/>
|
||||
<copyField source="{$fieldname}" dest="__all"/>
|
||||
</xsl:for-each>
|
||||
|
||||
<!-- copyField commands copy one field to another at the time a document
|
||||
is added to the index. It's used either to index the same field differently,
|
||||
or to add multiple fields to the same field for easier/faster searching.
|
||||
|
||||
<copyField source="sourceFieldName" dest="destinationFieldName"/>
|
||||
-->
|
||||
|
||||
<!-- field type definitions. The "name" attribute is
|
||||
just a label to be used by field definitions. The "class"
|
||||
attribute and any other attributes determine the real
|
||||
behavior of the fieldType.
|
||||
Class names starting with "solr" refer to java classes in a
|
||||
standard package such as org.apache.solr.analysis
|
||||
-->
|
||||
|
||||
<!-- The StrField type is not analyzed, but indexed/stored verbatim.
|
||||
It supports doc values but in that case the field needs to be
|
||||
single-valued and either required or have a default value.
|
||||
-->
|
||||
<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
|
||||
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
|
||||
|
||||
<!-- boolean type: "true" or "false" -->
|
||||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
||||
|
||||
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
|
||||
|
||||
<!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
|
||||
currently supported on types that are sorted internally as strings
|
||||
and on numeric types.
|
||||
This includes "string","boolean", "int", "float", "long", "date", "double",
|
||||
including the "Trie" and "Point" variants.
|
||||
- If sortMissingLast="true", then a sort on this field will cause documents
|
||||
without the field to come after documents with the field,
|
||||
regardless of the requested sort order (asc or desc).
|
||||
- If sortMissingFirst="true", then a sort on this field will cause documents
|
||||
without the field to come before documents with the field,
|
||||
regardless of the requested sort order.
|
||||
- If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
||||
then default lucene sorting will be used which places docs without the
|
||||
field first in an ascending sort and last in a descending sort.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Numeric field types that index values using KD-trees. *Point fields are faster and more efficient than Trie* fields both, at
|
||||
search time and at index time, but some features are still not supported.
|
||||
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
|
||||
-->
|
||||
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
|
||||
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
|
||||
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
|
||||
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
|
||||
|
||||
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
|
||||
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
|
||||
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
|
||||
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
|
||||
|
||||
<!--
|
||||
Default numeric field types. For faster range queries, consider *PointFields (pint/pfloat/plong/pdouble), or the
|
||||
tint/tfloat/tlong/tdouble types.
|
||||
-->
|
||||
<fieldType name="int" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
|
||||
<fieldType name="float" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
|
||||
<fieldType name="long" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
|
||||
<fieldType name="double" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
|
||||
|
||||
<fieldType name="ints" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
|
||||
<fieldType name="floats" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
|
||||
<fieldType name="longs" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
|
||||
<fieldType name="doubles" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
|
||||
|
||||
<!--
|
||||
Numeric field types that index each value at various levels of precision
|
||||
to accelerate range queries when the number of values between the range
|
||||
endpoints is large. See the javadoc for NumericRangeQuery for internal
|
||||
implementation details.
|
||||
|
||||
Smaller precisionStep values (specified in bits) will lead to more tokens
|
||||
indexed per value, slightly larger index size, and faster range queries.
|
||||
A precisionStep of 0 disables indexing at different precision levels.
|
||||
|
||||
Consider using pint/pfloat/plong/pdouble instead of Trie* fields if possible
|
||||
-->
|
||||
<fieldType name="tint" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
|
||||
<fieldType name="tfloat" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
|
||||
<fieldType name="tlong" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
|
||||
<fieldType name="tdouble" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
|
||||
|
||||
<fieldType name="tints" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
|
||||
<fieldType name="tfloats" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
|
||||
<fieldType name="tlongs" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
|
||||
<fieldType name="tdoubles" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
|
||||
|
||||
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
||||
is a more restricted form of the canonical representation of dateTime
|
||||
http://www.w3.org/TR/xmlschema-2/#dateTime
|
||||
The trailing "Z" designates UTC time and is mandatory.
|
||||
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
||||
All other components are mandatory.
|
||||
|
||||
Expressions can also be used to denote calculations that should be
|
||||
performed relative to "NOW" to determine the value, ie...
|
||||
|
||||
NOW/HOUR
|
||||
... Round to the start of the current hour
|
||||
NOW-1DAY
|
||||
... Exactly 1 day prior to now
|
||||
NOW/DAY+6MONTHS+3DAYS
|
||||
... 6 months and 3 days in the future from the start of
|
||||
the current day
|
||||
|
||||
Consult the TrieDateField javadocs for more information.
|
||||
-->
|
||||
<!-- KD-tree versions of date fields -->
|
||||
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
|
||||
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
|
||||
|
||||
<fieldType name="date" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
|
||||
<fieldType name="dates" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
|
||||
|
||||
<fieldType name="tdate" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0"/>
|
||||
<fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
|
||||
|
||||
|
||||
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
||||
<fieldType name="binary" class="solr.BinaryField"/>
|
||||
|
||||
<!-- The "RandomSortField" is not used to store or search any
|
||||
data. You can declare fields of this type it in your schema
|
||||
to generate pseudo-random orderings of your docs for sorting
|
||||
or function purposes. The ordering is generated based on the field
|
||||
name and the version of the index. As long as the index version
|
||||
remains unchanged, and the same field name is reused,
|
||||
the ordering of the docs will be consistent.
|
||||
If you want different psuedo-random orderings of documents,
|
||||
for the same version of the index, use a dynamicField and
|
||||
change the field name in the request.
|
||||
-->
|
||||
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
||||
|
||||
<!-- solr.TextField allows the specification of custom text analyzers
|
||||
specified as a tokenizer and a list of token filters. Different
|
||||
analyzers may be specified for indexing and querying.
|
||||
|
||||
The optional positionIncrementGap puts space between multiple fields of
|
||||
this type on the same document, with the purpose of preventing false phrase
|
||||
matching across fields.
|
||||
|
||||
For more info on customizing your analyzer chain, please see
|
||||
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
||||
-->
|
||||
|
||||
<!-- One can also specify an existing Analyzer class that has a
|
||||
default constructor via the class attribute on the analyzer element.
|
||||
Example:
|
||||
<fieldType name="text_greek" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
||||
</fieldType>
|
||||
-->
|
||||
|
||||
<!-- A text field that only splits on whitespace for exact matching of words -->
|
||||
<!-- <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> -->
|
||||
|
||||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="ngramtext" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="25"/>
|
||||
<filter class="solr.TrimFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<fieldType name="personName" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="personNamePrefix" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="30" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!-- A general text field that has reasonable, generic
|
||||
cross-language defaults: it tokenizes with StandardTokenizer,
|
||||
removes stop words from case-insensitive "stopwords.txt"
|
||||
(empty by default), and down cases. At query time only, it
|
||||
also applies synonyms.
|
||||
-->
|
||||
<fieldType name="text_common" class="solr.TextField" positionIncrementGap="100" multiValued="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
-->
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field with defaults appropriate for English, plus
|
||||
aggressive word-splitting and autophrase features enabled.
|
||||
This field is just like text_en, except it adds
|
||||
WordDelimiterGraphFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and
|
||||
non-alphanumeric chars. This means certain compound word
|
||||
cases will work, for example query "wi fi" will match
|
||||
document "WiFi" or "wi-fi".
|
||||
-->
|
||||
<!-- <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> -->
|
||||
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!-- Case insensitive stop word removal.
|
||||
-->
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<!-- <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> -->
|
||||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Just like text_common except it reverses the characters of
|
||||
each token, to enable more efficient leading wildcard queries.
|
||||
-->
|
||||
<!-- <dynamicField name="*_txt_rev" type="text_common_rev" indexed="true" stored="true"/> -->
|
||||
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
||||
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> -->
|
||||
<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="string_ci" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!--
|
||||
Example of using PathHierarchyTokenizerFactory at index time, so
|
||||
queries for paths match documents at that path, or in descendent paths
|
||||
-->
|
||||
<!-- <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> -->
|
||||
<fieldType name="descendent_path" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!--
|
||||
Example of using PathHierarchyTokenizerFactory at query time, so
|
||||
queries for paths match documents at that path, or in ancestor paths
|
||||
-->
|
||||
<!-- <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> -->
|
||||
<fieldType name="ancestor_path" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- since fields of this type are by default not stored or indexed,
|
||||
any data added to them will be ignored outright. -->
|
||||
<fieldType name="ignored" stored="false" indexed="false" docValues="false" multiValued="true" class="solr.StrField" />
|
||||
|
||||
<!-- This point type indexes the coordinates as separate fields (subFields)
|
||||
If subFieldType is defined, it references a type, and a dynamic field
|
||||
definition is created matching *___<typename>. Alternately, if
|
||||
subFieldSuffix is defined, that is used to create the subFields.
|
||||
Example: if subFieldType="double", then the coordinates would be
|
||||
indexed in fields myloc_0___double,myloc_1___double.
|
||||
Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
||||
in fields myloc_0_d,myloc_1_d
|
||||
The subFields are an implementation detail of the fieldType, and end
|
||||
users normally should not need to know about them.
|
||||
-->
|
||||
<!-- <dynamicField name="*_point" type="point" indexed="true" stored="true"/> -->
|
||||
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
||||
|
||||
<!-- A specialized field for geospatial search filters and distance sorting. -->
|
||||
<fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/>
|
||||
|
||||
<!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
|
||||
For more information about this and other Spatial fields new to Solr 4, see:
|
||||
http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
|
||||
-->
|
||||
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
|
||||
geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
|
||||
|
||||
</schema>
|
||||
</xsl:template>
|
||||
</xsl:stylesheet>
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"id":"solr",
|
||||
"address":"localhost:9983",
|
||||
"port":"8983",
|
||||
"webContext":"solr",
|
||||
"numShards":"4",
|
||||
"replicationFactor":"1",
|
||||
"maxShardsPerNode":"4",
|
||||
"host":"localhost",
|
||||
"luceneMatchVersion":"7.5.0",
|
||||
"feedingShutdownTolerance":"30000",
|
||||
"feedingBufferFlushThreshold":"1000",
|
||||
"feedingSimulationMode":"false"
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
<FIELDS><!-- SOURCE FIELD -->
|
||||
<FIELD indexable="true" name="source_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_type" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="false" name="source_publication_date" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_subType" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_pid" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_schema" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="source_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/><!-- TARGET FIELD -->
|
||||
<FIELD indexable="true" name="target_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_type" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_subType" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_pid" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_schema" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="target_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="false" name="target_publication_date" stored="true" stat="false" tokenizable="false" value="None"/><!-- RELATION FIELD -->
|
||||
<FIELD indexable="true" name="publicationDate" multivalued="false" stored="true" stat="false" type="pdate" value="None"/>
|
||||
<FIELD indexable="true" name="relation_name" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="relation_inverse" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="publisher_name" tokenizable="ture" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="linkprovider" tokenizable="ture" stored="true" stat="false" xpath="None"/>
|
||||
</FIELDS>
|
File diff suppressed because it is too large
Load Diff
|
@ -1,14 +1,27 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.provision;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.solr.client.solrj.response.SolrPingResponse;
|
||||
import org.apache.solr.client.solrj.response.UpdateResponse;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.Element;
|
||||
import org.dom4j.Node;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class SolrAdminApplicationTest extends SolrTest {
|
||||
|
||||
@Test
|
||||
|
@ -39,4 +52,81 @@ class SolrAdminApplicationTest extends SolrTest {
|
|||
assertEquals(0, rsp.getStatus());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This test verifies that the schema will be generated correctly
|
||||
* by get the profile of the metadataFormat and generating solr schema.xml
|
||||
* we expect that the fiedl in the metadataFormat are all in the field solr schema
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
void testSchemaCreation() throws Exception {
|
||||
|
||||
|
||||
final String layout =getSMFLayout();
|
||||
|
||||
assertNotNull(layout);
|
||||
assertTrue(StringUtils.isNotBlank(layout));
|
||||
|
||||
final String scheme = SolrUtil.getSchemaXML(getSMFLayout());
|
||||
|
||||
assertNotNull(scheme);
|
||||
assertTrue(StringUtils.isNotBlank(scheme));
|
||||
|
||||
|
||||
final Document fields = parseDocument(layout);
|
||||
|
||||
List<Node> params = fields.selectNodes("//FIELD");
|
||||
|
||||
final List<String> exptectedFieldName = new ArrayList<>();
|
||||
for (Node param : params) {
|
||||
|
||||
Element element = (Element) param;
|
||||
String name = element.attributeValue("name");
|
||||
exptectedFieldName.add(name.toLowerCase());
|
||||
}
|
||||
|
||||
|
||||
assertTrue(exptectedFieldName.size()>0);
|
||||
|
||||
|
||||
final Document parsedScheme = parseDocument(scheme);
|
||||
|
||||
params = parsedScheme.selectNodes("//field");
|
||||
|
||||
final List<String> createdFieldName = new ArrayList<>();
|
||||
for (Node param : params) {
|
||||
|
||||
Element element = (Element) param;
|
||||
String name = element.attributeValue("name");
|
||||
createdFieldName.add(name.toLowerCase());
|
||||
}
|
||||
assertTrue(createdFieldName.size()>0);
|
||||
exptectedFieldName.stream().map(createdFieldName::contains).forEach(Assertions::assertTrue);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateCollection() throws IOException {
|
||||
miniCluster.getZkClient();
|
||||
SolrUtil.uploadZookeperConfig(miniCluster.getZkClient(),"SMF-index-scholix",true,getSMFLayout() );
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
private Document parseDocument(final String xml) throws DocumentException {
|
||||
return new SAXReader().read(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)));
|
||||
}
|
||||
|
||||
|
||||
private String getSMFLayout() throws IOException {
|
||||
return IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/provision/SMF_layout.xml")));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
<LAYOUT name="index">
|
||||
<FIELDS><!-- SOURCE FIELD -->
|
||||
<FIELD indexable="true" name="source_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_type" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="false" name="source_publication_date" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_subType" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_pid" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_schema" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="source_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/><!-- TARGET FIELD -->
|
||||
<FIELD indexable="true" name="target_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_type" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_subType" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_pid" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_schema" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="target_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="false" name="target_publication_date" stored="true" stat="false" tokenizable="false" value="None"/><!-- RELATION FIELD -->
|
||||
<FIELD indexable="true" name="publicationDate" multivalued="false" stored="true" stat="false" type="pdate" value="None"/>
|
||||
<FIELD indexable="true" name="relation_name" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="relation_inverse" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="publisher_name" tokenizable="ture" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="linkprovider" tokenizable="ture" stored="true" stat="false" xpath="None"/>
|
||||
</FIELDS>
|
||||
</LAYOUT>
|
Loading…
Reference in New Issue