Added functionality to create index collection inside dhp-graph provision

2022-10-03 15:53:03 +02:00 · 2022-10-03 15:53:03 +02:00 · 56f880c89d
parent 89f7007080
commit 56f880c89d
15 changed files with 2548 additions and 3 deletions
--- a/dhp-workflows/dhp-graph-provision/pom.xml
+++ b/dhp-workflows/dhp-graph-provision/pom.xml
@ -45,6 +45,10 @@
    </build>

    <dependencies>
+        <dependency>
+            <groupId>org.antlr</groupId>
+            <artifactId>stringtemplate</artifactId>
+        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java
@ -59,6 +59,9 @@ public class SolrAdminApplication implements Closeable {
 		final String zkHost = isLookup.getZkHost();
 		log.info("zkHost: {}", zkHost);

+
+
+
 		final String collection = ProvisionConstants.getCollectionName(format);
 		log.info("collection: {}", collection);

--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrUtil.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrUtil.java
@ -0,0 +1,167 @@
+package eu.dnetlib.dhp.oa.provision;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.type.MapType;
+import com.fasterxml.jackson.databind.type.TypeFactory;
+import org.apache.commons.io.IOUtils;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.KeeperException;
+import org.dom4j.Document;
+import org.dom4j.io.DocumentResult;
+import org.dom4j.io.DocumentSource;
+import org.dom4j.io.SAXReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.stringtemplate.v4.ST;
+
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+public class SolrUtil {
+
+    /**
+     * The log.
+     */
+    private static final Logger log = LoggerFactory.getLogger(SolrUtil.class);
+
+    /**
+     * The Constant CONFIGS_PATH.
+     */
+    private static final String CONFIGS_PATH = "/configs";
+
+    private static final char DELIMITER = '$';
+
+    private static final String CONF_BASE_PATH ="/eu/dnetlib/dhp/oa/provision/conf";
+
+    private static final String CONF_FILE_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/files/";
+
+    private static final String SCHEMA_TEMPLATE_PATH= "/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt";
+
+    public static void uploadZookeperConfig(final SolrZkClient zkClient,
+                                     final String coreName,
+                                     final boolean overwrite,
+                                     final String layout){
+
+        final String basepath = CONFIGS_PATH + "/" + coreName;
+
+        log.info("uploading solr configuration to ZK for index collection: " + coreName);
+        try {
+            if (overwrite && zkClient.getSolrZooKeeper().exists(basepath, false) != null) {
+                log.info("cleanup ZK configuration: " + coreName);
+                for (String child : zkClient.getSolrZooKeeper().getChildren(basepath, false)) {
+                    final String path = basepath + "/" + child;
+                    log.debug("cleanup ZK file: " + path);
+                    zkClient.delete(path, -1, true);
+                }
+                zkClient.delete(basepath, -1, true);
+            }
+            if (!zkClient.exists(basepath, true)) {
+                log.info("upload ZK configuration: " + coreName);
+                zkClient.makePath(basepath, true);
+                uploadConfiguration(zkClient, basepath, buildConfiguration(layout));
+            }
+            log.info("upload ZK configuration complete");
+        } catch (Exception e) {
+            throw new RuntimeException("unable to upload solr configuration", e);
+        }
+    }
+
+    private static void uploadConfiguration(final SolrZkClient zkClient, final String basePath, final Map<String, byte[]> resources) throws KeeperException,
+            InterruptedException, IOException {
+
+        if (!zkClient.exists(basePath, true)) {
+            zkClient.makePath(basePath, true);
+        }
+
+        for (final Map.Entry<String, byte[]> e : resources.entrySet()) {
+            String path = basePath + "/" + e.getKey();
+            log.debug("upload ZK configuration: " + path);
+            zkClient.create(path, e.getValue(), CreateMode.PERSISTENT, true);
+        }
+    }
+
+
+    private static String loadFileInClassPath(final String aPath) {
+        try {
+            return IOUtils.toString(Objects.requireNonNull(SolrUtil.class.getResourceAsStream(aPath)), Charset.defaultCharset());
+        } catch (IOException e) {
+            return null;
+        }
+    }
+
+    public static Map<String,String> getServiceProperties() throws IOException {
+        final String properties = loadFileInClassPath(CONF_BASE_PATH+"/service_properties.json");
+        final ObjectMapper mapper = new ObjectMapper();
+        TypeFactory typeFactory = mapper.getTypeFactory();
+        MapType mapType = typeFactory.constructMapType(HashMap.class, String.class, String.class);
+        return mapper.readValue(properties, mapType);
+    }
+
+
+    public static String getConfig() throws Exception {
+        final Map<String, String> p = getServiceProperties();
+        final String st = loadFileInClassPath(CONF_BASE_PATH+"/solrconfig.xml.st");
+        final ST solrConfig = new ST(st, DELIMITER, DELIMITER);
+        p.forEach(solrConfig::add);
+        return solrConfig.toString();
+    }
+
+    private static Map<String, byte[]> buildConfiguration(final String layout)
+            throws Exception {
+
+
+        Map<String, byte[]> res = new HashMap<>();
+
+        try {
+            log.debug("adding schema.xml to the resource map");
+            res.put("schema.xml", getSchemaXML(layout).getBytes());
+
+            res.put("solrconfig.xml", getConfig().getBytes());
+            log.debug("adding solrconfig.xml to the resource map");
+
+            Files.list(
+                    Paths.get(Objects.requireNonNull(SolrUtil.class.getResource(CONF_FILE_BASE_PATH)).getPath()))
+                    .map(Path::getFileName)
+                    .forEach(s-> {
+                        log.debug(String.format("put file from path %s",CONF_FILE_BASE_PATH + s));
+                            res.put(String.valueOf(s),
+
+                            Objects.requireNonNull(loadFileInClassPath(CONF_FILE_BASE_PATH + s)).getBytes(StandardCharsets.UTF_8));}
+                    );
+
+            return res;
+        } catch (Throwable e) {
+            throw new Exception("failed to build configuration", e);
+        }
+    }
+
+
+    public static String getSchemaXML(final String layout) throws Exception {
+
+        final Document fields = new SAXReader().read(new ByteArrayInputStream(layout.getBytes(StandardCharsets.UTF_8)));
+
+
+        Transformer transformer = TransformerFactory.newInstance().newTransformer(new DocumentSource(new SAXReader().read(SolrUtil.class.getResourceAsStream(SCHEMA_TEMPLATE_PATH))));
+        transformer.setParameter("textFieldType", "text_common");
+
+        final DocumentResult result = new DocumentResult();
+
+        transformer.transform(new DocumentSource(fields), result);
+        String xml = result.getDocument().asXML();
+
+        log.debug("new index schema:\n" + xml);
+
+        return xml;
+    }
+}
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/currency.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/currency.xml
@ -0,0 +1,67 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
+
+<currencyConfig version="1.0">
+  <rates>
+    <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
+    <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
+    <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
+    <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
+    <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
+    <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
+    <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
+    <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
+    <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
+    <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
+    <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
+    <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
+    <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
+    <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
+    <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
+    <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
+    <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
+    <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
+    <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
+    <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
+    <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
+    <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
+    <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
+    <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
+    <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
+    <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
+    <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
+    <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
+    <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
+    <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
+    <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
+    <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
+    <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
+    <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
+    <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
+    <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
+    <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
+    <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
+    
+    <!-- Cross-rates for some common currencies -->
+    <rate from="EUR" to="GBP" rate="0.869914" />  
+    <rate from="EUR" to="NOK" rate="7.800095" />  
+    <rate from="GBP" to="NOK" rate="8.966508" />  
+  </rates>
+</currencyConfig>
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/elevate.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/elevate.xml
@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- If this file is found in the config directory, it will only be
+     loaded once at startup.  If it is found in Solr's data
+     directory, it will be re-loaded every commit.
+
+   See http://wiki.apache.org/solr/QueryElevationComponent for more info
+
+-->
+<elevate>
+ <!-- Query elevation examples
+  <query text="foo bar">
+    <doc id="1" />
+    <doc id="2" />
+    <doc id="3" />
+  </query>
+
+for use with techproducts example
+ 
+  <query text="ipod">
+    <doc id="MA147LL/A" />  put the actual ipod at the top 
+    <doc id="IW-02" exclude="true" /> exclude this cable
+  </query>
+-->
+
+</elevate>
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/params.json
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/params.json
@ -0,0 +1,20 @@
+{"params":{
+  "query":{
+    "defType":"edismax",
+    "q.alt":"*:*",
+    "rows":"10",
+    "fl":"*,score",
+    "":{"v":0}
+  },
+  "facets":{
+    "facet":"on",
+    "facet.mincount": "1",
+    "":{"v":0}
+  },
+ "velocity":{
+   "wt": "velocity",
+   "v.template":"browse",
+   "v.layout": "layout",
+   "":{"v":0}
+ }
+}}
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/protwords.txt
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/protwords.txt
@ -0,0 +1,21 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+dontstems
+zwhacky
+
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/stopwords.txt
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/stopwords.txt
@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+s
+such
+t
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/synonyms.txt
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/files/synonyms.txt
@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt
@ -0,0 +1,549 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+	<xsl:output omit-xml-declaration="yes" indent="yes"/>
+
+	<xsl:template match="//FIELDS">
+
+		<xsl:param name="textFieldType" select="string('text_common')"/>
+		<xsl:variable name="smallcase" select="'abcdefghijklmnopqrstuvwxyz'"/>
+		<xsl:variable name="uppercase" select="'ABCDEFGHIJKLMNOPQRSTUVWXYZ'"/>
+
+		<!--
+		D-Net index schema template
+
+		CHANGELOG
+
+		0.1 : first release
+		0.2 : added preserveOriginal="1" for text field type in the index analyzer and catenateWords="1" for the query analyzer
+		0.3 : changed language for SnowballPorterFilterFactory to language="German2" (index/query) in the text field type
+		0.4 : added solr.ASCIIFoldingFilterFactory filter (index/query) in the text field type
+		0.5 : added long_keyword field type, to be used for objIdentifiers
+		0.6 : added field types for spellchecking
+		0.7 : added parameter for text field type
+		0.8 : added field _version_, needed by Solr 4.0.0 for the transaction log
+		0.9   : added type: text_en_splitting
+		0.91  : added type: ngramtext
+		0.92  : added schema optimizations, removing unnecessary stored fields
+		0.93  : added attribute preserveOriginal="1" to fieldtype ngramtext (query analysis) to improve matches
+		0.94  : updated and simplified ngramtext fieldtype
+		0.95  : update to solr 4.4, removed attribute "compress" from field definition, ngramfield doesn't support NGramFilterFactory anymore
+		0.96  : update to solr 4.9
+		0.97  : introduced field type string_ci supporting case insensitivity.
+		1.0   : updated to solr 6.6.0
+		 -->
+		<schema name="dnet" version="1.0">
+
+			<!-- Valid attributes for fields:
+			 name: mandatory - the name for the field
+			 type: mandatory - the name of a field type from the
+			   fieldTypes section
+			 indexed: true if this field should be indexed (searchable or sortable)
+			 stored: true if this field should be retrievable
+			 docValues: true if this field should have doc values. Doc values are
+			   useful (required, if you are using *Point fields) for faceting,
+			   grouping, sorting and function queries. Doc values will make the index
+			   faster to load, more NRT-friendly and more memory-efficient.
+			   They however come with some limitations: they are currently only
+			   supported by StrField, UUIDField, all Trie*Fields and *PointFields,
+			   and depending on the field type, they might require the field to be
+			   single-valued, be required or have a default value (check the
+			   documentation of the field type you're interested in for more information)
+			 multiValued: true if this field may contain multiple values per document
+			 omitNorms: (expert) set to true to omit the norms associated with
+			   this field (this disables length normalization and index-time
+			   boosting for the field, and saves some memory).  Only full-text
+			   fields or fields that need an index-time boost need norms.
+			   Norms are omitted for primitive (non-analyzed) types by default.
+			 termVectors: [false] set to true to store the term vector for a
+			   given field.
+			   When using MoreLikeThis, fields used for similarity should be
+			   stored for best performance.
+			 termPositions: Store position information with the term vector.
+			   This will increase storage costs.
+			 termOffsets: Store offset information with the term vector. This
+			   will increase storage costs.
+			 required: The field is required.  It will throw an error if the
+			   value does not exist
+			 default: a value that should be used if no value is specified
+			   when adding a document.
+			-->
+
+			<!-- field names should consist of alphanumeric or underscore characters only and
+			  not start with a digit.  This is not currently strictly enforced,
+			  but other field names will not have first class support from all components
+			  and back compatibility is not guaranteed.  Names with both leading and
+			  trailing underscores (e.g. _version_) are reserved.
+			-->
+
+			<xsl:for-each select="./FIELD">
+				<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/>
+				<xsl:variable name="fieldtype">
+					<xsl:choose>
+						<xsl:when test="@type"><xsl:value-of select="@type"/></xsl:when>
+						<xsl:when test="@tokenizable='false'">string</xsl:when>
+						<xsl:otherwise>
+							<xsl:value-of select="$textFieldType"/>
+						</xsl:otherwise>
+					</xsl:choose>
+				</xsl:variable>
+				<xsl:variable name="isMultivalued">
+					<xsl:choose>
+						<xsl:when test="@multivalued='false'">false</xsl:when>
+						<xsl:otherwise>true</xsl:otherwise>
+					</xsl:choose>
+				</xsl:variable>
+				<xsl:variable name="isStored">
+					<xsl:choose>
+						<xsl:when test="@stored='true'">true</xsl:when>
+						<xsl:otherwise>false</xsl:otherwise>
+					</xsl:choose>
+				</xsl:variable>
+
+				<field name="{$fieldname}" type="{$fieldtype}" indexed="{@indexable}" stored="{normalize-space($isStored)}" multiValued="{normalize-space($isMultivalued)}"/>
+			</xsl:for-each>
+
+			<field name="__indexrecordidentifier" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
+
+			<field name="__deleted" type="boolean" indexed="true" stored="false" default="false" omitNorms="true" omitTermFreqAndPositions="true"/>
+
+			<field name="__dsid" type="string" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/>
+
+			<field name="__dsversion" type="pdate" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/>
+
+			<field name="__result" type="string" indexed="false" stored="true" multiValued="false" docValues="false"/>
+
+			<field name="__all" type="{$textFieldType}" indexed="true" stored="false" multiValued="true"/>
+
+			<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
+
+			<field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
+
+			<!-- field for ping -->
+			<field name="text" type="{$textFieldType}" indexed="false" stored="false"/>
+
+			<!-- Field to use to determine and enforce document uniqueness.
+				 Unless this field is marked with required="false", it will be a required field
+			  -->
+			<uniqueKey>__indexrecordidentifier</uniqueKey>
+
+			<xsl:for-each select="./FIELD[@copy = 'true']">
+				<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/>
+				<copyField source="{$fieldname}" dest="__all"/>
+			</xsl:for-each>
+
+			<!-- copyField commands copy one field to another at the time a document
+			   is added to the index.  It's used either to index the same field differently,
+			   or to add multiple fields to the same field for easier/faster searching.
+
+			<copyField source="sourceFieldName" dest="destinationFieldName"/>
+			-->
+
+			<!-- field type definitions. The "name" attribute is
+			   just a label to be used by field definitions.  The "class"
+			   attribute and any other attributes determine the real
+			   behavior of the fieldType.
+				 Class names starting with "solr" refer to java classes in a
+			   standard package such as org.apache.solr.analysis
+			-->
+
+			<!-- The StrField type is not analyzed, but indexed/stored verbatim.
+			   It supports doc values but in that case the field needs to be
+			   single-valued and either required or have a default value.
+			  -->
+			<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
+			<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
+
+			<!-- boolean type: "true" or "false" -->
+			<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
+
+			<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
+
+			<!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
+				 currently supported on types that are sorted internally as strings
+				 and on numeric types.
+				 This includes "string","boolean", "int", "float", "long", "date", "double",
+				 including the "Trie" and "Point" variants.
+			   - If sortMissingLast="true", then a sort on this field will cause documents
+				 without the field to come after documents with the field,
+				 regardless of the requested sort order (asc or desc).
+			   - If sortMissingFirst="true", then a sort on this field will cause documents
+				 without the field to come before documents with the field,
+				 regardless of the requested sort order.
+			   - If sortMissingLast="false" and sortMissingFirst="false" (the default),
+				 then default lucene sorting will be used which places docs without the
+				 field first in an ascending sort and last in a descending sort.
+			-->
+
+			<!--
+			  Numeric field types that index values using KD-trees. *Point fields are faster and more efficient than Trie* fields both, at
+			  search time and at index time, but some features are still not supported.
+			  Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
+			-->
+			<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
+			<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
+			<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
+			<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
+
+			<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
+			<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
+			<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
+			<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
+
+			<!--
+			  Default numeric field types. For faster range queries, consider *PointFields (pint/pfloat/plong/pdouble), or the
+			  tint/tfloat/tlong/tdouble types.
+			-->
+			<fieldType name="int" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
+			<fieldType name="float" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
+			<fieldType name="long" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
+			<fieldType name="double" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
+
+			<fieldType name="ints" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
+			<fieldType name="floats" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
+			<fieldType name="longs" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
+			<fieldType name="doubles" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
+
+			<!--
+			 Numeric field types that index each value at various levels of precision
+			 to accelerate range queries when the number of values between the range
+			 endpoints is large. See the javadoc for NumericRangeQuery for internal
+			 implementation details.
+
+			 Smaller precisionStep values (specified in bits) will lead to more tokens
+			 indexed per value, slightly larger index size, and faster range queries.
+			 A precisionStep of 0 disables indexing at different precision levels.
+
+			 Consider using pint/pfloat/plong/pdouble instead of Trie* fields if possible
+			-->
+			<fieldType name="tint" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
+			<fieldType name="tfloat" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
+			<fieldType name="tlong" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
+			<fieldType name="tdouble" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
+
+			<fieldType name="tints" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
+			<fieldType name="tfloats" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
+			<fieldType name="tlongs" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
+			<fieldType name="tdoubles" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
+
+			<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
+				 is a more restricted form of the canonical representation of dateTime
+				 http://www.w3.org/TR/xmlschema-2/#dateTime
+				 The trailing "Z" designates UTC time and is mandatory.
+				 Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
+				 All other components are mandatory.
+
+				 Expressions can also be used to denote calculations that should be
+				 performed relative to "NOW" to determine the value, ie...
+
+					   NOW/HOUR
+						  ... Round to the start of the current hour
+					   NOW-1DAY
+						  ... Exactly 1 day prior to now
+					   NOW/DAY+6MONTHS+3DAYS
+						  ... 6 months and 3 days in the future from the start of
+							  the current day
+
+				 Consult the TrieDateField javadocs for more information.
+			  -->
+			<!-- KD-tree versions of date fields -->
+			<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
+			<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
+
+			<fieldType name="date" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
+			<fieldType name="dates" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
+
+			<fieldType name="tdate" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0"/>
+			<fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
+
+
+			<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
+			<fieldType name="binary" class="solr.BinaryField"/>
+
+			<!-- The "RandomSortField" is not used to store or search any
+				 data.  You can declare fields of this type it in your schema
+				 to generate pseudo-random orderings of your docs for sorting
+				 or function purposes.  The ordering is generated based on the field
+				 name and the version of the index. As long as the index version
+				 remains unchanged, and the same field name is reused,
+				 the ordering of the docs will be consistent.
+				 If you want different psuedo-random orderings of documents,
+				 for the same version of the index, use a dynamicField and
+				 change the field name in the request.
+			 -->
+			<fieldType name="random" class="solr.RandomSortField" indexed="true" />
+
+			<!-- solr.TextField allows the specification of custom text analyzers
+				 specified as a tokenizer and a list of token filters. Different
+				 analyzers may be specified for indexing and querying.
+
+				 The optional positionIncrementGap puts space between multiple fields of
+				 this type on the same document, with the purpose of preventing false phrase
+				 matching across fields.
+
+				 For more info on customizing your analyzer chain, please see
+				 http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
+			 -->
+
+			<!-- One can also specify an existing Analyzer class that has a
+				 default constructor via the class attribute on the analyzer element.
+				 Example:
+			<fieldType name="text_greek" class="solr.TextField">
+			  <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
+			</fieldType>
+			-->
+
+			<!-- A text field that only splits on whitespace for exact matching of words -->
+			<!-- <dynamicField name="*_ws" type="text_ws"  indexed="true"  stored="true"/> -->
+
+			<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+				<analyzer>
+					<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+				</analyzer>
+			</fieldType>
+
+			<fieldType name="ngramtext" class="solr.TextField">
+				<analyzer type="index">
+					<tokenizer class="solr.KeywordTokenizerFactory"/>
+					<filter class="solr.LowerCaseFilterFactory"/>
+					<filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="25"/>
+					<filter class="solr.TrimFilterFactory"/>
+					<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+				</analyzer>
+				<analyzer type="query">
+					<tokenizer class="solr.KeywordTokenizerFactory"/>
+					<filter class="solr.LowerCaseFilterFactory"/>
+				</analyzer>
+			</fieldType>
+
+
+			<fieldType name="personName" class="solr.TextField"  positionIncrementGap="100">
+				<analyzer>
+					<tokenizer class="solr.StandardTokenizerFactory" />
+					<filter class="solr.LowerCaseFilterFactory" />
+				</analyzer>
+			</fieldType>
+
+			<fieldType name="personNamePrefix" class="solr.TextField"  positionIncrementGap="100">
+				<analyzer type="index">
+					<tokenizer class="solr.StandardTokenizerFactory"/>
+					<filter class="solr.LowerCaseFilterFactory" />
+					<filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="30" />
+				</analyzer>
+				<analyzer type="query">
+					<tokenizer class="solr.StandardTokenizerFactory"/>
+					<filter class="solr.LowerCaseFilterFactory" />
+				</analyzer>
+			</fieldType>
+
+
+			<!-- A general text field that has reasonable, generic
+				 cross-language defaults: it tokenizes with StandardTokenizer,
+				   removes stop words from case-insensitive "stopwords.txt"
+				   (empty by default), and down cases.  At query time only, it
+				   also applies synonyms.
+			  -->
+			<fieldType name="text_common" class="solr.TextField" positionIncrementGap="100" multiValued="true">
+				<analyzer type="index">
+					<tokenizer class="solr.StandardTokenizerFactory"/>
+					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+					<!-- in this example, we will only use synonyms at query time
+					<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+					<filter class="solr.FlattenGraphFilterFactory"/>
+					-->
+					<filter class="solr.LowerCaseFilterFactory"/>
+				</analyzer>
+				<analyzer type="query">
+					<tokenizer class="solr.StandardTokenizerFactory"/>
+					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+					<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+					<filter class="solr.LowerCaseFilterFactory"/>
+				</analyzer>
+			</fieldType>
+
+			<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
+				<analyzer type="index">
+					<tokenizer class="solr.StandardTokenizerFactory"/>
+					<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
+					<filter class="solr.LowerCaseFilterFactory"/>
+					<filter class="solr.EnglishPossessiveFilterFactory"/>
+					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+					<filter class="solr.PorterStemFilterFactory"/>
+				</analyzer>
+				<analyzer type="query">
+					<tokenizer class="solr.StandardTokenizerFactory"/>
+					<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
+					<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
+					<filter class="solr.LowerCaseFilterFactory"/>
+					<filter class="solr.EnglishPossessiveFilterFactory"/>
+					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+					<filter class="solr.PorterStemFilterFactory"/>
+				</analyzer>
+			</fieldType>
+
+			<!-- A text field with defaults appropriate for English, plus
+				 aggressive word-splitting and autophrase features enabled.
+				 This field is just like text_en, except it adds
+				 WordDelimiterGraphFilter to enable splitting and matching of
+				 words on case-change, alpha numeric boundaries, and
+				 non-alphanumeric chars.  This means certain compound word
+				 cases will work, for example query "wi fi" will match
+				 document "WiFi" or "wi-fi".
+			-->
+			<!-- <dynamicField name="*_txt_en_split" type="text_en_splitting"  indexed="true"  stored="true"/> -->
+			<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+				<analyzer type="index">
+					<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+					<!-- in this example, we will only use synonyms at query time
+					<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+					-->
+					<!-- Case insensitive stop word removal.
+					-->
+					<filter class="solr.StopFilterFactory"
+							ignoreCase="true"
+							words="stopwords.txt"
+					/>
+					<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+					<filter class="solr.LowerCaseFilterFactory"/>
+					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+					<filter class="solr.PorterStemFilterFactory"/>
+					<filter class="solr.FlattenGraphFilterFactory" />
+				</analyzer>
+				<analyzer type="query">
+					<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+					<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+					<filter class="solr.StopFilterFactory"
+							ignoreCase="true"
+							words="stopwords.txt"
+					/>
+					<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+					<filter class="solr.LowerCaseFilterFactory"/>
+					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+					<filter class="solr.PorterStemFilterFactory"/>
+				</analyzer>
+			</fieldType>
+
+			<!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
+				 but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
+			<!-- <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight"  indexed="true"  stored="true"/> -->
+			<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+				<analyzer type="index">
+					<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+					<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+					<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+					<filter class="solr.LowerCaseFilterFactory"/>
+					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+					<filter class="solr.EnglishMinimalStemFilterFactory"/>
+					<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
+						 possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
+					<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+					<filter class="solr.FlattenGraphFilterFactory" />
+				</analyzer>
+				<analyzer type="query">
+					<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+					<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+					<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+					<filter class="solr.LowerCaseFilterFactory"/>
+					<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+					<filter class="solr.EnglishMinimalStemFilterFactory"/>
+					<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
+						 possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
+					<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+				</analyzer>
+			</fieldType>
+
+			<!-- Just like text_common except it reverses the characters of
+				   each token, to enable more efficient leading wildcard queries.
+			-->
+			<!-- <dynamicField name="*_txt_rev" type="text_common_rev"  indexed="true"  stored="true"/> -->
+			<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
+				<analyzer type="index">
+					<tokenizer class="solr.StandardTokenizerFactory"/>
+					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+					<filter class="solr.LowerCaseFilterFactory"/>
+					<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+							maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+				</analyzer>
+				<analyzer type="query">
+					<tokenizer class="solr.StandardTokenizerFactory"/>
+					<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+					<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
+					<filter class="solr.LowerCaseFilterFactory"/>
+				</analyzer>
+			</fieldType>
+
+			<!-- <dynamicField name="*_phon_en" type="phonetic_en"  indexed="true"  stored="true"/> -->
+			<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
+				<analyzer>
+					<tokenizer class="solr.StandardTokenizerFactory"/>
+					<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
+				</analyzer>
+			</fieldType>
+
+			<fieldType name="string_ci" class="solr.TextField" sortMissingLast="true" omitNorms="true">
+				<analyzer type="query">
+					<tokenizer class="solr.KeywordTokenizerFactory"/>
+					<filter class="solr.LowerCaseFilterFactory"/>
+				</analyzer>
+			</fieldType>
+
+			<!--
+			  Example of using PathHierarchyTokenizerFactory at index time, so
+			  queries for paths match documents at that path, or in descendent paths
+			-->
+			<!-- <dynamicField name="*_descendent_path" type="descendent_path"  indexed="true"  stored="true"/> -->
+			<fieldType name="descendent_path" class="solr.TextField">
+				<analyzer type="index">
+					<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
+				</analyzer>
+				<analyzer type="query">
+					<tokenizer class="solr.KeywordTokenizerFactory" />
+				</analyzer>
+			</fieldType>
+
+			<!--
+			  Example of using PathHierarchyTokenizerFactory at query time, so
+			  queries for paths match documents at that path, or in ancestor paths
+			-->
+			<!-- <dynamicField name="*_ancestor_path" type="ancestor_path"  indexed="true"  stored="true"/> -->
+			<fieldType name="ancestor_path" class="solr.TextField">
+				<analyzer type="index">
+					<tokenizer class="solr.KeywordTokenizerFactory" />
+				</analyzer>
+				<analyzer type="query">
+					<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
+				</analyzer>
+			</fieldType>
+
+			<!-- since fields of this type are by default not stored or indexed,
+				 any data added to them will be ignored outright.  -->
+			<fieldType name="ignored" stored="false" indexed="false" docValues="false" multiValued="true" class="solr.StrField" />
+
+			<!-- This point type indexes the coordinates as separate fields (subFields)
+			  If subFieldType is defined, it references a type, and a dynamic field
+			  definition is created matching *___<typename>.  Alternately, if
+			  subFieldSuffix is defined, that is used to create the subFields.
+			  Example: if subFieldType="double", then the coordinates would be
+				indexed in fields myloc_0___double,myloc_1___double.
+			  Example: if subFieldSuffix="_d" then the coordinates would be indexed
+				in fields myloc_0_d,myloc_1_d
+			  The subFields are an implementation detail of the fieldType, and end
+			  users normally should not need to know about them.
+			 -->
+			<!-- <dynamicField name="*_point" type="point"  indexed="true"  stored="true"/> -->
+			<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
+
+			<!-- A specialized field for geospatial search filters and distance sorting. -->
+			<fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/>
+
+			<!-- An alternative geospatial field type new to Solr 4.  It supports multiValued and polygon shapes.
+			  For more information about this and other Spatial fields new to Solr 4, see:
+			  http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
+			-->
+			<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
+					   geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
+
+		</schema>
+	</xsl:template>
+</xsl:stylesheet>
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/service_properties.json
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/service_properties.json
@ -0,0 +1,14 @@
+ {
+    "id":"solr",
+    "address":"localhost:9983",
+    "port":"8983",
+    "webContext":"solr",
+    "numShards":"4",
+    "replicationFactor":"1",
+	"maxShardsPerNode":"4",
+    "host":"localhost",
+    "luceneMatchVersion":"7.5.0",
+    "feedingShutdownTolerance":"30000",
+    "feedingBufferFlushThreshold":"1000",
+    "feedingSimulationMode":"false"
+ }
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/smf.xml
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/smf.xml
@ -0,0 +1,23 @@
+<FIELDS><!-- SOURCE FIELD -->
+    <FIELD indexable="true" name="source_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="source_type" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="false" name="source_publication_date" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="source_subType" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="source_pid" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="source_schema" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="source_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
+    <FIELD indexable="true" name="source_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/><!-- TARGET FIELD -->
+    <FIELD indexable="true" name="target_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="target_type" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="target_subType" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="target_pid" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="target_schema" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="target_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
+    <FIELD indexable="true" name="target_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/>
+    <FIELD indexable="false" name="target_publication_date" stored="true" stat="false" tokenizable="false" value="None"/><!-- RELATION FIELD -->
+    <FIELD indexable="true" name="publicationDate" multivalued="false" stored="true" stat="false" type="pdate" value="None"/>
+    <FIELD indexable="true" name="relation_name" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="relation_inverse" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
+    <FIELD indexable="true" name="publisher_name" tokenizable="ture" stored="true" stat="false" xpath="None"/>
+    <FIELD indexable="true" name="linkprovider" tokenizable="ture" stored="true" stat="false" xpath="None"/>
+</FIELDS>
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/solrconfig.xml.st
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/conf/solrconfig.xml.st
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java
@ -1,14 +1,27 @@

 package eu.dnetlib.dhp.oa.provision;

-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.solr.client.solrj.response.SolrPingResponse;
 import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.Element;
+import org.dom4j.Node;
+import org.dom4j.io.SAXReader;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;

+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import static org.junit.jupiter.api.Assertions.*;
+
 class SolrAdminApplicationTest extends SolrTest {

 	@Test
@ -39,4 +52,81 @@ class SolrAdminApplicationTest extends SolrTest {
 		assertEquals(0, rsp.getStatus());
 	}

+
+	/**
+	 * This test verifies that the schema will be generated correctly
+	 * by get the profile of the  metadataFormat and generating solr schema.xml
+	 * we expect that the fiedl in the metadataFormat are all in the field solr schema
+	 * @throws Exception
+	 */
+	@Test
+	void testSchemaCreation() throws Exception {
+
+
+		final  String layout =getSMFLayout();
+
+		assertNotNull(layout);
+		assertTrue(StringUtils.isNotBlank(layout));
+
+		final String scheme = SolrUtil.getSchemaXML(getSMFLayout());
+
+		assertNotNull(scheme);
+		assertTrue(StringUtils.isNotBlank(scheme));
+
+
+		final Document fields = parseDocument(layout);
+
+		List<Node> params = fields.selectNodes("//FIELD");
+
+		final List<String> exptectedFieldName = new ArrayList<>();
+		for (Node param : params) {
+
+			Element element = (Element) param;
+			String name = element.attributeValue("name");
+			exptectedFieldName.add(name.toLowerCase());
+		}
+
+
+		assertTrue(exptectedFieldName.size()>0);
+
+
+		final Document parsedScheme = parseDocument(scheme);
+
+		params = parsedScheme.selectNodes("//field");
+
+		final List<String> createdFieldName = new ArrayList<>();
+		for (Node param : params) {
+
+			Element element = (Element) param;
+			String name = element.attributeValue("name");
+			createdFieldName.add(name.toLowerCase());
+		}
+		assertTrue(createdFieldName.size()>0);
+		exptectedFieldName.stream().map(createdFieldName::contains).forEach(Assertions::assertTrue);
+	}
+
+	@Test
+	public void testCreateCollection() throws IOException {
+		miniCluster.getZkClient();
+		SolrUtil.uploadZookeperConfig(miniCluster.getZkClient(),"SMF-index-scholix",true,getSMFLayout() );
+
+
+
+
+
+
+
+
+	}
+
+
+	private Document parseDocument(final String xml) throws DocumentException {
+		return new SAXReader().read(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)));
+	}
+
+
+	private String getSMFLayout() throws IOException {
+		return IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/provision/SMF_layout.xml")));
+	}
+
 }
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/SMF_layout.xml
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/SMF_layout.xml
@ -0,0 +1,25 @@
+<LAYOUT name="index">
+    <FIELDS><!-- SOURCE FIELD -->
+        <FIELD indexable="true" name="source_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="source_type" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="false" name="source_publication_date" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="source_subType" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="source_pid" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="source_schema" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="source_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
+        <FIELD indexable="true" name="source_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/><!-- TARGET FIELD -->
+        <FIELD indexable="true" name="target_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="target_type" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="target_subType" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="target_pid" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="target_schema" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="target_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
+        <FIELD indexable="true" name="target_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/>
+        <FIELD indexable="false" name="target_publication_date" stored="true" stat="false" tokenizable="false" value="None"/><!-- RELATION FIELD -->
+        <FIELD indexable="true" name="publicationDate" multivalued="false" stored="true" stat="false" type="pdate" value="None"/>
+        <FIELD indexable="true" name="relation_name" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="relation_inverse" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
+        <FIELD indexable="true" name="publisher_name" tokenizable="ture" stored="true" stat="false" xpath="None"/>
+        <FIELD indexable="true" name="linkprovider" tokenizable="ture" stored="true" stat="false" xpath="None"/>
+    </FIELDS>
+</LAYOUT>