package eu.dnetlib.pace.config; import java.io.IOException; import java.io.Serializable; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.function.BiFunction; import eu.dnetlib.pace.tree.support.TreeNodeDef; import eu.dnetlib.pace.util.PaceException; import org.antlr.stringtemplate.StringTemplate; import org.apache.commons.io.IOUtils; import com.google.common.collect.Maps; import eu.dnetlib.pace.condition.ConditionAlgo; import eu.dnetlib.pace.model.ClusteringDef; import eu.dnetlib.pace.model.FieldDef; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.codehaus.jackson.map.ObjectMapper; public class DedupConfig implements Config, Serializable { private static final Log log = LogFactory.getLog(DedupConfig.class); private static String CONFIG_TEMPLATE = "dedupConfig.st"; private PaceConfig pace; private WfConfig wf; private static Map defaults = Maps.newHashMap(); static { defaults.put("threshold", "0"); defaults.put("dedupRun", "001"); defaults.put("entityType", "result"); defaults.put("subEntityType", "resulttype"); defaults.put("subEntityValue", "publication"); defaults.put("orderField", "title"); defaults.put("queueMaxSize", "2000"); defaults.put("groupMaxSize", "10"); defaults.put("slidingWindowSize", "200"); defaults.put("rootBuilder", "result"); defaults.put("includeChildren", "true"); } public DedupConfig() {} public static DedupConfig load(final String json) { final DedupConfig config; try { config = new ObjectMapper().readValue(json, DedupConfig.class); config.getPace().initModel(); return config; } catch (IOException e) { throw new PaceException("Error in parsing configuration json", e); } } public static DedupConfig loadDefault() throws IOException { return loadDefault(new HashMap()); } public static DedupConfig loadDefault(final Map params) throws IOException { final StringTemplate template = new StringTemplate(new DedupConfig().readFromClasspath(CONFIG_TEMPLATE)); for (final Entry e : defaults.entrySet()) { template.setAttribute(e.getKey(), e.getValue()); } for (final Entry e : params.entrySet()) { if (template.getAttribute(e.getKey()) != null) { template.getAttributes().computeIfPresent(e.getKey(), (o, o2) -> e.getValue()); } else { template.setAttribute(e.getKey(), e.getValue()); } } final String json = template.toString(); return load(json); } private String readFromClasspath(final String resource) throws IOException { return IOUtils.toString(getClass().getResource(resource)); } public PaceConfig getPace() { return pace; } public void setPace(final PaceConfig pace) { this.pace = pace; } public WfConfig getWf() { return wf; } public void setWf(final WfConfig wf) { this.wf = wf; } @Override public String toString() { try { return new ObjectMapper().writeValueAsString(this); } catch (IOException e) { throw new PaceException("unable to serialise configuration", e); } } @Override public Map decisionTree(){ return getPace().getDecisionTree(); } @Override public List model() { return getPace().getModel(); } @Override public Map modelMap() { return getPace().getModelMap(); } @Override public List sufficientConditions() { return getPace().getStrictConditionAlgos(); } @Override public List necessaryConditions() { return getPace().getConditionAlgos(); } @Override public List clusterings() { return getPace().getClustering(); } @Override public Map> blacklists() { return getPace().getBlacklists(); } }