forked from D-Net/dnet-hadoop
implementation of the toString methonds changed: from Gson to Jackson
This commit is contained in:
parent
3cf3dc1934
commit
8b4762bf54
|
@ -19,6 +19,7 @@ import eu.dnetlib.pace.model.ClusteringDef;
|
||||||
import eu.dnetlib.pace.model.FieldDef;
|
import eu.dnetlib.pace.model.FieldDef;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
|
||||||
public class DedupConfig implements Config, Serializable {
|
public class DedupConfig implements Config, Serializable {
|
||||||
|
|
||||||
|
@ -96,7 +97,11 @@ public class DedupConfig implements Config, Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return new GsonBuilder().setPrettyPrinting().create().toJson(this);
|
try {
|
||||||
|
return new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
|
||||||
|
} catch (IOException e) {
|
||||||
|
return e.getStackTrace().toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package eu.dnetlib.pace.config;
|
package eu.dnetlib.pace.config;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -9,6 +10,7 @@ import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
import com.google.gson.GsonBuilder;
|
import com.google.gson.GsonBuilder;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
|
||||||
public class WfConfig implements Serializable {
|
public class WfConfig implements Serializable {
|
||||||
|
|
||||||
|
@ -249,7 +251,11 @@ public class WfConfig implements Serializable {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return new GsonBuilder().setPrettyPrinting().create().toJson(this);
|
try {
|
||||||
|
return new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(this);
|
||||||
|
} catch (IOException e) {
|
||||||
|
return e.getStackTrace().toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,17 +8,76 @@ import eu.dnetlib.pace.model.MapDocumentSerializer;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
|
||||||
public class ConfigTest extends AbstractPaceTest {
|
public class ConfigTest extends AbstractPaceTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void test() throws IOException {
|
public void dedupConfigSerializationTest() throws IOException {
|
||||||
final DedupConfig cfg = DedupConfig.load(readFromClasspath("result.pace.conf.json"));
|
final DedupConfig cfgFromClasspath = DedupConfig.load(readFromClasspath("result.pace.conf.json"));
|
||||||
|
|
||||||
assertNotNull(cfg);
|
assertNotNull(cfgFromClasspath);
|
||||||
|
|
||||||
|
String conf = "{ \n" +
|
||||||
|
"wf\" : { " +
|
||||||
|
" \"threshold\" : \"0.99\", " +
|
||||||
|
" \"run\" : \"001\", " +
|
||||||
|
" \"entityType\" : \"result\", " +
|
||||||
|
" \"orderField\" : \"title\", " +
|
||||||
|
" \"queueMaxSize\" : \"2000\"," +
|
||||||
|
" \"groupMaxSize\" : \"10\"," +
|
||||||
|
" \"slidingWindowSize\" : \"200\"," +
|
||||||
|
" \"rootBuilder\" : [ \"result\" ]," +
|
||||||
|
" \"includeChildren\" : \"true\" " +
|
||||||
|
" }," +
|
||||||
|
"\t\"pace\" : {\t\t\n" +
|
||||||
|
"\t\t\"clustering\" : [\n" +
|
||||||
|
"\t\t\t{ \"name\" : \"acronyms\", \"fields\" : [ \"title\" ], \"params\" : { \"max\" : \"1\", \"minLen\" : \"2\", \"maxLen\" : \"4\"} },\n" +
|
||||||
|
"\t\t\t{ \"name\" : \"ngrampairs\", \"fields\" : [ \"title\" ], \"params\" : { \"max\" : \"1\", \"ngramLen\" : \"3\"} },\n" +
|
||||||
|
"\t\t\t{ \"name\" : \"suffixprefix\", \"fields\" : [ \"title\" ], \"params\" : { \"max\" : \"1\", \"len\" : \"3\" } } \n" +
|
||||||
|
"\t\t],\t\t\n" +
|
||||||
|
"\t\t\"strictConditions\" : [\n" +
|
||||||
|
" \t\t\t{ \"name\" : \"exactMatch\", \"fields\" : [ \"pid\" ] }\n" +
|
||||||
|
" \t\t], \n" +
|
||||||
|
" \t\t\"conditions\" : [ \n" +
|
||||||
|
" \t\t\t{ \"name\" : \"yearMatch\", \"fields\" : [ \"dateofacceptance\" ] },\n" +
|
||||||
|
" \t\t\t{ \"name\" : \"titleVersionMatch\", \"fields\" : [ \"title\" ] },\n" +
|
||||||
|
" \t\t\t{ \"name\" : \"sizeMatch\", \"fields\" : [ \"authors\" ] } \n" +
|
||||||
|
" \t\t],\t\t\n" +
|
||||||
|
"\t\t\"model\" : [\n" +
|
||||||
|
"\t\t\t{ \"name\" : \"pid\", \"algo\" : \"Null\", \"type\" : \"String\", \"weight\" : \"0.0\", \"ignoreMissing\" : \"true\", \"path\" : \"pid[qualifier#classid = {doi}]/value\", \"overrideMatch\" : \"true\" }, \t\n" +
|
||||||
|
"\t\t\t{ \"name\" : \"title\", \"algo\" : \"JaroWinkler\", \"type\" : \"String\", \"weight\" : \"1.0\", \"ignoreMissing\" : \"false\", \"path\" : \"result/metadata/title[qualifier#classid = {main title}]/value\" },\n" +
|
||||||
|
"\t\t\t{ \"name\" : \"dateofacceptance\", \"algo\" : \"Null\", \"type\" : \"String\", \"weight\" : \"0.0\", \"ignoreMissing\" : \"true\", \"path\" : \"result/metadata/dateofacceptance/value\" } ,\n" +
|
||||||
|
"\t\t\t{ \"name\" : \"authors\", \"algo\" : \"Null\", \"type\" : \"List\", \"weight\" : \"0.0\", \"ignoreMissing\" : \"true\", \"path\" : \"result/author/metadata/fullname/value\" }\n" +
|
||||||
|
"\t\t],\n" +
|
||||||
|
"\t\t\"blacklists\" : {\n" +
|
||||||
|
"\t\t\t\"title\" : [\n" +
|
||||||
|
"\t\t\t\t\"^(Corpus Oral Dialectal \\\\(COD\\\\)\\\\.).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(Kiri Karl Morgensternile).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(\\\\[Eksliibris Aleksandr).*\\\\]$\",\n" +
|
||||||
|
"\t\t\t\t\"^(\\\\[Eksliibris Aleksandr).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(Eksliibris Aleksandr).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(Kiri A\\\\. de Vignolles).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(2 kirja Karl Morgensternile).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(Pirita kloostri idaosa arheoloogilised).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(Kiri tundmatule).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(Eksliibris Nikolai Birukovile).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(Eksliibris Nikolai Issakovile).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(WHP Cruise Summary Information of section).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(Measurement of the top quark\\\\-pair production cross section with ATLAS in pp collisions at).*$\",\n" +
|
||||||
|
"\t\t\t\t\"^(Measurement of the spin\\\\-dependent structure function).*\"\n" +
|
||||||
|
"\t\t\t] } \t\t\n" +
|
||||||
|
"\t}\n" +
|
||||||
|
"\n" +
|
||||||
|
"}";
|
||||||
|
|
||||||
|
final DedupConfig cfgFromSerialization = DedupConfig.load(cfgFromClasspath.toString());
|
||||||
|
String params = "\"params\":{\"limit\":-1,\"weight\":0.0}";
|
||||||
|
//verify if the serialization produces the same result of the input json
|
||||||
|
// assertEquals(cfgFromSerialization.toString().replaceAll("[\n\t\r ]", "").replaceAll("\"params\":null", params), cfgFromClasspath.toString().replaceAll("[\n\t\r ]", ""));
|
||||||
|
|
||||||
System.out.println(cfg.toString());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue