2019-12-05 14:14:25 +01:00
|
|
|
package eu.dnetlib.pace.util;
|
|
|
|
|
|
|
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
2019-12-17 09:16:26 +01:00
|
|
|
import com.jayway.jsonpath.Configuration;
|
2019-12-05 14:14:25 +01:00
|
|
|
import com.jayway.jsonpath.JsonPath;
|
2019-12-17 09:16:26 +01:00
|
|
|
import com.jayway.jsonpath.Option;
|
2019-12-05 14:14:25 +01:00
|
|
|
import eu.dnetlib.pace.config.DedupConfig;
|
|
|
|
import eu.dnetlib.pace.config.Type;
|
2023-04-17 11:06:27 +02:00
|
|
|
import eu.dnetlib.pace.model.*;
|
2019-12-05 14:14:25 +01:00
|
|
|
import net.minidev.json.JSONArray;
|
|
|
|
|
2023-04-17 11:06:27 +02:00
|
|
|
import java.math.BigDecimal;
|
2019-12-05 14:14:25 +01:00
|
|
|
import java.util.*;
|
|
|
|
import java.util.function.Predicate;
|
2022-03-09 12:53:09 +01:00
|
|
|
import java.util.stream.Collectors;
|
2019-12-05 14:14:25 +01:00
|
|
|
|
|
|
|
public class MapDocumentUtil {
|
|
|
|
|
|
|
|
public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
|
|
|
|
public static Predicate<String> urlFilter = s -> s.trim().matches(URL_REGEX);
|
|
|
|
|
|
|
|
public static MapDocument asMapDocumentWithJPath(DedupConfig conf, final String json) {
|
|
|
|
MapDocument m = new MapDocument();
|
|
|
|
m.setIdentifier(getJPathString(conf.getWf().getIdPath(), json));
|
|
|
|
Map<String, Field> stringField = new HashMap<>();
|
|
|
|
conf.getPace().getModel().forEach(fdef -> {
|
|
|
|
switch (fdef.getType()) {
|
|
|
|
case String:
|
|
|
|
case Int:
|
2020-04-24 14:36:42 +02:00
|
|
|
stringField.put(fdef.getName(), new FieldValueImpl(fdef.getType(), fdef.getName(), truncateValue(getJPathString(fdef.getPath(), json), fdef.getLength())));
|
2019-12-05 14:14:25 +01:00
|
|
|
break;
|
|
|
|
case URL:
|
|
|
|
String uv = getJPathString(fdef.getPath(), json);
|
|
|
|
if (!urlFilter.test(uv)) uv = "";
|
|
|
|
stringField.put(fdef.getName(), new FieldValueImpl(fdef.getType(), fdef.getName(), uv));
|
|
|
|
break;
|
|
|
|
case List:
|
|
|
|
case JSON:
|
|
|
|
FieldListImpl fi = new FieldListImpl(fdef.getName(), fdef.getType());
|
2020-04-24 14:36:42 +02:00
|
|
|
truncateList(getJPathList(fdef.getPath(), json, fdef.getType()), fdef.getSize())
|
2019-12-05 14:14:25 +01:00
|
|
|
.stream()
|
2019-12-18 09:29:44 +01:00
|
|
|
.map(item -> new FieldValueImpl(Type.String, fdef.getName(), item))
|
2019-12-05 14:14:25 +01:00
|
|
|
.forEach(fi::add);
|
|
|
|
stringField.put(fdef.getName(), fi);
|
|
|
|
break;
|
2023-04-17 11:06:27 +02:00
|
|
|
case DoubleArray:
|
|
|
|
stringField.put(
|
|
|
|
fdef.getName(),
|
|
|
|
new FieldValueImpl(Type.DoubleArray,
|
|
|
|
fdef.getName(),
|
|
|
|
getJPathArray(fdef.getPath(), json))
|
|
|
|
);
|
|
|
|
break;
|
2022-03-09 12:53:09 +01:00
|
|
|
case StringConcat:
|
|
|
|
String[] jpaths = fdef.getPath().split("\\|\\|\\|");
|
|
|
|
stringField.put(
|
|
|
|
fdef.getName(),
|
|
|
|
new FieldValueImpl(Type.String,
|
|
|
|
fdef.getName(),
|
|
|
|
truncateValue(Arrays.stream(jpaths).map(jpath -> getJPathString(jpath, json)).collect(Collectors.joining(" ")),
|
|
|
|
fdef.getLength())
|
|
|
|
)
|
|
|
|
);
|
|
|
|
break;
|
2019-12-05 14:14:25 +01:00
|
|
|
}
|
|
|
|
});
|
|
|
|
m.setFieldMap(stringField);
|
|
|
|
return m;
|
|
|
|
}
|
|
|
|
|
2019-12-06 15:28:30 +01:00
|
|
|
public static List<String> getJPathList(String path, String json, Type type) {
|
2019-12-05 14:14:25 +01:00
|
|
|
if (type == Type.List)
|
2019-12-17 09:16:26 +01:00
|
|
|
return JsonPath.using(Configuration.defaultConfiguration().addOptions(Option.ALWAYS_RETURN_LIST, Option.SUPPRESS_EXCEPTIONS)).parse(json).read(path);
|
2019-12-05 14:14:25 +01:00
|
|
|
Object jresult;
|
|
|
|
List<String> result = new ArrayList<>();
|
|
|
|
try {
|
|
|
|
jresult = JsonPath.read(json, path);
|
|
|
|
} catch (Throwable e) {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
if (jresult instanceof JSONArray) {
|
|
|
|
|
|
|
|
((JSONArray) jresult).forEach(it -> {
|
|
|
|
|
|
|
|
try {
|
|
|
|
result.add(new ObjectMapper().writeValueAsString(it));
|
|
|
|
} catch (JsonProcessingException e) {
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (jresult instanceof LinkedHashMap) {
|
|
|
|
try {
|
|
|
|
result.add(new ObjectMapper().writeValueAsString(jresult));
|
|
|
|
} catch (JsonProcessingException e) {
|
|
|
|
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
if (jresult instanceof String) {
|
|
|
|
result.add((String) jresult);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-12-06 15:28:30 +01:00
|
|
|
public static String getJPathString(final String jsonPath, final String json) {
|
2019-12-05 14:14:25 +01:00
|
|
|
try {
|
2019-12-13 11:30:02 +01:00
|
|
|
Object o = JsonPath.read(json, jsonPath);
|
|
|
|
if (o instanceof String)
|
|
|
|
return (String)o;
|
|
|
|
if (o instanceof JSONArray && ((JSONArray)o).size()>0)
|
|
|
|
return (String)((JSONArray)o).get(0);
|
|
|
|
return "";
|
|
|
|
} catch (Exception e) {
|
|
|
|
return "";
|
2019-12-05 14:14:25 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-17 11:06:27 +02:00
|
|
|
public static double[] getJPathArray(final String jsonPath, final String json) {
|
|
|
|
try {
|
|
|
|
Object o = JsonPath.read(json, jsonPath);
|
|
|
|
if (o instanceof double[])
|
|
|
|
return (double[]) o;
|
|
|
|
if (o instanceof JSONArray) {
|
|
|
|
Object[] objects = ((JSONArray) o).toArray();
|
|
|
|
double[] array = new double[objects.length];
|
|
|
|
for (int i = 0; i < objects.length; i++) {
|
|
|
|
if (objects[i] instanceof BigDecimal)
|
|
|
|
array[i] = ((BigDecimal)objects[i]).doubleValue();
|
|
|
|
else
|
|
|
|
array[i] = (double) objects[i];
|
|
|
|
}
|
|
|
|
return array;
|
|
|
|
}
|
|
|
|
return new double[0];
|
|
|
|
}
|
|
|
|
catch (Exception e) {
|
|
|
|
e.printStackTrace();
|
|
|
|
return new double[0];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-05 14:14:25 +01:00
|
|
|
|
2020-04-24 14:36:42 +02:00
|
|
|
public static String truncateValue(String value, int length) {
|
2020-08-06 10:27:05 +02:00
|
|
|
if (value == null)
|
|
|
|
return "";
|
2020-04-24 14:36:42 +02:00
|
|
|
|
|
|
|
if (length == -1 || length > value.length())
|
|
|
|
return value;
|
|
|
|
|
|
|
|
return value.substring(0, length);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static List<String> truncateList(List<String> list, int size) {
|
|
|
|
if (size == -1 || size > list.size())
|
|
|
|
return list;
|
|
|
|
|
|
|
|
return list.subList(0, size);
|
|
|
|
}
|
|
|
|
|
2019-12-05 14:14:25 +01:00
|
|
|
}
|