forked from D-Net/dnet-hadoop
[BipFinder] Fixed issue for wrong escaped char in doi
This commit is contained in:
parent
89f7007080
commit
188f25eefa
|
@ -11,6 +11,7 @@ import java.util.List;
|
|||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonParser;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -40,7 +41,6 @@ import eu.dnetlib.dhp.utils.DHPUtils;
|
|||
public class PrepareBipFinder implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareBipFinder.class);
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
|
@ -82,9 +82,11 @@ public class PrepareBipFinder implements Serializable {
|
|||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
ObjectMapper mapper = new ObjectMapper()
|
||||
.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true);
|
||||
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
|
||||
.textFile(inputPath)
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, BipDeserialize.class));
|
||||
.map(item -> mapper.readValue(item, BipDeserialize.class));
|
||||
|
||||
spark
|
||||
.createDataset(bipDeserializeJavaRDD.flatMap(entry -> entry.keySet().stream().map(key -> {
|
||||
|
|
|
@ -88,7 +88,7 @@ public class PrepareTest {
|
|||
.textFile(workingDir.toString() + "/work/bip")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Result.class));
|
||||
|
||||
Assertions.assertEquals(86, tmp.count());
|
||||
Assertions.assertEquals(87, tmp.count());
|
||||
|
||||
String doi1 = "unresolved::10.0000/096020199389707::doi";
|
||||
|
||||
|
@ -151,6 +151,9 @@ public class PrepareTest {
|
|||
Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).count());
|
||||
Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi2)).collect().get(0).getInstance().size());
|
||||
|
||||
tmp.filter(r -> r.getId().startsWith("unresolved::10.2111/1551-5028(2004)057"))
|
||||
.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -83,4 +83,5 @@
|
|||
{"10.0000/hoplos.v4i7.41295": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]}
|
||||
{"10.0000/hoplos.v4i7.42830": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]}
|
||||
{"10.0000/hoplos.v4i7.42861": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]}
|
||||
{"10.0000/hoplos.v4i7.43096": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]}
|
||||
{"10.0000/hoplos.v4i7.43096": [{"id": "influence", "unit": [{"value": "5.91019644836e-09", "key": "score"}]}, {"id": "popularity_alt", "unit": [{"value": "0.0", "key": "score"}]}, {"id": "popularity", "unit": [{"value": "8.48190886761e-09", "key": "score"}]}]}
|
||||
{"10.2111/1551-5028(2004)057\[0539:sdsocg\]2.0.co;2": [{"id":"influence", "unit":[{"key":"score","value":"6.3290875E-9"},{"key":"class","value":"C"}]}, {"id":"popularity", "unit":[{"key":"score","value":"6.576763E-9"},{"key":"class","value":"C"}]}, {"id":"influence_alt", "unit":[{"key":"score","value":"11"},{"key":"class","value":"C"}]}, {"id":"popularity_alt", "unit":[{"key":"score","value":"1.0142108"},{"key":"class","value":"C"}]}, {"id":"impulse", "unit":[{"key":"score","value":"1"},{"key":"class","value":"C"}]}]}
|
Loading…
Reference in New Issue