forked from D-Net/dnet-hadoop
[OpenCitation] added logic to avoid the genration of self citations relations
This commit is contained in:
parent
b071f8e415
commit
759ed519f2
|
@ -115,18 +115,20 @@ public class CreateActionSetSparkJob implements Serializable {
|
||||||
final String cited = ID_PREFIX
|
final String cited = ID_PREFIX
|
||||||
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited()));
|
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited()));
|
||||||
|
|
||||||
relationList
|
if(!citing.equals(cited)){
|
||||||
.addAll(
|
relationList
|
||||||
getRelations(
|
.addAll(
|
||||||
citing,
|
getRelations(
|
||||||
cited));
|
citing,
|
||||||
|
cited));
|
||||||
|
|
||||||
if (duplicate && value.getCiting().endsWith(".refs")) {
|
if (duplicate && value.getCiting().endsWith(".refs")) {
|
||||||
citing = ID_PREFIX + IdentifierFactory
|
citing = ID_PREFIX + IdentifierFactory
|
||||||
.md5(
|
.md5(
|
||||||
CleaningFunctions
|
CleaningFunctions
|
||||||
.normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
|
.normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
|
||||||
relationList.addAll(getRelations(citing, cited));
|
relationList.addAll(getRelations(citing, cited));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return relationList;
|
return relationList;
|
||||||
|
|
|
@ -103,6 +103,13 @@ public class ReadCOCITest {
|
||||||
.getPath()),
|
.getPath()),
|
||||||
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4"));
|
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4"));
|
||||||
|
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new org.apache.hadoop.fs.Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5")
|
||||||
|
.getPath()),
|
||||||
|
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5"));
|
||||||
|
|
||||||
ReadCOCI
|
ReadCOCI
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
|
@ -112,7 +119,7 @@ public class ReadCOCITest {
|
||||||
workingDir.toString() + "/COCI",
|
workingDir.toString() + "/COCI",
|
||||||
"-outputPath",
|
"-outputPath",
|
||||||
workingDir.toString() + "/COCI_json/",
|
workingDir.toString() + "/COCI_json/",
|
||||||
"-inputFile", "input1;input2;input3;input4"
|
"-inputFile", "input1;input2;input3;input4;input5"
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
@ -123,7 +130,7 @@ public class ReadCOCITest {
|
||||||
.textFile(workingDir.toString() + "/COCI_json/*/")
|
.textFile(workingDir.toString() + "/COCI_json/*/")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, COCI.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, COCI.class));
|
||||||
|
|
||||||
Assertions.assertEquals(23, tmp.count());
|
Assertions.assertEquals(24, tmp.count());
|
||||||
|
|
||||||
Assertions.assertEquals(1, tmp.filter(c -> c.getCiting().equals("10.1207/s15327647jcd3,4-01")).count());
|
Assertions.assertEquals(1, tmp.filter(c -> c.getCiting().equals("10.1207/s15327647jcd3,4-01")).count());
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,2 @@
|
||||||
|
oci,citing,cited,creation,timespan,journal_sc,author_sc
|
||||||
|
02001000007362801000805046300010563030608046333-02001000007362801000805046300010563030608046333,10.1007/s10854-015-3684-x,10.1007/s10854-015-3684-x,2015-09-01,P7Y2M,no,no
|
Loading…
Reference in New Issue