Master branch updates from beta September 2023 #337

Manually merged
claudio.atzori merged 1271 commits from beta into master 2023-09-06 11:31:09 +02:00
4 changed files with 45 additions and 48 deletions
Showing only changes of commit 401dd38074 - Show all commits

View File

@ -112,18 +112,19 @@ public class CreateActionSetSparkJob implements Serializable {
final String cited = ID_PREFIX
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited()));
if(!citing.equals(cited)){
if (!citing.equals(cited)) {
relationList
.addAll(
getRelations(
citing,
cited));
.addAll(
getRelations(
citing,
cited));
if (duplicate && value.getCiting().endsWith(".refs")) {
citing = ID_PREFIX + IdentifierFactory
.md5(
CleaningFunctions
.normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
.md5(
CleaningFunctions
.normalizePidValue(
"doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
relationList.addAll(getRelations(citing, cited));
}
}

View File

@ -72,7 +72,7 @@ public class ReadCOCI implements Serializable {
String outputPath,
String delimiter) throws IOException {
for(String inputFile : inputFiles){
for (String inputFile : inputFiles) {
String p_string = workingPath + "/" + inputFile + ".gz";
Dataset<Row> cociData = spark

View File

@ -12,7 +12,6 @@ public class COCI implements Serializable {
private String cited;
public String getOci() {
return oci;
}
@ -37,5 +36,4 @@ public class COCI implements Serializable {
this.cited = cited;
}
}

View File

@ -76,53 +76,51 @@ public class ReadCOCITest {
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
fs
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz"));
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz"));
fs
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz"));
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz"));
fs
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz"));
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz"));
fs
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz"));
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz"));
fs
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz"));
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz"));
ReadCOCI
.main(
new String[] {
"-isSparkSessionManaged",
Boolean.FALSE.toString(),
"-workingPath",
workingDir.toString() + "/COCI",
"-outputPath",
workingDir.toString() + "/COCI_json/",
"-inputFile", "input1;input2;input3;input4;input5"
});
.main(
new String[] {
"-isSparkSessionManaged",
Boolean.FALSE.toString(),
"-workingPath",
workingDir.toString() + "/COCI",
"-outputPath",
workingDir.toString() + "/COCI_json/",
"-inputFile", "input1;input2;input3;input4;input5"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());