Master branch updates from beta September 2023 #337

Manually merged
claudio.atzori merged 1271 commits from beta into master 2023-09-06 11:31:09 +02:00
4 changed files with 45 additions and 48 deletions
Showing only changes of commit 401dd38074 - Show all commits

View File

@ -112,18 +112,19 @@ public class CreateActionSetSparkJob implements Serializable {
final String cited = ID_PREFIX final String cited = ID_PREFIX
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited())); + IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited()));
if(!citing.equals(cited)){ if (!citing.equals(cited)) {
relationList relationList
.addAll( .addAll(
getRelations( getRelations(
citing, citing,
cited)); cited));
if (duplicate && value.getCiting().endsWith(".refs")) { if (duplicate && value.getCiting().endsWith(".refs")) {
citing = ID_PREFIX + IdentifierFactory citing = ID_PREFIX + IdentifierFactory
.md5( .md5(
CleaningFunctions CleaningFunctions
.normalizePidValue("doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs")))); .normalizePidValue(
"doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
relationList.addAll(getRelations(citing, cited)); relationList.addAll(getRelations(citing, cited));
} }
} }

View File

@ -72,7 +72,7 @@ public class ReadCOCI implements Serializable {
String outputPath, String outputPath,
String delimiter) throws IOException { String delimiter) throws IOException {
for(String inputFile : inputFiles){ for (String inputFile : inputFiles) {
String p_string = workingPath + "/" + inputFile + ".gz"; String p_string = workingPath + "/" + inputFile + ".gz";
Dataset<Row> cociData = spark Dataset<Row> cociData = spark

View File

@ -12,7 +12,6 @@ public class COCI implements Serializable {
private String cited; private String cited;
public String getOci() { public String getOci() {
return oci; return oci;
} }
@ -37,5 +36,4 @@ public class COCI implements Serializable {
this.cited = cited; this.cited = cited;
} }
} }

View File

@ -76,53 +76,51 @@ public class ReadCOCITest {
LocalFileSystem fs = FileSystem.getLocal(new Configuration()); LocalFileSystem fs = FileSystem.getLocal(new Configuration());
fs fs
.copyFromLocalFile( .copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass() false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz") .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input1.gz")
.getPath()), .getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz")); new org.apache.hadoop.fs.Path(workingDir + "/COCI/input1.gz"));
fs fs
.copyFromLocalFile( .copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass() false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz") .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input2.gz")
.getPath()), .getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz")); new org.apache.hadoop.fs.Path(workingDir + "/COCI/input2.gz"));
fs fs
.copyFromLocalFile( .copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass() false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz") .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input3.gz")
.getPath()), .getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz")); new org.apache.hadoop.fs.Path(workingDir + "/COCI/input3.gz"));
fs fs
.copyFromLocalFile( .copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass() false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz") .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input4.gz")
.getPath()), .getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz")); new org.apache.hadoop.fs.Path(workingDir + "/COCI/input4.gz"));
fs fs
.copyFromLocalFile( .copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass() false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz") .getResource("/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles/input5.gz")
.getPath()), .getPath()),
new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz")); new org.apache.hadoop.fs.Path(workingDir + "/COCI/input5.gz"));
ReadCOCI ReadCOCI
.main( .main(
new String[] { new String[] {
"-isSparkSessionManaged", "-isSparkSessionManaged",
Boolean.FALSE.toString(), Boolean.FALSE.toString(),
"-workingPath", "-workingPath",
workingDir.toString() + "/COCI", workingDir.toString() + "/COCI",
"-outputPath", "-outputPath",
workingDir.toString() + "/COCI_json/", workingDir.toString() + "/COCI_json/",
"-inputFile", "input1;input2;input3;input4;input5" "-inputFile", "input1;input2;input3;input4;input5"
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());