code formatting

This commit is contained in:
Claudio Atzori 2023-02-22 10:15:25 +01:00
parent 3b876d9327
commit 0c1be41b30
2 changed files with 253 additions and 251 deletions

View File

@ -8,13 +8,12 @@ import java.io.Serializable;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
@ -28,6 +27,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.Tuple2; import scala.Tuple2;
@ -80,12 +80,12 @@ public class SparkAtomicActionUsageJob implements Serializable {
}); });
} }
private static void prepareData(String dbname, SparkSession spark, String workingPath, String tableName, String attribute_name) { private static void prepareData(String dbname, SparkSession spark, String workingPath, String tableName,
String attribute_name) {
spark spark
.sql( .sql(String.format(
"Select " + attribute_name + " as id, sum(downloads) as downloads, sum(views) as views " + "select %s as id, sum(downloads) as downloads, sum(views) as views " +
"from " + dbname + "." + tableName + "from %s.%s group by %s", attribute_name, dbname, tableName, attribute_name))
"group by " + attribute_name)
.as(Encoders.bean(UsageStatsModel.class)) .as(Encoders.bean(UsageStatsModel.class))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
@ -93,16 +93,16 @@ public class SparkAtomicActionUsageJob implements Serializable {
.json(workingPath); .json(workingPath);
} }
public static void writeActionSet(SparkSession spark, String inputPath, String outputPath) { public static void writeActionSet(SparkSession spark, String inputPath, String outputPath) {
getFinalIndicatorsResult(spark, inputPath+ "/usageDb"). getFinalIndicatorsResult(spark, inputPath + "/usageDb")
toJavaRDD(). .toJavaRDD()
map(p -> new AtomicAction(p.getClass(),p)) .map(p -> new AtomicAction(p.getClass(), p))
.union(getFinalIndicatorsProject(spark, inputPath + "/projectDb") .union(
getFinalIndicatorsProject(spark, inputPath + "/projectDb")
.toJavaRDD() .toJavaRDD()
.map(p -> new AtomicAction(p.getClass(), p))) .map(p -> new AtomicAction(p.getClass(), p)))
.union(getFinalIndicatorsDatasource(spark, inputPath + "/datasourceDb") .union(
getFinalIndicatorsDatasource(spark, inputPath + "/datasourceDb")
.toJavaRDD() .toJavaRDD()
.map(p -> new AtomicAction(p.getClass(), p))) .map(p -> new AtomicAction(p.getClass(), p)))
.mapToPair( .mapToPair(
@ -127,10 +127,10 @@ public class SparkAtomicActionUsageJob implements Serializable {
return readPath(spark, inputPath, UsageStatsModel.class) return readPath(spark, inputPath, UsageStatsModel.class)
.map((MapFunction<UsageStatsModel, Project>) usm -> { .map((MapFunction<UsageStatsModel, Project>) usm -> {
Project r = new Project(); Project p = new Project();
r.setId("40|" + usm.getId()); p.setId("40|" + usm.getId());
r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews())); p.setMeasures(getMeasure(usm.getDownloads(), usm.getViews()));
return r; return p;
}, Encoders.bean(Project.class)); }, Encoders.bean(Project.class));
} }
@ -138,15 +138,13 @@ public class SparkAtomicActionUsageJob implements Serializable {
return readPath(spark, inputPath, UsageStatsModel.class) return readPath(spark, inputPath, UsageStatsModel.class)
.map((MapFunction<UsageStatsModel, Datasource>) usm -> { .map((MapFunction<UsageStatsModel, Datasource>) usm -> {
Datasource r = new Datasource(); Datasource d = new Datasource();
r.setId("10|" + usm.getId()); d.setId("10|" + usm.getId());
r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews())); d.setMeasures(getMeasure(usm.getDownloads(), usm.getViews()));
return r; return d;
}, Encoders.bean(Datasource.class)); }, Encoders.bean(Datasource.class));
} }
private static List<Measure> getMeasure(Long downloads, Long views) { private static List<Measure> getMeasure(Long downloads, Long views) {
DataInfo dataInfo = OafMapperUtils DataInfo dataInfo = OafMapperUtils
.dataInfo( .dataInfo(

View File

@ -8,7 +8,6 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -25,6 +24,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class SparkAtomicActionCountJobTest { public class SparkAtomicActionCountJobTest {
@ -160,7 +160,13 @@ public class SparkAtomicActionCountJobTest {
Assertions Assertions
.assertEquals( .assertEquals(
1, tmp.filter(r -> ((OafEntity)r.getPayload()).getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).count()); 1,
tmp
.filter(
r -> ((OafEntity) r.getPayload())
.getId()
.equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
.count());
Assertions Assertions
.assertEquals( .assertEquals(
@ -261,7 +267,6 @@ public class SparkAtomicActionCountJobTest {
.get(0) .get(0)
.getValue()); .getValue());
Assertions Assertions
.assertEquals( .assertEquals(
"0", "0",
@ -361,7 +366,6 @@ public class SparkAtomicActionCountJobTest {
.get(0) .get(0)
.getValue()); .getValue());
Assertions Assertions
.assertEquals( .assertEquals(
"0", "0",