forked from D-Net/dnet-hadoop
code formatting
This commit is contained in:
parent
3b876d9327
commit
0c1be41b30
|
@ -8,13 +8,12 @@ import java.io.Serializable;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
|
@ -28,6 +27,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import scala.Tuple2;
|
||||
|
||||
|
@ -80,12 +80,12 @@ public class SparkAtomicActionUsageJob implements Serializable {
|
|||
});
|
||||
}
|
||||
|
||||
private static void prepareData(String dbname, SparkSession spark, String workingPath, String tableName, String attribute_name) {
|
||||
private static void prepareData(String dbname, SparkSession spark, String workingPath, String tableName,
|
||||
String attribute_name) {
|
||||
spark
|
||||
.sql(
|
||||
"Select " + attribute_name + " as id, sum(downloads) as downloads, sum(views) as views " +
|
||||
"from " + dbname + "." + tableName +
|
||||
"group by " + attribute_name)
|
||||
.sql(String.format(
|
||||
"select %s as id, sum(downloads) as downloads, sum(views) as views " +
|
||||
"from %s.%s group by %s", attribute_name, dbname, tableName, attribute_name))
|
||||
.as(Encoders.bean(UsageStatsModel.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
|
@ -93,16 +93,16 @@ public class SparkAtomicActionUsageJob implements Serializable {
|
|||
.json(workingPath);
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static void writeActionSet(SparkSession spark, String inputPath, String outputPath) {
|
||||
getFinalIndicatorsResult(spark, inputPath+ "/usageDb").
|
||||
toJavaRDD().
|
||||
map(p -> new AtomicAction(p.getClass(),p))
|
||||
.union(getFinalIndicatorsProject(spark, inputPath + "/projectDb")
|
||||
getFinalIndicatorsResult(spark, inputPath + "/usageDb")
|
||||
.toJavaRDD()
|
||||
.map(p -> new AtomicAction(p.getClass(), p )))
|
||||
.union(getFinalIndicatorsDatasource(spark, inputPath + "/datasourceDb")
|
||||
.map(p -> new AtomicAction(p.getClass(), p))
|
||||
.union(
|
||||
getFinalIndicatorsProject(spark, inputPath + "/projectDb")
|
||||
.toJavaRDD()
|
||||
.map(p -> new AtomicAction(p.getClass(), p)))
|
||||
.union(
|
||||
getFinalIndicatorsDatasource(spark, inputPath + "/datasourceDb")
|
||||
.toJavaRDD()
|
||||
.map(p -> new AtomicAction(p.getClass(), p)))
|
||||
.mapToPair(
|
||||
|
@ -127,10 +127,10 @@ public class SparkAtomicActionUsageJob implements Serializable {
|
|||
|
||||
return readPath(spark, inputPath, UsageStatsModel.class)
|
||||
.map((MapFunction<UsageStatsModel, Project>) usm -> {
|
||||
Project r = new Project();
|
||||
r.setId("40|" + usm.getId());
|
||||
r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews()));
|
||||
return r;
|
||||
Project p = new Project();
|
||||
p.setId("40|" + usm.getId());
|
||||
p.setMeasures(getMeasure(usm.getDownloads(), usm.getViews()));
|
||||
return p;
|
||||
}, Encoders.bean(Project.class));
|
||||
}
|
||||
|
||||
|
@ -138,15 +138,13 @@ public class SparkAtomicActionUsageJob implements Serializable {
|
|||
|
||||
return readPath(spark, inputPath, UsageStatsModel.class)
|
||||
.map((MapFunction<UsageStatsModel, Datasource>) usm -> {
|
||||
Datasource r = new Datasource();
|
||||
r.setId("10|" + usm.getId());
|
||||
r.setMeasures(getMeasure(usm.getDownloads(), usm.getViews()));
|
||||
return r;
|
||||
Datasource d = new Datasource();
|
||||
d.setId("10|" + usm.getId());
|
||||
d.setMeasures(getMeasure(usm.getDownloads(), usm.getViews()));
|
||||
return d;
|
||||
}, Encoders.bean(Datasource.class));
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static List<Measure> getMeasure(Long downloads, Long views) {
|
||||
DataInfo dataInfo = OafMapperUtils
|
||||
.dataInfo(
|
||||
|
|
|
@ -8,7 +8,6 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -25,6 +24,7 @@ import org.slf4j.LoggerFactory;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class SparkAtomicActionCountJobTest {
|
||||
|
@ -79,16 +79,16 @@ public class SparkAtomicActionCountJobTest {
|
|||
JavaRDD<AtomicAction> tmp = sc
|
||||
.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
|
||||
.map(usm -> OBJECT_MAPPER.readValue(usm._2.getBytes(), AtomicAction.class));
|
||||
//.map(aa -> (Result) aa.getPayload());
|
||||
// .map(aa -> (Result) aa.getPayload());
|
||||
|
||||
Assertions.assertEquals(9,tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("50|")).count());
|
||||
Assertions.assertEquals(9,tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|")).count());
|
||||
Assertions.assertEquals(9,tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|")).count());
|
||||
Assertions.assertEquals(9, tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("50|")).count());
|
||||
Assertions.assertEquals(9, tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|")).count());
|
||||
Assertions.assertEquals(9, tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|")).count());
|
||||
|
||||
tmp.foreach(r -> Assertions.assertEquals(2, ((OafEntity)r.getPayload()).getMeasures().size()));
|
||||
tmp.foreach(r -> Assertions.assertEquals(2, ((OafEntity) r.getPayload()).getMeasures().size()));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> ((OafEntity)r.getPayload())
|
||||
r -> ((OafEntity) r.getPayload())
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
|
@ -98,14 +98,14 @@ public class SparkAtomicActionCountJobTest {
|
|||
.forEach(u -> Assertions.assertFalse(u.getDataInfo().getDeletedbyinference()))));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> ((OafEntity)r.getPayload())
|
||||
r -> ((OafEntity) r.getPayload())
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
m -> m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred()))));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> ((OafEntity)r.getPayload())
|
||||
r -> ((OafEntity) r.getPayload())
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
|
@ -116,7 +116,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
|
||||
tmp
|
||||
.foreach(
|
||||
r -> ((OafEntity)r.getPayload())
|
||||
r -> ((OafEntity) r.getPayload())
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
|
@ -130,7 +130,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
u.getDataInfo().getProvenanceaction().getClassid()))));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> ((OafEntity)r.getPayload())
|
||||
r -> ((OafEntity) r.getPayload())
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
|
@ -145,7 +145,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
|
||||
tmp
|
||||
.foreach(
|
||||
r -> ((OafEntity)r.getPayload())
|
||||
r -> ((OafEntity) r.getPayload())
|
||||
.getMeasures()
|
||||
.stream()
|
||||
.forEach(
|
||||
|
@ -160,13 +160,19 @@ public class SparkAtomicActionCountJobTest {
|
|||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp.filter(r -> ((OafEntity)r.getPayload()).getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).count());
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
r -> ((OafEntity) r.getPayload())
|
||||
.getId()
|
||||
.equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"0",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -182,7 +188,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"5",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -199,7 +205,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"0",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -215,7 +221,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"1",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -232,7 +238,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"2",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -248,7 +254,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"6",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -261,12 +267,11 @@ public class SparkAtomicActionCountJobTest {
|
|||
.get(0)
|
||||
.getValue());
|
||||
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"0",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("40|f1__________::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -282,7 +287,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"5",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("40|f1__________::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -299,7 +304,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"0",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("40|f11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -315,7 +320,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"1",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("40|f11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -332,7 +337,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"2",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("40|f12_________::3085e4c6e051378ca6157fe7f0430c1f"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -348,7 +353,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"6",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("40|f12_________::3085e4c6e051378ca6157fe7f0430c1f"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -361,12 +366,11 @@ public class SparkAtomicActionCountJobTest {
|
|||
.get(0)
|
||||
.getValue());
|
||||
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"0",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("10|d1__________::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -382,7 +386,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"5",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("10|d1__________::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -399,7 +403,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"0",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("10|d11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -415,7 +419,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"1",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("10|d11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -432,7 +436,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"2",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("10|d12_________::3085e4c6e051378ca6157fe7f0430c1f"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
@ -448,7 +452,7 @@ public class SparkAtomicActionCountJobTest {
|
|||
.assertEquals(
|
||||
"6",
|
||||
tmp
|
||||
.map(r -> ((OafEntity)r.getPayload()))
|
||||
.map(r -> ((OafEntity) r.getPayload()))
|
||||
.filter(r -> r.getId().equals("10|d12_________::3085e4c6e051378ca6157fe7f0430c1f"))
|
||||
.collect()
|
||||
.get(0)
|
||||
|
|
Loading…
Reference in New Issue