refactoring
This commit is contained in:
parent
55ea485783
commit
4c9bc4c3a5
|
@ -82,14 +82,16 @@ public class SparkAtomicActionUsageJob implements Serializable {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void prepareResultData(String dbname, SparkSession spark, String workingPath, String tableName, String resultAttributeName, String datasourceAttributeName) {
|
private static void prepareResultData(String dbname, SparkSession spark, String workingPath, String tableName,
|
||||||
|
String resultAttributeName, String datasourceAttributeName) {
|
||||||
spark
|
spark
|
||||||
.sql(
|
.sql(
|
||||||
String
|
String
|
||||||
.format(
|
.format(
|
||||||
"select %s as id, %s as datasourceId, sum(downloads) as downloads, sum(views) as views " +
|
"select %s as id, %s as datasourceId, sum(downloads) as downloads, sum(views) as views " +
|
||||||
"from %s.%s group by %s, %s",
|
"from %s.%s group by %s, %s",
|
||||||
resultAttributeName, datasourceAttributeName, dbname, tableName, resultAttributeName, datasourceAttributeName))
|
resultAttributeName, datasourceAttributeName, dbname, tableName, resultAttributeName,
|
||||||
|
datasourceAttributeName))
|
||||||
.as(Encoders.bean(UsageStatsResultModel.class))
|
.as(Encoders.bean(UsageStatsResultModel.class))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
|
@ -131,20 +133,22 @@ public class SparkAtomicActionUsageJob implements Serializable {
|
||||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Measure newMeasureInstance(String id) {
|
public static Measure newMeasureInstance(String id) {
|
||||||
Measure m = new Measure();
|
Measure m = new Measure();
|
||||||
m.setId(id);
|
m.setId(id);
|
||||||
m.setUnit(new ArrayList<>());
|
m.setUnit(new ArrayList<>());
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Dataset<Result> getFinalIndicatorsResult(SparkSession spark, String inputPath) {
|
private static Dataset<Result> getFinalIndicatorsResult(SparkSession spark, String inputPath) {
|
||||||
|
|
||||||
return readPath(spark, inputPath, UsageStatsResultModel.class)
|
return readPath(spark, inputPath, UsageStatsResultModel.class)
|
||||||
.groupByKey((MapFunction<UsageStatsResultModel, String>) usm -> usm.getId(), Encoders.STRING())
|
.groupByKey((MapFunction<UsageStatsResultModel, String>) usm -> usm.getId(), Encoders.STRING())
|
||||||
.mapGroups((MapGroupsFunction<String, UsageStatsResultModel, Result>) (k,it) -> {
|
.mapGroups((MapGroupsFunction<String, UsageStatsResultModel, Result>) (k, it) -> {
|
||||||
Result r = new Result();
|
Result r = new Result();
|
||||||
r.setId("50|" + k);
|
r.setId("50|" + k);
|
||||||
//id = download or view and unit = list of key value pairs
|
// id = download or view and unit = list of key value pairs
|
||||||
Measure download = newMeasureInstance("downloads");
|
Measure download = newMeasureInstance("downloads");
|
||||||
Measure view = newMeasureInstance("views");
|
Measure view = newMeasureInstance("views");
|
||||||
UsageStatsResultModel first = it.next();
|
UsageStatsResultModel first = it.next();
|
||||||
|
@ -178,8 +182,14 @@ public class SparkAtomicActionUsageJob implements Serializable {
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||||
"");
|
"");
|
||||||
download.getUnit().add(OafMapperUtils.newKeyValueInstance(usm.getDatasourceId(), String.valueOf(usm.getDownloads()), dataInfo));
|
download
|
||||||
view.getUnit().add(OafMapperUtils.newKeyValueInstance(usm.getDatasourceId(), String.valueOf(usm.getViews()), dataInfo));
|
.getUnit()
|
||||||
|
.add(
|
||||||
|
OafMapperUtils
|
||||||
|
.newKeyValueInstance(usm.getDatasourceId(), String.valueOf(usm.getDownloads()), dataInfo));
|
||||||
|
view
|
||||||
|
.getUnit()
|
||||||
|
.add(OafMapperUtils.newKeyValueInstance(usm.getDatasourceId(), String.valueOf(usm.getViews()), dataInfo));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Dataset<Project> getFinalIndicatorsProject(SparkSession spark, String inputPath) {
|
private static Dataset<Project> getFinalIndicatorsProject(SparkSession spark, String inputPath) {
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.usagestats;
|
package eu.dnetlib.dhp.actionmanager.usagestats;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 30/06/23
|
* @Date 30/06/23
|
||||||
*/
|
*/
|
||||||
public class UsageStatsResultModel extends UsageStatsModel{
|
public class UsageStatsResultModel extends UsageStatsModel {
|
||||||
private String datasourceId ;
|
private String datasourceId;
|
||||||
|
|
||||||
public String getDatasourceId() {
|
public String getDatasourceId() {
|
||||||
return datasourceId;
|
return datasourceId;
|
||||||
|
|
|
@ -8,7 +8,6 @@ import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Measure;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -25,6 +24,7 @@ import org.slf4j.LoggerFactory;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Measure;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
|
||||||
|
@ -144,7 +144,8 @@ public class SparkAtomicActionCountJobTest {
|
||||||
"Inferred by OpenAIRE",
|
"Inferred by OpenAIRE",
|
||||||
u.getDataInfo().getProvenanceaction().getClassname()))));
|
u.getDataInfo().getProvenanceaction().getClassname()))));
|
||||||
|
|
||||||
tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|"))
|
tmp
|
||||||
|
.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|"))
|
||||||
.foreach(
|
.foreach(
|
||||||
r -> ((OafEntity) r.getPayload())
|
r -> ((OafEntity) r.getPayload())
|
||||||
.getMeasures()
|
.getMeasures()
|
||||||
|
@ -169,37 +170,60 @@ public class SparkAtomicActionCountJobTest {
|
||||||
.equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
|
.equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
OafEntity entity = (OafEntity) tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")).first()
|
OafEntity entity = (OafEntity) tmp
|
||||||
|
.filter(
|
||||||
|
aa -> ((OafEntity) aa.getPayload()).getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
|
||||||
|
.first()
|
||||||
.getPayload();
|
.getPayload();
|
||||||
|
|
||||||
entity
|
entity
|
||||||
.getMeasures()
|
.getMeasures()
|
||||||
.stream()
|
.stream()
|
||||||
.forEach(
|
.forEach(
|
||||||
m -> Assertions.assertEquals(3, m.getUnit().size() ));
|
m -> Assertions.assertEquals(3, m.getUnit().size()));
|
||||||
|
|
||||||
Measure downloads = entity.getMeasures()
|
Measure downloads = entity
|
||||||
|
.getMeasures()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(m -> m.getId().equals("downloads"))
|
.filter(m -> m.getId().equals("downloads"))
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.get();
|
.get();
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
String.valueOf(0),
|
||||||
|
downloads.getUnit().stream().filter(u -> u.getKey().equals("10|fake1")).findFirst().get().getValue());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
String.valueOf(0),
|
||||||
|
downloads.getUnit().stream().filter(u -> u.getKey().equals("10|fake2")).findFirst().get().getValue());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
String.valueOf(1),
|
||||||
|
downloads.getUnit().stream().filter(u -> u.getKey().equals("10|fake3")).findFirst().get().getValue());
|
||||||
|
|
||||||
Assertions.assertEquals(String.valueOf(0), downloads.getUnit().stream().filter(u -> u.getKey().equals("10|fake1")).findFirst().get().getValue());
|
Measure views = entity
|
||||||
Assertions.assertEquals(String.valueOf(0), downloads.getUnit().stream().filter(u -> u.getKey().equals("10|fake2")).findFirst().get().getValue());
|
.getMeasures()
|
||||||
Assertions.assertEquals(String.valueOf(1), downloads.getUnit().stream().filter(u -> u.getKey().equals("10|fake3")).findFirst().get().getValue());
|
|
||||||
|
|
||||||
Measure views = entity.getMeasures()
|
|
||||||
.stream()
|
.stream()
|
||||||
.filter(m -> m.getId().equals("views"))
|
.filter(m -> m.getId().equals("views"))
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.get();
|
.get();
|
||||||
|
|
||||||
Assertions.assertEquals(String.valueOf(5), views.getUnit().stream().filter(u -> u.getKey().equals("10|fake1")).findFirst().get().getValue());
|
Assertions
|
||||||
Assertions.assertEquals(String.valueOf(1), views.getUnit().stream().filter(u -> u.getKey().equals("10|fake2")).findFirst().get().getValue());
|
.assertEquals(
|
||||||
Assertions.assertEquals(String.valueOf(3), views.getUnit().stream().filter(u -> u.getKey().equals("10|fake3")).findFirst().get().getValue());
|
String.valueOf(5),
|
||||||
|
views.getUnit().stream().filter(u -> u.getKey().equals("10|fake1")).findFirst().get().getValue());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
String.valueOf(1),
|
||||||
|
views.getUnit().stream().filter(u -> u.getKey().equals("10|fake2")).findFirst().get().getValue());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
String.valueOf(3),
|
||||||
|
views.getUnit().stream().filter(u -> u.getKey().equals("10|fake3")).findFirst().get().getValue());
|
||||||
|
|
||||||
tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|"))
|
tmp
|
||||||
|
.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|"))
|
||||||
.foreach(
|
.foreach(
|
||||||
r -> ((OafEntity) r.getPayload())
|
r -> ((OafEntity) r.getPayload())
|
||||||
.getMeasures()
|
.getMeasures()
|
||||||
|
@ -214,7 +238,6 @@ public class SparkAtomicActionCountJobTest {
|
||||||
"count",
|
"count",
|
||||||
u.getKey()))));
|
u.getKey()))));
|
||||||
|
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
"0",
|
"0",
|
||||||
|
@ -413,6 +436,7 @@ public class SparkAtomicActionCountJobTest {
|
||||||
.get(0)
|
.get(0)
|
||||||
.getValue());
|
.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testMatch() {
|
void testMatch() {
|
||||||
String usageScoresPath = getClass()
|
String usageScoresPath = getClass()
|
||||||
|
@ -490,7 +514,8 @@ public class SparkAtomicActionCountJobTest {
|
||||||
"Inferred by OpenAIRE",
|
"Inferred by OpenAIRE",
|
||||||
u.getDataInfo().getProvenanceaction().getClassname()))));
|
u.getDataInfo().getProvenanceaction().getClassname()))));
|
||||||
|
|
||||||
tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|"))
|
tmp
|
||||||
|
.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|"))
|
||||||
.foreach(
|
.foreach(
|
||||||
r -> ((OafEntity) r.getPayload())
|
r -> ((OafEntity) r.getPayload())
|
||||||
.getMeasures()
|
.getMeasures()
|
||||||
|
@ -505,7 +530,8 @@ public class SparkAtomicActionCountJobTest {
|
||||||
"count",
|
"count",
|
||||||
u.getKey()))));
|
u.getKey()))));
|
||||||
|
|
||||||
tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("50|"))
|
tmp
|
||||||
|
.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("50|"))
|
||||||
.foreach(
|
.foreach(
|
||||||
r -> ((OafEntity) r.getPayload())
|
r -> ((OafEntity) r.getPayload())
|
||||||
.getMeasures()
|
.getMeasures()
|
||||||
|
@ -520,7 +546,8 @@ public class SparkAtomicActionCountJobTest {
|
||||||
"10|fake1",
|
"10|fake1",
|
||||||
u.getKey()))));
|
u.getKey()))));
|
||||||
|
|
||||||
tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|"))
|
tmp
|
||||||
|
.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|"))
|
||||||
.foreach(
|
.foreach(
|
||||||
r -> ((OafEntity) r.getPayload())
|
r -> ((OafEntity) r.getPayload())
|
||||||
.getMeasures()
|
.getMeasures()
|
||||||
|
|
Loading…
Reference in New Issue