[Indicators] refactoring according to ticket #9741

This commit is contained in:
Miriam Baglioni 2024-05-02 11:47:45 +02:00
parent a6b0f1d371
commit e2ea886ee8
3 changed files with 89 additions and 40 deletions

View File

@ -29,11 +29,12 @@ public class Constants {
public static final String USAGE_COUNT_DOWNLOADS = "downloads"; public static final String USAGE_COUNT_DOWNLOADS = "downloads";
public static final String USAGE_COUNT_VIEWS = "views"; public static final String USAGE_COUNT_VIEWS = "views";
public static final String IMPACT_POPULARITY = "popularity"; public static final String BIP_POPULARITY = "popularity";
public static final String IMPACT_POPULARITY_ALT = "popularity_alt"; public static final String BIP_POPULARITY_ALT = "popularity_alt";
public static final String IMPACT_INFLUENCE = "influence"; public static final String BIP_INFLUENCE = "influence";
public static final String IMPACT_INFLUENCE_ALT = "influence_alt"; public static final String BIP_INFLUENCE_ALT = "influence_alt";
public static final String IMPACT_IMPULSE = "impulse"; public static final String BIP_IMPULSE = "impulse";
public static final String BIP_CITATION_COUNT = "citation";
static { static {
ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2); ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);

View File

@ -24,6 +24,7 @@ import com.google.gson.Gson;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.Constants; import eu.dnetlib.dhp.oa.graph.dump.complete.Constants;
import eu.dnetlib.dhp.oa.model.BipIndicators;
import eu.dnetlib.dhp.oa.model.Indicator; import eu.dnetlib.dhp.oa.model.Indicator;
import eu.dnetlib.dhp.oa.model.Score; import eu.dnetlib.dhp.oa.model.Score;
import eu.dnetlib.dhp.oa.model.UsageCounts; import eu.dnetlib.dhp.oa.model.UsageCounts;
@ -89,7 +90,8 @@ public class Utils {
} }
public static Dataset<String> getEntitiesId(SparkSession spark, String inputPath) { public static Dataset<String> getEntitiesId(SparkSession spark, String inputPath) {
Dataset<String> dumpedIds = Utils Dataset<String> dumpedIds;
dumpedIds = Utils
.readPath(spark, inputPath + "/publication", GraphResult.class) .readPath(spark, inputPath + "/publication", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING()) .map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING())
.union( .union(
@ -149,6 +151,19 @@ public class Utils {
Encoders.bean(Relation.class)); Encoders.bean(Relation.class));
} }
/**
* "measures":[{"id":"influence",
* "unit":[{"key":"score","value":"3.1177596E-9"},{"key":"class","value":"C5"}]},
* {"id":"popularity",
* "unit":[{"key":"score","value":"1.6041533E-9"},{
* "key":"class","value":"C5"}]},
* {"id":"influence_alt",
*{"key":"score","value":"0"},{"key":"class","value":"C5"}]},
* {"id":"popularity_alt","unit":[{"key":"score","value":"0.0"},{,"key":"class","value":"C5"}]},{"id":"impulse",
* {key":"score","value":"0"},,"key":"class","value":"C5"}]}]
* @param measures
* @return
*/
public static Indicator getIndicator(List<Measure> measures) { public static Indicator getIndicator(List<Measure> measures) {
Indicator i = new Indicator(); Indicator i = new Indicator();
for (eu.dnetlib.dhp.schema.oaf.Measure m : measures) { for (eu.dnetlib.dhp.schema.oaf.Measure m : measures) {
@ -159,9 +174,48 @@ public class Utils {
case USAGE_COUNT_VIEWS: case USAGE_COUNT_VIEWS:
getUsageCounts(i).setViews(m.getUnit().get(0).getValue()); getUsageCounts(i).setViews(m.getUnit().get(0).getValue());
break; break;
default: case BIP_INFLUENCE:
getImpactMeasure(i).add(getScore(m.getId(), m.getUnit())); m.getUnit().forEach(u -> {
if (u.getKey().equals("class"))
getImpactMeasure(i).setInfluenceClass(u.getValue());
if (u.getKey().equals("score"))
getImpactMeasure(i).setInfluence(Double.parseDouble(u.getValue()));
});
break; break;
case BIP_POPULARITY:
m.getUnit().forEach(u -> {
if (u.getKey().equals("class"))
getImpactMeasure(i).setPopularityClass(u.getValue());
if (u.getKey().equals("score"))
getImpactMeasure(i).setPopularity(Double.parseDouble(u.getValue()));
});
break;
case BIP_INFLUENCE_ALT:
m.getUnit().forEach(u -> {
if (u.getKey().equals("class"))
getImpactMeasure(i).setInfluenceAltClass(u.getValue());
if (u.getKey().equals("score"))
getImpactMeasure(i).setInfluenceAlt(Double.parseDouble(u.getValue()));
});
break;
case BIP_POPULARITY_ALT:
m.getUnit().forEach(u -> {
if (u.getKey().equals("class"))
getImpactMeasure(i).setPopularityAltClass(u.getValue());
if (u.getKey().equals("score"))
getImpactMeasure(i).setPopularityAlt(Double.parseDouble(u.getValue()));
});
break;
case BIP_IMPULSE:
m.getUnit().forEach(u -> {
if (u.getKey().equals("class"))
getImpactMeasure(i).setImpulseClass(u.getValue());
if (u.getKey().equals("score"))
getImpactMeasure(i).setImpulse(Double.parseDouble(u.getValue()));
});
break;
default:
throw new RuntimeException("No mapping found for indicator " + m.getId());
} }
} }
@ -178,11 +232,11 @@ public class Utils {
} }
@NotNull @NotNull
private static List<Score> getImpactMeasure(Indicator i) { private static BipIndicators getImpactMeasure(Indicator i) {
if (i.getBipIndicators() == null) { if (i.getCitationImpact() == null) {
i.setBipIndicators(new ArrayList<>()); i.setCitationImpact(new BipIndicators());
} }
return i.getBipIndicators(); return i.getCitationImpact();
} }
private static Score getScore(String indicator, List<KeyValue> unit) { private static Score getScore(String indicator, List<KeyValue> unit) {

View File

@ -140,7 +140,7 @@ public class DumpJobTest {
GraphResult gr = verificationDataset.first(); GraphResult gr = verificationDataset.first();
Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getUsageCounts()).isPresent()); Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getUsageCounts()).isPresent());
Assertions.assertFalse(Optional.ofNullable(gr.getIndicators().getBipIndicators()).isPresent()); Assertions.assertFalse(Optional.ofNullable(gr.getIndicators().getCitationImpact()).isPresent());
} }
@ -363,10 +363,10 @@ public class DumpJobTest {
Assertions.assertEquals("2020-03-23T00:20:51.392Z", gr.getDateOfCollection()); Assertions.assertEquals("2020-03-23T00:20:51.392Z", gr.getDateOfCollection());
Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getUsageCounts()).isPresent()); Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getUsageCounts()).isPresent());
Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getBipIndicators()).isPresent()); Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getCitationImpact()).isPresent());
Assertions // Assertions
.assertEquals(5, gr.getIndicators().getBipIndicators().size()); // .assertEquals(5, gr.getIndicators().getBipIndicators().size());
Assertions Assertions
.assertTrue(gr.getIndicators().getUsageCounts() != null); .assertTrue(gr.getIndicators().getUsageCounts() != null);
@ -375,31 +375,25 @@ public class DumpJobTest {
Assertions Assertions
.assertTrue(Integer.valueOf(gr.getIndicators().getUsageCounts().getViews()) >= 0); .assertTrue(Integer.valueOf(gr.getIndicators().getUsageCounts().getViews()) >= 0);
List<Score> bip = gr.getIndicators().getBipIndicators(); BipIndicators bip = gr.getIndicators().getCitationImpact();
for (Score in : bip) { Assertions.assertEquals("6.01504990349e-09", bip.getInfluence());
switch (in.getIndicator()) { Assertions.assertEquals("C", bip.getInfluenceClass());
case "influence": //
Assertions.assertEquals("6.01504990349e-09", in.getScore()); // case "popularity_alt":
Assertions.assertEquals("C", in.getClazz()); // Assertions.assertEquals("2.304", in.getScore());
break; // Assertions.assertEquals("C", in.getClazz());
case "popularity_alt": // break;
Assertions.assertEquals("2.304", in.getScore());
Assertions.assertEquals("C", in.getClazz());
break;
case "popularity":
Assertions.assertEquals("1.81666032463e-08", in.getScore());
Assertions.assertEquals("C", in.getClazz());
break;
case "influence_alt":
Assertions.assertEquals("8.0", in.getScore());
Assertions.assertEquals("C", in.getClazz());
break;
case "impulse":
Assertions.assertEquals("8.0", in.getScore());
Assertions.assertEquals("C", in.getClazz());
}
} Assertions.assertEquals("1.81666032463e-08", bip.getPopularity());
Assertions.assertEquals("C", bip.getPopularityClass());
// case "influence_alt":
// Assertions.assertEquals("8.0", in.getScore());
// Assertions.assertEquals("C", in.getClazz());
// break;
Assertions.assertEquals("8.0", bip.getImpulse());
Assertions.assertEquals("C", bip.getImpulseClass());
Assertions.assertEquals("0", gr.getIndicators().getUsageCounts().getDownloads()); Assertions.assertEquals("0", gr.getIndicators().getUsageCounts().getDownloads());
Assertions.assertEquals("1", gr.getIndicators().getUsageCounts().getViews()); Assertions.assertEquals("1", gr.getIndicators().getUsageCounts().getViews());