[Indicators] refactoring according to ticket #9741

This commit is contained in:
Miriam Baglioni 2024-05-02 11:47:45 +02:00
parent a6b0f1d371
commit e2ea886ee8
3 changed files with 89 additions and 40 deletions

View File

@ -29,11 +29,12 @@ public class Constants {
public static final String USAGE_COUNT_DOWNLOADS = "downloads";
public static final String USAGE_COUNT_VIEWS = "views";
public static final String IMPACT_POPULARITY = "popularity";
public static final String IMPACT_POPULARITY_ALT = "popularity_alt";
public static final String IMPACT_INFLUENCE = "influence";
public static final String IMPACT_INFLUENCE_ALT = "influence_alt";
public static final String IMPACT_IMPULSE = "impulse";
public static final String BIP_POPULARITY = "popularity";
public static final String BIP_POPULARITY_ALT = "popularity_alt";
public static final String BIP_INFLUENCE = "influence";
public static final String BIP_INFLUENCE_ALT = "influence_alt";
public static final String BIP_IMPULSE = "impulse";
public static final String BIP_CITATION_COUNT = "citation";
static {
ACCESS_RIGHTS_COAR_MAP.put(ModelConstants.ACCESS_RIGHT_OPEN, CABF2);

View File

@ -24,6 +24,7 @@ import com.google.gson.Gson;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.Constants;
import eu.dnetlib.dhp.oa.model.BipIndicators;
import eu.dnetlib.dhp.oa.model.Indicator;
import eu.dnetlib.dhp.oa.model.Score;
import eu.dnetlib.dhp.oa.model.UsageCounts;
@ -89,7 +90,8 @@ public class Utils {
}
public static Dataset<String> getEntitiesId(SparkSession spark, String inputPath) {
Dataset<String> dumpedIds = Utils
Dataset<String> dumpedIds;
dumpedIds = Utils
.readPath(spark, inputPath + "/publication", GraphResult.class)
.map((MapFunction<GraphResult, String>) r -> r.getId(), Encoders.STRING())
.union(
@ -149,6 +151,19 @@ public class Utils {
Encoders.bean(Relation.class));
}
/**
* "measures":[{"id":"influence",
* "unit":[{"key":"score","value":"3.1177596E-9"},{"key":"class","value":"C5"}]},
* {"id":"popularity",
* "unit":[{"key":"score","value":"1.6041533E-9"},{
* "key":"class","value":"C5"}]},
* {"id":"influence_alt",
*{"key":"score","value":"0"},{"key":"class","value":"C5"}]},
* {"id":"popularity_alt","unit":[{"key":"score","value":"0.0"},{,"key":"class","value":"C5"}]},{"id":"impulse",
* {key":"score","value":"0"},,"key":"class","value":"C5"}]}]
* @param measures
* @return
*/
public static Indicator getIndicator(List<Measure> measures) {
Indicator i = new Indicator();
for (eu.dnetlib.dhp.schema.oaf.Measure m : measures) {
@ -159,9 +174,48 @@ public class Utils {
case USAGE_COUNT_VIEWS:
getUsageCounts(i).setViews(m.getUnit().get(0).getValue());
break;
default:
getImpactMeasure(i).add(getScore(m.getId(), m.getUnit()));
case BIP_INFLUENCE:
m.getUnit().forEach(u -> {
if (u.getKey().equals("class"))
getImpactMeasure(i).setInfluenceClass(u.getValue());
if (u.getKey().equals("score"))
getImpactMeasure(i).setInfluence(Double.parseDouble(u.getValue()));
});
break;
case BIP_POPULARITY:
m.getUnit().forEach(u -> {
if (u.getKey().equals("class"))
getImpactMeasure(i).setPopularityClass(u.getValue());
if (u.getKey().equals("score"))
getImpactMeasure(i).setPopularity(Double.parseDouble(u.getValue()));
});
break;
case BIP_INFLUENCE_ALT:
m.getUnit().forEach(u -> {
if (u.getKey().equals("class"))
getImpactMeasure(i).setInfluenceAltClass(u.getValue());
if (u.getKey().equals("score"))
getImpactMeasure(i).setInfluenceAlt(Double.parseDouble(u.getValue()));
});
break;
case BIP_POPULARITY_ALT:
m.getUnit().forEach(u -> {
if (u.getKey().equals("class"))
getImpactMeasure(i).setPopularityAltClass(u.getValue());
if (u.getKey().equals("score"))
getImpactMeasure(i).setPopularityAlt(Double.parseDouble(u.getValue()));
});
break;
case BIP_IMPULSE:
m.getUnit().forEach(u -> {
if (u.getKey().equals("class"))
getImpactMeasure(i).setImpulseClass(u.getValue());
if (u.getKey().equals("score"))
getImpactMeasure(i).setImpulse(Double.parseDouble(u.getValue()));
});
break;
default:
throw new RuntimeException("No mapping found for indicator " + m.getId());
}
}
@ -178,11 +232,11 @@ public class Utils {
}
@NotNull
private static List<Score> getImpactMeasure(Indicator i) {
if (i.getBipIndicators() == null) {
i.setBipIndicators(new ArrayList<>());
private static BipIndicators getImpactMeasure(Indicator i) {
if (i.getCitationImpact() == null) {
i.setCitationImpact(new BipIndicators());
}
return i.getBipIndicators();
return i.getCitationImpact();
}
private static Score getScore(String indicator, List<KeyValue> unit) {

View File

@ -140,7 +140,7 @@ public class DumpJobTest {
GraphResult gr = verificationDataset.first();
Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getUsageCounts()).isPresent());
Assertions.assertFalse(Optional.ofNullable(gr.getIndicators().getBipIndicators()).isPresent());
Assertions.assertFalse(Optional.ofNullable(gr.getIndicators().getCitationImpact()).isPresent());
}
@ -363,10 +363,10 @@ public class DumpJobTest {
Assertions.assertEquals("2020-03-23T00:20:51.392Z", gr.getDateOfCollection());
Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getUsageCounts()).isPresent());
Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getBipIndicators()).isPresent());
Assertions.assertTrue(Optional.ofNullable(gr.getIndicators().getCitationImpact()).isPresent());
Assertions
.assertEquals(5, gr.getIndicators().getBipIndicators().size());
// Assertions
// .assertEquals(5, gr.getIndicators().getBipIndicators().size());
Assertions
.assertTrue(gr.getIndicators().getUsageCounts() != null);
@ -375,31 +375,25 @@ public class DumpJobTest {
Assertions
.assertTrue(Integer.valueOf(gr.getIndicators().getUsageCounts().getViews()) >= 0);
List<Score> bip = gr.getIndicators().getBipIndicators();
for (Score in : bip) {
switch (in.getIndicator()) {
case "influence":
Assertions.assertEquals("6.01504990349e-09", in.getScore());
Assertions.assertEquals("C", in.getClazz());
break;
case "popularity_alt":
Assertions.assertEquals("2.304", in.getScore());
Assertions.assertEquals("C", in.getClazz());
break;
case "popularity":
Assertions.assertEquals("1.81666032463e-08", in.getScore());
Assertions.assertEquals("C", in.getClazz());
break;
case "influence_alt":
Assertions.assertEquals("8.0", in.getScore());
Assertions.assertEquals("C", in.getClazz());
break;
case "impulse":
Assertions.assertEquals("8.0", in.getScore());
Assertions.assertEquals("C", in.getClazz());
}
BipIndicators bip = gr.getIndicators().getCitationImpact();
Assertions.assertEquals("6.01504990349e-09", bip.getInfluence());
Assertions.assertEquals("C", bip.getInfluenceClass());
//
// case "popularity_alt":
// Assertions.assertEquals("2.304", in.getScore());
// Assertions.assertEquals("C", in.getClazz());
// break;
}
Assertions.assertEquals("1.81666032463e-08", bip.getPopularity());
Assertions.assertEquals("C", bip.getPopularityClass());
// case "influence_alt":
// Assertions.assertEquals("8.0", in.getScore());
// Assertions.assertEquals("C", in.getClazz());
// break;
Assertions.assertEquals("8.0", bip.getImpulse());
Assertions.assertEquals("C", bip.getImpulseClass());
Assertions.assertEquals("0", gr.getIndicators().getUsageCounts().getDownloads());
Assertions.assertEquals("1", gr.getIndicators().getUsageCounts().getViews());