ref 21031: Add support to Jupyter

Updated Jupyter Accesses Harvester
This commit is contained in:
Giancarlo Panichi 2021-03-26 13:24:45 +01:00
parent 13481c35a5
commit 38ec08e0a3
5 changed files with 290 additions and 224 deletions

View File

@ -21,6 +21,7 @@ import org.gcube.common.scope.impl.ScopeBean;
import org.gcube.common.scope.impl.ScopeBean.Type;
import org.gcube.dataharvest.harvester.CatalogueAccessesHarvester;
import org.gcube.dataharvest.harvester.CoreServicesAccessesHarvester;
import org.gcube.dataharvest.harvester.JupyterAccessesHarvester;
import org.gcube.dataharvest.harvester.MethodInvocationHarvester;
import org.gcube.dataharvest.harvester.SocialInteractionsHarvester;
import org.gcube.dataharvest.harvester.VREAccessesHarvester;
@ -53,10 +54,11 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
public static final String DRY_RUN_INPUT_PARAMETER = "dryRun";
/**
* Allows partial harvesting of data of the current period.
* This means that in MONTHLY aggregation type the current month is harvested instead of the previous month which
* is done when the month is completed.
* This allow the portlet to display monthly data in the current moth even the data is partial (till the current day).
* Allows partial harvesting of data of the current period. This means that
* in MONTHLY aggregation type the current month is harvested instead of the
* previous month which is done when the month is completed. This allow the
* portlet to display monthly data in the current moth even the data is
* partial (till the current day).
*/
public static final String PARTIAL_HARVESTING = "partialHarvesting";
@ -85,7 +87,6 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
};
public static InheritableThreadLocal<Properties> getProperties() {
return properties;
}
@ -107,7 +108,6 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
};
public static ScopeDescriptor getScopeDescriptor(String context) {
return scopeDescriptors.get().get(context);
}
@ -205,8 +205,8 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
end = DateUtils.getEndDateFromStartDate(aggregationType, start, 1, partialHarvesting);
logger.debug("Harvesting from {} to {} (ReRun:{} - GetVREUsers:{} - DryRun:{})",
DateUtils.format(start), DateUtils.format(end), reRun, getVREUsers, dryRun);
logger.debug("Harvesting from {} to {} (ReRun:{} - GetVREUsers:{} - DryRun:{})", DateUtils.format(start),
DateUtils.format(end), reRun, getVREUsers, dryRun);
Properties properties = getConfigParameters();
getProperties().set(properties);
@ -227,7 +227,6 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
}
scopeDescriptors.set(scopeDescriptorMap);
Set<Dimension> dimensionSet = dao.getDimensions();
Map<String, Dimension> dimensionMap = new HashMap<>();
for (Dimension dimension : dimensionSet) {
@ -241,6 +240,7 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
String initialToken = SecurityTokenProvider.instance.get();
VREAccessesHarvester vreAccessesHarvester = null;
JupyterAccessesHarvester jupyterAccessesHarvester = null;
for (String context : contexts) {
// Setting the token for the context
@ -267,17 +267,18 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
accountingRecords.addAll(records);
} catch (Exception e) {
logger.error("Error harvesting {} for {}", CatalogueAccessesHarvester.class.getSimpleName(), context, e);
logger.error("Error harvesting {} for {}", CatalogueAccessesHarvester.class.getSimpleName(),
context, e);
}
}
if (vreAccessesHarvester == null) {
if (scopeBean.is(Type.INFRASTRUCTURE)) {
vreAccessesHarvester = new VREAccessesHarvester(start, end);
} else {
// This code should be never used because the scopes are sorted by fullname
// This code should be never used because the scopes are
// sorted by fullname
ScopeBean parent = scopeBean.enclosingScope();
while (!parent.is(Type.INFRASTRUCTURE)) {
@ -295,6 +296,29 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
}
if (jupyterAccessesHarvester == null) {
if (scopeBean.is(Type.INFRASTRUCTURE)) {
jupyterAccessesHarvester = new JupyterAccessesHarvester(start, end);
} else {
// This code should be never used because the scopes are
// sorted by fullname
ScopeBean parent = scopeBean.enclosingScope();
while (!parent.is(Type.INFRASTRUCTURE)) {
parent = scopeBean.enclosingScope();
}
// Setting back token for the context
Utils.setContext(contextAuthorization.getTokenForContext(parent.toString()));
jupyterAccessesHarvester = new JupyterAccessesHarvester(start, end);
// Setting back token for the context
Utils.setContext(contextAuthorization.getTokenForContext(context));
}
}
if ((context.startsWith(SO_BIG_DATA_VO) || context.startsWith(SO_BIG_DATA_EU_VRE)
|| context.startsWith(SO_BIG_DATA_IT_VRE))
@ -311,8 +335,24 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
accountingRecords.addAll(harvested);
/*
List<HarvestedData> harvested = vreAccessesHarvester.getData();
data.addAll(harvested);
* List<HarvestedData> harvested =
* vreAccessesHarvester.getData(); data.addAll(harvested);
*/
} catch (Exception e) {
logger.error("Error harvesting VRE Accesses for {}", context, e);
}
try {
// Collecting Google Analytics Data for Jupyters Accesses
logger.info("Going to harvest Jupyter Accesses for {}", context);
List<AccountingRecord> harvested = jupyterAccessesHarvester.getAccountingRecords();
accountingRecords.addAll(harvested);
/*
* List<HarvestedData> harvested =
* jupyterAccessesHarvester.getData();
* data.addAll(harvested);
*/
} catch (Exception e) {
logger.error("Error harvesting VRE Accesses for {}", context, e);
@ -327,8 +367,8 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
accountingRecords.addAll(harvested);
/*
List<HarvestedData> harvested = socialHarvester.getData();
data.addAll(harvested);
* List<HarvestedData> harvested =
* socialHarvester.getData(); data.addAll(harvested);
*/
} catch (Exception e) {
logger.error("Error harvesting Social Interactions for {}", context, e);
@ -337,9 +377,11 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
try {
// Collecting info on VRE users
if (getVREUsers) {
// Harvesting Users only for VREs (not for VO and ROOT which is the sum of the children contexts)
// Harvesting Users only for VREs (not for VO and ROOT
// which is the sum of the children contexts)
// The VREUsers can be only Harvested for the last month
if(scopeBean.is(Type.VRE) && start.equals(DateUtils.getPreviousPeriod(aggregationType, partialHarvesting).getTime())) {
if (scopeBean.is(Type.VRE) && start
.equals(DateUtils.getPreviousPeriod(aggregationType, partialHarvesting).getTime())) {
logger.info("Going to harvest Context Users for {}", context);
VREUsersHarvester vreUsersHarvester = new VREUsersHarvester(start, end);
@ -347,8 +389,9 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
accountingRecords.addAll(harvested);
/*
List<HarvestedData> harvested = vreUsersHarvester.getData();
data.addAll(harvested);
* List<HarvestedData> harvested =
* vreUsersHarvester.getData();
* data.addAll(harvested);
*/
}
}
@ -359,17 +402,19 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
if (context.startsWith(SO_BIG_DATA_CATALOGUE_CONTEXT)) {
try {
// Collecting info on Resource Catalogue (Dataset, Application, Deliverables, Methods)
// Collecting info on Resource Catalogue (Dataset,
// Application, Deliverables, Methods)
logger.info("Going to harvest Resource Catalogue Information for {}", context);
ResourceCatalogueHarvester resourceCatalogueHarvester = new ResourceCatalogueHarvester(start, end,
contexts);
ResourceCatalogueHarvester resourceCatalogueHarvester = new ResourceCatalogueHarvester(start,
end, contexts);
List<AccountingRecord> harvested = resourceCatalogueHarvester.getAccountingRecords();
accountingRecords.addAll(harvested);
/*
List<HarvestedData> harvested = resourceCatalogueHarvester.getData();
data.addAll(harvested);
* List<HarvestedData> harvested =
* resourceCatalogueHarvester.getData();
* data.addAll(harvested);
*/
} catch (Exception e) {
@ -402,8 +447,9 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
accountingRecords.addAll(harvested);
/*
List<HarvestedData> harvested = tagMeMethodInvocationHarvester.getData();
data.addAll(harvested);
* List<HarvestedData> harvested =
* tagMeMethodInvocationHarvester.getData();
* data.addAll(harvested);
*/
} catch (Exception e) {
@ -415,13 +461,13 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
logger.info("Going to harvest Method Invocations for {}", context);
MethodInvocationHarvester methodInvocationHarvester = new MethodInvocationHarvester(start, end);
List<AccountingRecord> harvested = methodInvocationHarvester.getAccountingRecords();
accountingRecords.addAll(harvested);
/*
List<HarvestedData> harvested = methodInvocationHarvester.getData();
data.addAll(harvested);
* List<HarvestedData> harvested =
* methodInvocationHarvester.getData();
* data.addAll(harvested);
*/
} catch (Exception e) {
logger.error("Error harvesting Method Invocations for {}", context, e);
@ -432,7 +478,8 @@ public class AccountingDashboardHarvesterPlugin extends Plugin {
Utils.setContext(initialToken);
logger.debug("Harvest Measures from {} to {} are {}", DateUtils.format(start), DateUtils.format(end), accountingRecords);
logger.debug("Harvest Measures from {} to {} are {}", DateUtils.format(start), DateUtils.format(end),
accountingRecords);
if (!dryRun) {
dao.insertRecords(accountingRecords.toArray(new AccountingRecord[1]));
// dbaseManager.insertMonthlyData(start, end, data, reRun);

View File

@ -80,6 +80,7 @@ public class JupyterAccessesHarvester extends BasicHarvester {
public JupyterAccessesHarvester(Date start, Date end) throws Exception {
super(start, end);
logger.debug("JupyerAccessHArvester: {}, {}",start,end);
vreAccesses = getAllAccesses(start, end);
}
@ -94,9 +95,10 @@ public class JupyterAccessesHarvester extends BasicHarvester {
ScopeBean scopeBean = new ScopeBean(context);
String lowerCasedContext = scopeBean.name().toLowerCase();
logger.debug("JupyerAccessHArvester lowerCasedContext: {}",lowerCasedContext);
for (VREAccessesReportRow row : vreAccesses) {
String pagePath = row.getPagePath().toLowerCase();
//logger.debug("JupyerAccessHArvester pagePath: {}",lowerCasedContext);
if (pagePath != null && !pagePath.isEmpty()) {
if (pagePath.contains(lowerCasedContext)) {
if (pagePath.contains("jupyter") || pagePath.contains("jupiter")) {
@ -111,7 +113,7 @@ public class JupyterAccessesHarvester extends BasicHarvester {
ScopeDescriptor scopeDescriptor = AccountingDashboardHarvesterPlugin.getScopeDescriptor();
AccountingRecord ar = new AccountingRecord(scopeDescriptor, instant,
getDimension(HarvestedDataKey.ACCESSES), (long) measure);
getDimension(HarvestedDataKey.JUPYTER_ACCESSES), (long) measure);
logger.debug("{} : {}", ar.getDimension().getId(), ar.getMeasure());
accountingRecords.add(ar);

View File

@ -494,55 +494,6 @@ public class AccountingDataHarvesterPluginTest extends ContextTest {
}
}
// @Test
public void testJupyterccessesHarvester() throws Exception {
try {
// AccountingDao dao = getAccountingDao();
List<Date> starts = new ArrayList<>();
starts.add(DateUtils.getStartCalendar(2018, Calendar.SEPTEMBER, 1).getTime());
starts.add(DateUtils.getStartCalendar(2018, Calendar.OCTOBER, 1).getTime());
starts.add(DateUtils.getStartCalendar(2018, Calendar.NOVEMBER, 1).getTime());
starts.add(DateUtils.getStartCalendar(2018, Calendar.DECEMBER, 1).getTime());
starts.add(DateUtils.getStartCalendar(2019, Calendar.JANUARY, 1).getTime());
starts.add(DateUtils.getStartCalendar(2019, Calendar.FEBRUARY, 1).getTime());
starts.add(DateUtils.getStartCalendar(2019, Calendar.MARCH, 1).getTime());
AggregationType measureType = AggregationType.MONTHLY;
String[] contextFullNames = new String[] { "/d4science.research-infrastructures.eu/D4OS/Blue-CloudLab" };
List<AccountingRecord> accountingRecords = new ArrayList<>();
for (Date start : starts) {
Date end = DateUtils.getEndDateFromStartDate(measureType, start, 1, false);
ContextTest.setContextByName(ROOT);
JupyterAccessesHarvester vreAccessesHarvester = new JupyterAccessesHarvester(start, end);
for (String contextFullname : contextFullNames) {
setContextByNameAndScopeDescriptor(contextFullname);
List<AccountingRecord> harvested = vreAccessesHarvester.getAccountingRecords();
accountingRecords.addAll(harvested);
logger.debug("{} - {}", contextFullname, accountingRecords);
}
}
logger.debug("{}", accountingRecords);
ContextTest.setContextByName(ROOT);
// dao.insertRecords(accountingRecords.toArray(new
// AccountingRecord[1]));
} catch (Exception e) {
logger.error("", e);
throw e;
}
}
// @Test
public void testSocialInteraction() {

View File

@ -0,0 +1,65 @@
package org.gcube.dataharvest.jupyter;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import org.gcube.accounting.accounting.summary.access.model.update.AccountingRecord;
import org.gcube.dataharvest.harvester.JupyterAccessesHarvester;
import org.gcube.dataharvest.utils.AggregationType;
import org.gcube.dataharvest.utils.ContextTest;
import org.gcube.dataharvest.utils.DateUtils;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AccountingDataHarvesterJupyterTest extends ContextTest {
private static Logger logger = LoggerFactory.getLogger(AccountingDataHarvesterJupyterTest.class);
public static final String ROOT = "/d4science.research-infrastructures.eu";
private static final String SCOPE = "/d4science.research-infrastructures.eu/D4OS/Blue-CloudLab";
@Test
public void testJupyterccessesHarvester() throws Exception {
try {
// AccountingDao dao = getAccountingDao();
List<Date> starts = new ArrayList<>();
starts.add(DateUtils.getStartCalendar(2021, Calendar.JANUARY, 1).getTime());
starts.add(DateUtils.getStartCalendar(2021, Calendar.FEBRUARY, 1).getTime());
starts.add(DateUtils.getStartCalendar(2021, Calendar.MARCH, 1).getTime());
AggregationType measureType = AggregationType.MONTHLY;
List<AccountingRecord> accountingRecords = new ArrayList<>();
for (Date start : starts) {
Date end = DateUtils.getEndDateFromStartDate(measureType, start, 1, false);
ContextTest.setContextByName(ROOT);
JupyterAccessesHarvester jupyterAccessesHarvester = new JupyterAccessesHarvester(start, end);
ContextTest.setContextByName(SCOPE);
List<AccountingRecord> harvested = jupyterAccessesHarvester.getAccountingRecords();
accountingRecords.addAll(harvested);
logger.debug("{} - {}", SCOPE, accountingRecords);
}
logger.debug("{}", accountingRecords);
ContextTest.setContextByName(ROOT);
// dao.insertRecords(accountingRecords.toArray(new
// AccountingRecord[1]));
} catch (Throwable e) {
logger.error(e.getLocalizedMessage(), e);
throw e;
}
}
}

View File

@ -1,3 +1,4 @@
/*.gcubekey
/*.key
/*.properties
/howto.txt