396 lines
17 KiB
Java
396 lines
17 KiB
Java
package org.gcube.dataharvest.harvester;
|
|
|
|
import static org.gcube.resources.discovery.icclient.ICFactory.clientFor;
|
|
import static org.gcube.resources.discovery.icclient.ICFactory.queryFor;
|
|
|
|
import java.io.IOException;
|
|
import java.io.StringReader;
|
|
import java.security.GeneralSecurityException;
|
|
import java.time.Instant;
|
|
import java.time.LocalDate;
|
|
import java.time.ZoneId;
|
|
import java.time.format.DateTimeFormatter;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.Date;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
|
|
import javax.xml.parsers.DocumentBuilder;
|
|
import javax.xml.parsers.DocumentBuilderFactory;
|
|
|
|
import org.gcube.accounting.accounting.summary.access.model.ScopeDescriptor;
|
|
import org.gcube.accounting.accounting.summary.access.model.update.AccountingRecord;
|
|
import org.gcube.common.authorization.client.exceptions.ObjectNotFound;
|
|
import org.gcube.common.encryption.encrypter.StringEncrypter;
|
|
import org.gcube.common.resources.gcore.GenericResource;
|
|
import org.gcube.common.resources.gcore.ServiceEndpoint;
|
|
import org.gcube.common.resources.gcore.ServiceEndpoint.AccessPoint;
|
|
import org.gcube.common.resources.gcore.ServiceEndpoint.Property;
|
|
import org.gcube.common.resources.gcore.utils.Group;
|
|
import org.gcube.common.resources.gcore.utils.XPathHelper;
|
|
import org.gcube.common.scope.api.ScopeProvider;
|
|
import org.gcube.common.scope.impl.ScopeBean;
|
|
import org.gcube.dataharvest.datamodel.AnalyticsReportCredentials;
|
|
import org.gcube.dataharvest.datamodel.CatalogueAccessesReportRow;
|
|
import org.gcube.dataharvest.datamodel.HarvestedDataKey;
|
|
import org.gcube.resources.discovery.client.api.DiscoveryClient;
|
|
import org.gcube.resources.discovery.client.queries.api.SimpleQuery;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.w3c.dom.Node;
|
|
import org.xml.sax.InputSource;
|
|
|
|
import com.google.analytics.data.v1beta.BetaAnalyticsDataClient;
|
|
import com.google.analytics.data.v1beta.BetaAnalyticsDataSettings;
|
|
import com.google.analytics.data.v1beta.DateRange;
|
|
import com.google.analytics.data.v1beta.DateRange.Builder;
|
|
import com.google.analytics.data.v1beta.Dimension;
|
|
import com.google.analytics.data.v1beta.Metric;
|
|
import com.google.analytics.data.v1beta.Row;
|
|
import com.google.analytics.data.v1beta.RunReportRequest;
|
|
import com.google.analytics.data.v1beta.RunReportResponse;
|
|
import com.google.api.client.json.JsonFactory;
|
|
import com.google.api.client.json.gson.GsonFactory;
|
|
import com.google.api.gax.core.FixedCredentialsProvider;
|
|
import com.google.auth.oauth2.ServiceAccountCredentials;
|
|
|
|
|
|
public class CatalogueAccessesHarvester extends BasicHarvester {
|
|
|
|
private static Logger logger = LoggerFactory.getLogger(CatalogueAccessesHarvester.class);
|
|
|
|
private static final JsonFactory JSON_FACTORY = GsonFactory.getDefaultInstance();
|
|
|
|
|
|
private static final String MAPPING_RESOURCE_CATEGORY = "BigGAnalyticsMapping";
|
|
private static final String SERVICE_ENDPOINT_CATEGORY = "OnlineService";
|
|
private static final String SERVICE_ENDPOINT_NAME = "GA4AnalyticsDataService";
|
|
private static final String AP_CATALOGUE_PAGEVIEWS_PROPERTY = "catalogue-pageviews";
|
|
private static final String AP_CLIENT_PROPERTY = "client_id";
|
|
private static final String AP_PRIVATEKEY_PROPERTY = "private_key_id";
|
|
|
|
private static final String REGEX_CATALOGUE_ACCESSES = "^\\/$";
|
|
private static final String REGEX_CATALOGUE_DATASET_LIST_ACCESSES = "^\\/dataset(\\?([a-zA-Z0-9_.-]*.+))*";
|
|
private static final String REGEX_CATALOGUE_DATASET_ACCESSES = "^\\/dataset\\/[a-zA-Z0-9_.-]+$";
|
|
private static final String REGEX_CATALOGUE_RESOURCE_ACCESSES = "^\\/dataset\\/[a-zA-Z0-9_.-]+\\/resource\\/[a-zA-Z0-9_.-]+$";
|
|
|
|
private HashMap<String, List<CatalogueAccessesReportRow>> catalogueAccesses;
|
|
|
|
public CatalogueAccessesHarvester(Date start, Date end) throws Exception {
|
|
super(start, end);
|
|
catalogueAccesses = getAllAccesses(start, end);
|
|
}
|
|
|
|
@Override
|
|
public List<AccountingRecord> getAccountingRecords() throws Exception {
|
|
try {
|
|
ArrayList<AccountingRecord> accountingRecords = new ArrayList<AccountingRecord>();
|
|
for (String dashboardContext : catalogueAccesses.keySet()) {
|
|
int catalogueTotalAccesses = 0;
|
|
int catalogueDatasetListAccesses = 0;
|
|
int catalogueDatasetAccesses = 0;
|
|
int catalogueResourceAccesses = 0;
|
|
logger.debug("Catalogue accesses for {} ", dashboardContext);
|
|
for(CatalogueAccessesReportRow row : catalogueAccesses.get(dashboardContext)) {
|
|
// String pagePath = row.getPagePath();
|
|
switch (row.getKey()) {
|
|
case CATALOGUE_ACCESSES:
|
|
catalogueTotalAccesses += row.getVisitNumber();
|
|
break;
|
|
case CATALOGUE_DATASET_LIST_ACCESSES:
|
|
catalogueDatasetListAccesses += row.getVisitNumber();
|
|
break;
|
|
case CATALOGUE_DATASET_ACCESSES:
|
|
catalogueDatasetAccesses += row.getVisitNumber();
|
|
break;
|
|
case CATALOGUE_RESOURCE_ACCESSES:
|
|
catalogueResourceAccesses += row.getVisitNumber();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
ScopeDescriptor scopeDescriptor = new ScopeDescriptor();
|
|
|
|
try {
|
|
ScopeBean scopeBean = new ScopeBean(dashboardContext);
|
|
scopeDescriptor.setId(dashboardContext);
|
|
scopeDescriptor.setName(scopeBean.name());
|
|
|
|
AccountingRecord ar1 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_ACCESSES), (long) catalogueTotalAccesses);
|
|
AccountingRecord ar2 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_DATASET_LIST_ACCESSES), (long) catalogueDatasetListAccesses);
|
|
AccountingRecord ar3 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_DATASET_ACCESSES), (long) catalogueDatasetAccesses);
|
|
AccountingRecord ar4 = new AccountingRecord(scopeDescriptor, instant, getDimension(HarvestedDataKey.CATALOGUE_RESOURCE_ACCESSES), (long) catalogueResourceAccesses);
|
|
logger.debug("{} : {}", ar1.getDimension().getId(), ar1.getMeasure());
|
|
accountingRecords.add(ar1);
|
|
logger.debug("{} : {}", ar2.getDimension().getId(), ar2.getMeasure());
|
|
accountingRecords.add(ar2);
|
|
logger.debug("{} : {}", ar3.getDimension().getId(), ar3.getMeasure());
|
|
accountingRecords.add(ar3);
|
|
logger.debug("{} : {}", ar4.getDimension().getId(), ar4.getMeasure());
|
|
accountingRecords.add(ar4);
|
|
} catch (NullPointerException e) {
|
|
logger.warn("I found no correspondance in the Genereric Resource for a PropertyId you should check this, type: BigGAnalyticsMapping name: AccountingDashboardMapping");
|
|
e.printStackTrace();
|
|
}
|
|
|
|
}
|
|
logger.debug("Returning {} accountingRecords ", accountingRecords.size());
|
|
return accountingRecords;
|
|
|
|
} catch(Exception e) {
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
*/
|
|
private static HashMap<String, List<CatalogueAccessesReportRow>> getAllAccesses(Date start, Date end) throws Exception {
|
|
Builder dateRangeBuilder = getDateRangeBuilderForAnalytics(start, end);
|
|
logger.debug("Getting Catalogue accesses in this time range {}", dateRangeBuilder.toString());
|
|
|
|
AnalyticsReportCredentials credentialsFromD4S = getAuthorisedApplicationInfoFromIs();
|
|
|
|
logger.debug("Getting credentials credentialsFromD4S");
|
|
|
|
BetaAnalyticsDataSettings serviceSettings = initializeAnalyticsReporting(credentialsFromD4S);
|
|
|
|
logger.debug("initializeAnalyticsReporting service settings");
|
|
|
|
|
|
HashMap<String,List<RunReportResponse>> responses = getReportResponses(serviceSettings, credentialsFromD4S.getViewIds(), dateRangeBuilder);
|
|
HashMap<String, List<CatalogueAccessesReportRow>> toReturn = new HashMap<>();
|
|
|
|
for(String view : responses.keySet()) {
|
|
String dashboardContext = getAccountingDashboardContextGivenGAViewID(view);
|
|
logger.info("\n\n**************** Parsing responses for this catalogue view, which corresponds to Dashboard Context: " + dashboardContext);
|
|
List<CatalogueAccessesReportRow> viewReport = parseResponse(view, responses.get(view), dashboardContext);
|
|
logger.trace("Got {} entries from view id={}", viewReport.size(), view);
|
|
toReturn.put(dashboardContext, viewReport);
|
|
}
|
|
return toReturn;
|
|
}
|
|
|
|
/**
|
|
* Initializes an Google Analytics Data API service object.
|
|
*
|
|
* @return An authorized Google Analytics Data API
|
|
* @throws IOException
|
|
* @throws GeneralSecurityException
|
|
*/
|
|
private static BetaAnalyticsDataSettings initializeAnalyticsReporting(AnalyticsReportCredentials cred) throws IOException {
|
|
return BetaAnalyticsDataSettings.newBuilder()
|
|
.setCredentialsProvider(FixedCredentialsProvider.create(
|
|
ServiceAccountCredentials.fromPkcs8(cred.getClientId(), cred.getClientEmail(), cred.getPrivateKeyPem(), cred.getPrivateKeyId(), null)))
|
|
.build();
|
|
}
|
|
|
|
/**
|
|
* Queries Analytics Data API service
|
|
*
|
|
* @param service Analytics Data API service service settings.
|
|
* @return Row Analytics Data API service
|
|
* @throws IOException
|
|
*/
|
|
private static HashMap<String,List<RunReportResponse>> getReportResponses(BetaAnalyticsDataSettings betaAnalyticsDataSettings,
|
|
List<String> viewIDs, Builder dateRangeBuilder) throws IOException {
|
|
|
|
HashMap<String,List<RunReportResponse>> reports = new HashMap<>();
|
|
|
|
try (BetaAnalyticsDataClient analyticsData = BetaAnalyticsDataClient.create(betaAnalyticsDataSettings)) {
|
|
|
|
for(String propertyId : viewIDs) {
|
|
List<RunReportResponse> gReportResponses = new ArrayList<>();
|
|
logger.debug("Getting data from Analytics Data API for propertyId: " + propertyId);
|
|
RunReportRequest request =
|
|
RunReportRequest.newBuilder()
|
|
.setProperty("properties/" + propertyId)
|
|
.addDimensions(Dimension.newBuilder().setName("pagePath"))
|
|
.addMetrics(Metric.newBuilder().setName("screenPageViews"))
|
|
.addDateRanges(dateRangeBuilder)
|
|
.build();
|
|
|
|
// Make the request.
|
|
RunReportResponse response = analyticsData.runReport(request);
|
|
gReportResponses.add(response);
|
|
// Iterate through every row of the API response.
|
|
// for (Row row : response.getRowsList()) {
|
|
// System.out.printf(
|
|
// "%s, %s%n", row.getDimensionValues(0).getValue(), row.getMetricValues(0).getValue());
|
|
// }
|
|
reports.put(propertyId, gReportResponses);
|
|
}
|
|
}
|
|
return reports;
|
|
}
|
|
|
|
/**
|
|
* Parses and prints the Analytics Data API service respose
|
|
*
|
|
* @param dashboardContext
|
|
*/
|
|
private static List<CatalogueAccessesReportRow> parseResponse(String viewId, List<RunReportResponse> responses, String dashboardContext) {
|
|
logger.debug("parsing Response for " + viewId);
|
|
|
|
List<CatalogueAccessesReportRow> toReturn = new ArrayList<>();
|
|
for (RunReportResponse response : responses) {
|
|
for (Row row: response.getRowsList()) {
|
|
String dimension = row.getDimensionValues(0).getValue();
|
|
String metric = row.getMetricValues(0).getValue();
|
|
CatalogueAccessesReportRow var = new CatalogueAccessesReportRow();
|
|
boolean validEntry = false;
|
|
String pagePath = dimension;
|
|
logger.trace("parsing pagepath {}: value: {}", pagePath, Integer.parseInt(metric));
|
|
if (pagePath.matches(REGEX_CATALOGUE_RESOURCE_ACCESSES)) {
|
|
var.setKey(HarvestedDataKey.CATALOGUE_RESOURCE_ACCESSES);
|
|
validEntry = true;
|
|
}
|
|
else if (pagePath.matches(REGEX_CATALOGUE_DATASET_ACCESSES)) {
|
|
var.setKey(HarvestedDataKey.CATALOGUE_DATASET_ACCESSES);
|
|
validEntry = true;
|
|
}
|
|
else if (pagePath.matches(REGEX_CATALOGUE_DATASET_LIST_ACCESSES)) {
|
|
var.setKey(HarvestedDataKey.CATALOGUE_DATASET_LIST_ACCESSES);
|
|
validEntry = true;
|
|
}
|
|
else if (pagePath.matches(REGEX_CATALOGUE_ACCESSES)) {
|
|
var.setKey(HarvestedDataKey.CATALOGUE_ACCESSES);
|
|
validEntry = true;
|
|
}
|
|
if (validEntry) {
|
|
var.setDashboardContext(dashboardContext);
|
|
var.setPagePath(dimension);
|
|
var.setVisitNumber(Integer.parseInt(metric));
|
|
toReturn.add(var);
|
|
}
|
|
}
|
|
}
|
|
return toReturn;
|
|
}
|
|
|
|
|
|
private static List<ServiceEndpoint> getAnalyticsReportingConfigurationFromIS(String infrastructureScope)
|
|
throws Exception {
|
|
String scope = infrastructureScope;
|
|
String currScope = ScopeProvider.instance.get();
|
|
ScopeProvider.instance.set(scope);
|
|
SimpleQuery query = queryFor(ServiceEndpoint.class);
|
|
query.addCondition("$resource/Profile/Category/text() eq '" + SERVICE_ENDPOINT_CATEGORY + "'");
|
|
query.addCondition("$resource/Profile/Name/text() eq '" + SERVICE_ENDPOINT_NAME + "'");
|
|
DiscoveryClient<ServiceEndpoint> client = clientFor(ServiceEndpoint.class);
|
|
List<ServiceEndpoint> toReturn = client.submit(query);
|
|
ScopeProvider.instance.set(currScope);
|
|
return toReturn;
|
|
}
|
|
|
|
/**
|
|
* This method look up in the IS the Gateway which corresponds to a given Google Analytics viewId
|
|
* @param viewID
|
|
* @return the gateway name, e.g. "Blue-Cloud Gateway" or null if no correspondance was found
|
|
* @throws Exception
|
|
* @throws ObjectNotFound
|
|
*/
|
|
private static String getAccountingDashboardContextGivenGAViewID(String viewID) throws ObjectNotFound, Exception {
|
|
String toReturn = null;
|
|
String context = org.gcube.dataharvest.utils.Utils.getCurrentContext();
|
|
String currScope = ScopeProvider.instance.get();
|
|
ScopeProvider.instance.set(context);
|
|
SimpleQuery query = queryFor(GenericResource.class);
|
|
query.addCondition("$resource/Profile/SecondaryType/text() eq '" + MAPPING_RESOURCE_CATEGORY + "'");
|
|
query.addCondition("$resource/Profile/Body/Property/viewID/text() eq '" + viewID + "'");
|
|
DiscoveryClient<GenericResource> client = clientFor(GenericResource.class);
|
|
List<GenericResource> list = client.submit(query);
|
|
if(list.size() > 1) {
|
|
logger.error("Too many Generic Resources having GA viewID " + viewID
|
|
+ " in this scope having SecondaryType " + MAPPING_RESOURCE_CATEGORY);
|
|
} else if(list.size() == 0) {
|
|
logger.warn("There is no Generic Resources having GA viewID " + viewID + " and SecondaryType "
|
|
+ MAPPING_RESOURCE_CATEGORY + " in this context: " + context);
|
|
} else {
|
|
GenericResource found = list.get(0);
|
|
String elem = new StringBuilder("<body>").append(found.profile().bodyAsString()).append("</body>").toString();
|
|
DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
|
|
Node node = docBuilder.parse(new InputSource(new StringReader(elem))).getDocumentElement();
|
|
XPathHelper helper = new XPathHelper(node);
|
|
List<String> currValue = helper.evaluate("//Property/viewID/text()");
|
|
if (currValue != null && currValue.size() > 0) {
|
|
List<String> contexts = currValue;
|
|
for (int i = 0; i < contexts.size(); i++) {
|
|
if (currValue.get(i).trim().compareTo(viewID) == 0) {
|
|
toReturn = helper.evaluate("//Property/DashboardContext/text()").get(i);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
logger.debug("Found DashboardContext for viewId {} : {} ", viewID, toReturn);
|
|
}
|
|
ScopeProvider.instance.set(currScope);
|
|
return toReturn;
|
|
}
|
|
|
|
/**
|
|
* @throws Exception
|
|
*/
|
|
private static AnalyticsReportCredentials getAuthorisedApplicationInfoFromIs() throws Exception {
|
|
AnalyticsReportCredentials reportCredentials = new AnalyticsReportCredentials();
|
|
|
|
String context = org.gcube.dataharvest.utils.Utils.getCurrentContext();
|
|
try {
|
|
List<ServiceEndpoint> list = getAnalyticsReportingConfigurationFromIS(context);
|
|
if(list.size() > 1) {
|
|
logger.error("Too many Service Endpoints having name " + SERVICE_ENDPOINT_NAME
|
|
+ " in this scope having Category " + SERVICE_ENDPOINT_CATEGORY);
|
|
} else if(list.size() == 0) {
|
|
logger.warn("There is no Service Endpoint having name " + SERVICE_ENDPOINT_NAME + " and Category "
|
|
+ SERVICE_ENDPOINT_CATEGORY + " in this context: " + context);
|
|
} else {
|
|
|
|
for(ServiceEndpoint res : list) {
|
|
Group<AccessPoint> apGroup = res.profile().accessPoints();
|
|
AccessPoint[] accessPoints = (AccessPoint[]) apGroup.toArray(new AccessPoint[apGroup.size()]);
|
|
AccessPoint found = accessPoints[0];
|
|
reportCredentials.setClientEmail(found.username());
|
|
String decryptedPrivateKey = StringEncrypter.getEncrypter().decrypt(found.password());
|
|
reportCredentials.setPrivateKeyPem(decryptedPrivateKey.trim());
|
|
|
|
for(Property prop : found.properties()) {
|
|
if(prop.name().compareTo(AP_CATALOGUE_PAGEVIEWS_PROPERTY) == 0) {
|
|
String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value());
|
|
String[] views = decryptedValue.split(";");
|
|
reportCredentials.setViewIds(Arrays.asList(views));
|
|
}
|
|
if(prop.name().compareTo(AP_CLIENT_PROPERTY) == 0) {
|
|
String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value());
|
|
reportCredentials.setClientId(decryptedValue);
|
|
}
|
|
if(prop.name().compareTo(AP_PRIVATEKEY_PROPERTY) == 0) {
|
|
String decryptedValue = StringEncrypter.getEncrypter().decrypt(prop.value());
|
|
reportCredentials.setPrivateKeyId(decryptedValue);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch(Exception e) {
|
|
e.printStackTrace();
|
|
return null;
|
|
}
|
|
return reportCredentials;
|
|
}
|
|
|
|
private static LocalDate asLocalDate(Date date) {
|
|
return Instant.ofEpochMilli(date.getTime()).atZone(ZoneId.systemDefault()).toLocalDate();
|
|
}
|
|
|
|
private static Builder getDateRangeBuilderForAnalytics(Date start, Date end) {
|
|
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); //required by Analytics
|
|
String startDate = asLocalDate(start).format(formatter);
|
|
String endDate = asLocalDate(end).format(formatter);
|
|
Builder dateRangeBuilder = DateRange.newBuilder().setStartDate(startDate).setEndDate(endDate);
|
|
|
|
return dateRangeBuilder;
|
|
}
|
|
|
|
}
|