accounting-dashboard-harves.../src/main/java/org/gcube/dataharvest/harvester/sobigdata/ResourceCatalogueHarvester....

175 lines
5.4 KiB
Java

package org.gcube.dataharvest.harvester.sobigdata;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.gcube.dataharvest.datamodel.HarvestedData;
import org.gcube.dataharvest.harvester.BasicHarvester;
import org.gcube.dataharvest.utils.Utils;
import org.gcube.portlets.user.urlshortener.UrlEncoderUtil;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Eric Perrone (ISTI - CNR)
* @author Luca Frosini (ISTI - CNR)
*/
public class ResourceCatalogueHarvester extends BasicHarvester {
private static Logger logger = LoggerFactory.getLogger(ResourceCatalogueHarvester.class);
private int cityOfCitizensCounter = 0;
private int migrationStudiesCounter = 0;
private int societalDebatesCounter = 0;
private int wellBeingAndEconomyCounter = 0;
public ResourceCatalogueHarvester(Date start, Date end) throws ParseException {
super(start, end);
}
@Override
public List<HarvestedData> getData() throws Exception {
ArrayList<HarvestedData> data = new ArrayList<HarvestedData>();
List<HarvestedData> dataDeliverable = getDataDeliverable();
for(HarvestedData harvest : dataDeliverable) {
data.add(harvest);
}
List<HarvestedData> dataMethod = getDataMethod();
for(HarvestedData harvest : dataMethod) {
data.add(harvest);
}
List<HarvestedData> dataDataset = getDataDataset();
for(HarvestedData harvest : dataDataset) {
data.add(harvest);
}
List<HarvestedData> dataApplication = getDataApplication();
for(HarvestedData harvest : dataApplication) {
data.add(harvest);
}
return data;
}
public List<HarvestedData> getDataDeliverable() throws Exception {
String json = executeQuery("Deliverable");
return buildList(json, HarvestedData.NEW_CATALOGUE_DELIVERABLES);
}
public List<HarvestedData> getDataMethod() throws Exception {
String json = executeQuery("Method");
return buildList(json, HarvestedData.NEW_CATALOGUE_METHODS);
}
public List<HarvestedData> getDataDataset() throws Exception {
String json = executeQuery("Dataset");
return buildList(json, HarvestedData.NEW_CATALOGUE_DATASETS);
}
public List<HarvestedData> getDataApplication() throws Exception {
String json = executeQuery("Application");
return buildList(json, HarvestedData.NEW_CATALOGUE_APPLICATIONS);
}
private List<HarvestedData> buildList(String json, int dataType) throws Exception {
ArrayList<HarvestedData> data = new ArrayList<HarvestedData>();
JSONObject jsonObject = new JSONObject(json);
JSONObject responseHeader = jsonObject.getJSONObject("responseHeader");
int status = responseHeader.getInt("status");
if(status != 0) {
String err = "Query Deliverable in error: status " + status;
logger.error(err);
throw new Exception(err, null);
}
JSONObject response = jsonObject.getJSONObject("response");
int numFound = response.getInt("numFound");
HarvestedData h = new HarvestedData(dataType,
"/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue", numFound);
logger.debug(h.toString());
data.add(h);
if(numFound > 0) {
JSONArray docs = response.getJSONArray("docs");
for(Object item : docs) {
JSONObject doc = (JSONObject) item;
try {
JSONArray groups = doc.getJSONArray("groups");
Iterator<Object> git = groups.iterator();
while(git.hasNext()) {
String groupItem = (String) git.next();
counterByGroup(groupItem);
}
} catch(JSONException x) {
logger.debug("Document without groups");
}
}
h = new HarvestedData(dataType, "/d4science.research-infrastructures.eu/SoBigData/CityOfCitizens",
cityOfCitizensCounter);
logger.debug(h.toString());
data.add(h);
h = new HarvestedData(dataType, "/Migration Studies", migrationStudiesCounter);
logger.debug(h.toString());
data.add(h);
h = new HarvestedData(dataType, "/d4science.research-infrastructures.eu/SoBigData/SocietalDebates",
societalDebatesCounter);
logger.debug(h.toString());
data.add(h);
h = new HarvestedData(dataType, "/d4science.research-infrastructures.eu/SoBigData/WellBeingAndEconomy",
wellBeingAndEconomyCounter);
logger.debug(h.toString());
data.add(h);
}
return data;
}
private String executeQuery(String fqSubString) throws Exception {
String query = "https://ckan-solr-d4s.d4science.org/solr/sobigdata/select?";
String q = UrlEncoderUtil.encodeQuery("metadata_created:[" + Utils.dateToStringWithTZ(startDate) + " TO "
+ Utils.dateToStringWithTZ(endDate) + "]");
query += "q=" + q;
String fq = UrlEncoderUtil.encodeQuery("extras_systemtype:\"SoBigData.eu: " + fqSubString + "\"");
query += "&fq=" + fq + "&wt=json&indent=true";
logger.debug(query);
String json = Utils.getJson(query);
// logger.debug(json);
return json;
}
private void counterByGroup(String groupName) {
cityOfCitizensCounter = migrationStudiesCounter = societalDebatesCounter = wellBeingAndEconomyCounter = 0;
switch(groupName) {
case "city-of-citizens-group":
cityOfCitizensCounter++;
break;
case "migration-studies":
migrationStudiesCounter++;
break;
case "societal-debates-group":
societalDebatesCounter++;
break;
case "well-being-and-economy-group":
wellBeingAndEconomyCounter++;
break;
}
}
}