176 lines
5.4 KiB
Java
176 lines
5.4 KiB
Java
package org.gcube.dataharvest.harvester.sobigdata;
|
|
|
|
import java.text.ParseException;
|
|
import java.util.ArrayList;
|
|
import java.util.Date;
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
|
|
import org.gcube.dataharvest.datamodel.HarvestedData;
|
|
import org.gcube.dataharvest.harvester.BasicHarvester;
|
|
import org.gcube.dataharvest.utils.DateUtils;
|
|
import org.gcube.dataharvest.utils.Utils;
|
|
import org.gcube.portlets.user.urlshortener.UrlEncoderUtil;
|
|
import org.json.JSONArray;
|
|
import org.json.JSONException;
|
|
import org.json.JSONObject;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
/**
|
|
* @author Eric Perrone (ISTI - CNR)
|
|
* @author Luca Frosini (ISTI - CNR)
|
|
*/
|
|
public class ResourceCatalogueHarvester extends BasicHarvester {
|
|
|
|
private static Logger logger = LoggerFactory.getLogger(ResourceCatalogueHarvester.class);
|
|
|
|
private int cityOfCitizensCounter = 0;
|
|
private int migrationStudiesCounter = 0;
|
|
private int societalDebatesCounter = 0;
|
|
private int wellBeingAndEconomyCounter = 0;
|
|
|
|
public ResourceCatalogueHarvester(Date start, Date end) throws ParseException {
|
|
super(start, end);
|
|
}
|
|
|
|
@Override
|
|
public List<HarvestedData> getData() throws Exception {
|
|
|
|
ArrayList<HarvestedData> data = new ArrayList<HarvestedData>();
|
|
List<HarvestedData> dataDeliverable = getDataDeliverable();
|
|
for(HarvestedData harvest : dataDeliverable) {
|
|
data.add(harvest);
|
|
}
|
|
|
|
List<HarvestedData> dataMethod = getDataMethod();
|
|
for(HarvestedData harvest : dataMethod) {
|
|
data.add(harvest);
|
|
}
|
|
|
|
List<HarvestedData> dataDataset = getDataDataset();
|
|
for(HarvestedData harvest : dataDataset) {
|
|
data.add(harvest);
|
|
}
|
|
|
|
List<HarvestedData> dataApplication = getDataApplication();
|
|
for(HarvestedData harvest : dataApplication) {
|
|
data.add(harvest);
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
public List<HarvestedData> getDataDeliverable() throws Exception {
|
|
String json = executeQuery("Deliverable");
|
|
return buildList(json, HarvestedData.NEW_CATALOGUE_DELIVERABLES);
|
|
}
|
|
|
|
public List<HarvestedData> getDataMethod() throws Exception {
|
|
String json = executeQuery("Method");
|
|
return buildList(json, HarvestedData.NEW_CATALOGUE_METHODS);
|
|
}
|
|
|
|
public List<HarvestedData> getDataDataset() throws Exception {
|
|
String json = executeQuery("Dataset");
|
|
return buildList(json, HarvestedData.NEW_CATALOGUE_DATASETS);
|
|
}
|
|
|
|
public List<HarvestedData> getDataApplication() throws Exception {
|
|
String json = executeQuery("Application");
|
|
return buildList(json, HarvestedData.NEW_CATALOGUE_APPLICATIONS);
|
|
}
|
|
|
|
private List<HarvestedData> buildList(String json, int dataType) throws Exception {
|
|
ArrayList<HarvestedData> data = new ArrayList<HarvestedData>();
|
|
JSONObject jsonObject = new JSONObject(json);
|
|
|
|
JSONObject responseHeader = jsonObject.getJSONObject("responseHeader");
|
|
int status = responseHeader.getInt("status");
|
|
if(status != 0) {
|
|
String err = "Query Deliverable in error: status " + status;
|
|
logger.error(err);
|
|
throw new Exception(err, null);
|
|
}
|
|
|
|
JSONObject response = jsonObject.getJSONObject("response");
|
|
|
|
int numFound = response.getInt("numFound");
|
|
HarvestedData h = new HarvestedData(dataType,
|
|
"/d4science.research-infrastructures.eu/SoBigData/ResourceCatalogue", numFound);
|
|
logger.debug(h.toString());
|
|
data.add(h);
|
|
if(numFound > 0) {
|
|
|
|
JSONArray docs = response.getJSONArray("docs");
|
|
for(Object item : docs) {
|
|
JSONObject doc = (JSONObject) item;
|
|
try {
|
|
JSONArray groups = doc.getJSONArray("groups");
|
|
Iterator<Object> git = groups.iterator();
|
|
while(git.hasNext()) {
|
|
String groupItem = (String) git.next();
|
|
counterByGroup(groupItem);
|
|
}
|
|
} catch(JSONException x) {
|
|
logger.debug("Document without groups");
|
|
}
|
|
}
|
|
|
|
h = new HarvestedData(dataType, "/d4science.research-infrastructures.eu/SoBigData/CityOfCitizens",
|
|
cityOfCitizensCounter);
|
|
logger.debug(h.toString());
|
|
data.add(h);
|
|
h = new HarvestedData(dataType, "/Migration Studies", migrationStudiesCounter);
|
|
logger.debug(h.toString());
|
|
data.add(h);
|
|
h = new HarvestedData(dataType, "/d4science.research-infrastructures.eu/SoBigData/SocietalDebates",
|
|
societalDebatesCounter);
|
|
logger.debug(h.toString());
|
|
data.add(h);
|
|
h = new HarvestedData(dataType, "/d4science.research-infrastructures.eu/SoBigData/WellBeingAndEconomy",
|
|
wellBeingAndEconomyCounter);
|
|
logger.debug(h.toString());
|
|
data.add(h);
|
|
|
|
}
|
|
|
|
return data;
|
|
|
|
}
|
|
|
|
private String executeQuery(String fqSubString) throws Exception {
|
|
String query = "https://ckan-solr-d4s.d4science.org/solr/sobigdata/select?";
|
|
String q = UrlEncoderUtil.encodeQuery("metadata_created:[" + DateUtils.dateToStringWithTZ(startDate) + " TO "
|
|
+ DateUtils.dateToStringWithTZ(endDate) + "]");
|
|
query += "q=" + q;
|
|
String fq = UrlEncoderUtil.encodeQuery("extras_systemtype:\"SoBigData.eu: " + fqSubString + "\"");
|
|
query += "&fq=" + fq + "&wt=json&indent=true";
|
|
logger.debug(query);
|
|
|
|
String json = Utils.getJson(query);
|
|
// logger.debug(json);
|
|
|
|
return json;
|
|
}
|
|
|
|
private void counterByGroup(String groupName) {
|
|
cityOfCitizensCounter = migrationStudiesCounter = societalDebatesCounter = wellBeingAndEconomyCounter = 0;
|
|
switch(groupName) {
|
|
case "city-of-citizens-group":
|
|
cityOfCitizensCounter++;
|
|
break;
|
|
case "migration-studies":
|
|
migrationStudiesCounter++;
|
|
break;
|
|
case "societal-debates-group":
|
|
societalDebatesCounter++;
|
|
break;
|
|
case "well-being-and-economy-group":
|
|
wellBeingAndEconomyCounter++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|