Added method getVREHashtagsWithOccurrenceFilteredByTime to filter hashtags too old

git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/portal/social-networking-library@128535 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Costantino Perciante 2016-05-10 10:14:46 +00:00
parent df008d8b66
commit fb3dfaa024
3 changed files with 214 additions and 52 deletions

View File

@ -3,6 +3,7 @@ package org.gcube.portal.databook.server;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@ -1703,6 +1704,59 @@ public final class DBCassandraAstyanaxImpl implements DatabookStore {
}
return toReturn;
}
/**
* {@inheritDoc}
*/
@Override
public Map<String, Integer> getVREHashtagsWithOccurrenceFilteredByTime(String vreid, long timestamp){
OperationResult<Rows<String, String>> result = null;
try {
result = conn.getKeyspace().prepareQuery(cf_HashtagsCounter)
.getKeySlice(vreid)
.execute();
} catch (ConnectionException e) {
e.printStackTrace();
}
HashMap<String, Integer> toReturn = new HashMap<String, Integer> ();
// Iterate rows and their columns
for (Row<String, String> row : result.getResult()) {
for (Column<String> column : row.getColumns()) {
// retrieve the feeds list for this hashtag
List<Feed> feeds = null;
try{
feeds = getVREFeedsByHashtag(vreid, column.getName());
}catch(Exception e){
_log.error("Unable to retrieve the list of feeds for hashtag" + column.getName() + " in vre " + vreid);
continue;
}
if(feeds.isEmpty()){
_log.info("There are no feeds containing hashtag " + column.getName() + " in vre " + vreid);
continue;
}
// retrieve the most recent one among these feeds
Collections.sort(feeds, Collections.reverseOrder());
if(feeds.get(0).getTime().getTime() < timestamp){
continue;
}
// else..
int curValue = Integer.parseInt(column.getStringValue());
if (curValue > 0)
toReturn.put(column.getName(), curValue);
}
}
return toReturn;
}
/**
* {@inheritDoc}
*/

View File

@ -1,27 +1,25 @@
package org.gcube.portal.databook.server;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.Date;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.UUID;
import java.util.Map;
import java.util.Map.Entry;
import org.gcube.portal.databook.shared.Attachment;
import org.gcube.portal.databook.shared.Comment;
import org.gcube.portal.databook.shared.Feed;
import org.gcube.portal.databook.shared.FeedType;
import org.gcube.portal.databook.shared.PrivacyLevel;
import org.gcube.portal.databook.shared.ex.CommentIDNotFoundException;
import org.gcube.portal.databook.shared.ex.ColumnNameNotFoundException;
import org.gcube.portal.databook.shared.ex.FeedIDNotFoundException;
import org.gcube.portal.databook.shared.ex.FeedTypeNotFoundException;
import org.gcube.portal.databook.shared.ex.PrivacyLevelTypeNotFoundException;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import com.netflix.astyanax.connectionpool.exceptions.ConnectionException;
import com.netflix.astyanax.model.ColumnFamily;
import com.netflix.astyanax.serializers.StringSerializer;
public class DatabookCassandraTest {
private static DBCassandraAstyanaxImpl store;
@ -37,20 +35,123 @@ public class DatabookCassandraTest {
}
// @Test
// public void getComment(){
//
// String uuid = "820969b2-4632-4197-9fd6-5aafab781faa";
//
// Comment c;
// try {
// c = store.readCommentById(uuid);
// System.err.println(c);
// } catch (CommentIDNotFoundException e) {
// // TODO Auto-generated catch block
// System.err.println(e.toString());
// }
// }
// @Test
// public void getHashTagsFilteredByTime() throws PrivacyLevelTypeNotFoundException, FeedTypeNotFoundException, FeedIDNotFoundException, ColumnNameNotFoundException{
//
// int windowSize = 6; // go back windowSize months
//
// String vreUnderTest = "/gcube/devsec/devVRE";
//
// // reference time
// Calendar calendar = Calendar.getInstance();
// int currentMonth = calendar.get(Calendar.MONTH); // jan = 0, ..... dec = 11
// calendar.set(Calendar.MONTH, currentMonth - windowSize); // the year is automatically decreased if needed
// SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
// System.out.println("Reference time for trending topics is " + format.format(calendar.getTime()));
//
// Map<String, Integer> res =
// store.getVREHashtagsWithOccurrenceFilteredByTime(
// vreUnderTest, calendar.getTimeInMillis());
//
// // find max score inside the list (counter)
// int max = 0;
// for(Entry<String, Integer> entry : res.entrySet()){
//
// max = max < entry.getValue() ? entry.getValue() : max;
//
// }
//
// // normalize
// Map<String, Double> normalized = new HashMap<String, Double>();
// for(Entry<String, Integer> entry : res.entrySet()){
//
// normalized.put(entry.getKey(), (double)entry.getValue() / (double)max);
//
// }
//
// // create the weight for each entry as:
// // w = 0.6 * normalized_score + 0.4 * freshness
// // freshness is evaluated as (window_size - latest_feed_for_hashtag_in_window_month)/window_size
// Map<String, Double> scoredList = new HashMap<String, Double>();
// for(Entry<String, Integer> entry : res.entrySet()){
//
// double weight = 0.6 * normalized.get(entry.getKey());
//
// // retrieve the last feed for this hashtag and locate it into the window
// List<Feed> mostRecentFeedForHashtag = store.getVREFeedsByHashtag(vreUnderTest, entry.getKey());
//
// // retrieve the most recent one among these feeds
// Collections.sort(mostRecentFeedForHashtag, Collections.reverseOrder());
//
// // locate into the window
// Calendar locateInWindow = Calendar.getInstance();
// locateInWindow.setTimeInMillis(mostRecentFeedForHashtag.get(0).getTime().getTime());
//
// // get the month
// int sub = currentMonth - locateInWindow.get(Calendar.MONTH);
// int value = sub >= 0? sub : 12 - Math.abs(sub);
// double freshness = (double)(windowSize - value) / (double)(windowSize);
// System.out.println("freshness is " + freshness + " because the last feed has month " + locateInWindow.get(Calendar.MONTH));
//
// weight += 0.4 * freshness;
//
// scoredList.put(entry.getKey(), weight);
// }
//
// // print sorted
// Map<String, Double> scoredListSorted = sortByValue(scoredList);
// for(Entry<String, Double> entry : scoredListSorted.entrySet()){
//
// System.out.println("[hashtag=" + entry.getKey() + " , weight=" + entry.getValue() + "]");
// }
// }
//
// public static <K, V extends Comparable<? super V>> Map<K, V>
// sortByValue( Map<K, V> map )
// {
// List<Map.Entry<K, V>> list =
// new LinkedList<Map.Entry<K, V>>( map.entrySet() );
// Collections.sort( list, new Comparator<Map.Entry<K, V>>()
// {
// public int compare( Map.Entry<K, V> o1, Map.Entry<K, V> o2 )
// {
// return (o2.getValue()).compareTo( o1.getValue() );
// }
// });
//
// Map<K, V> result = new LinkedHashMap<K, V>();
// for (Map.Entry<K, V> entry : list)
// {
// result.put( entry.getKey(), entry.getValue() );
// }
// return result;
// }
// @Test
// public void getHashTags() throws PrivacyLevelTypeNotFoundException, FeedTypeNotFoundException, FeedIDNotFoundException, ColumnNameNotFoundException{
// List<Feed> resList = store.getVREFeedsByHashtag("/gcube/devsec/devVRE", "#test");
//
// for (Feed feed : resList) {
// System.out.println(feed.getTime());
// }
//
// }
// @Test
// public void getComment(){
//
// String uuid = "820969b2-4632-4197-9fd6-5aafab781faa";
//
// Comment c;
// try {
// c = store.readCommentById(uuid);
// System.err.println(c);
// } catch (CommentIDNotFoundException e) {
// // TODO Auto-generated catch block
// System.err.println(e.toString());
// }
// }
// @Test
// public void vreIds(){
@ -192,30 +293,30 @@ public class DatabookCassandraTest {
// }
// /**
// * use exclusively to add a new (Dynamic) CF to a keyspace
// */
// @Test
// public void addInvitesDynamicColumnFamilies() {
// System.out.println("UserNotificationsUnread");
// ColumnFamily<String, String> cf_UserNotificationsUnreadTimeline = new ColumnFamily<String, String>(
// DBCassandraAstyanaxImpl.USER_NOTIFICATIONS_UNREAD, // Column Family Name
// StringSerializer.get(), // Key Serializer
// StringSerializer.get()); // Column Serializer
//
// try {
//
// new CassandraClusterConnection(false).getKeyspace().createColumnFamily(cf_UserNotificationsUnreadTimeline, ImmutableMap.<String, Object>builder()
// .put("default_validation_class", "UTF8Type")
// .put("key_validation_class", "UTF8Type")
// .put("comparator_type", "UTF8Type")
// .build());
//
// } catch (ConnectionException e) {
// e.printStackTrace();
// }
// System.out.println("UserNotificationsUnread END");
// }
// /**
// * use exclusively to add a new (Dynamic) CF to a keyspace
// */
// @Test
// public void addInvitesDynamicColumnFamilies() {
// System.out.println("UserNotificationsUnread");
// ColumnFamily<String, String> cf_UserNotificationsUnreadTimeline = new ColumnFamily<String, String>(
// DBCassandraAstyanaxImpl.USER_NOTIFICATIONS_UNREAD, // Column Family Name
// StringSerializer.get(), // Key Serializer
// StringSerializer.get()); // Column Serializer
//
// try {
//
// new CassandraClusterConnection(false).getKeyspace().createColumnFamily(cf_UserNotificationsUnreadTimeline, ImmutableMap.<String, Object>builder()
// .put("default_validation_class", "UTF8Type")
// .put("key_validation_class", "UTF8Type")
// .put("comparator_type", "UTF8Type")
// .build());
//
// } catch (ConnectionException e) {
// e.printStackTrace();
// }
// System.out.println("UserNotificationsUnread END");
// }
// private List<String> getKeys() {

View File

@ -370,6 +370,13 @@ public interface DatabookStore {
* @return a HashMap<String, Integer> of vre Hashtags associated with their occurrence
*/
Map<String, Integer> getVREHashtagsWithOccurrence(String vreid);
/**
* get a map of vre hashtags where the key is the hashtag and the value is the occurrence of the hashtag in the VRE
* @param vreid vre identifier (scope)
* @param timestamp do not consider hashtags used before timestamp
* @return a HashMap<String, Integer> of vre Hashtags associated with their occurrence
*/
Map<String, Integer> getVREHashtagsWithOccurrenceFilteredByTime(String vreid, long timestamp);
/**
*
* @param vreid VRE identifier