diff --git a/src/main/java/org/gcube/portal/databook/server/DBCassandraAstyanaxImpl.java b/src/main/java/org/gcube/portal/databook/server/DBCassandraAstyanaxImpl.java index f0daa71..366963b 100644 --- a/src/main/java/org/gcube/portal/databook/server/DBCassandraAstyanaxImpl.java +++ b/src/main/java/org/gcube/portal/databook/server/DBCassandraAstyanaxImpl.java @@ -3,6 +3,7 @@ package org.gcube.portal.databook.server; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -1703,6 +1704,59 @@ public final class DBCassandraAstyanaxImpl implements DatabookStore { } return toReturn; } + /** + * {@inheritDoc} + */ + @Override + public Map getVREHashtagsWithOccurrenceFilteredByTime(String vreid, long timestamp){ + OperationResult> result = null; + try { + result = conn.getKeyspace().prepareQuery(cf_HashtagsCounter) + .getKeySlice(vreid) + .execute(); + } catch (ConnectionException e) { + e.printStackTrace(); + } + + HashMap toReturn = new HashMap (); + + // Iterate rows and their columns + for (Row row : result.getResult()) { + for (Column column : row.getColumns()) { + + // retrieve the feeds list for this hashtag + List feeds = null; + try{ + feeds = getVREFeedsByHashtag(vreid, column.getName()); + }catch(Exception e){ + _log.error("Unable to retrieve the list of feeds for hashtag" + column.getName() + " in vre " + vreid); + continue; + } + + if(feeds.isEmpty()){ + + _log.info("There are no feeds containing hashtag " + column.getName() + " in vre " + vreid); + continue; + + } + + // retrieve the most recent one among these feeds + Collections.sort(feeds, Collections.reverseOrder()); + + if(feeds.get(0).getTime().getTime() < timestamp){ + continue; + } + + // else.. + int curValue = Integer.parseInt(column.getStringValue()); + + if (curValue > 0) + toReturn.put(column.getName(), curValue); + } + } + return toReturn; + } + /** * {@inheritDoc} */ diff --git a/src/main/java/org/gcube/portal/databook/server/DatabookCassandraTest.java b/src/main/java/org/gcube/portal/databook/server/DatabookCassandraTest.java index a7154fa..2e2943b 100644 --- a/src/main/java/org/gcube/portal/databook/server/DatabookCassandraTest.java +++ b/src/main/java/org/gcube/portal/databook/server/DatabookCassandraTest.java @@ -1,27 +1,25 @@ package org.gcube.portal.databook.server; -import static org.junit.Assert.assertTrue; - -import java.util.ArrayList; -import java.util.Date; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; -import java.util.UUID; +import java.util.Map; +import java.util.Map.Entry; -import org.gcube.portal.databook.shared.Attachment; -import org.gcube.portal.databook.shared.Comment; import org.gcube.portal.databook.shared.Feed; -import org.gcube.portal.databook.shared.FeedType; -import org.gcube.portal.databook.shared.PrivacyLevel; -import org.gcube.portal.databook.shared.ex.CommentIDNotFoundException; +import org.gcube.portal.databook.shared.ex.ColumnNameNotFoundException; import org.gcube.portal.databook.shared.ex.FeedIDNotFoundException; +import org.gcube.portal.databook.shared.ex.FeedTypeNotFoundException; +import org.gcube.portal.databook.shared.ex.PrivacyLevelTypeNotFoundException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; -import com.netflix.astyanax.connectionpool.exceptions.ConnectionException; -import com.netflix.astyanax.model.ColumnFamily; -import com.netflix.astyanax.serializers.StringSerializer; - public class DatabookCassandraTest { private static DBCassandraAstyanaxImpl store; @@ -37,20 +35,123 @@ public class DatabookCassandraTest { } -// @Test -// public void getComment(){ -// -// String uuid = "820969b2-4632-4197-9fd6-5aafab781faa"; -// -// Comment c; -// try { -// c = store.readCommentById(uuid); -// System.err.println(c); -// } catch (CommentIDNotFoundException e) { -// // TODO Auto-generated catch block -// System.err.println(e.toString()); -// } -// } + // @Test + // public void getHashTagsFilteredByTime() throws PrivacyLevelTypeNotFoundException, FeedTypeNotFoundException, FeedIDNotFoundException, ColumnNameNotFoundException{ + // + // int windowSize = 6; // go back windowSize months + // + // String vreUnderTest = "/gcube/devsec/devVRE"; + // + // // reference time + // Calendar calendar = Calendar.getInstance(); + // int currentMonth = calendar.get(Calendar.MONTH); // jan = 0, ..... dec = 11 + // calendar.set(Calendar.MONTH, currentMonth - windowSize); // the year is automatically decreased if needed + // SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); + // System.out.println("Reference time for trending topics is " + format.format(calendar.getTime())); + // + // Map res = + // store.getVREHashtagsWithOccurrenceFilteredByTime( + // vreUnderTest, calendar.getTimeInMillis()); + // + // // find max score inside the list (counter) + // int max = 0; + // for(Entry entry : res.entrySet()){ + // + // max = max < entry.getValue() ? entry.getValue() : max; + // + // } + // + // // normalize + // Map normalized = new HashMap(); + // for(Entry entry : res.entrySet()){ + // + // normalized.put(entry.getKey(), (double)entry.getValue() / (double)max); + // + // } + // + // // create the weight for each entry as: + // // w = 0.6 * normalized_score + 0.4 * freshness + // // freshness is evaluated as (window_size - latest_feed_for_hashtag_in_window_month)/window_size + // Map scoredList = new HashMap(); + // for(Entry entry : res.entrySet()){ + // + // double weight = 0.6 * normalized.get(entry.getKey()); + // + // // retrieve the last feed for this hashtag and locate it into the window + // List mostRecentFeedForHashtag = store.getVREFeedsByHashtag(vreUnderTest, entry.getKey()); + // + // // retrieve the most recent one among these feeds + // Collections.sort(mostRecentFeedForHashtag, Collections.reverseOrder()); + // + // // locate into the window + // Calendar locateInWindow = Calendar.getInstance(); + // locateInWindow.setTimeInMillis(mostRecentFeedForHashtag.get(0).getTime().getTime()); + // + // // get the month + // int sub = currentMonth - locateInWindow.get(Calendar.MONTH); + // int value = sub >= 0? sub : 12 - Math.abs(sub); + // double freshness = (double)(windowSize - value) / (double)(windowSize); + // System.out.println("freshness is " + freshness + " because the last feed has month " + locateInWindow.get(Calendar.MONTH)); + // + // weight += 0.4 * freshness; + // + // scoredList.put(entry.getKey(), weight); + // } + // + // // print sorted + // Map scoredListSorted = sortByValue(scoredList); + // for(Entry entry : scoredListSorted.entrySet()){ + // + // System.out.println("[hashtag=" + entry.getKey() + " , weight=" + entry.getValue() + "]"); + // } + // } + // + // public static > Map + // sortByValue( Map map ) + // { + // List> list = + // new LinkedList>( map.entrySet() ); + // Collections.sort( list, new Comparator>() + // { + // public int compare( Map.Entry o1, Map.Entry o2 ) + // { + // return (o2.getValue()).compareTo( o1.getValue() ); + // } + // }); + // + // Map result = new LinkedHashMap(); + // for (Map.Entry entry : list) + // { + // result.put( entry.getKey(), entry.getValue() ); + // } + // return result; + // } + + + // @Test + // public void getHashTags() throws PrivacyLevelTypeNotFoundException, FeedTypeNotFoundException, FeedIDNotFoundException, ColumnNameNotFoundException{ + // List resList = store.getVREFeedsByHashtag("/gcube/devsec/devVRE", "#test"); + // + // for (Feed feed : resList) { + // System.out.println(feed.getTime()); + // } + // + // } + + // @Test + // public void getComment(){ + // + // String uuid = "820969b2-4632-4197-9fd6-5aafab781faa"; + // + // Comment c; + // try { + // c = store.readCommentById(uuid); + // System.err.println(c); + // } catch (CommentIDNotFoundException e) { + // // TODO Auto-generated catch block + // System.err.println(e.toString()); + // } + // } // @Test // public void vreIds(){ @@ -192,30 +293,30 @@ public class DatabookCassandraTest { // } -// /** -// * use exclusively to add a new (Dynamic) CF to a keyspace -// */ -// @Test -// public void addInvitesDynamicColumnFamilies() { -// System.out.println("UserNotificationsUnread"); -// ColumnFamily cf_UserNotificationsUnreadTimeline = new ColumnFamily( -// DBCassandraAstyanaxImpl.USER_NOTIFICATIONS_UNREAD, // Column Family Name -// StringSerializer.get(), // Key Serializer -// StringSerializer.get()); // Column Serializer -// -// try { -// -// new CassandraClusterConnection(false).getKeyspace().createColumnFamily(cf_UserNotificationsUnreadTimeline, ImmutableMap.builder() -// .put("default_validation_class", "UTF8Type") -// .put("key_validation_class", "UTF8Type") -// .put("comparator_type", "UTF8Type") -// .build()); -// -// } catch (ConnectionException e) { -// e.printStackTrace(); -// } -// System.out.println("UserNotificationsUnread END"); -// } + // /** + // * use exclusively to add a new (Dynamic) CF to a keyspace + // */ + // @Test + // public void addInvitesDynamicColumnFamilies() { + // System.out.println("UserNotificationsUnread"); + // ColumnFamily cf_UserNotificationsUnreadTimeline = new ColumnFamily( + // DBCassandraAstyanaxImpl.USER_NOTIFICATIONS_UNREAD, // Column Family Name + // StringSerializer.get(), // Key Serializer + // StringSerializer.get()); // Column Serializer + // + // try { + // + // new CassandraClusterConnection(false).getKeyspace().createColumnFamily(cf_UserNotificationsUnreadTimeline, ImmutableMap.builder() + // .put("default_validation_class", "UTF8Type") + // .put("key_validation_class", "UTF8Type") + // .put("comparator_type", "UTF8Type") + // .build()); + // + // } catch (ConnectionException e) { + // e.printStackTrace(); + // } + // System.out.println("UserNotificationsUnread END"); + // } // private List getKeys() { diff --git a/src/main/java/org/gcube/portal/databook/server/DatabookStore.java b/src/main/java/org/gcube/portal/databook/server/DatabookStore.java index 482b50b..68e8165 100644 --- a/src/main/java/org/gcube/portal/databook/server/DatabookStore.java +++ b/src/main/java/org/gcube/portal/databook/server/DatabookStore.java @@ -370,6 +370,13 @@ public interface DatabookStore { * @return a HashMap of vre Hashtags associated with their occurrence */ Map getVREHashtagsWithOccurrence(String vreid); + /** + * get a map of vre hashtags where the key is the hashtag and the value is the occurrence of the hashtag in the VRE + * @param vreid vre identifier (scope) + * @param timestamp do not consider hashtags used before timestamp + * @return a HashMap of vre Hashtags associated with their occurrence + */ + Map getVREHashtagsWithOccurrenceFilteredByTime(String vreid, long timestamp); /** * * @param vreid VRE identifier