[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] r23852: {arm} I'm not spotting a good solution for deduplicating large log (in arm/trunk: . src src/interface)
Author: atagar
Date: 2010-11-22 03:10:55 +0000 (Mon, 22 Nov 2010)
New Revision: 23852
Modified:
arm/trunk/armrc.sample
arm/trunk/src/interface/configStatePanel.py
arm/trunk/src/interface/logPanel.py
arm/trunk/src/settings.cfg
Log:
I'm not spotting a good solution for deduplicating large logs, so sidesteping the issue for now by disabling the feature when it takes too long.
Modified: arm/trunk/armrc.sample
===================================================================
--- arm/trunk/armrc.sample 2010-11-21 21:21:25 UTC (rev 23851)
+++ arm/trunk/armrc.sample 2010-11-22 03:10:55 UTC (rev 23852)
@@ -69,7 +69,7 @@
# when editing config values the current value is prepopulated if true, and
# left blank otherwise
# state.colWidth.*
-# maximum column content width
+# column content width
# state.showPrivateOptions
# tor provides config options of the form "__<option>" that can be dangerous
# to set, if true arm provides these on the config panel
Modified: arm/trunk/src/interface/configStatePanel.py
===================================================================
--- arm/trunk/src/interface/configStatePanel.py 2010-11-21 21:21:25 UTC (rev 23851)
+++ arm/trunk/src/interface/configStatePanel.py 2010-11-22 03:10:55 UTC (rev 23852)
@@ -232,14 +232,17 @@
optionColWidth, valueColWidth = 0, 0
# constructs a mapping of entries to their current values
+ # TODO: just skip dynamic widths entirely?
entryToValues = {}
for entry in self.confContents:
entryToValues[entry] = entry.get(FIELD_VALUE)
- optionColWidth = max(optionColWidth, len(entry.get(FIELD_OPTION)))
- valueColWidth = max(valueColWidth, len(entryToValues[entry]))
+ #optionColWidth = max(optionColWidth, len(entry.get(FIELD_OPTION)))
+ #valueColWidth = max(valueColWidth, len(entryToValues[entry]))
- optionColWidth = min(self._config["features.config.state.colWidth.option"], optionColWidth)
- valueColWidth = min(self._config["features.config.state.colWidth.value"], valueColWidth)
+ #optionColWidth = min(self._config["features.config.state.colWidth.option"], optionColWidth)
+ #valueColWidth = min(self._config["features.config.state.colWidth.value"], valueColWidth)
+ optionColWidth = self._config["features.config.state.colWidth.option"]
+ valueColWidth = self._config["features.config.state.colWidth.value"]
descriptionColWidth = max(0, width - scrollOffset - optionColWidth - valueColWidth - 2)
for lineNum in range(scrollLoc, len(self.confContents)):
Modified: arm/trunk/src/interface/logPanel.py
===================================================================
--- arm/trunk/src/interface/logPanel.py 2010-11-21 21:21:25 UTC (rev 23851)
+++ arm/trunk/src/interface/logPanel.py 2010-11-22 03:10:55 UTC (rev 23852)
@@ -72,6 +72,9 @@
CACHED_DUPLICATES_ARGUMENTS = None # events
CACHED_DUPLICATES_RESULT = None
+# duration we'll wait for the deduplication function before giving up (in ms)
+DEDUPLICATION_TIMEOUT = 100
+
def daysSince(timestamp=None):
"""
Provides the number of days since the epoch converted to local time (rounded
@@ -326,7 +329,8 @@
"""
Deduplicates a list of log entries, providing back a tuple listing with the
log entry and count of duplicates following it. Entries in different days are
- not considered to be duplicates.
+ not considered to be duplicates. This times out, returning None if it takes
+ longer than DEDUPLICATION_TIMEOUT.
Arguments:
events - chronologically ordered listing of events
@@ -339,35 +343,17 @@
# loads common log entries from the config if they haven't been
if COMMON_LOG_MESSAGES == None: loadLogMessages()
+ startTime = time.time()
eventsRemaining = list(events)
returnEvents = []
while eventsRemaining:
entry = eventsRemaining.pop(0)
- duplicateIndices = []
+ duplicateIndices = isDuplicate(entry, eventsRemaining, True)
- for i in range(len(eventsRemaining)):
- forwardEntry = eventsRemaining[i]
-
- # if showing dates then do duplicate detection for each day, rather
- # than globally
- if forwardEntry.type == DAYBREAK_EVENT: break
-
- if entry.type == forwardEntry.type:
- isDuplicate = False
- if entry.msg == forwardEntry.msg: isDuplicate = True
- elif entry.type in COMMON_LOG_MESSAGES:
- for commonMsg in COMMON_LOG_MESSAGES[entry.type]:
- # if it starts with an asterisk then check the whole message rather
- # than just the start
- if commonMsg[0] == "*":
- isDuplicate = commonMsg[1:] in entry.msg and commonMsg[1:] in forwardEntry.msg
- else:
- isDuplicate = entry.msg.startswith(commonMsg) and forwardEntry.msg.startswith(commonMsg)
-
- if isDuplicate: break
-
- if isDuplicate: duplicateIndices.append(i)
+ # checks if the call timeout has been reached
+ if (time.time() - startTime) > DEDUPLICATION_TIMEOUT / 1000.0:
+ return None
# drops duplicate entries
duplicateIndices.reverse()
@@ -380,6 +366,48 @@
return returnEvents
+def isDuplicate(event, eventSet, getDuplicates = False):
+ """
+ True if the event is a duplicate for something in the eventSet, false
+ otherwise. If the getDuplicates flag is set this provides the indices of
+ the duplicates instead.
+
+ Arguments:
+ event - event to search for duplicates of
+ eventSet - set to look for the event in
+ getDuplicates - instead of providing back a boolean this gives a list of
+ the duplicate indices in the eventSet
+ """
+
+ duplicateIndices = []
+ for i in range(len(eventSet)):
+ forwardEntry = eventSet[i]
+
+ # if showing dates then do duplicate detection for each day, rather
+ # than globally
+ if forwardEntry.type == DAYBREAK_EVENT: break
+
+ if event.type == forwardEntry.type:
+ isDuplicate = False
+ if event.msg == forwardEntry.msg: isDuplicate = True
+ elif event.type in COMMON_LOG_MESSAGES:
+ for commonMsg in COMMON_LOG_MESSAGES[event.type]:
+ # if it starts with an asterisk then check the whole message rather
+ # than just the start
+ if commonMsg[0] == "*":
+ isDuplicate = commonMsg[1:] in event.msg and commonMsg[1:] in forwardEntry.msg
+ else:
+ isDuplicate = event.msg.startswith(commonMsg) and forwardEntry.msg.startswith(commonMsg)
+
+ if isDuplicate: break
+
+ if isDuplicate:
+ if getDuplicates: duplicateIndices.append(i)
+ else: return True
+
+ if getDuplicates: return duplicateIndices
+ else: return False
+
class LogEntry():
"""
Individual log file entry, having the following attributes:
@@ -779,7 +807,14 @@
isDatesShown = self.regexFilter == None and self._config["features.log.showDateDividers"]
eventLog = getDaybreaks(self.msgLog, self._isPaused) if isDatesShown else list(self.msgLog)
- if not self.showDuplicates: deduplicatedLog = getDuplicates(eventLog)
+ if not self.showDuplicates:
+ deduplicatedLog = getDuplicates(eventLog)
+
+ if deduplicatedLog == None:
+ msg = "Deduplication took too long. Its current implementation has difficulty handling large logs so disabling it to keep the interface responsive."
+ log.log(log.WARN, msg)
+ self.showDuplicates = True
+ deduplicatedLog = [(entry, 0) for entry in eventLog]
else: deduplicatedLog = [(entry, 0) for entry in eventLog]
# determines if we have the minimum width to show date dividers
Modified: arm/trunk/src/settings.cfg
===================================================================
--- arm/trunk/src/settings.cfg 2010-11-21 21:21:25 UTC (rev 23851)
+++ arm/trunk/src/settings.cfg 2010-11-22 03:10:55 UTC (rev 23852)
@@ -44,6 +44,7 @@
# [ARM_DEBUG] recreating panel 'graph' with the dimensions of 14/124
# [ARM_DEBUG] redrawing the log panel with the corrected content height (estimat was off by 4)
# [ARM_DEBUG] GETINFO accounting/bytes-left (runtime: 0.0006)
+# [ARM_DEBUG] GETCONF MyFamily (runtime: 0.0007)
msg.BW READ:
msg.DEBUG connection_handle_write(): After TLS write of
@@ -73,6 +74,7 @@
msg.ARM_DEBUG GETINFO accounting/bytes-left
msg.ARM_DEBUG GETINFO accounting/interval-end
msg.ARM_DEBUG GETINFO accounting/hibernating
+msg.ARM_DEBUG GETCONF
# some config options are fetched via special values
torrc.map HiddenServiceDir => HiddenServiceOptions