[tor-commits] [metrics-web/release] Remove unused code from the censorship detector.

commit 4a30e7a9fbdded5318506302543b5d5e07571670
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date:   Mon Mar 12 17:49:40 2018 +0100

    Remove unused code from the censorship detector.
 src/main/python/clients/detector.py | 199 +-----------------------------------
 1 file changed, 2 insertions(+), 197 deletions(-)

diff --git a/src/main/python/clients/detector.py b/src/main/python/clients/detector.py
index 6cf1c7d..3d17bf0 100644
--- a/src/main/python/clients/detector.py
+++ b/src/main/python/clients/detector.py
@@ -60,8 +60,6 @@ import country_info
 # write utf8 to file
 import codecs
-days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
 def get_country_name_from_cc(country_code):
   if (country_code.lower() in country_info.countries):
     return country_info.countries[country_code.lower()]
@@ -208,145 +206,6 @@ def make_tendencies_minmax(l, INTERVAL = 1):
   ## print minx[-1], maxx[-1]
   return minx, maxx
-# Makes pretty plots
-def raw_plot(series, minc, maxc, labels, xtitle):
-    assert len(xtitle) == 3
-    fname, stitle, slegend = xtitle
-    font = {'family' : 'Bitstream Vera Sans',
-        'weight' : 'normal',
-        'size'   : 8}
-    matplotlib.rc('font', **font)
-    ylim( (-max(series)*0.1, max(series)*1.1) )
-    plot(labels, series, linewidth=1.0, label="Users")
-    wherefill = []
-    for mm,mx in zip(minc, maxc):
-      wherefill += [not (mm == None and mx == None)]
-      assert mm < mx or (mm == None and mx == None)
-    fill_between(labels, minc, maxc, where=wherefill, color="gray", label="Prediction")
-    vdown = []
-    vup = []
-    for i,v in enumerate(series):
-      if minc[i] != None and v < minc[i]:
-        vdown += [v]
-        vup += [None]
-      elif maxc[i] != None and v > maxc[i]:
-        vdown += [None]
-        vup += [v]
-      else:
-        vup += [None]
-        vdown += [None]
-    plot(labels, vdown, 'o', ms=10, lw=2, alpha=0.5, mfc='orange', label="Downturns")
-    plot(labels, vup, 'o', ms=10, lw=2, alpha=0.5, mfc='green', label="Upturns")
-    legend(loc=2)
-    xlabel('Time (days)')
-    ylabel('Users')
-    title(stitle)
-    grid(True)
-    F = gcf()
-    F.set_size_inches(10,5)
-    F.savefig(fname,  format="png", dpi = (150))
-    close()
-def absolute_plot(series, minc, maxc, labels,INTERVAL, xtitle):
-  in_minc = []
-  in_maxc = []
-  for i, v in enumerate(series):
-    if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[i-INTERVAL] != None and series[i-INTERVAL] != 0 and minc[i]!= None and maxc[i]!= None:
-      in_minc += [minc[i] * poisson.ppf(1-0.9999, series[i-INTERVAL])]
-      in_maxc += [maxc[i] * poisson.ppf(0.9999, series[i-INTERVAL])]
-      if not in_minc[-1] < in_maxc[-1]:
-        print in_minc[-1], in_maxc[-1], series[i-INTERVAL], minc[i], maxc[i]
-      assert in_minc[-1] < in_maxc[-1]
-    else:
-      in_minc += [None]
-      in_maxc += [None]
-  raw_plot(series, in_minc, in_maxc, labels, xtitle)
-"""Return the number of downscores and upscores of a time series
-'series', given tendencies 'minc' and 'maxc' for the time interval
-If 'scoring_interval' is specifed we only consider upscore/downscore
-that happened in the latest 'scoring_interval' days.
-def censor_score(series, minc, maxc, INTERVAL, scoring_interval=None):
-  upscore = 0
-  downscore = 0
-  if scoring_interval is None:
-    scoring_interval = len(series)
-  assert(len(series) >= scoring_interval)
-  for i, v in enumerate(series):
-    if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[i-INTERVAL] != None and series[i-INTERVAL] != 0 and minc[i]!= None and maxc[i]!= None:
-      in_minc = minc[i] * poisson.ppf(1-0.9999, series[i-INTERVAL])
-      in_maxc = maxc[i] * poisson.ppf(0.9999, series[i-INTERVAL])
-      if (i >= (len(series) - scoring_interval)):
-        downscore += 1 if minc[i] != None and v < in_minc else 0
-        upscore += 1 if maxc[i] != None and v > in_maxc else 0
-  return downscore, upscore
-def plot_target(tss, TARGET, xtitle, minx, maxx, DAYS=365, INTERV = 7):
-  ctarget = tss.get_country_series(TARGET)
-  c = n_day_rel(ctarget, INTERV)
-  absolute_plot(ctarget[-DAYS:], minx[-DAYS:], maxx[-DAYS:], tss.all_dates[-DAYS:],INTERV, xtitle = xtitle)
-def write_censorship_report_prologue(report_file, dates, notification_period):
-  if (notification_period == 1):
-    date_str = "%s" % (dates[-1]) # no need for date range if it's just one day
-  else:
-    date_str = "%s to %s" % (dates[-notification_period], dates[-1])
-  prologue = "=======================\n"
-  prologue += "Automatic Censorship Report for %s\n" % (date_str)
-  prologue += "=======================\n\n"
-  report_file.write(prologue)
-## Make a league table of censorship + nice graphs
-def plot_all(tss, minx, maxx, INTERV, DAYS=None, rdir="img"):
-  rdir = os.path.realpath(rdir)
-  if not os.path.exists(rdir) or not os.path.isdir(rdir):
-    print "ERROR: %s does not exist or is not a directory." % rdir
-    return
-  summary_file = file(os.path.join(rdir, "summary.txt"), "w")
-  if DAYS == None:
-    DAYS = 6*31
-  s = tss.get_largest(200)
-  scores = []
-  for num, li in s:
-    print ".",
-    ds,us = censor_score(tss.get_country_series(li)[-DAYS:], minx[-DAYS:], maxx[-DAYS:], INTERV)
-    # print ds, us
-    scores += [(ds,num, us, li)]
-  scores.sort()
-  scores.reverse()
-  s = "\n=======================\n"
-  s+= "Report for %s to %s\n" % (tss.all_dates[-DAYS], tss.all_dates[-1])
-  s+= "=======================\n"
-  print s
-  summary_file.write(s)
-  for a,nx, b,c in scores:
-    if a > 0:
-      s = "%s -- down: %2d (up: %2d affected: %s)" % (c, a, b, nx)
-      print s
-      summary_file.write(s + "\n")
-      xtitle = (os.path.join(rdir, "%03d-%s-censor.png" % (a,c)), "Tor report for %s -- down: %2d (up: %2d affected: %s)" % (c, a, b, nx),"")
-      plot_target(tss, c,xtitle, minx, maxx, DAYS, INTERV)
-  summary_file.close()
 """Write a CSV report on the minimum/maximum users of each country per date."""
 def write_all(tss, minc, maxc, RANGES_FILE, INTERVAL=7):
   ranges_file = file(RANGES_FILE, "w")
@@ -368,70 +227,16 @@ def write_all(tss, minc, maxc, RANGES_FILE, INTERVAL=7):
         ranges_file.write("%s,%s,%s,%s\n" % (tss.all_dates[i], c, minv, maxv))
-"""Return a URL that points to a graph in metrics.tpo that displays
-the number of direct Tor users in country 'country_code', for a
-'period'-days period.
-Let's hope that the metrics.tpo URL scheme doesn't change often.
-def get_tor_usage_graph_url_for_cc_and_date(country_code, dates, period):
-  url = "https://metrics.torproject.org/users.html?graph=userstats-relay-country&start=%s&end=%s&country=%s&events=on#userstats-relay-country\n"; % \
-      (dates[-period], dates[-1], country_code)
-  return url
-"""Write a file containing a short censorship report over the last
-'notification_period' days.
-def write_ml_report(tss, minx, maxx, INTERV, DAYS, notification_period=None):
-  if notification_period is None:
-    notification_period = DAYS
-  report_file = codecs.open('short_censorship_report.txt', 'w', 'utf-8')
-  file_prologue_written = False
-  s = tss.get_largest(None) # no restrictions, get 'em all.
-  scores = []
-  for num, li in s:
-    ds,us = censor_score(tss.get_country_series(li)[-DAYS:], minx[-DAYS:], maxx[-DAYS:], INTERV, notification_period)
-    scores += [(ds,num, us, li)]
-  scores.sort()
-  scores.reverse()
-  for downscores,users_n,upscores,country_code in scores:
-    if (downscores > 0) or (upscores > 0):
-      if not file_prologue_written:
-        write_censorship_report_prologue(report_file, tss.all_dates, notification_period)
-        file_prologue_written = True
-      if ((upscores > 0) and (downscores == 0)):
-        s = "We detected an unusual spike of Tor users in %s (%d upscores, %d users):\n" % \
-            (get_country_name_from_cc(country_code), upscores, users_n)
-      else:
-        s = "We detected %d potential censorship events in %s (users: %d, upscores: %d):\n" % \
-            (downscores, get_country_name_from_cc(country_code), users_n, upscores)
-      # Also give out a link for the appropriate usage graph for a 90-days period.
-      s += get_tor_usage_graph_url_for_cc_and_date(country_code, tss.all_dates, 90)
-      report_file.write(s + "\n")
-  report_file.close()
 # INTERV is the time interval to model connection rates;
 # consider maximum DAYS days back.
 def detect(CSV_FILE = "userstats-detector.csv",
-           RANGES_FILE = "userstats-ranges.csv", GRAPH_DIR = "img",
-           INTERV = 7, DAYS = 6 * 31, REPORT = True):
+           RANGES_FILE = "userstats-ranges.csv",
+           INTERV = 7, DAYS = 6 * 31):
   tss = torstatstore(CSV_FILE)
   l = tss.get_largest_locations(50)
   minx, maxx = make_tendencies_minmax(l, INTERV)
-  #plot_all(tss, minx, maxx, INTERV, DAYS, rdir=GRAPH_DIR)
   write_all(tss, minx, maxx, RANGES_FILE, INTERV)
-  if REPORT:
-    # Make our short report; only consider events of the last day
-    write_ml_report(tss, minx, maxx, INTERV, DAYS, 1)
 def main():

