[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] [metrics-db/master] Added the new database schema with update/refresh system.
Author: Kevin Berry <xckjb88@xxxxxxxxx>
Date: Tue, 25 May 2010 19:08:03 -0400
Subject: Added the new database schema with update/refresh system.
Commit: 9f328ce854411cbc10c38f2b4c1433193a32662b
The new schema is to be used in conjunction with R to plot graphs. The new
tables include network_size, relay_versions, relay_platforms, and
total_bandwidth, which can be queried by R so complex joins can be avoided
for the sake of speed. The new system keeps these summary tables up-to-date
by keeping track of when changes were made in the descriptor or statusentry
tables.
Moved everything database related to db/
Modified run.sh to use refresch script when ernie runs.
Add Fast and Stable flag to network_size table.
Add versions 0.1.0, 0.1.1, and 0.2.3 to relay_versions.
---
db/refresh.sql | 21 ++
db/tordir-denormalized.sql | 145 +++++++++++++
db/tordir.sql | 481 ++++++++++++++++++++++++++++++++++++++++---
run.sh | 17 ++
4 files changed, 630 insertions(+), 34 deletions(-)
create mode 100644 db/refresh.sql
create mode 100644 db/tordir-denormalized.sql
diff --git a/db/refresh.sql b/db/refresh.sql
new file mode 100644
index 0000000..9e55016
--- /dev/null
+++ b/db/refresh.sql
@@ -0,0 +1,21 @@
+-- Copyright 2010 The Tor Project
+-- See LICENSE for licensing information
+
+-- REFRESH.SQL
+-- This script should be run every time ernie is run to keep data sinks
+-- up to date by calling the refresh functions.
+
+-- Make this script a transaction, in case we need to roll-back changes.
+BEGIN;
+
+SELECT * FROM refresh_relay_statuses_per_day();
+SELECT * FROM refresh_network_size();
+SELECT * FROM refresh_relay_platforms();
+SELECT * FROM refresh_relay_versions();
+SELECT * FROM refresh_total_bandwidth();
+
+-- Clear the updates table, since we have just updated everything.
+DELETE FROM updates;
+
+-- Commit the transaction.
+COMMIT;
diff --git a/db/tordir-denormalized.sql b/db/tordir-denormalized.sql
new file mode 100644
index 0000000..60fc5d0
--- /dev/null
+++ b/db/tordir-denormalized.sql
@@ -0,0 +1,145 @@
+-- Copyright 2010 The Tor Project
+-- See LICENSE for licensing information
+
+-- NOTE THAT WE ARE CURRENTLY NOT USING DENORMALIZED TABLES! THIS FILE IS
+-- ONLY HERE FOR REFERENCE AND IN CASE WE'LL WANT TO USE DENORMALIZED
+-- TABLES IN THE FUTURE!
+
+-- Denormalization is a data warehousing technique to speed
+-- up queries on very large data sets to avoid joins. This script will
+-- keep a denormalized table through row-level triggers. Keep in mind
+-- that inserts with this technique will be slow. A similar end-result
+-- could be achieved by creating a denormalized table after all of the
+-- inserts, and populating it by doing something like:
+--
+-- SELECT *
+-- INTO descriptor_statusentry
+-- FROM descriptor LEFT JOIN statusentry
+-- ON descriptor.descriptor=statusentry.descriptor
+
+-- TABLE descriptor_statusentry: Denormalized table containing both
+-- descriptors and status entries in one big table. The table
+-- reflects a left join of descriptor on statusentry.
+
+CREATE TABLE descriptor_statusentry (
+ descriptor character(40) NOT NULL,
+ address character varying(15),
+ orport integer,
+ dirport integer,
+ bandwidthavg bigint,
+ bandwidthburst bigint,
+ bandwidthobserved bigint,
+ platform character varying(256),
+ published timestamp without time zone,
+ uptime bigint,
+ validafter timestamp without time zone,
+ isauthority boolean DEFAULT false,
+ isbadexit boolean DEFAULT false,
+ isbaddirectory boolean DEFAULT false,
+ isexit boolean DEFAULT false,
+ isfast boolean DEFAULT false,
+ isguard boolean DEFAULT false,
+ ishsdir boolean DEFAULT false,
+ isnamed boolean DEFAULT false,
+ isstable boolean DEFAULT false,
+ isrunning boolean DEFAULT false,
+ isunnamed boolean DEFAULT false,
+ isvalid boolean DEFAULT false,
+ isv2dir boolean DEFAULT false,
+ isv3dir boolean DEFAULT false
+);
+
+CREATE INDEX descriptorstatusid ON descriptor_statusentry
+ USING btree (descriptor, validafter);
+
+--TRIGGER mirror_statusentry()
+--Reflect any changes to statusentry in descriptor_statusentry
+CREATE FUNCTION mirror_statusentry() RETURNS TRIGGER
+AS $mirror_statusentry$
+ DECLARE
+ rd descriptor%ROWTYPE;
+ BEGIN
+ IF (TG_OP = 'INSERT') THEN
+ SELECT * INTO rd FROM descriptor WHERE descriptor=NEW.descriptor;
+ INSERT INTO descriptor_statusentry
+ VALUES (new.descriptor, rd.address, rd.orport, rd.dirport,
+ rd.bandwidthavg, rd.bandwidthburst, rd.bandwidthobserved,
+ rd.platform, rd.published, rd.uptime, new.validafter,
+ new.isauthority, new.isbadexit, new.isbaddirectory,
+ new.isexit, new.isfast, new.isguard, new.ishsdir,
+ new.isnamed, new.isstable, new.isrunning, new.isunnamed,
+ new.isvalid, new.isv2dir, new.isv3dir);
+
+ DELETE FROM descriptor_statusentry
+ WHERE descriptor=NEW.descriptor AND validafter IS NULL;
+
+ ELSIF (TG_OP = 'UPDATE') THEN
+ UPDATE descriptor_statusentry
+ SET isauthority=NEW.isauthority,
+ isbadexit=NEW.isbadexit, isbaddirectory=NEW.isbaddirectory,
+ isexit=NEW.isexit, isfast=NEW.isfast, isguard=NEW.isguard,
+ ishsdir=NEW.ishsdir, isnamed=NEW.isnamed,
+ isstable=NEW.isstable,isrunning=NEW.isrunning,
+ isunnamed=NEW.isunnamed, isvalid=NEW.isvalid,
+ isv2dir=NEW.isv2dir, isv3dir=NEW.isv3dir
+ WHERE descriptor=NEW.descriptor AND validafter=NEW.validafter;
+ ELSIF (TG_OP = 'DELETE') THEN
+ DELETE FROM descriptor_statusentry
+ WHERE validafter=OLD.validafter AND descriptor=OLD.descriptor;
+ END IF;
+ RETURN NEW;
+END;
+$mirror_statusentry$ LANGUAGE plpgsql;
+
+--Reflect changes in descriptor_statusentry when changes are made to
+--the descriptor table
+CREATE FUNCTION mirror_descriptor() RETURNS TRIGGER AS $mirror_descriptor$
+ DECLARE
+ dcount INTEGER;
+ BEGIN
+ IF (TG_OP = 'INSERT') THEN
+ SELECT COUNT(*) INTO dcount
+ FROM descriptor_statusentry
+ WHERE descriptor=NEW.descriptor AND validafter IS NOT NULL;
+
+ IF (dcount = 0) THEN
+ INSERT INTO descriptor_statusentry VALUES (
+ NEW.descriptor, NEW.address, NEW.orport, NEW.dirport,
+ NEW.bandwidthavg, NEW.bandwidthburst,
+ NEW.bandwidthobserved, NEW.platform, NEW.published,
+ NEW.uptime, null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, null);
+ ELSE
+ UPDATE descriptor_statusentry
+ SET address=NEW.address, orport=NEW.orport,
+ dirport=NEW.dirport, bandwidthavg=NEW.bandwidthavg,
+ bandwidthburst=NEW.bandwidthburst,
+ bandwidthobserved=NEW.bandwidthobserved,
+ platform=NEW.platform, published=NEW.published,
+ uptime=NEW.uptime
+ WHERE descriptor=NEW.descriptor;
+ END IF;
+ ELSIF (TG_OP = 'UPDATE') THEN
+ UPDATE descriptor_statusentry
+ SET address=NEW.address, orport=NEW.orport, dirport=NEW.dirport,
+ bandwidthavg=NEW.bandwidthavg,
+ bandwidthburst=NEW.bandwidthburst,
+ bandwidthobserved=NEW.bandwidthobserved,
+ platform=NEW.platform, published=NEW.published,
+ uptime=NEW.uptime
+ WHERE descriptor=NEW.descriptor;
+ ELSIF (TG_OP = 'DELETE') THEN
+ END IF;
+ RETURN NEW;
+END;
+$mirror_descriptor$ LANGUAGE plpgsql;
+
+CREATE TRIGGER mirror_statusentry
+AFTER INSERT OR UPDATE OR DELETE
+ON statusentry
+ FOR EACH ROW EXECUTE PROCEDURE mirror_statusentry();
+
+CREATE TRIGGER mirror_descriptor
+AFTER INSERT OR UPDATE OR DELETE
+ON descriptor
+ FOR EACH ROW EXECUTE PROCEDURE mirror_descriptor();
diff --git a/db/tordir.sql b/db/tordir.sql
index 97c12d8..a421730 100644
--- a/db/tordir.sql
+++ b/db/tordir.sql
@@ -1,34 +1,447 @@
-CREATE TABLE descriptor (
- descriptor CHARACTER(40) NOT NULL,
- address CHARACTER VARYING(15) NOT NULL,
- orport INTEGER NOT NULL,
- dirport INTEGER NOT NULL,
- bandwidthavg BIGINT NOT NULL,
- bandwidthburst BIGINT NOT NULL,
- bandwidthobserved BIGINT NOT NULL,
- platform CHARACTER VARYING(256),
- published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- uptime BIGINT,
- CONSTRAINT descriptor_pkey PRIMARY KEY (descriptor)
-);
-
-CREATE TABLE statusentry (
- validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- descriptor CHARACTER(40) NOT NULL,
- isauthority BOOLEAN DEFAULT FALSE NOT NULL,
- isbadexit BOOLEAN DEFAULT FALSE NOT NULL,
- isbaddirectory BOOLEAN DEFAULT FALSE NOT NULL,
- isexit BOOLEAN DEFAULT FALSE NOT NULL,
- isfast BOOLEAN DEFAULT FALSE NOT NULL,
- isguard BOOLEAN DEFAULT FALSE NOT NULL,
- ishsdir BOOLEAN DEFAULT FALSE NOT NULL,
- isnamed BOOLEAN DEFAULT FALSE NOT NULL,
- isstable BOOLEAN DEFAULT FALSE NOT NULL,
- isrunning BOOLEAN DEFAULT FALSE NOT NULL,
- isunnamed BOOLEAN DEFAULT FALSE NOT NULL,
- isvalid BOOLEAN DEFAULT FALSE NOT NULL,
- isv2dir BOOLEAN DEFAULT FALSE NOT NULL,
- isv3dir BOOLEAN DEFAULT FALSE NOT NULL,
- CONSTRAINT statusentry_pkey PRIMARY KEY (validafter, descriptor)
-);
-
+-- Copyright 2010 The Tor Project
+-- See LICENSE for licensing information
+
+-- TABLE descriptor
+-- Contains all of the descriptors published by routers.
+CREATE TABLE descriptor (
+ descriptor CHARACTER(40) NOT NULL,
+ address CHARACTER VARYING(15) NOT NULL,
+ orport INTEGER NOT NULL,
+ dirport INTEGER NOT NULL,
+ bandwidthavg BIGINT NOT NULL,
+ bandwidthburst BIGINT NOT NULL,
+ bandwidthobserved BIGINT NOT NULL,
+ platform CHARACTER VARYING(256),
+ published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ uptime BIGINT,
+ CONSTRAINT descriptor_pkey PRIMARY KEY (descriptor)
+);
+
+-- TABLE statusentry
+-- Contains all of the consensuses published by the directories. Each
+-- statusentry references a valid descriptor.
+CREATE TABLE statusentry (
+ validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ descriptor CHARACTER(40) NOT NULL,
+ isauthority BOOLEAN DEFAULT FALSE NOT NULL,
+ isbadexit BOOLEAN DEFAULT FALSE NOT NULL,
+ isbaddirectory BOOLEAN DEFAULT FALSE NOT NULL,
+ isexit BOOLEAN DEFAULT FALSE NOT NULL,
+ isfast BOOLEAN DEFAULT FALSE NOT NULL,
+ isguard BOOLEAN DEFAULT FALSE NOT NULL,
+ ishsdir BOOLEAN DEFAULT FALSE NOT NULL,
+ isnamed BOOLEAN DEFAULT FALSE NOT NULL,
+ isstable BOOLEAN DEFAULT FALSE NOT NULL,
+ isrunning BOOLEAN DEFAULT FALSE NOT NULL,
+ isunnamed BOOLEAN DEFAULT FALSE NOT NULL,
+ isvalid BOOLEAN DEFAULT FALSE NOT NULL,
+ isv2dir BOOLEAN DEFAULT FALSE NOT NULL,
+ isv3dir BOOLEAN DEFAULT FALSE NOT NULL,
+ CONSTRAINT statusentry_pkey PRIMARY KEY (validafter, descriptor)
+);
+
+-- TABLE network_size
+-- TODO Instead of having a separate column for each flag we could add
+-- two columns 'flag' and 'relays' to add more flags more easily.
+CREATE TABLE network_size (
+ date TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ avg_running INTEGER NOT NULL,
+ avg_exit INTEGER NOT NULL,
+ avg_guard INTEGER NOT NULL,
+ avg_fast INTEGER NOT NULL,
+ avg_stable INTEGER NOT NULL,
+ CONSTRAINT network_size_pkey PRIMARY KEY(date)
+);
+
+-- TABLE relay_platforms
+CREATE TABLE relay_platforms (
+ date TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ avg_linux INTEGER NOT NULL,
+ avg_darwin INTEGER NOT NULL,
+ avg_bsd INTEGER NOT NULL,
+ avg_windows INTEGER NOT NULL,
+ avg_other INTEGER NOT NULL,
+ CONSTRAINT relay_platforms_pkey PRIMARY KEY(date)
+);
+
+-- TABLE relay_versions
+-- TODO It might be more flexible to use columns 'date', 'versions', and
+-- 'relays'.
+CREATE TABLE relay_versions (
+ date TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ "0.1.0" INTEGER NOT NULL,
+ "0.1.1" INTEGER NOT NULL,
+ "0.1.2" INTEGER NOT NULL,
+ "0.2.0" INTEGER NOT NULL,
+ "0.2.1" INTEGER NOT NULL,
+ "0.2.2" INTEGER NOT NULL,
+ "0.2.3" INTEGER NOT NULL,
+ CONSTRAINT relay_versions_pkey PRIMARY KEY(date)
+);
+
+-- TABLE total_bandwidth
+-- Contains information for the whole network's total bandwidth which is
+-- used in the bandwidth graphs.
+-- TODO We should add bwadvertised as MIN(bwavg, bwobserved) which is used
+-- by 0.2.0.x clients for path selection.
+CREATE TABLE total_bandwidth (
+ date TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ bwavg BIGINT NOT NULL,
+ bwburst BIGINT NOT NULL,
+ bwobserved BIGINT NOT NULL,
+ CONSTRAINT total_bandwidth_pkey PRIMARY KEY(date)
+);
+
+-- TABLE relay_statuses_per_day
+-- A helper table which is commonly used to update the tables above in the
+-- refresh_* functions.
+CREATE TABLE relay_statuses_per_day (
+ date DATE NOT NULL,
+ count INTEGER NOT NULL,
+ CONSTRAINT relay_statuses_per_day_pkey PRIMARY KEY(date)
+);
+
+-- TABLE updates
+-- A helper table which is used to keep track of what tables and where
+-- need to be updated upon refreshes.
+CREATE TABLE updates (
+ "date" date NOT NULL,
+ CONSTRAINT updates_pkey PRIMARY KEY(date)
+);
+
+CREATE LANGUAGE plpgsql;
+
+-- FUNCTION update_status
+-- This keeps the updates table up to date for the time graphs.
+CREATE OR REPLACE FUNCTION update_status() RETURNS TRIGGER AS $$
+ BEGIN
+ IF (TG_OP='INSERT' OR TG_OP='UPDATE') THEN
+ IF (SELECT COUNT(*) FROM updates
+ WHERE DATE=DATE(new.validafter)) = 0 THEN
+ INSERT INTO updates
+ VALUES (DATE(NEW.validafter));
+ END IF;
+ END IF;
+ IF (TG_OP='DELETE' OR TG_OP='UPDATE') THEN
+ IF (SELECT COUNT(*) FROM updates
+ WHERE DATE=DATE(old.validafter)) = 0 THEN
+ INSERT INTO updates
+ VALUES (DATE(OLD.validafter));
+ END IF;
+ END IF;
+ RETURN NULL; -- result is ignored since this is an AFTER trigger
+END;
+$$ LANGUAGE plpgsql;
+
+-- TRIGGER update_status
+-- This calls the function update_status() each time a row is inserted,
+-- updated, or deleted from the statusentry table.
+CREATE TRIGGER update_status
+AFTER INSERT OR UPDATE OR DELETE
+ON statusentry
+ FOR EACH ROW EXECUTE PROCEDURE update_status();
+
+-- FUNCTION update_desc
+-- This keeps the updates table up to date for the time graphs.
+CREATE OR REPLACE FUNCTION update_desc() RETURNS TRIGGER AS $$
+ BEGIN
+ IF (TG_OP='INSERT' OR TG_OP='UPDATE') THEN
+ BEGIN
+ IF (SELECT COUNT(*) FROM updates
+ WHERE DATE=DATE(new.published)) = 0 THEN
+ INSERT INTO updates
+ VALUES (DATE(NEW.published));
+ END IF;
+ IF (SELECT COUNT(*) FROM updates
+ WHERE DATE=DATE(new.published)+1) = 0 THEN
+ INSERT INTO updates
+ VALUES (DATE(NEW.published)+1);
+ END IF;
+ END;
+ END IF;
+ IF (TG_OP='DELETE' OR TG_OP='UPDATE') THEN
+ BEGIN
+ IF (SELECT COUNT(*) FROM updates
+ WHERE DATE=DATE(old.published)) = 0 THEN
+ INSERT INTO updates
+ VALUES (DATE(OLD.published));
+ END IF;
+ IF (SELECT COUNT(*) FROM updates
+ WHERE DATE=DATE(old.published)+1) = 0 THEN
+ INSERT INTO updates
+ VALUES (DATE(OLD.published)+1);
+ END IF;
+ END;
+ END IF;
+ RETURN NULL; -- result is ignored since this is an AFTER trigger
+END;
+$$ LANGUAGE plpgsql;
+
+-- TRIGGER update_desc
+-- This calls the function update_desc() each time a row is inserted,
+-- updated, or deleted from the descriptors table.
+CREATE TRIGGER update_desc
+AFTER INSERT OR UPDATE OR DELETE
+ON descriptor
+ FOR EACH ROW EXECUTE PROCEDURE update_desc();
+
+-- FUNCTION refresh_relay_statuses_per_day()
+-- Updates helper table which is used to refresh the aggregate tables.
+CREATE OR REPLACE FUNCTION refresh_relay_statuses_per_day()
+RETURNS INTEGER AS $$
+ BEGIN
+ DELETE FROM relay_statuses_per_day;
+ INSERT INTO relay_statuses_per_day (date, count)
+ SELECT DATE(validafter) AS date, COUNT(*) AS count
+ FROM (SELECT DISTINCT validafter
+ FROM statusentry) distinct_consensuses
+ GROUP BY DATE(validafter);
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- refresh_* functions
+-- The following functions keep their corresponding aggregate tables
+-- up-to-date. They should be called every time ERNIE is run, or when new
+-- data is finished being added to the descriptor or statusentry tables.
+-- They find what new data has been entered or updated based on the
+-- updates table.
+
+-- FUNCTION refresh_network_size()
+CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$
+ BEGIN
+ --Insert any new dates, or blank dates--
+ INSERT INTO network_size
+ (date, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
+ SELECT
+ DATE(validafter) as date,
+ COUNT(*) / relay_statuses_per_day.count AS avg_running,
+ SUM(CASE WHEN isexit IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_exit,
+ SUM(CASE WHEN isguard IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_guard,
+ SUM(CASE WHEN isfast IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_fast,
+ SUM(CASE WHEN isstable IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_stable
+ FROM statusentry
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE AND
+ DATE(validafter) NOT IN
+ (SELECT DATE(date) FROM network_size)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count;
+
+ --Update any new values that may have already
+ --been inserted, but aren't complete. based on the 'updates'
+ --table.
+ UPDATE network_size
+ SET avg_running=new_ns.avg_running,
+ avg_exit=new_ns.avg_exit,
+ avg_guard=new_ns.avg_guard,
+ avg_fast=new_ns.avg_fast,
+ avg_stable=new_ns.avg_stable
+ FROM (SELECT
+ DATE(validafter) as date,
+ COUNT(*) / relay_statuses_per_day.count AS avg_running,
+ SUM(CASE WHEN isexit IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_exit,
+ SUM(CASE WHEN isguard IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_guard,
+ SUM(CASE WHEN isfast IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_fast,
+ SUM(CASE WHEN isstable IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_stable
+ FROM statusentry
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE AND
+ DATE(validafter) IN (SELECT DISTINCT date FROM updates)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count)
+ AS new_ns
+ WHERE new_ns.date=network_size.date;
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_relay_platforms()
+CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$
+ BEGIN
+ INSERT INTO relay_platforms
+ (date, avg_linux, avg_darwin, avg_bsd, avg_windows, avg_other)
+ SELECT DATE(validafter),
+ SUM(CASE WHEN platform LIKE '%Linux%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_linux,
+ SUM(CASE WHEN platform LIKE '%Darwin%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_darwin,
+ SUM(CASE WHEN platform LIKE '%BSD%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_bsd,
+ SUM(CASE WHEN platform LIKE '%Windows%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_windows,
+ SUM(CASE WHEN platform NOT LIKE '%Windows%'
+ AND platform NOT LIKE '%Darwin%'
+ AND platform NOT LIKE '%BSD%'
+ AND platform NOT LIKE '%Linux%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_other
+ FROM descriptor RIGHT JOIN statusentry
+ ON statusentry.descriptor = descriptor.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE AND
+ DATE(validafter) NOT IN (SELECT DATE(date) FROM relay_platforms)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count;
+
+ UPDATE relay_platforms
+ SET avg_linux=new_rp.avg_linux,
+ avg_darwin=new_rp.avg_darwin,
+ avg_windows=new_rp.avg_windows,
+ avg_bsd=new_rp.avg_bsd,
+ avg_other=new_rp.avg_other
+ FROM (SELECT DATE(validafter),
+ SUM(CASE WHEN platform LIKE '%Linux%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_linux,
+ SUM(CASE WHEN platform LIKE '%Darwin%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_darwin,
+ SUM(CASE WHEN platform LIKE '%BSD%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_bsd,
+ SUM(CASE WHEN platform LIKE '%Windows%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_windows,
+ SUM(CASE WHEN platform NOT LIKE '%Windows%'
+ AND platform NOT LIKE '%Darwin%'
+ AND platform NOT LIKE '%BSD%'
+ AND platform NOT LIKE '%Linux%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_other
+ FROM descriptor RIGHT JOIN statusentry
+ ON statusentry.descriptor = descriptor.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE AND
+ DATE(validafter) IN (SELECT DISTINCT date FROM updates)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count) as new_rp
+ WHERE new_rp.date=relay_platforms.date;
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_relay_versions()
+CREATE OR REPLACE FUNCTION refresh_relay_versions() RETURNS INTEGER AS $$
+ BEGIN
+ INSERT INTO relay_versions
+ (date, "0.1.0", "0.1.1", "0.1.2", "0.2.0", "0.2.1", "0.2.2", "0.2.3")
+ SELECT DATE(validafter),
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.1.0' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.1.0",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.1.1' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.1.1",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.1.2' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.1.2",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.2.0' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.2.0",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.2.1' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.2.1",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.2.2' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.2.2",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.2.3' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.2.3"
+ FROM descriptor RIGHT JOIN statusentry
+ ON descriptor.descriptor = statusentry.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE AND
+ DATE(validafter) NOT IN (SELECT DATE(date) FROM relay_versions)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count;
+
+ UPDATE relay_versions
+ SET "0.1.0"=new_rv."0.1.0",
+ "0.1.1"=new_rv."0.1.1",
+ "0.1.2"=new_rv."0.1.2",
+ "0.2.0"=new_rv."0.2.0",
+ "0.2.1"=new_rv."0.2.1",
+ "0.2.2"=new_rv."0.2.2",
+ "0.2.3"=new_rv."0.2.3"
+ FROM (SELECT DATE(validafter),
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.1.0' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.1.0",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.1.1' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.1.1",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.1.2' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.1.2",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.2.0' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.2.0",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.2.1' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.2.1",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.2.2' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.2.2",
+ SUM(CASE WHEN substring(platform, 5, 5)
+ LIKE '0.2.3' THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS "0.2.3"
+ FROM descriptor RIGHT JOIN statusentry
+ ON descriptor.descriptor = statusentry.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE AND
+ DATE(validafter) IN (SELECT DISTINCT date FROM updates)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count) AS new_rv
+ WHERE new_rv.date=relay_versions.date;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_total_bandwidth()
+-- This keeps the table total_bandwidth up-to-date when necessary.
+CREATE OR REPLACE FUNCTION refresh_total_bandwidth() RETURNS INTEGER AS $$
+ BEGIN
+ INSERT INTO total_bandwidth
+ (bwavg, bwburst, bwobserved, date)
+ SELECT (SUM(bandwidthavg)
+ / relay_statuses_per_day.count)::BIGINT AS bwavg,
+ (SUM(bandwidthburst)
+ / relay_statuses_per_day.count)::BIGINT AS bwburst,
+ (SUM(bandwidthobserved)
+ / relay_statuses_per_day.count)::BIGINT AS bwobserved,
+ DATE(validafter)
+ FROM descriptor RIGHT JOIN statusentry
+ ON descriptor.descriptor = statusentry.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE AND
+ DATE(validafter) NOT IN (SELECT date FROM total_bandwidth)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count;
+
+ UPDATE total_bandwidth
+ SET bwavg=new_tb.bwavg,
+ bwburst=new_tb.bwburst,
+ bwobserved=new_tb.bwobserved
+ FROM (SELECT (SUM(bandwidthavg)
+ / relay_statuses_per_day.count)::BIGINT AS bwavg,
+ (SUM(bandwidthburst)
+ / relay_statuses_per_day.count)::BIGINT AS bwburst,
+ (SUM(bandwidthobserved)
+ / relay_statuses_per_day.count)::BIGINT AS bwobserved,
+ DATE(validafter)
+ FROM descriptor RIGHT JOIN statusentry
+ ON descriptor.descriptor = statusentry.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE AND
+ DATE(validafter) IN (SELECT DISTINCT date FROM updates)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count) AS new_tb
+ WHERE new_tb.date = total_bandwidth.date;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
diff --git a/run.sh b/run.sh
index cc885db..30af813 100755
--- a/run.sh
+++ b/run.sh
@@ -1,4 +1,21 @@
#!/bin/sh
+
# TODO is there a better way to suppress Ant's output?
ant -q | grep -Ev "^$|^BUILD SUCCESSFUL|^Total time: "
+# Refresh matviews if we are supposed to
+n=`grep "^WriteRelayDescriptorDatabase" config | awk '{print $2}'`
+if [ "$n" -eq 1 ]; then
+ # TODO Alternatively use a more reliable URI parser?
+ #db=$( echo "$uri" | ruby -ruri -e 'puts URI.parse(gets.chomp).path' )
+
+ uri=`grep "^RelayDescriptorDatabaseJDBC" config | awk '{print $2}'`
+ conn=`echo $uri | awk -F"//" '{print $2}'`
+ host=`echo $conn | sed -e 's/\/.*//'`
+ db=`echo $conn | awk -F"/" '{print $2}' | sed -e 's/\?.*//'`
+ user=`echo $conn | awk -F"=" '{print $2}' | sed -e 's/\&.*//'`
+ pswd=`echo $conn | awk -F"password=" '{print $2}'`
+
+ export PGPASSWORD="$pswd"
+ psql -d $db -h $host -A -t -q -U $user -f db/refresh.sql
+fi
--
1.7.1