[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-web/master] Add database schema.
commit 12ead2691cc540f5d2bbd5c6918ec50fff29b188
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Thu Mar 3 19:46:13 2011 +0100
Add database schema.
---
db/tordir.sql | 744 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 744 insertions(+), 0 deletions(-)
diff --git a/db/tordir.sql b/db/tordir.sql
new file mode 100644
index 0000000..11e49dc
--- /dev/null
+++ b/db/tordir.sql
@@ -0,0 +1,744 @@
+-- Copyright 2010 The Tor Project
+-- See LICENSE for licensing information
+
+-- TABLE descriptor
+-- Contains all of the descriptors published by routers.
+CREATE TABLE descriptor (
+ descriptor CHARACTER(40) NOT NULL,
+ nickname CHARACTER VARYING(19) NOT NULL,
+ address CHARACTER VARYING(15) NOT NULL,
+ orport INTEGER NOT NULL,
+ dirport INTEGER NOT NULL,
+ fingerprint CHARACTER(40) NOT NULL,
+ bandwidthavg BIGINT NOT NULL,
+ bandwidthburst BIGINT NOT NULL,
+ bandwidthobserved BIGINT NOT NULL,
+ platform CHARACTER VARYING(256),
+ published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ uptime BIGINT,
+ extrainfo CHARACTER(40),
+ rawdesc BYTEA NOT NULL,
+ CONSTRAINT descriptor_pkey PRIMARY KEY (descriptor)
+);
+
+-- TABLE extrainfo
+-- Contains all of the extra-info descriptors published by the routers.
+CREATE TABLE extrainfo (
+ extrainfo CHARACTER(40) NOT NULL,
+ nickname CHARACTER VARYING(19) NOT NULL,
+ fingerprint CHARACTER(40) NOT NULL,
+ published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ rawdesc BYTEA NOT NULL,
+ CONSTRAINT extrainfo_pkey PRIMARY KEY (extrainfo)
+);
+
+-- Contains bandwidth histories reported by relays in extra-info
+-- descriptors. Each row contains the reported bandwidth in 15-minute
+-- intervals for each relay and date.
+CREATE TABLE bwhist (
+ fingerprint CHARACTER(40) NOT NULL,
+ date DATE NOT NULL,
+ read BIGINT[],
+ read_sum BIGINT,
+ written BIGINT[],
+ written_sum BIGINT,
+ dirread BIGINT[],
+ dirread_sum BIGINT,
+ dirwritten BIGINT[],
+ dirwritten_sum BIGINT,
+ CONSTRAINT bwhist_pkey PRIMARY KEY (fingerprint, date)
+);
+
+CREATE INDEX bwhist_date ON bwhist (date);
+
+-- TABLE statusentry
+-- Contains all of the consensus entries published by the directories.
+-- Each statusentry references a valid descriptor.
+CREATE TABLE statusentry (
+ validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ nickname CHARACTER VARYING(19) NOT NULL,
+ fingerprint CHARACTER(40) NOT NULL,
+ descriptor CHARACTER(40) NOT NULL,
+ published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ address CHARACTER VARYING(15) NOT NULL,
+ orport INTEGER NOT NULL,
+ dirport INTEGER NOT NULL,
+ isauthority BOOLEAN DEFAULT FALSE NOT NULL,
+ isbadexit BOOLEAN DEFAULT FALSE NOT NULL,
+ isbaddirectory BOOLEAN DEFAULT FALSE NOT NULL,
+ isexit BOOLEAN DEFAULT FALSE NOT NULL,
+ isfast BOOLEAN DEFAULT FALSE NOT NULL,
+ isguard BOOLEAN DEFAULT FALSE NOT NULL,
+ ishsdir BOOLEAN DEFAULT FALSE NOT NULL,
+ isnamed BOOLEAN DEFAULT FALSE NOT NULL,
+ isstable BOOLEAN DEFAULT FALSE NOT NULL,
+ isrunning BOOLEAN DEFAULT FALSE NOT NULL,
+ isunnamed BOOLEAN DEFAULT FALSE NOT NULL,
+ isvalid BOOLEAN DEFAULT FALSE NOT NULL,
+ isv2dir BOOLEAN DEFAULT FALSE NOT NULL,
+ isv3dir BOOLEAN DEFAULT FALSE NOT NULL,
+ version CHARACTER VARYING(50),
+ bandwidth BIGINT,
+ ports TEXT,
+ rawdesc BYTEA NOT NULL,
+ CONSTRAINT statusentry_pkey PRIMARY KEY (validafter, fingerprint)
+);
+
+-- TABLE consensus
+-- Contains all of the consensuses published by the directories.
+CREATE TABLE consensus (
+ validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ rawdesc BYTEA NOT NULL,
+ CONSTRAINT consensus_pkey PRIMARY KEY (validafter)
+);
+
+-- TABLE vote
+-- Contains all of the votes published by the directories
+CREATE TABLE vote (
+ validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ dirsource CHARACTER(40) NOT NULL,
+ rawdesc BYTEA NOT NULL,
+ CONSTRAINT vote_pkey PRIMARY KEY (validafter, dirsource)
+);
+
+-- TABLE connbidirect
+-- Contain conn-bi-direct stats strings
+CREATE TABLE connbidirect (
+ source CHARACTER(40) NOT NULL,
+ statsend TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ seconds INTEGER NOT NULL,
+ belownum BIGINT NOT NULL,
+ readnum BIGINT NOT NULL,
+ writenum BIGINT NOT NULL,
+ bothnum BIGINT NOT NULL,
+ CONSTRAINT connbidirect_pkey PRIMARY KEY (source, statsend)
+);
+
+-- Create the various indexes we need for searching relays
+CREATE INDEX statusentry_address ON statusentry (address);
+CREATE INDEX statusentry_fingerprint ON statusentry (fingerprint);
+CREATE INDEX statusentry_nickname ON statusentry (LOWER(nickname));
+CREATE INDEX statusentry_validafter ON statusentry (validafter);
+
+-- And create an index that we use for precalculating statistics
+CREATE INDEX statusentry_descriptor ON statusentry (descriptor);
+CREATE INDEX statusentry_validafter_date ON statusentry (DATE(validafter));
+
+-- TABLE network_size
+CREATE TABLE network_size (
+ date DATE NOT NULL,
+ avg_running INTEGER NOT NULL,
+ avg_exit INTEGER NOT NULL,
+ avg_guard INTEGER NOT NULL,
+ avg_fast INTEGER NOT NULL,
+ avg_stable INTEGER NOT NULL,
+ CONSTRAINT network_size_pkey PRIMARY KEY(date)
+);
+
+-- TABLE network_size_hour
+CREATE TABLE network_size_hour (
+ validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ avg_running INTEGER NOT NULL,
+ avg_exit INTEGER NOT NULL,
+ avg_guard INTEGER NOT NULL,
+ avg_fast INTEGER NOT NULL,
+ avg_stable INTEGER NOT NULL,
+ CONSTRAINT network_size_hour_pkey PRIMARY KEY(validafter)
+);
+
+-- TABLE relay_platforms
+CREATE TABLE relay_platforms (
+ date DATE NOT NULL,
+ avg_linux INTEGER NOT NULL,
+ avg_darwin INTEGER NOT NULL,
+ avg_bsd INTEGER NOT NULL,
+ avg_windows INTEGER NOT NULL,
+ avg_other INTEGER NOT NULL,
+ CONSTRAINT relay_platforms_pkey PRIMARY KEY(date)
+);
+
+-- TABLE relay_versions
+CREATE TABLE relay_versions (
+ date DATE NOT NULL,
+ version CHARACTER(5) NOT NULL,
+ relays INTEGER NOT NULL,
+ CONSTRAINT relay_versions_pkey PRIMARY KEY(date, version)
+);
+
+-- TABLE total_bandwidth
+-- Contains information for the whole network's total bandwidth which is
+-- used in the bandwidth graphs.
+CREATE TABLE total_bandwidth (
+ date DATE NOT NULL,
+ bwavg BIGINT NOT NULL,
+ bwburst BIGINT NOT NULL,
+ bwobserved BIGINT NOT NULL,
+ bwadvertised BIGINT NOT NULL,
+ CONSTRAINT total_bandwidth_pkey PRIMARY KEY(date)
+);
+
+-- TABLE total_bwhist
+-- Contains the total number of read/written and the number of dir bytes
+-- read/written by all relays in the network on a given day. The dir bytes
+-- are an estimate based on the subset of relays that count dir bytes.
+CREATE TABLE total_bwhist (
+ date DATE NOT NULL,
+ read BIGINT,
+ written BIGINT,
+ CONSTRAINT total_bwhist_pkey PRIMARY KEY(date)
+);
+
+-- TABLE user_stats
+-- Aggregate statistics on directory requests and byte histories that we
+-- use to estimate user numbers.
+CREATE TABLE user_stats (
+ date DATE NOT NULL,
+ country CHARACTER(2) NOT NULL,
+ r BIGINT,
+ dw BIGINT,
+ dr BIGINT,
+ drw BIGINT,
+ drr BIGINT,
+ bw BIGINT,
+ br BIGINT,
+ bwd BIGINT,
+ brd BIGINT,
+ bwr BIGINT,
+ brr BIGINT,
+ bwdr BIGINT,
+ brdr BIGINT,
+ bwp BIGINT,
+ brp BIGINT,
+ bwn BIGINT,
+ brn BIGINT,
+ CONSTRAINT user_stats_pkey PRIMARY KEY(date, country)
+);
+
+-- TABLE relay_statuses_per_day
+-- A helper table which is commonly used to update the tables above in the
+-- refresh_* functions.
+CREATE TABLE relay_statuses_per_day (
+ date DATE NOT NULL,
+ count INTEGER NOT NULL,
+ CONSTRAINT relay_statuses_per_day_pkey PRIMARY KEY(date)
+);
+
+-- Dates to be included in the next refresh run.
+CREATE TABLE scheduled_updates (
+ id SERIAL,
+ date DATE NOT NULL
+);
+
+-- Dates in the current refresh run. When starting a refresh run, we copy
+-- the rows from scheduled_updates here in order to delete just those
+-- lines after the refresh run. Otherwise we might forget scheduled dates
+-- that have been added during a refresh run. If this happens we're going
+-- to update these dates in the next refresh run.
+CREATE TABLE updates (
+ id INTEGER,
+ date DATE
+);
+
+CREATE LANGUAGE plpgsql;
+
+-- FUNCTION refresh_relay_statuses_per_day()
+-- Updates helper table which is used to refresh the aggregate tables.
+CREATE OR REPLACE FUNCTION refresh_relay_statuses_per_day()
+RETURNS INTEGER AS $$
+ BEGIN
+ DELETE FROM relay_statuses_per_day
+ WHERE date IN (SELECT date FROM updates);
+ INSERT INTO relay_statuses_per_day (date, count)
+ SELECT DATE(validafter) AS date, COUNT(*) AS count
+ FROM consensus
+ WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ GROUP BY DATE(validafter);
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION array_sum (BIGINT[]) RETURNS BIGINT AS $$
+ SELECT SUM($1[i])::bigint
+ FROM generate_series(array_lower($1, 1), array_upper($1, 1)) index(i);
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION insert_bwhist(
+ insert_fingerprint CHARACTER(40), insert_date DATE,
+ insert_read BIGINT[], insert_written BIGINT[],
+ insert_dirread BIGINT[], insert_dirwritten BIGINT[])
+ RETURNS INTEGER AS $$
+ BEGIN
+ IF (SELECT COUNT(*) FROM bwhist
+ WHERE fingerprint = insert_fingerprint AND date = insert_date) = 0
+ THEN
+ INSERT INTO bwhist (fingerprint, date, read, written, dirread,
+ dirwritten)
+ VALUES (insert_fingerprint, insert_date, insert_read, insert_written,
+ insert_dirread, insert_dirwritten);
+ ELSE
+ BEGIN
+ UPDATE bwhist
+ SET read[array_lower(insert_read, 1):
+ array_upper(insert_read, 1)] = insert_read,
+ written[array_lower(insert_written, 1):
+ array_upper(insert_written, 1)] = insert_written,
+ dirread[array_lower(insert_dirread, 1):
+ array_upper(insert_dirread, 1)] = insert_dirread,
+ dirwritten[array_lower(insert_dirwritten, 1):
+ array_upper(insert_dirwritten, 1)] = insert_dirwritten
+ WHERE fingerprint = insert_fingerprint AND date = insert_date;
+ -- Updating twice is an ugly workaround for PostgreSQL bug 5840
+ UPDATE bwhist
+ SET read[array_lower(insert_read, 1):
+ array_upper(insert_read, 1)] = insert_read,
+ written[array_lower(insert_written, 1):
+ array_upper(insert_written, 1)] = insert_written,
+ dirread[array_lower(insert_dirread, 1):
+ array_upper(insert_dirread, 1)] = insert_dirread,
+ dirwritten[array_lower(insert_dirwritten, 1):
+ array_upper(insert_dirwritten, 1)] = insert_dirwritten
+ WHERE fingerprint = insert_fingerprint AND date = insert_date;
+ END;
+ END IF;
+ UPDATE bwhist
+ SET read_sum = array_sum(read),
+ written_sum = array_sum(written),
+ dirread_sum = array_sum(dirread),
+ dirwritten_sum = array_sum(dirwritten)
+ WHERE fingerprint = insert_fingerprint AND date = insert_date;
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- refresh_* functions
+-- The following functions keep their corresponding aggregate tables
+-- up-to-date. They should be called every time ERNIE is run, or when new
+-- data is finished being added to the descriptor or statusentry tables.
+-- They find what new data has been entered or updated based on the
+-- updates table.
+
+-- FUNCTION refresh_network_size()
+CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$
+ BEGIN
+
+ DELETE FROM network_size
+ WHERE date IN (SELECT date FROM updates);
+
+ INSERT INTO network_size
+ (date, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
+ SELECT
+ DATE(validafter) AS date,
+ COUNT(*) / relay_statuses_per_day.count AS avg_running,
+ SUM(CASE WHEN isexit IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_exit,
+ SUM(CASE WHEN isguard IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_guard,
+ SUM(CASE WHEN isfast IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_fast,
+ SUM(CASE WHEN isstable IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_stable
+ FROM statusentry
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE
+ AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_network_size_hour()
+CREATE OR REPLACE FUNCTION refresh_network_size_hour() RETURNS INTEGER AS $$
+ BEGIN
+
+ DELETE FROM network_size_hour
+ WHERE DATE(validafter) IN (SELECT date FROM updates);
+
+ INSERT INTO network_size_hour
+ (validafter, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
+ SELECT validafter, COUNT(*) AS avg_running,
+ SUM(CASE WHEN isexit IS TRUE THEN 1 ELSE 0 END) AS avg_exit,
+ SUM(CASE WHEN isguard IS TRUE THEN 1 ELSE 0 END) AS avg_guard,
+ SUM(CASE WHEN isfast IS TRUE THEN 1 ELSE 0 END) AS avg_fast,
+ SUM(CASE WHEN isstable IS TRUE THEN 1 ELSE 0 END) AS avg_stable
+ FROM statusentry
+ WHERE isrunning = TRUE
+ AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ GROUP BY validafter;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_relay_platforms()
+CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$
+ BEGIN
+
+ DELETE FROM relay_platforms
+ WHERE date IN (SELECT date FROM updates);
+
+ INSERT INTO relay_platforms
+ (date, avg_linux, avg_darwin, avg_bsd, avg_windows, avg_other)
+ SELECT DATE(validafter),
+ SUM(CASE WHEN platform LIKE '%Linux%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_linux,
+ SUM(CASE WHEN platform LIKE '%Darwin%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_darwin,
+ SUM(CASE WHEN platform LIKE '%BSD%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_bsd,
+ SUM(CASE WHEN platform LIKE '%Windows%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_windows,
+ SUM(CASE WHEN platform NOT LIKE '%Windows%'
+ AND platform NOT LIKE '%Darwin%'
+ AND platform NOT LIKE '%BSD%'
+ AND platform NOT LIKE '%Linux%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_other
+ FROM descriptor RIGHT JOIN statusentry
+ ON statusentry.descriptor = descriptor.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE
+ AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ AND DATE(relay_statuses_per_day.date) >=
+ (SELECT MIN(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) <=
+ (SELECT MAX(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) IN
+ (SELECT date FROM updates)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_relay_versions()
+CREATE OR REPLACE FUNCTION refresh_relay_versions() RETURNS INTEGER AS $$
+ BEGIN
+
+ DELETE FROM relay_versions
+ WHERE date IN (SELECT date FROM updates);
+
+ INSERT INTO relay_versions
+ (date, version, relays)
+ SELECT DATE(validafter), SUBSTRING(platform, 5, 5) AS version,
+ COUNT(*) / relay_statuses_per_day.count AS relays
+ FROM descriptor RIGHT JOIN statusentry
+ ON descriptor.descriptor = statusentry.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE
+ AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ AND DATE(relay_statuses_per_day.date) >=
+ (SELECT MIN(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) <=
+ (SELECT MAX(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) IN
+ (SELECT date FROM updates)
+ AND platform IS NOT NULL
+ GROUP BY 1, 2, relay_statuses_per_day.count;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_total_bandwidth()
+-- This keeps the table total_bandwidth up-to-date when necessary.
+CREATE OR REPLACE FUNCTION refresh_total_bandwidth() RETURNS INTEGER AS $$
+ BEGIN
+
+ DELETE FROM total_bandwidth
+ WHERE date IN (SELECT date FROM updates);
+
+ INSERT INTO total_bandwidth
+ (bwavg, bwburst, bwobserved, bwadvertised, date)
+ SELECT (SUM(bandwidthavg)
+ / relay_statuses_per_day.count)::BIGINT AS bwavg,
+ (SUM(bandwidthburst)
+ / relay_statuses_per_day.count)::BIGINT AS bwburst,
+ (SUM(bandwidthobserved)
+ / relay_statuses_per_day.count)::BIGINT AS bwobserved,
+ (SUM(LEAST(bandwidthavg, bandwidthobserved))
+ / relay_statuses_per_day.count)::BIGINT AS bwadvertised,
+ DATE(validafter)
+ FROM descriptor RIGHT JOIN statusentry
+ ON descriptor.descriptor = statusentry.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE
+ AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ AND DATE(relay_statuses_per_day.date) >=
+ (SELECT MIN(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) <=
+ (SELECT MAX(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) IN
+ (SELECT date FROM updates)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION refresh_total_bwhist() RETURNS INTEGER AS $$
+ BEGIN
+ DELETE FROM total_bwhist WHERE date IN (SELECT date FROM updates);
+ INSERT INTO total_bwhist (date, read, written)
+ SELECT date, SUM(read_sum) AS read, SUM(written_sum) AS written
+ FROM bwhist
+ WHERE date >= (SELECT MIN(date) FROM updates)
+ AND date <= (SELECT MAX(date) FROM updates)
+ AND date IN (SELECT date FROM updates)
+ GROUP BY date;
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_user_stats()
+-- This function refreshes our user statistics by weighting reported
+-- directory request statistics of directory mirrors with bandwidth
+-- histories.
+CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
+ BEGIN
+ -- Start by deleting user statistics of the dates we're about to
+ -- regenerate.
+ DELETE FROM user_stats WHERE date IN (SELECT date FROM updates);
+ -- Now insert new user statistics.
+ INSERT INTO user_stats (date, country, r, dw, dr, drw, drr, bw, br, bwd,
+ brd, bwr, brr, bwdr, brdr, bwp, brp, bwn, brn)
+ SELECT
+ -- We want to learn about total requests by date and country.
+ dirreq_stats_by_country.date AS date,
+ dirreq_stats_by_country.country AS country,
+ dirreq_stats_by_country.r AS r,
+ -- In order to weight the reported directory requests, we're
+ -- counting bytes of relays (except directory authorities)
+ -- matching certain criteria: whether or not they are reporting
+ -- directory requests, whether or not they are reporting
+ -- directory bytes, and whether their directory port is open or
+ -- closed.
+ SUM(CASE WHEN authority IS NOT NULL
+ THEN NULL ELSE dirwritten END) AS dw,
+ SUM(CASE WHEN authority IS NOT NULL
+ THEN NULL ELSE dirread END) AS dr,
+ SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE dirwritten END) AS dwr,
+ SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE dirread END) AS drr,
+ SUM(CASE WHEN authority IS NOT NULL
+ THEN NULL ELSE written END) AS bw,
+ SUM(CASE WHEN authority IS NOT NULL
+ THEN NULL ELSE read END) AS br,
+ SUM(CASE WHEN dirwritten = 0 OR authority IS NOT NULL
+ THEN NULL ELSE written END) AS bwd,
+ SUM(CASE WHEN dirwritten = 0 OR authority IS NOT NULL
+ THEN NULL ELSE read END) AS brd,
+ SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE written END) AS bwr,
+ SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE read END) AS brr,
+ SUM(CASE WHEN dirwritten = 0 OR requests IS NULL
+ OR authority IS NOT NULL THEN NULL ELSE written END) AS bwdr,
+ SUM(CASE WHEN dirwritten = 0 OR requests IS NULL
+ OR authority IS NOT NULL THEN NULL ELSE read END) AS brdr,
+ SUM(CASE WHEN opendirport IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE written END) AS bwp,
+ SUM(CASE WHEN opendirport IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE read END) AS brp,
+ SUM(CASE WHEN opendirport IS NOT NULL OR authority IS NOT NULL
+ THEN NULL ELSE written END) AS bwn,
+ SUM(CASE WHEN opendirport IS NOT NULL OR authority IS NOT NULL
+ THEN NULL ELSE read END) AS brn
+ FROM (
+ -- The first sub-select tells us the total number of directory
+ -- requests per country reported by all directory mirrors.
+ SELECT dirreq_stats_by_date.date AS date, country, SUM(requests) AS r
+ FROM (
+ SELECT fingerprint, date, country, SUM(requests) AS requests
+ FROM (
+ -- There are two selects here, because in most cases the directory
+ -- request statistics cover two calendar dates.
+ SELECT LOWER(source) AS fingerprint, DATE(statsend) AS date,
+ country, FLOOR(requests * (CASE
+ WHEN EXTRACT(EPOCH FROM DATE(statsend)) >
+ EXTRACT(EPOCH FROM statsend) - seconds
+ THEN EXTRACT(EPOCH FROM statsend) -
+ EXTRACT(EPOCH FROM DATE(statsend))
+ ELSE seconds END) / seconds) AS requests
+ FROM dirreq_stats
+ UNION
+ SELECT LOWER(source) AS fingerprint, DATE(statsend) - 1 AS date,
+ country, FLOOR(requests *
+ (EXTRACT(EPOCH FROM DATE(statsend)) -
+ EXTRACT(EPOCH FROM statsend) + seconds)
+ / seconds) AS requests
+ FROM dirreq_stats
+ WHERE EXTRACT(EPOCH FROM DATE(statsend)) -
+ EXTRACT(EPOCH FROM statsend) + seconds > 0
+ ) dirreq_stats_split
+ GROUP BY 1, 2, 3
+ ) dirreq_stats_by_date
+ -- We're only interested in requests by directory mirrors, not
+ -- directory authorities, so we exclude all relays with the Authority
+ -- flag.
+ RIGHT JOIN (
+ SELECT fingerprint, DATE(validafter) AS date
+ FROM statusentry
+ WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ AND isauthority IS FALSE
+ GROUP BY 1, 2
+ ) statusentry_dirmirrors
+ ON dirreq_stats_by_date.fingerprint =
+ statusentry_dirmirrors.fingerprint
+ AND dirreq_stats_by_date.date = statusentry_dirmirrors.date
+ GROUP BY 1, 2
+ ) dirreq_stats_by_country
+ LEFT JOIN (
+ -- In the next step, we expand the result by bandwidth histories of
+ -- all relays.
+ SELECT fingerprint, date, read_sum AS read, written_sum AS written,
+ dirread_sum AS dirread, dirwritten_sum AS dirwritten
+ FROM bwhist
+ WHERE date >= (SELECT MIN(date) FROM updates)
+ AND date <= (SELECT MAX(date) FROM updates)
+ AND date IN (SELECT date FROM updates)
+ ) bwhist_by_relay
+ ON dirreq_stats_by_country.date = bwhist_by_relay.date
+ LEFT JOIN (
+ -- For each relay, tell how often it had an open directory port and
+ -- how often it had the Authority flag assigned on a given date.
+ SELECT fingerprint, DATE(validafter) AS date,
+ SUM(CASE WHEN dirport > 0 THEN 1 ELSE NULL END) AS opendirport,
+ SUM(CASE WHEN isauthority IS TRUE THEN 1 ELSE NULL END) AS authority
+ FROM statusentry
+ WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ GROUP BY 1, 2
+ ) statusentry_by_relay
+ ON bwhist_by_relay.fingerprint = statusentry_by_relay.fingerprint
+ AND bwhist_by_relay.date = statusentry_by_relay.date
+ LEFT JOIN (
+ -- For each relay, tell if it has reported directory request
+ -- statistics on a given date. Again, we have to take into account
+ -- that statistics intervals cover more than one calendar date in most
+ -- cases. The exact number of requests isn't relevant here, but only
+ -- whether the relay reported directory requests or not.
+ SELECT fingerprint, date, 1 AS requests
+ FROM (
+ SELECT LOWER(source) AS fingerprint, DATE(statsend) AS date
+ FROM dirreq_stats
+ WHERE DATE(statsend) >= (SELECT MIN(date) FROM updates)
+ AND DATE(statsend) <= (SELECT MAX(date) FROM updates)
+ AND DATE(statsend) IN (SELECT date FROM updates)
+ AND country = 'zy'
+ UNION
+ SELECT LOWER(source) AS fingerprint, DATE(statsend) - 1 AS date
+ FROM dirreq_stats
+ WHERE DATE(statsend) - 1 >= (SELECT MIN(date) FROM updates)
+ AND DATE(statsend) - 1 <= (SELECT MAX(date) FROM updates)
+ AND DATE(statsend) - 1 IN (SELECT date FROM updates)
+ AND country = 'zy'
+ AND EXTRACT(EPOCH FROM DATE(statsend)) -
+ EXTRACT(EPOCH FROM statsend) + seconds > 0
+ ) dirreq_stats_split
+ GROUP BY 1, 2
+ ) dirreq_stats_by_relay
+ ON bwhist_by_relay.fingerprint = dirreq_stats_by_relay.fingerprint
+ AND bwhist_by_relay.date = dirreq_stats_by_relay.date
+ WHERE dirreq_stats_by_country.country IS NOT NULL
+ -- Group by date, country, and total reported directory requests,
+ -- summing up the bandwidth histories.
+ GROUP BY 1, 2, 3;
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- non-relay statistics
+-- The following tables contain pre-aggregated statistics that are not
+-- based on relay descriptors or that are not yet derived from the relay
+-- descriptors in the database.
+
+-- TABLE bridge_network_size
+-- Contains average number of running bridges.
+CREATE TABLE bridge_network_size (
+ "date" DATE NOT NULL,
+ avg_running INTEGER NOT NULL,
+ CONSTRAINT bridge_network_size_pkey PRIMARY KEY(date)
+);
+
+-- TABLE dirreq_stats
+-- Contains daily users by country.
+CREATE TABLE dirreq_stats (
+ source CHARACTER(40) NOT NULL,
+ statsend TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ seconds INTEGER NOT NULL,
+ country CHARACTER(2) NOT NULL,
+ requests INTEGER NOT NULL,
+ CONSTRAINT dirreq_stats_pkey
+ PRIMARY KEY (source, statsend, seconds, country)
+);
+
+-- TABLE bridge_stats
+-- Contains daily bridge users by country.
+CREATE TABLE bridge_stats (
+ "date" DATE NOT NULL,
+ country CHARACTER(2) NOT NULL,
+ users INTEGER NOT NULL,
+ CONSTRAINT bridge_stats_pkey PRIMARY KEY ("date", country)
+);
+
+-- TABLE torperf_stats
+-- Quantiles and medians of daily torperf results.
+CREATE TABLE torperf_stats (
+ "date" DATE NOT NULL,
+ source CHARACTER VARYING(32) NOT NULL,
+ q1 INTEGER NOT NULL,
+ md INTEGER NOT NULL,
+ q3 INTEGER NOT NULL,
+ timeouts INTEGER NOT NULL,
+ failures INTEGER NOT NULL,
+ requests INTEGER NOT NULL,
+ CONSTRAINT torperf_stats_pkey PRIMARY KEY("date", source)
+);
+
+-- TABLE gettor_stats
+-- Packages requested from GetTor
+CREATE TABLE gettor_stats (
+ "date" DATE NOT NULL,
+ bundle CHARACTER VARYING(32) NOT NULL,
+ downloads INTEGER NOT NULL,
+ CONSTRAINT gettor_stats_pkey PRIMARY KEY("date", bundle)
+);
+
+-- Refresh all statistics in the database.
+CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
+ BEGIN
+ DELETE FROM updates;
+ INSERT INTO updates SELECT * FROM scheduled_updates;
+ PERFORM refresh_relay_statuses_per_day();
+ PERFORM refresh_network_size();
+ PERFORM refresh_network_size_hour();
+ PERFORM refresh_relay_platforms();
+ PERFORM refresh_relay_versions();
+ PERFORM refresh_total_bandwidth();
+ PERFORM refresh_total_bwhist();
+ PERFORM refresh_user_stats();
+ DELETE FROM scheduled_updates WHERE id IN (SELECT id FROM updates);
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits