#!/usr/bin/env python3

import getopt
import multiprocessing
import sys

import stem
import stem.descriptor
import stem.descriptor.reader
import stem.descriptor.networkstatus

import numpy as np
import pandas as pd

import common

def process_network_status(network_status):
    assert type(network_status) == stem.descriptor.networkstatus.NetworkStatusDocumentV3, type(network_status)

    data = {
        "date": [],
        "relay_uptime_hours": [],
    }
    # We assume the intervals seen by this function are non-overlapping.
    num_running = sum(stem.Flag.RUNNING in router.flags for router in network_status.routers.values())
    for (date, frac_int, _) in common.segment_datetime_interval(network_status.valid_after, network_status.fresh_until):
        data["date"].append(date)
        data["relay_uptime_hours"].append(num_running * frac_int)
    return pd.DataFrame(data)

def process_file(f):
    with stem.descriptor.reader.DescriptorReader([f], document_handler = stem.descriptor.DocumentHandler.DOCUMENT) as reader:
        return (
            pd.concat(process_network_status(desc) for desc in reader)
                .groupby("date").sum().reset_index()
        )

if __name__ == "__main__":
    _, inputs = getopt.gnu_getopt(sys.argv[1:], "")
    with multiprocessing.Pool(common.NUM_PROCESSES) as pool:
        (
            pd.concat(pool.imap_unordered(process_file, inputs))
                .groupby("date").sum().reset_index()
        ).to_csv(sys.stdout, index = False, float_format = "%.2f", columns = [
            "date",
            "relay_uptime_hours",
        ])
