Commit 7ec662c8 authored by Joshua Tauberer's avatar Joshua Tauberer

status checks: use a worker pool that lives across flask requests, see #327

parent 348d2b87
...@@ -11,6 +11,12 @@ from mailconfig import get_mail_users, get_mail_users_ex, get_admins, add_mail_u ...@@ -11,6 +11,12 @@ from mailconfig import get_mail_users, get_mail_users_ex, get_admins, add_mail_u
from mailconfig import get_mail_user_privileges, add_remove_mail_user_privilege from mailconfig import get_mail_user_privileges, add_remove_mail_user_privilege
from mailconfig import get_mail_aliases, get_mail_aliases_ex, get_mail_domains, add_mail_alias, remove_mail_alias from mailconfig import get_mail_aliases, get_mail_aliases_ex, get_mail_domains, add_mail_alias, remove_mail_alias
# Create a worker pool for the status checks. The pool should
# live across http requests so we don't baloon the system with
# processes.
import multiprocessing.pool
pool = multiprocessing.pool.Pool(processes=10)
env = utils.load_environment() env = utils.load_environment()
auth_service = auth.KeyAuthService() auth_service = auth.KeyAuthService()
...@@ -318,7 +324,7 @@ def system_status(): ...@@ -318,7 +324,7 @@ def system_status():
def print_line(self, message, monospace=False): def print_line(self, message, monospace=False):
self.items[-1]["extra"].append({ "text": message, "monospace": monospace }) self.items[-1]["extra"].append({ "text": message, "monospace": monospace })
output = WebOutput() output = WebOutput()
run_checks(env, output) run_checks(env, output, pool)
return json_response(output.items) return json_response(output.items)
@app.route('/system/updates') @app.route('/system/updates')
......
...@@ -17,12 +17,12 @@ from mailconfig import get_mail_domains, get_mail_aliases ...@@ -17,12 +17,12 @@ from mailconfig import get_mail_domains, get_mail_aliases
from utils import shell, sort_domains, load_env_vars_from_file from utils import shell, sort_domains, load_env_vars_from_file
def run_checks(env, output): def run_checks(env, output, pool):
# run systems checks # run systems checks
output.add_heading("System") output.add_heading("System")
# check that services are running # check that services are running
if not run_services_checks(env, output): if not run_services_checks(env, output, pool):
# If critical services are not running, stop. If bind9 isn't running, # If critical services are not running, stop. If bind9 isn't running,
# all later DNS checks will timeout and that will take forever to # all later DNS checks will timeout and that will take forever to
# go through, and if running over the web will cause a fastcgi timeout. # go through, and if running over the web will cause a fastcgi timeout.
...@@ -37,11 +37,8 @@ def run_checks(env, output): ...@@ -37,11 +37,8 @@ def run_checks(env, output):
# perform other checks asynchronously # perform other checks asynchronously
pool = multiprocessing.pool.Pool(processes=1) run_network_checks(env, output)
r1 = pool.apply_async(run_network_checks, [env]) run_domain_checks(env, output, pool)
r2 = run_domain_checks(env)
r1.get().playback(output)
r2.playback(output)
def get_ssh_port(): def get_ssh_port():
# Returns ssh port # Returns ssh port
...@@ -54,7 +51,7 @@ def get_ssh_port(): ...@@ -54,7 +51,7 @@ def get_ssh_port():
if e == "port": if e == "port":
returnNext = True returnNext = True
def run_services_checks(env, output): def run_services_checks(env, output, pool):
# Check that system services are running. # Check that system services are running.
services = [ services = [
...@@ -82,7 +79,6 @@ def run_services_checks(env, output): ...@@ -82,7 +79,6 @@ def run_services_checks(env, output):
all_running = True all_running = True
fatal = False fatal = False
pool = multiprocessing.pool.Pool(processes=10)
ret = pool.starmap(check_service, ((i, service, env) for i, service in enumerate(services)), chunksize=1) ret = pool.starmap(check_service, ((i, service, env) for i, service in enumerate(services)), chunksize=1)
for i, running, fatal2, output2 in sorted(ret): for i, running, fatal2, output2 in sorted(ret):
all_running = all_running and running all_running = all_running and running
...@@ -189,10 +185,9 @@ def check_free_disk_space(env, output): ...@@ -189,10 +185,9 @@ def check_free_disk_space(env, output):
else: else:
output.print_error(disk_msg) output.print_error(disk_msg)
def run_network_checks(env): def run_network_checks(env, output):
# Also see setup/network-checks.sh. # Also see setup/network-checks.sh.
output = BufferedOutput()
output.add_heading("Network") output.add_heading("Network")
# Stop if we cannot make an outbound connection on port 25. Many residential # Stop if we cannot make an outbound connection on port 25. Many residential
...@@ -220,9 +215,7 @@ def run_network_checks(env): ...@@ -220,9 +215,7 @@ def run_network_checks(env):
which may prevent recipients from receiving your email. See http://www.spamhaus.org/query/ip/%s.""" which may prevent recipients from receiving your email. See http://www.spamhaus.org/query/ip/%s."""
% (env['PUBLIC_IP'], zen, env['PUBLIC_IP'])) % (env['PUBLIC_IP'], zen, env['PUBLIC_IP']))
return output def run_domain_checks(env, output, pool):
def run_domain_checks(env):
# Get the list of domains we handle mail for. # Get the list of domains we handle mail for.
mail_domains = get_mail_domains(env) mail_domains = get_mail_domains(env)
...@@ -242,13 +235,10 @@ def run_domain_checks(env): ...@@ -242,13 +235,10 @@ def run_domain_checks(env):
# Parallelize the checks across a worker pool. # Parallelize the checks across a worker pool.
args = ((domain, env, dns_domains, dns_zonefiles, mail_domains, web_domains) args = ((domain, env, dns_domains, dns_zonefiles, mail_domains, web_domains)
for domain in domains_to_check) for domain in domains_to_check)
pool = multiprocessing.pool.Pool(processes=10)
ret = pool.starmap(run_domain_checks_on_domain, args, chunksize=1) ret = pool.starmap(run_domain_checks_on_domain, args, chunksize=1)
ret = dict(ret) # (domain, output) => { domain: output } ret = dict(ret) # (domain, output) => { domain: output }
output = BufferedOutput()
for domain in sort_domains(ret, env): for domain in sort_domains(ret, env):
ret[domain].playback(output) ret[domain].playback(output)
return output
def run_domain_checks_on_domain(domain, env, dns_domains, dns_zonefiles, mail_domains, web_domains): def run_domain_checks_on_domain(domain, env, dns_domains, dns_zonefiles, mail_domains, web_domains):
output = BufferedOutput() output = BufferedOutput()
...@@ -777,12 +767,14 @@ class BufferedOutput: ...@@ -777,12 +767,14 @@ class BufferedOutput:
for attr, args, kwargs in self.buf: for attr, args, kwargs in self.buf:
getattr(output, attr)(*args, **kwargs) getattr(output, attr)(*args, **kwargs)
if __name__ == "__main__": if __name__ == "__main__":
import sys import sys
from utils import load_environment from utils import load_environment
env = load_environment() env = load_environment()
if len(sys.argv) == 1: if len(sys.argv) == 1:
run_checks(env, ConsoleOutput()) pool = multiprocessing.pool.Pool(processes=10)
run_checks(env, ConsoleOutput(), pool)
elif sys.argv[1] == "--check-primary-hostname": elif sys.argv[1] == "--check-primary-hostname":
# See if the primary hostname appears resolvable and has a signed certificate. # See if the primary hostname appears resolvable and has a signed certificate.
domain = env['PRIMARY_HOSTNAME'] domain = env['PRIMARY_HOSTNAME']
...@@ -795,3 +787,5 @@ if __name__ == "__main__": ...@@ -795,3 +787,5 @@ if __name__ == "__main__":
if cert_status != "OK": if cert_status != "OK":
sys.exit(1) sys.exit(1)
sys.exit(0) sys.exit(0)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment