Commit febfa72d authored by Joshua Tauberer's avatar Joshua Tauberer

race condition between backups and status checks - connection refused

At the end of the backup, wait a bit for dovecot and postfix to finish restarting.

Hopefully fixes #381.
parent c03e0003
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
import os, os.path, shutil, glob, re, datetime import os, os.path, shutil, glob, re, datetime
import dateutil.parser, dateutil.relativedelta, dateutil.tz import dateutil.parser, dateutil.relativedelta, dateutil.tz
from utils import exclusive_process, load_environment, shell from utils import exclusive_process, load_environment, shell, wait_for_service
# Destroy backups when the most recent increment in the chain # Destroy backups when the most recent increment in the chain
# that depends on it is this many days old. # that depends on it is this many days old.
...@@ -242,6 +242,13 @@ def perform_backup(full_backup): ...@@ -242,6 +242,13 @@ def perform_backup(full_backup):
['su', env['STORAGE_USER'], '-c', post_script], ['su', env['STORAGE_USER'], '-c', post_script],
env=env) env=env)
# Our nightly cron job executes system status checks immediately after this
# backup. Since it checks that dovecot and postfix are running, block for a
# bit (maximum of 10 seconds each) to give each a chance to finish restarting
# before the status checks might catch them down. See #381.
wait_for_service(25, True, env, 10)
wait_for_service(993, True, env, 10)
def run_duplicity_verification(): def run_duplicity_verification():
env = load_environment() env = load_environment()
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup') backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
......
...@@ -184,3 +184,19 @@ def du(path): ...@@ -184,3 +184,19 @@ def du(path):
seen.add(stat.st_ino) seen.add(stat.st_ino)
total_size += stat.st_size total_size += stat.st_size
return total_size return total_size
def wait_for_service(port, public, env, timeout):
# Block until a service on a given port (bound privately or publicly)
# is taking connections, with a maximum timeout.
import socket, time
start = time.perf_counter()
while True:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(timeout/3)
try:
s.connect(("127.0.0.1" if not public else env['PUBLIC_IP'], port))
return True
except OSError:
if time.perf_counter() > start+timeout:
return False
time.sleep(min(timeout/4, 1))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment