Commit 2e6c4103 authored by Leo Koppelkamm's avatar Leo Koppelkamm

Make backups more configurable

Backup location and maximum age can now be configured in the admin panel.
For now only S3 is supported, but adding other duplicity supported backends should be straightforward.
parent 0293e043
...@@ -10,20 +10,29 @@ ...@@ -10,20 +10,29 @@
import os, os.path, shutil, glob, re, datetime import os, os.path, shutil, glob, re, datetime
import dateutil.parser, dateutil.relativedelta, dateutil.tz import dateutil.parser, dateutil.relativedelta, dateutil.tz
import rtyaml
from utils import exclusive_process, load_environment, shell, wait_for_service from utils import exclusive_process, load_environment, shell, wait_for_service
# Root folder
backup_root = os.path.join(load_environment()["STORAGE_ROOT"], 'backup')
# Default settings
# Destroy backups when the most recent increment in the chain # Destroy backups when the most recent increment in the chain
# that depends on it is this many days old. # that depends on it is this many days old.
keep_backups_for_days = 3 default_config = {
"max_age_in_days": 3,
"target": "file://" + os.path.join(backup_root, 'encrypted'),
"target_type": "file"
}
def backup_status(env): def backup_status(env):
# What is the current status of backups? # What is the current status of backups?
# Loop through all of the files in STORAGE_ROOT/backup/encrypted to # Query duplicity to get a list of all backups.
# get a list of all of the backups taken and sum up file sizes to # Use the number of volumes to estimate the size.
# see how large the storage is. config = get_backup_config()
now = datetime.datetime.now(dateutil.tz.tzlocal()) now = datetime.datetime.now(dateutil.tz.tzlocal())
def reldate(date, ref, clip): def reldate(date, ref, clip):
if ref < date: return clip if ref < date: return clip
rd = dateutil.relativedelta.relativedelta(ref, date) rd = dateutil.relativedelta.relativedelta(ref, date)
...@@ -33,29 +42,42 @@ def backup_status(env): ...@@ -33,29 +42,42 @@ def backup_status(env):
if rd.days > 1: return "%d days, %d hours" % (rd.days, rd.hours) if rd.days > 1: return "%d days, %d hours" % (rd.days, rd.hours)
if rd.days == 1: return "%d day, %d hours" % (rd.days, rd.hours) if rd.days == 1: return "%d day, %d hours" % (rd.days, rd.hours)
return "%d hours, %d minutes" % (rd.hours, rd.minutes) return "%d hours, %d minutes" % (rd.hours, rd.minutes)
def parse_line(line):
keys = line.strip().split()
date = dateutil.parser.parse(keys[1])
return {
"date": keys[1],
"date_str": date.strftime("%x %X"),
"date_delta": reldate(date, now, "the future?"),
"full": keys[0] == "full",
"size": int(keys[2]) * 250 * 1000000,
}
# Write duplicity status to file
shell('check_call', [
"/usr/bin/duplicity",
"collection-status",
"--log-file", os.path.join(backup_root, "duplicity_status"),
"--gpg-options", "--cipher-algo=AES256",
config["target"],
],
get_env())
backups = { } backups = { }
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
backup_dir = os.path.join(backup_root, 'encrypted') backup_dir = os.path.join(backup_root, 'encrypted')
os.makedirs(backup_dir, exist_ok=True) # os.listdir fails if directory does not exist
for fn in os.listdir(backup_dir): # Parse backup data from status file
m = re.match(r"duplicity-(full|full-signatures|(inc|new-signatures)\.(?P<incbase>\d+T\d+Z)\.to)\.(?P<date>\d+T\d+Z)\.", fn) with open(os.path.join(backup_root, "duplicity_status"),'r') as status_file:
if not m: raise ValueError(fn) for line in status_file:
if line.startswith(" full") or line.startswith(" inc"):
key = m.group("date") backup = parse_line(line)
if key not in backups: backups[backup["date"]] = backup
date = dateutil.parser.parse(m.group("date"))
backups[key] = { # Remove status file
"date": m.group("date"), os.remove(os.path.join(backup_root, "duplicity_status"))
"date_str": date.strftime("%x %X"),
"date_delta": reldate(date, now, "the future?"),
"full": m.group("incbase") is None,
"previous": m.group("incbase"),
"size": 0,
}
backups[key]["size"] += os.path.getsize(os.path.join(backup_dir, fn))
# Ensure the rows are sorted reverse chronologically. # Ensure the rows are sorted reverse chronologically.
# This is relied on by should_force_full() and the next step. # This is relied on by should_force_full() and the next step.
backups = sorted(backups.values(), key = lambda b : b["date"], reverse=True) backups = sorted(backups.values(), key = lambda b : b["date"], reverse=True)
...@@ -79,11 +101,11 @@ def backup_status(env): ...@@ -79,11 +101,11 @@ def backup_status(env):
# when the threshold is met. # when the threshold is met.
deleted_in = None deleted_in = None
if incremental_count > 0 and first_full_size is not None: if incremental_count > 0 and first_full_size is not None:
deleted_in = "approx. %d days" % round(keep_backups_for_days + (.5 * first_full_size - incremental_size) / (incremental_size/incremental_count) + .5) deleted_in = "approx. %d days" % round(config["max_age_in_days"] + (.5 * first_full_size - incremental_size) / (incremental_size/incremental_count) + .5)
# When will a backup be deleted? # When will a backup be deleted?
saw_full = False saw_full = False
days_ago = now - datetime.timedelta(days=keep_backups_for_days) days_ago = now - datetime.timedelta(days=config["max_age_in_days"])
for bak in backups: for bak in backups:
if deleted_in: if deleted_in:
# Subsequent backups are deleted when the most recent increment # Subsequent backups are deleted when the most recent increment
...@@ -124,12 +146,35 @@ def should_force_full(env): ...@@ -124,12 +146,35 @@ def should_force_full(env):
# (I love for/else blocks. Here it's just to show off.) # (I love for/else blocks. Here it's just to show off.)
return True return True
def get_passphrase():
# Get the encryption passphrase. secret_key.txt is 2048 random
# bits base64-encoded and with line breaks every 65 characters.
# gpg will only take the first line of text, so sanity check that
# that line is long enough to be a reasonable passphrase. It
# only needs to be 43 base64-characters to match AES256's key
# length of 32 bytes.
with open(os.path.join(backup_root, 'secret_key.txt')) as f:
passphrase = f.readline().strip()
if len(passphrase) < 43: raise Exception("secret_key.txt's first line is too short!")
return passphrase
def get_env():
config = get_backup_config()
env = { "PASSPHRASE" : get_passphrase() }
if config["target_type"] == 's3':
env["AWS_ACCESS_KEY_ID"] = config["target_user"]
env["AWS_SECRET_ACCESS_KEY"] = config["target_pass"]
return env
def perform_backup(full_backup): def perform_backup(full_backup):
env = load_environment() env = load_environment()
exclusive_process("backup") exclusive_process("backup")
config = get_backup_config()
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
backup_cache_dir = os.path.join(backup_root, 'cache') backup_cache_dir = os.path.join(backup_root, 'cache')
backup_dir = os.path.join(backup_root, 'encrypted') backup_dir = os.path.join(backup_root, 'encrypted')
...@@ -169,17 +214,6 @@ def perform_backup(full_backup): ...@@ -169,17 +214,6 @@ def perform_backup(full_backup):
shell('check_call', ["/usr/sbin/service", "dovecot", "stop"]) shell('check_call', ["/usr/sbin/service", "dovecot", "stop"])
shell('check_call', ["/usr/sbin/service", "postfix", "stop"]) shell('check_call', ["/usr/sbin/service", "postfix", "stop"])
# Get the encryption passphrase. secret_key.txt is 2048 random
# bits base64-encoded and with line breaks every 65 characters.
# gpg will only take the first line of text, so sanity check that
# that line is long enough to be a reasonable passphrase. It
# only needs to be 43 base64-characters to match AES256's key
# length of 32 bytes.
with open(os.path.join(backup_root, 'secret_key.txt')) as f:
passphrase = f.readline().strip()
if len(passphrase) < 43: raise Exception("secret_key.txt's first line is too short!")
env_with_passphrase = { "PASSPHRASE" : passphrase }
# Run a backup of STORAGE_ROOT (but excluding the backups themselves!). # Run a backup of STORAGE_ROOT (but excluding the backups themselves!).
# --allow-source-mismatch is needed in case the box's hostname is changed # --allow-source-mismatch is needed in case the box's hostname is changed
# after the first backup. See #396. # after the first backup. See #396.
...@@ -192,10 +226,10 @@ def perform_backup(full_backup): ...@@ -192,10 +226,10 @@ def perform_backup(full_backup):
"--volsize", "250", "--volsize", "250",
"--gpg-options", "--cipher-algo=AES256", "--gpg-options", "--cipher-algo=AES256",
env["STORAGE_ROOT"], env["STORAGE_ROOT"],
"file://" + backup_dir, config["target"],
"--allow-source-mismatch" "--allow-source-mismatch"
], ],
env_with_passphrase) get_env())
finally: finally:
# Start services again. # Start services again.
shell('check_call', ["/usr/sbin/service", "dovecot", "start"]) shell('check_call', ["/usr/sbin/service", "dovecot", "start"])
...@@ -210,12 +244,12 @@ def perform_backup(full_backup): ...@@ -210,12 +244,12 @@ def perform_backup(full_backup):
shell('check_call', [ shell('check_call', [
"/usr/bin/duplicity", "/usr/bin/duplicity",
"remove-older-than", "remove-older-than",
"%dD" % keep_backups_for_days, "%dD" % config["max_age_in_days"],
"--archive-dir", backup_cache_dir, "--archive-dir", backup_cache_dir,
"--force", "--force",
"file://" + backup_dir config["target"]
], ],
env_with_passphrase) get_env())
# From duplicity's manual: # From duplicity's manual:
# "This should only be necessary after a duplicity session fails or is # "This should only be necessary after a duplicity session fails or is
...@@ -227,13 +261,14 @@ def perform_backup(full_backup): ...@@ -227,13 +261,14 @@ def perform_backup(full_backup):
"cleanup", "cleanup",
"--archive-dir", backup_cache_dir, "--archive-dir", backup_cache_dir,
"--force", "--force",
"file://" + backup_dir config["target"]
], ],
env_with_passphrase) get_env())
# Change ownership of backups to the user-data user, so that the after-bcakup # Change ownership of backups to the user-data user, so that the after-bcakup
# script can access them. # script can access them.
shell('check_call', ["/bin/chown", "-R", env["STORAGE_USER"], backup_dir]) if config["target_type"] == 'file':
shell('check_call', ["/bin/chown", "-R", env["STORAGE_USER"], backup_dir])
# Execute a post-backup script that does the copying to a remote server. # Execute a post-backup script that does the copying to a remote server.
# Run as the STORAGE_USER user, not as root. Pass our settings in # Run as the STORAGE_USER user, not as root. Pass our settings in
...@@ -241,8 +276,8 @@ def perform_backup(full_backup): ...@@ -241,8 +276,8 @@ def perform_backup(full_backup):
post_script = os.path.join(backup_root, 'after-backup') post_script = os.path.join(backup_root, 'after-backup')
if os.path.exists(post_script): if os.path.exists(post_script):
shell('check_call', shell('check_call',
['su', env['STORAGE_USER'], '-c', post_script], ['su', env['STORAGE_USER'], '-c', post_script, config["target"]],
env=env) env=get_env())
# Our nightly cron job executes system status checks immediately after this # Our nightly cron job executes system status checks immediately after this
# backup. Since it checks that dovecot and postfix are running, block for a # backup. Since it checks that dovecot and postfix are running, block for a
...@@ -253,10 +288,10 @@ def perform_backup(full_backup): ...@@ -253,10 +288,10 @@ def perform_backup(full_backup):
def run_duplicity_verification(): def run_duplicity_verification():
env = load_environment() env = load_environment()
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup') config = get_backup_config()
backup_cache_dir = os.path.join(backup_root, 'cache') backup_cache_dir = os.path.join(backup_root, 'cache')
backup_dir = os.path.join(backup_root, 'encrypted') backup_dir = os.path.join(backup_root, 'encrypted')
env_with_passphrase = { "PASSPHRASE" : open(os.path.join(backup_root, 'secret_key.txt')).read() }
shell('check_call', [ shell('check_call', [
"/usr/bin/duplicity", "/usr/bin/duplicity",
"--verbosity", "info", "--verbosity", "info",
...@@ -264,9 +299,47 @@ def run_duplicity_verification(): ...@@ -264,9 +299,47 @@ def run_duplicity_verification():
"--compare-data", "--compare-data",
"--archive-dir", backup_cache_dir, "--archive-dir", backup_cache_dir,
"--exclude", backup_root, "--exclude", backup_root,
"file://" + backup_dir, config["target"],
env["STORAGE_ROOT"], env["STORAGE_ROOT"],
], env_with_passphrase) ], get_env())
def backup_set_custom(target, target_user, target_pass, target_type, max_age):
config = get_backup_config()
# max_age must be an int
if isinstance(max_age, str):
max_age = int(max_age)
config["target"] = target
config["target_user"] = target_user
config["target_pass"] = target_pass
config["target_type"] = target_type
config["max_age_in_days"] = max_age
write_backup_config(config)
return "Updated backup config"
def get_backup_config():
try:
config = rtyaml.load(open(os.path.join(backup_root, 'custom.yaml')))
if not isinstance(config, dict): raise ValueError() # caught below
except:
return default_config
merged_config = default_config.copy()
merged_config.update(config)
# max_age must be an int
if isinstance(merged_config["max_age_in_days"], str):
merged_config["max_age_in_days"] = int(merged_config["max_age_in_days"])
return config
def write_backup_config(newconfig):
with open(os.path.join(backup_root, 'custom.yaml'), "w") as f:
f.write(rtyaml.dump(newconfig))
if __name__ == "__main__": if __name__ == "__main__":
import sys import sys
...@@ -274,6 +347,7 @@ if __name__ == "__main__": ...@@ -274,6 +347,7 @@ if __name__ == "__main__":
# Run duplicity's verification command to check a) the backup files # Run duplicity's verification command to check a) the backup files
# are readable, and b) report if they are up to date. # are readable, and b) report if they are up to date.
run_duplicity_verification() run_duplicity_verification()
else: else:
# Perform a backup. Add --full to force a full backup rather than # Perform a backup. Add --full to force a full backup rather than
# possibly performing an incremental backup. # possibly performing an incremental backup.
......
...@@ -402,6 +402,24 @@ def backup_status(): ...@@ -402,6 +402,24 @@ def backup_status():
from backup import backup_status from backup import backup_status
return json_response(backup_status(env)) return json_response(backup_status(env))
@app.route('/system/backup/get-custom')
@authorized_personnel_only
def backup_get_custom():
from backup import get_backup_config
return json_response(get_backup_config())
@app.route('/system/backup/set-custom', methods=["POST"])
@authorized_personnel_only
def backup_set_custom():
from backup import backup_set_custom
return json_response(backup_set_custom(
request.form.get('target', ''),
request.form.get('target_user', ''),
request.form.get('target_pass', ''),
request.form.get('target_type', ''),
request.form.get('max_age', '')
))
# MUNIN # MUNIN
@app.route('/munin/') @app.route('/munin/')
...@@ -432,4 +450,3 @@ if __name__ == '__main__': ...@@ -432,4 +450,3 @@ if __name__ == '__main__':
# Start the application server. Listens on 127.0.0.1 (IPv4 only). # Start the application server. Listens on 127.0.0.1 (IPv4 only).
app.run(port=10222) app.run(port=10222)
...@@ -7,12 +7,53 @@ ...@@ -7,12 +7,53 @@
<h3>Copying Backup Files</h3> <h3>Copying Backup Files</h3>
<p>The box makes an incremental backup each night. The backup is stored on the machine itself. You are responsible for copying the backup files off of the machine.</p> <p>The box makes an incremental backup each night. By default the backup is stored on the machine itself, but you can also have it stored on Amazon S3</p>
<p>Many cloud providers make this easy by allowing you to take snapshots of the machine's disk.</p>
<p>You can also use SFTP (FTP over SSH) to copy files from <tt id="backup-location"></tt>. These files are encrypted, so they are safe to store anywhere. Copy the encryption password from <tt id="backup-encpassword-file"></tt> also but keep it in a safe location.</p> <p>You can also use SFTP (FTP over SSH) to copy files from <tt id="backup-location"></tt>. These files are encrypted, so they are safe to store anywhere. Copy the encryption password from <tt id="backup-encpassword-file"></tt> also but keep it in a safe location.</p>
<h3>Backup Configuration</h3>
<form class="form-horizontal" role="form" onsubmit="set_custom_backup(); return false;">
<div class="form-group">
<label for="target" class="col-sm-2 control-label">Backup target</label>
<div class="col-sm-2">
<select class="form-control" rows="1" id="target-type" onchange="toggle_form()">
<option value="file">Store locally</option>
<option value="s3">Amazon S3</option>
</select>
</div>
</div>
<div class="form-group">
<label for="target" class="col-sm-2 control-label">Maximum time to keep old backups (in days)</label>
<div class="col-sm-8">
<input type="number" class="form-control" rows="1" id="max-age"></input>
</div>
</div>
<div class="form-group form-advanced">
<label for="target" class="col-sm-2 control-label">S3 URL</label>
<div class="col-sm-8">
<textarea class="form-control" rows="1" id="target"></textarea>
</div>
</div>
<div class="form-group form-advanced">
<label for="target-user" class="col-sm-2 control-label">S3&nbsp;Key</label>
<div class="col-sm-8">
<textarea class="form-control" rows="1" id="target-user"></textarea>
</div>
</div>
<div class="form-group form-advanced">
<label for="target-pass" class="col-sm-2 control-label">S3&nbsp;Secret</label>
<div class="col-sm-8">
<textarea class="form-control" rows="1" id="target-pass"></textarea>
</div>
</div>
<div class="form-group">
<div class="col-sm-offset-2 col-sm-11">
<button id="set-s3-backup-button" type="submit" class="btn btn-primary">Save</button>
</div>
</div>
</form>
<h3>Current Backups</h3> <h3>Current Backups</h3>
<p>The backup directory currently contains the backups listed below. The total size on disk of the backups is currently <span id="backup-total-size"></span>.</p> <p>The backup directory currently contains the backups listed below. The total size on disk of the backups is currently <span id="backup-total-size"></span>.</p>
...@@ -27,8 +68,17 @@ ...@@ -27,8 +68,17 @@
<tbody> <tbody>
</tbody> </tbody>
</table> </table>
<script> <script>
function toggle_form() {
var target_type = $("#target-type").val();
if (target_type == 'file') {
$(".form-advanced").hide();
} else {
$(".form-advanced").show();
}
}
function nice_size(bytes) { function nice_size(bytes) {
var powers = ['bytes', 'KB', 'MB', 'GB', 'TB']; var powers = ['bytes', 'KB', 'MB', 'GB', 'TB'];
while (true) { while (true) {
...@@ -46,6 +96,8 @@ function nice_size(bytes) { ...@@ -46,6 +96,8 @@ function nice_size(bytes) {
} }
function show_system_backup() { function show_system_backup() {
show_custom_backup()
$('#backup-status tbody').html("<tr><td colspan='2' class='text-muted'>Loading...</td></tr>") $('#backup-status tbody').html("<tr><td colspan='2' class='text-muted'>Loading...</td></tr>")
api( api(
"/system/backup/status", "/system/backup/status",
...@@ -83,4 +135,45 @@ function show_system_backup() { ...@@ -83,4 +135,45 @@ function show_system_backup() {
$('#backup-total-size').text(nice_size(total_disk_size)); $('#backup-total-size').text(nice_size(total_disk_size));
}) })
} }
function show_custom_backup() {
api(
"/system/backup/get-custom",
"GET",
{ },
function(r) {
$("#target").val(r.target);
$("#target-type").val(r.target_type);
$("#target-user").val(r.target_user);
$("#target-pass").val(r.target_pass);
$("#max-age").val(r.max_age_in_days);
toggle_form()
})
}
function set_custom_backup() {
var target = $("#target").val();
var target_type = $("#target-type").val();
var target_user = $("#target-user").val();
var target_pass = $("#target-pass").val();
var max_age = $("#max-age").val();
api(
"/system/backup/set-custom",
"POST",
{
target: target,
target_type: target_type,
target_user: target_user,
target_pass: target_pass,
max_age: max_age
},
function(r) {
// Responses are multiple lines of pre-formatted text.
show_modal_error("Backup configuration", $("<pre/>").text(r));
},
function(r) {
show_modal_error("Backup configuration (error)", r);
});
return false;
}
</script> </script>
...@@ -4,8 +4,9 @@ source setup/functions.sh ...@@ -4,8 +4,9 @@ source setup/functions.sh
# build-essential libssl-dev libffi-dev python3-dev: Required to pip install cryptography. # build-essential libssl-dev libffi-dev python3-dev: Required to pip install cryptography.
apt_install python3-flask links duplicity libyaml-dev python3-dnspython python3-dateutil \ apt_install python3-flask links duplicity libyaml-dev python3-dnspython python3-dateutil \
build-essential libssl-dev libffi-dev python3-dev build-essential libssl-dev libffi-dev python3-dev python-pip
hide_output pip3 install --upgrade rtyaml email_validator idna cryptography hide_output pip3 install --upgrade rtyaml email_validator idna cryptography boto
hide_output pip install --upgrade boto
# email_validator is repeated in setup/questions.sh # email_validator is repeated in setup/questions.sh
# Create a backup directory and a random key for encrypting backups. # Create a backup directory and a random key for encrypting backups.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment