Commit 99e51f8a authored by Joshua Tauberer's avatar Joshua Tauberer

use boto to get actual file sizes of backup files when S3 is used

parent 3b4b57c0
...@@ -37,6 +37,7 @@ def backup_status(env): ...@@ -37,6 +37,7 @@ def backup_status(env):
if rd.days == 1: return "%d day, %d hours" % (rd.days, rd.hours) if rd.days == 1: return "%d day, %d hours" % (rd.days, rd.hours)
return "%d hours, %d minutes" % (rd.hours, rd.minutes) return "%d hours, %d minutes" % (rd.hours, rd.minutes)
# Get duplicity collection status and parse for a list of backups.
def parse_line(line): def parse_line(line):
keys = line.strip().split() keys = line.strip().split()
date = dateutil.parser.parse(keys[1]) date = dateutil.parser.parse(keys[1])
...@@ -45,10 +46,9 @@ def backup_status(env): ...@@ -45,10 +46,9 @@ def backup_status(env):
"date_str": date.strftime("%x %X"), "date_str": date.strftime("%x %X"),
"date_delta": reldate(date, now, "the future?"), "date_delta": reldate(date, now, "the future?"),
"full": keys[0] == "full", "full": keys[0] == "full",
"size": int(keys[2]) * 250 * 1000000, "size": 0, # collection-status doesn't give us the size
"volumes": keys[2], # number of archive volumes for this backup (not really helpful)
} }
# Get duplicity collection status
collection_status = shell('check_output', [ collection_status = shell('check_output', [
"/usr/bin/duplicity", "/usr/bin/duplicity",
"collection-status", "collection-status",
...@@ -58,16 +58,18 @@ def backup_status(env): ...@@ -58,16 +58,18 @@ def backup_status(env):
config["target"], config["target"],
], ],
get_env(env)) get_env(env))
for line in collection_status.split('\n'):
# Split multi line string into list
collection_status = collection_status.split('\n')
# Parse backup data from status file
for line in collection_status:
if line.startswith(" full") or line.startswith(" inc"): if line.startswith(" full") or line.startswith(" inc"):
backup = parse_line(line) backup = parse_line(line)
backups[backup["date"]] = backup backups[backup["date"]] = backup
# Look at the target to get the sizes of each of the backups. There is more than one file per backup.
for fn, size in list_target_files(config):
m = re.match(r"duplicity-(full|full-signatures|(inc|new-signatures)\.(?P<incbase>\d+T\d+Z)\.to)\.(?P<date>\d+T\d+Z)\.", fn)
if not m: continue # not a part of a current backup chain
key = m.group("date")
backups[key]["size"] += size
# Ensure the rows are sorted reverse chronologically. # Ensure the rows are sorted reverse chronologically.
# This is relied on by should_force_full() and the next step. # This is relied on by should_force_full() and the next step.
backups = sorted(backups.values(), key = lambda b : b["date"], reverse=True) backups = sorted(backups.values(), key = lambda b : b["date"], reverse=True)
...@@ -297,18 +299,20 @@ def run_duplicity_verification(): ...@@ -297,18 +299,20 @@ def run_duplicity_verification():
env["STORAGE_ROOT"], env["STORAGE_ROOT"],
], get_env(env)) ], get_env(env))
def validate_target(config): def list_target_files(config):
import urllib.parse import urllib.parse
try: try:
p = urllib.parse.urlparse(config["target"]) p = urllib.parse.urlparse(config["target"])
except ValueError: except ValueError:
return "invalid target" return "invalid target"
if p.scheme == "s3": if p.scheme == "file":
import boto.s3 return [(fn, os.path.getsize(os.path.join(p.path, fn))) for fn in os.listdir(p.path)]
from boto.exception import BotoServerError
elif p.scheme == "s3":
# match to a Region # match to a Region
import boto.s3
from boto.exception import BotoServerError
for region in boto.s3.regions(): for region in boto.s3.regions():
if region.endpoint == p.hostname: if region.endpoint == p.hostname:
break break
...@@ -333,6 +337,11 @@ def validate_target(config): ...@@ -333,6 +337,11 @@ def validate_target(config):
raise ValueError("Incorrect region for this bucket.") raise ValueError("Incorrect region for this bucket.")
raise ValueError(e.reason) raise ValueError(e.reason)
return [(key.name[len(path):], key.size) for key in bucket.list(prefix=path)]
else:
raise ValueError(config["target"])
def backup_set_custom(env, target, target_user, target_pass, min_age): def backup_set_custom(env, target, target_user, target_pass, min_age):
config = get_backup_config(env, for_save=True) config = get_backup_config(env, for_save=True)
...@@ -351,7 +360,7 @@ def backup_set_custom(env, target, target_user, target_pass, min_age): ...@@ -351,7 +360,7 @@ def backup_set_custom(env, target, target_user, target_pass, min_age):
if config["target"] != "local": if config["target"] != "local":
# "local" isn't supported by the following function, which expects a full url in the target key, # "local" isn't supported by the following function, which expects a full url in the target key,
# which is what is there except when loading the config prior to saving # which is what is there except when loading the config prior to saving
validate_target(config) list_target_files(config)
except ValueError as e: except ValueError as e:
return str(e) return str(e)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment