I created a script that I dropped into /etc/cron.hourly
which does the following:
- Use rsync to mirror my root partition to a btrfs partition on another hard drive (which only updates modified files).
- Use
btrfs subvolume snapshot
to create a snapshot of that mirror (which only uses additional storage for modified files). - Moves "old" snapshots into a trash directory so I can delete them later if I want to save space.
It is as follows:
#!/usr/bin/env python
from datetime import datetime, timedelta
import os
import pathlib
import shutil
import subprocess
import sys
import portalocker
DATETIME_FORMAT = '%Y-%m-%d-%H%M'
BACKUP_DIRECTORY = pathlib.Path('/backups/internal')
MIRROR_DIRECTORY = BACKUP_DIRECTORY / 'mirror'
SNAPSHOT_DIRECTORY = BACKUP_DIRECTORY / 'snapshots'
TRASH_DIRECTORY = BACKUP_DIRECTORY / 'trash'
EXCLUDED = [
'/backups',
'/dev',
'/media',
'/lost+found',
'/mnt',
'/nix',
'/proc',
'/run',
'/sys',
'/tmp',
'/var',
'/home/*/.cache',
'/home/*/.local/share/flatpak',
'/home/*/.local/share/Trash',
'/home/*/.steam',
'/home/*/Downloads',
'/home/*/Trash',
]
OPTIONS = [
'-avAXH',
'--delete',
'--delete-excluded',
'--numeric-ids',
'--relative',
'--progress',
]
def execute(command, *options):
print('>', command, *options)
subprocess.run((command,) + options).check_returncode()
execute(
'/usr/bin/mount',
'-o', 'rw,remount',
BACKUP_DIRECTORY,
)
try:
with portalocker.Lock(os.path.join(BACKUP_DIRECTORY,'lock')):
execute(
'/usr/bin/rsync',
'/',
MIRROR_DIRECTORY,
*(
OPTIONS
+
[f'--exclude={excluded_path}' for excluded_path in EXCLUDED]
)
)
execute(
'/usr/bin/btrfs',
'subvolume',
'snapshot',
'-r',
MIRROR_DIRECTORY,
SNAPSHOT_DIRECTORY / datetime.now().strftime(DATETIME_FORMAT),
)
snapshot_datetimes = sorted(
(
datetime.strptime(filename, DATETIME_FORMAT)
for filename in os.listdir(SNAPSHOT_DIRECTORY)
),
)
# Keep the last 24 hours of snapshot_datetimes
one_day_ago = datetime.now() - timedelta(days=1)
while snapshot_datetimes and snapshot_datetimes[-1] >= one_day_ago:
snapshot_datetimes.pop()
# Helper function for selecting all of the snapshot_datetimes for a given day/month
def prune_all_with(get_metric):
this = get_metric(snapshot_datetimes[-1])
snapshot_datetimes.pop()
while snapshot_datetimes and get_metric(snapshot_datetimes[-1]) == this:
snapshot = SNAPSHOT_DIRECTORY / snapshot_datetimes[-1].strftime(DATETIME_FORMAT)
snapshot_datetimes.pop()
execute('/usr/bin/btrfs', 'property', 'set', '-ts', snapshot, 'ro', 'false')
shutil.move(snapshot, TRASH_DIRECTORY)
# Keep daily snapshot_datetimes for the last month
last_daily_to_keep = datetime.now().date() - timedelta(days=30)
while snapshot_datetimes and snapshot_datetimes[-1].date() >= last_daily_to_keep:
prune_all_with(lambda x: x.date())
# Keep weekly snapshot_datetimes for the last three month
last_weekly_to_keep = datetime.now().date() - timedelta(days=90)
while snapshot_datetimes and snapshot_datetimes[-1].date() >= last_weekly_to_keep:
prune_all_with(lambda x: x.date().isocalendar().week)
# Keep monthly snapshot_datetimes forever
while snapshot_datetimes:
prune_all_with(lambda x: x.date().month)
except portalocker.AlreadyLocked:
sys.exit('Backup already in progress.')
finally:
execute(
'/usr/bin/mount',
'-o', 'ro,remount',
BACKUP_DIRECTORY,
)