#!/usr/bin/env python """ open/dulcinea/bin/gc_stored_files.py Delete files that are not referenced by StoredFile instances. """ import sys, os, glob, time from durus.storage import gen_durus_id_class from qp.lib.site import Site # new files are not deleted since someone could be in the process of # uploading them or attaching them in another transaction MIN_AGE = 60*60*24 size = 0 def maybe_remove(filename): global size if (time.time() - os.stat(filename).st_mtime) > MIN_AGE: print(filename) size += os.stat(filename).st_size os.unlink(filename) def main (prog, args): usage = "usage: %s [site]" % prog if len(args) != 1: sys.exit(usage) site = Site(args[0]) connection = site.make_file_connection(readonly=True) used = {} for durus_id, class_name in gen_durus_id_class(connection.get_storage(), 'StoredFile'): s = connection.get(durus_id) used[s.get_full_path()] = 1 now = time.time() root_directory = site.get('file_store') print('root_directory %s' % root_directory) for filename in glob.glob(os.path.join(root_directory, '?/??/*')): if filename not in used: maybe_remove(filename) for filename in glob.glob(os.path.join(root_directory, 'tmp/upload.*')): maybe_remove(filename) print("%s MB" % (size / (1024*1024))) if __name__ == '__main__': main(sys.argv[0], sys.argv[1:])