Cleanup Storage

Updated May 05, 2023 12:59

Files uploaded to ftrack are not deleted when the Component/ComponentLocation is deleted from the web interface either directly or indirectly when deleting a parent object. Please note that the Python API client can be used to delete the actual files when removing components from the location. https://ftrack-python-api.rtd.ftrack.com/en/stable/locations/tutorial.html

To delete files that have been left behind after deleting entities in the web interface follow the procedure below:

Locate files that are no longer connected to an entity in ftrack using the example script below and move the “orphaned” files to a separate directory which you may delete once you have validated the result.
It uses the API operation “_validate_entity_keys” that given an `entity_type` and a list of `entity_keys` returns the `entity_keys` that are present in the database.
This operation requires Administrative access and is used to guarantee that no entities are filtered out due to the user missing access to a project.

Requirements

ftrack version 4.7.2 or later.
A python environment with the `ftrack_python_api` installed and access to the ftrack storage where the server store uploaded files.

Example script usage



usage: cleanup.py [-h] [--server_url SERVER_URL] [--api_user API_USER] [--api_key API_KEY] [--dry_run] [--destination DESTINATION] [PATHS ...]

positional arguments:
  PATHS                 Paths to scan.

optional arguments:
  -h, --help            show this help message and exit
  --server_url SERVER_URL
                        The ftrack server to connect to. Defaults to the FTRACK_SERVER_URL environment variable.
  --api_user API_USER   The ftrack user to operate as, requires `Administrative` access. Defaults to the FTRACK_API_USER environment variable.
  --api_key API_KEY     The ftrack api key. Defaults to FTRAK_APIKEY environment variable
  --dry_run             If used together with destination no files will be moved.
  --destination DESTINATION
                        A directory to move orphaned files to.

Example script

import os
import re
import uuid
import shutil
import logging
import argparse
import itertools
import collections
import configparser

import ftrack_api

logger = logging.getLogger(
    __name__
)

CHUNK_SIZE = 100
UUID_REGEX = re.compile(
    '[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z][0-9a-z]{3}-[0-9a-z]{12}'
)

UUID_PATH = collections.namedtuple(
    'UUID_PATH', ['root', 'path', 'uuid']
)


def _extract_uuid(path):
    '''Determine the possible uuid from a given path.'''
    _uuid = ''
    _parts = path.split(os.sep)

    for i, part in enumerate(_parts[::-1]):
        if len(_uuid) >= 36:            
            break

        _uuid = part + _uuid


    if UUID_REGEX.match(_uuid):
        return UUID_PATH(
            root=os.sep.join(_parts[:len(_parts)-i]), 
            path=os.sep.join(_parts[len(_parts)-i:]), 
            uuid=_uuid
        )


def _walk_directories(_path):
    if not os.path.exists(_path):
        logger.error(
            'Path not exist.. skipping.  "{0}"'.format(
                _path
            )
        )

        return

    _path = os.path.realpath(
        _path
    )

    for path, _, objs in os.walk(_path):
        for obj in objs:
            _match = _extract_uuid(
                os.path.join(path, obj)
            )

            if _match:
                yield _match


def _valid_components(uuids, session):
    _uuids = [uuid.uuid for uuid in uuids]

    result = session.call([
        {
            'action': '_validate_entity_keys',
            'entity_type': 'Component',
            'entity_keys': _uuids
        }
    ])[0]['data']
    for uuid in uuids:
        if uuid.uuid not in result:
            print(
                os.path.join(uuid.root, uuid.path)
            )

            yield uuid

def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--server_url', 
        default=os.environ.get('FTRACK_SERVER_URL'), 
        help=(
            'The ftrack server to connect to. Defaults to the '
            'FTRACK_SERVER_URL environment variable.'
        )
    )

    parser.add_argument(
        '--api_user', 
        default=os.environ.get('FTRACK_API_USER'),
        help=(
            'The ftrack user to operate as, requires `Administrative` access. '
            'Defaults to the FTRACK_API_USER environment variable.'
        )
    )    

    parser.add_argument(
        '--api_key', 
        default=os.environ.get('FTRACK_APIKEY'), 
        help=(
            'The ftrack api key. Defaults to FTRAK_APIKEY environment variable'
        )
    )

    parser.add_argument(
        '--dry_run', 
        default=False, 
        action='store_true',
        help=(
            'If used together with destination no files will be moved.'
        )
    )

    parser.add_argument(
        '--destination', 
        help='A directory to move orphaned files to.'
    )

    parser.add_argument(
        'PATHS', 
        nargs='*', help=(
            'Paths to scan.'
        )
    )

    args = parser.parse_args()

    if not len(args.PATHS):
        raise RuntimeError(
            'At least one path must be provided!'
        )


    logging.basicConfig(
        level=logging.INFO
    )

    logger.info(
        'Outputting files that could not be matched '
        'against a component to stdout'
    )

    session = ftrack_api.Session(
        server_url=args.server_url, 
        api_user=args.api_user, 
        api_key=args.api_key
    )

    chunk = []
    orphaned_files = []
    for potential in itertools.chain(
        *[_walk_directories(path) for path in args.PATHS]
    ):
        chunk.append(
            potential
        )

        if len(chunk) >= CHUNK_SIZE:
            orphaned_files.extend(
                _valid_components(chunk, session)
            )

            chunk = []

    # final chunk?
    orphaned_files.extend(
        _valid_components(chunk, session)
    )

    if args.destination:
        if not os.path.exists(args.destination):
            raise IOError(
                'Could not locate destinaton: {0}'.format(
                    args.destination
                )
            )

        elif not os.path.isdir(args.destination):
            raise IOError(
                'Path is not a directory: {0}'.format(
                    args.destination
                )
            )
                
        for orphaned_file in orphaned_files:
            src_path = os.path.join(
                orphaned_file.root, orphaned_file.path
            )

            dst_path = os.path.join(
                args.destination, orphaned_file.path
            )

            dst_folder = os.path.dirname(
                dst_path
            )


            if not args.dry_run:
                if not os.path.exists(dst_folder):
                    os.makedirs(
                        dst_path
                    )

                shutil.move(
                    src_path, dst_path
                )

            logging.info(
                'moved {0} -> {1} - dry run : {2}'.format(
                    src_path, dst_path, args.dry_run
                )
            )


if __name__ == '__main__':
    main()

Related to

on-prem