#!/usr/bin/env python
"""
Python module to check the state of files using gfal and return their locality
# ===================================================================== #
# author: Ron Trompert <ron.trompert@surfsara.nl> -- SURFsara #
# helpdesk: Grid Services <grid.support@surfsara.nl> -- SURFsara #
# #
# usage: python state.py #
# description: #
# Display the status of each file listed in "files". The paths #
# should have the '/pnfs/...' format. Script output: #
# ONLINE: means that the file is only on disk #
# NEARLINE: means that the file in only on tape #
# ONLINE_AND_NEARLINE: means that the file is on disk #
# and tape #
# ===================================================================== #
"""
from __future__ import print_function
import sys
from collections import Counter
try:
import gfal2 as gfal # pylint: disable=import-error
gfal.set_verbose(gfal.verbose_level.warning)
except ImportError:
print("GFAL CANNOT BE IMPORTED")
from GRID_LRT.Staging.srmlist import srmlist
from GRID_LRT.auth import grid_credentials
[docs]def main(filename, verbose=True):
"""Main function that takes in a file name and returns a list of tuples of
filenames and staging statuses. The input file can be both srm:// and gsiftp:// links.
Args:
:param filename: The filename holding the links whose have to be checked
:type filename: str
:param verbose: A toggle to turn off printing out the status of each file.
True by default will print everything out
:type verbose: bool
Returns:
:ret results: A list of tuples containing the file_name and the State
Usage:
>>> from GRID_LRT.Staging import state_all
>>> filename='/home/apmechev/GRIDTOOLS/GRID_LRT/GRID_LRT/tests/srm_50_sara.txt'
>>> results=state_all.main(filename)
>>> results=state_all.main(filename, verbose=False)
>>> results[0]
('L229507_SB150_uv.dppp.MS_f6fc7fc5.tar', 'ONLINE_AND_NEARLINE')
"""
grid_credentials.grid_credentials_enabled() # Check if credenitals enabled
s_list = load_file_into_srmlist(filename)
print("files are at "+s_list.lta_location)
results = []
for i in s_list.gfal_links():
results.append(check_status(i, verbose))
return results
[docs]def load_file_into_srmlist(filename):
"""Helper function that loads a file into an srmlist object (will be
added to the actual srmlist class later)
"""
s_list = srmlist()
for i in open(filename, 'r').read().split():
s_list.append(i)
return s_list
[docs]def check_status(surl_link, verbose=True):
""" Obtain the status of a file from the given surl.
Args:
:param surl: the SURL pointing to the file.
:type surl: str
:parame verbose: print the status to the terminal.
:type verbose: bool
Returns:
:(filename, status): a tuple containing the file and status as
stored in the 'user.status' attribute.
"""
context = gfal.creat_context()
status = context.getxattr(surl_link, 'user.status')
filename = surl_link.split('/')[-1]
if status == 'ONLINE_AND_NEARLINE' or status == 'ONLINE':
color = "\033[32m"
else:
color = "\033[31m"
if verbose:
print('{:s} is {:s}{:s}\033[0m'.format(filename, color, status))
return (filename, status.strip())
[docs]def percent_staged(results):
"""Takes list of tuples of (srm, status) and counts the percentage of files
that are staged (0->1) and retunrs this percentage as float
Usage:
>>> from GRID_LRT.Staging import state_all
>>> filename='/home/apmechev/GRIDTOOLS/GRID_LRT/GRID_LRT/tests/srm_50_sara.txt'
>>> results=state_all.main(filename, verbose=False)
>>> state_all.percent_staged(results)
"""
total_files = len(results)
counts = Counter(x[1] for x in results)
staged = dict(counts).get('ONLINE_AND_NEARLINE', 0)+dict(counts).get('ONLINE', 0)
unstaged = dict(counts).get('NEARLINE', 0)
# assert staged + unstaged == total_files
print(str(float(staged/(staged + unstaged))*100)+" percent of files staged")
return float(staged/total_files)
[docs]def check_status_file(surl_list): # TODO: implement
"""Unimplemented task"""
print(surl_list)
if __name__ == '__main__':
INPUT = sys.argv[1]
if (INPUT.lower()).startswith('srm://'):
# Single file.
check_status(INPUT)
else:
# Assume a file list.
check_status_file(INPUT)