In [1]:
import os, glob

In [2]:
# Set the locations of log files on remote host and where to store them locally. \
# hosthame must be defined in .ssh/config.
REMOTE_LOGS = '/global/projecta/projectdirs/matgen/builder_logs/'
LOCAL_LOGS = '/Users/aykol/tmp_builderlogs/'
hostname = 'matgen'

In [3]:
def get_logs(sync_logs, hostname, remote_logdir, local_logdir):
    """
    ryncs logs from an external host to a specified location in the local machine.
    Args:
        - snync_logs  (list) or (str): types of logs to sync to local filesystem.
                  options are 'phasediagram', 'substrate', 'reject', 'electronic', 'material', 
                  diffraction', 'elasticity' and 'initializer'.
                  If only 'all' is given, it will sync all possible types of logs.
        - hostname: ssh host info in .ssh/config
        - remote_logdir: path to log dir at remote host
        - local_logdir: path to log dire on the current filesystem. Will be created if not exist.
    """
    
    log_options = {"material": "*IncrMaterialBuild*", "substrate": "*substrate*", 
                   "elasticity": "*Elasticity*", "phasediagram": "*IncrPhaseDiagram*",
                   "diffraction": "*IncrDiffract*", "electronic": "*IncrElectronicS*",
                   "initializer": "*Incr_initializer*", "reject": "*Incr_reject*"
                  }
    if sync_logs == 'all':
        sync_logs = log_options.keys()
    sync_command = ['rsync', '-av']
    if not isinstance(sync_logs, list):
        sync_logs=[sync_logs]
    for i in sync_logs:
        if i in log_options:
            sync_command.append('--include="{}"'.format(log_options[i]))
        else:
            raise ValueError("{} is not a valid builder log. Sync stopped.".format(i))
    sync_command.append('--exclude="*"')
    sync_command.append("{}:{}".format(hostname,remote_logdir))
    
    if not os.path.exists(local_logdir):
        os.makedirs(local_logdir)
    sync_command.append(local_logdir)
    os.system(" ".join(sync_command))

In [4]:
def parse_logs(parse_logs, local_logdir):
    log_options = {"material": "*IncrMaterialBuild*", "substrate": "*substrate*", 
                   "elasticity": "*Elasticity*", "phasediagram": "*IncrPhaseDiagram*",
                   "diffraction": "*IncrDiffract*", "electronic": "*IncrElectronicS*",
                   "initializer": "*Incr_initializer*", "reject": "*Incr_reject*"
                  }
    if parse_logs == 'all':
        parse_logs = log_options.keys()
    if not isinstance(parse_logs, list):
        parse_logs=[parse_logs]
    
    parsed_messages = {}
    
    for log_type in parse_logs:
        
        parsed_messages[log_type] = {}
        log_files = glob.glob(os.path.join(local_logdir, log_options[log_type]))
        for fp in log_files:
            with open(fp, 'r') as f:
                for line in f.readlines():
                    l = line.split()
                    if l[2]=="ERROR":
                        message = ' '.join(l[3:])
                    else:
                        continue
                    time = '_'.join(l[:2])
                    key = None
                    for z in range(3, len(l)):
                        if ('mp-' in l[z]) or ('task' in l[z]):
                            key = '_'.join(l[3:z])
                            break
                    if not key:
                        z = 6   
                        key = '_'.join(l[3:6])
                    message = '_'.join(l[z:])
                    if key not in parsed_messages[log_type]:
                        parsed_messages[log_type][key] = [[message, time]]
                    else:
                        parsed_messages[log_type][key].append([message, time]) 
    return parsed_messages

In [5]:
# Syncs specified typed of logs. 'all' syncs everything.
# This will take a while when done for the first time.
get_logs('all', hostname, REMOTE_LOGS, LOCAL_LOGS)

In [6]:
# Parser currently extracts ERRORs only. More detailed parsing is underway.
# Again can take a particular list of log types, or 'all' as input
p = parse_logs('all', LOCAL_LOGS)

In [7]:
p.keys()


Out[7]:
['phasediagram',
 'substrate',
 'material',
 'diffraction',
 'elasticity',
 'initializer',
 'reject',
 'electronic']

In [8]:
for k,v in p.items():
    for i in v:
        print(k, i, len(v[i]))


('phasediagram', 'Unable_to_process', 9402)
('diffraction', 'Unable_to_import', 182259)
('initializer', 'Unable_to_process', 162880)
('electronic', 'Unable_to_insert_electronic_structure_for', 1436)
('electronic', 'Unable_to_get_electronic_structure_for', 433170)

In [9]:
print(p['phasediagram']['Unable_to_process'][-1])


['Mg-Hg-Pm:_None', '2016-09-09_12:51:54,500']

In [10]:
print(p['diffraction']['Unable_to_import'][-1])


['task_id_mp-601438_due_to_SymmetryFinder', '2017-02-05_06:08:07,665']

In [11]:
print(p['electronic']['Unable_to_insert_electronic_structure_for'][-1])


["task_id_mp-867512_for_GGA_Uniform_v2_due_to_error_'BSON_document_too_large_(181426513_bytes)_-_the_connected_server_supports_BSON_document_sizes_up_to_16793598_bytes.'", '2017-02-05_07:05:30,229']

In [12]:
print(p['electronic']['Unable_to_get_electronic_structure_for'][-1])


["task_id_mp-988137_for_GGA_Uniform_v2_due_to_error_''dos_fs_id''", '2017-02-05_07:05:02,975']

In [ ]: