#!/usr/bin/env python
 
 
 
from __future__ import print_function
 
 
 
desc = '''Generate the difference of two YAML files into a new YAML file (works on
 
pair of directories too).  A new attribute 'Added' is set to True or False
 
depending whether the entry is added or removed from the first input to the
 
next.
 
 
 
The tools requires PyYAML.'''
 
 
 
import yaml
 
# Try to use the C parser.
 
try:
 
    from yaml import CLoader as Loader
 
except ImportError:
 
    from yaml import Loader
 
 
 
import optrecord
 
import argparse
 
from collections import defaultdict
 
 
 
if __name__ == '__main__':
 
    parser = argparse.ArgumentParser(description=desc)
 
    parser.add_argument(
 
        'yaml_dir_or_file_1',
 
        help='An optimization record file or a directory searched for optimization '
 
             'record files that are used as the old version for the comparison')
 
    parser.add_argument(
 
        'yaml_dir_or_file_2',
 
        help='An optimization record file or a directory searched for optimization '
 
             'record files that are used as the new version for the comparison')
 
    parser.add_argument(
 
        '--jobs',
 
        '-j',
 
        default=None,
 
        type=int,
 
        help='Max job count (defaults to %(default)s, the current CPU count)')
 
    parser.add_argument(
 
        '--max-size',
 
        '-m',
 
        default=100000,
 
        type=int,
 
        help='Maximum number of remarks stored in an output file')
 
    parser.add_argument(
 
        '--no-progress-indicator',
 
        '-n',
 
        action='store_true',
 
        default=False,
 
        help='Do not display any indicator of how many YAML files were read.')
 
    parser.add_argument('--output', '-o', default='diff{}.opt.yaml')
 
    args = parser.parse_args()
 
 
 
    files1 = optrecord.find_opt_files(args.yaml_dir_or_file_1)
 
    files2 = optrecord.find_opt_files(args.yaml_dir_or_file_2)
 
 
 
    print_progress = not args.no_progress_indicator
 
    all_remarks1, _, _ = optrecord.gather_results(files1, args.jobs, print_progress)
 
    all_remarks2, _, _ = optrecord.gather_results(files2, args.jobs, print_progress)
 
 
 
    added = set(all_remarks2.values()) - set(all_remarks1.values())
 
    removed = set(all_remarks1.values()) - set(all_remarks2.values())
 
 
 
    for r in added:
 
        r.Added = True
 
    for r in removed:
 
        r.Added = False
 
 
 
    result = list(added | removed)
 
    for r in result:
 
        r.recover_yaml_structure()
 
 
 
    for i in range(0, len(result), args.max_size):
 
        with open(args.output.format(i / args.max_size), 'w') as stream:
 
            yaml.dump_all(result[i:i + args.max_size], stream)