Copy content from one file to another but no duplicates bash/python

My file1 looks like this:

############# IMPORTANT ################
########################################

# more comments
# more comments 2

# values 1
#######################
export VALUE_IMPORTANT_1=10 # comments
export VALUE_IMPORTANT_2=40 # comments

# values 2
######################
export SIMPLE_RULE=222
export IMPORTANT_RUN_COMMAND=190

# values 3
#################
export COMMAND_EXEC=3
export NUMBER_STORED=3

My file2 looks like this:

############# IMPORTANT ################
########################################

# more comments
# more comments 2

# values 1
#######################
export VALUE_IMPORTANT_1=5 # comments
export VALUE_IMPORTANT_2=8 # comments
export VALUE_IMPORTANT_3=2 # comments
export VALUE_IMPORTANT_4=2 # comments

# values 2
######################
export SIMPLE_RULE=45
export IMPORTANT_RUN_COMMAND=2
export COMMANDER=45

# values 3
#################
export COMMAND_EXEC=2
export NUMBER_STORED=5
export RUN_ONLY_ONCE=0

# values 4
##############
export NEW_VALUE=5

What I'd like is to copy just the variables that doesn't exists on the file 1 but not changing the values if values from file2 are different than values from file1

So the final file I'd like to look like:

############# IMPORTANT ################
########################################

# more comments
# more comments 2

# values 1
#######################
export VALUE_IMPORTANT_1=10 # comments
export VALUE_IMPORTANT_2=40 # comments
export VALUE_IMPORTANT_3=2 # comments
export VALUE_IMPORTANT_4=2 # comments

# values 2
######################
export SIMPLE_RULE=222
export IMPORTANT_RUN_COMMAND=190
export COMMANDER=45

# values 3
#################
export COMMAND_EXEC=3
export NUMBER_STORED=3
export RUN_ONLY_ONCE=0

# values 4
##############
export NEW_VALUE=5

Is it there a way to do this in bash or python [preferable bash]?


Solution 1:

This simple script will merge the files, but it won't keep the comments nor the variables order. This is an example of the output:

export VALUE_IMPORTANT_1=10 
export VALUE_IMPORTANT_2=40 
export SIMPLE_RULE=222
export IMPORTANT_RUN_COMMAND=190
export COMMAND_EXEC=3
export NUMBER_STORED=3
export VALUE_IMPORTANT_3=2 
export VALUE_IMPORTANT_4=2 
export COMMANDER=45
export RUN_ONLY_ONCE=0
export NEW_VALUE=5

Script:

def get_file_values(file):
    with open(file) as f:
        f_lines = f.readlines()
    # Remove comments and empty lines
    values = filter(lambda el: not el.startswith("#") and el.strip(), f_lines)
    # Keep only vars values
    key_value_list = map(lambda el: el.strip().split("#")[0].replace("export ", "").split("="), values)
    # Return vars dict
    return {k: v for k, v in key_value_list}


# Load files
file_1_vars = get_file_values("file1")
file_2_vars = get_file_values("file2")
# Check vars
keys_1 = file_1_vars.keys()
for key, value in file_2_vars.items():
    if key not in keys_1:
        file_1_vars[key] = value

# Write output
with open("file3", "w") as out:
    out.write("\n".join(f"export {key}={value}" for key, value in file_1_vars.items()))

EDIT: Keep variable order and block division

The code look a bit messy, but it does the job, look at its output.

def get_file_values(file):
    with open(file) as f:
        f_lines = f.readlines()

    max_f_lines = len(f_lines)
    i = 0
    var_blocks = {}
    while i < max_f_lines - 2:
        # Check if next occurrences contains a variables block
        if f_lines[i].startswith("# ") and f_lines[i + 1].startswith("##") and f_lines[i + 2].startswith("export"):
            block = f_lines[i][2:].strip()
            var_blocks[block] = {}
            i += 2
            # Add variables to block
            while i < max_f_lines and f_lines[i].startswith("export"):
                # Remove \n and comment. Then remove export and split var name and value
                key_value = f_lines[i].strip().split("#")[0].replace("export ", "").split("=")
                # Remove any whitespace if still present
                key = key_value[0].strip()
                value = key_value[1].strip()
                # Populate var_blocks
                var_blocks[block][key] = value
                i += 1
        else:
            i += 1
    # At this point we return a dictionary divided per block like:
    # {'values 1': {'VALUE_IMPORTANT_1': '10', 'VALUE_IMPORTANT_2': '40'},
    #  'values 2': {'SIMPLE_RULE': '222', 'IMPORTANT_RUN_COMMAND': '190'},
    #  'values 3': {'COMMAND_EXEC': '3', 'NUMBER_STORED': '3'}}
    return var_blocks


# Load files variables
file_1_vars = get_file_values("file1")
file_2_vars = get_file_values("file2")
# Get keys
keys_1 = file_1_vars.keys()
keys_2 = file_2_vars.keys()
# Initialize new file vars and start merge
new_file_vars = {}
for key in keys_1:
    new_file_vars[key] = {}
    for k, v in file_1_vars[key].items():
        new_file_vars[key][k] = v

    # Check if key is also in file2
    if key in file_2_vars:
        already_added_keys = new_file_vars[key].keys()
        for k, v in file_2_vars[key].items():
            # Add the value only if it isn't already present in file1
            if k not in already_added_keys:  
                new_file_vars[key][k] = v

# Finally, add all keys that are present in the second file but not in the first
for key in keys_2:
    if key not in keys_1:
        new_file_vars[key] = {}
        for k, v in file_2_vars[key].items():
            new_file_vars[key][k] = v

# Write output
with open("file3", "w") as out:
    out.write("############# IMPORTANT ################\n"
              "########################################\n\n")
    for key in new_file_vars.keys():
        out.write(f"# {key}\n{'#' * 40}\n")
        for k, v in new_file_vars[key].items():
            out.write(f"export {k}={v}\n")

        out.write("\n")  # Leave a blank line after a block

Output file:

############# IMPORTANT ################
########################################

# values 1
########################################
export VALUE_IMPORTANT_1=10
export VALUE_IMPORTANT_2=40
export VALUE_IMPORTANT_3=2
export VALUE_IMPORTANT_4=2

# values 2
########################################
export SIMPLE_RULE=222
export IMPORTANT_RUN_COMMAND=190
export COMMANDER=45

# values 3
########################################
export COMMAND_EXEC=3
export NUMBER_STORED=3
export RUN_ONLY_ONCE=0

# values 4
########################################
export NEW_VALUE=5