Flexible appending new data to yaml files

Solution 1:

You can use PyYAML's low-level event interface. Assuming you have an input YAML file and want to write the modifications to an output YAML file, you can write a function that goes through PyYAML's generated event stream and inserts the requested additional values at the specified locations:

import yaml
from yaml.events import *

class AppendableEvents:
  def __init__(self, path, events):
    self.path = path
    self.events = events

  def correct_position(self, levels):
    if len(self.path) != len(levels):
      return False
    for index, expected in enumerate(self.path):
      if expected != levels[index].cur_id:
        return False
    return True

class Level:
  def __init__(self, mode):
    self.mode = mode
    self.cur_id = -1 if mode == "item" else ""

def append_to_yaml(yamlFile, targetFile, items):
  events = []
  levels = []
  with open(yamlFile, 'r') as handle:
    for event in yaml.parse(handle):
      if isinstance(event, StreamStartEvent) or \
         isinstance(event, StreamEndEvent) or \
         isinstance(event, DocumentStartEvent) or \
         isinstance(event, DocumentEndEvent):
        pass
      elif isinstance(event, CollectionStartEvent):
        if len(levels) > 0:
          if levels[-1].mode == "key":
            # we can only handle scalar keys
            raise ValueError("encountered complex key!")
          else:
            if levels[-1].mode == "value":
              levels[-1].mode = "key"
        if isinstance(event, MappingStartEvent):
          levels.append(Level("key"))
        else: # SequenceStartEvent
          levels.append(Level("item"))
      elif isinstance(event, ScalarEvent):
        if len(levels) > 0:
          if levels[-1].mode == "item":
            levels[-1].cur_id += 1
          elif levels[-1].mode == "key":
            levels[-1].cur_id = event.value
            levels[-1].mode = "value"
          else: # mode == "value"
            levels[-1].mode = "key"
      elif isinstance(event, CollectionEndEvent):
        # here we check whether we want to append anything
        levels.pop()
        for item in items:
          if item.correct_position(levels):
            for additional_event in item.events:
              events.append(additional_event)
      events.append(event)
  with open(targetFile, mode="w") as handle:
    yaml.emit(events, handle)

To use it, you must provide the additional stuff you want to append as list of YAML events, and specify the desired position as list of keys (or sequence indexes):

def key(name):
  return ScalarEvent(None, None, (True, True), name)

def literal_value(content):
  return ScalarEvent(None, None, (False, True), content, style="|")

append_to_yaml("file1.yaml", "file1_modified.yaml", [
  AppendableEvents(["test3", "service1"], [
    key("my-appended-key"),
    literal_value("\"my appended value\"\n")]),
  AppendableEvents(["test3"], [
    key("my_second_appended_key"),
    literal_value("\"my second appended value\"\n")])])

This code correctly transform your file1.yaml into the given modified file. In general, this also allows you to append complex (sequence or mapping) nodes. Here's a basic example how to do that:

def seq(*scalarValues):
  return [SequenceStartEvent(None, None, True)] + \
    [ScalarEvent(None, None, (True, False), v) for v in scalarValues] + \
    [SequenceEndEvent()]

def map(*scalarValues):
  return [MappingStartEvent(None, None, True)] + \
    [ScalarEvent(None, None, (True, False), v) for v in scalarValues] + \
    [MappingEndEvent()]

append_to_yaml("file1.yaml", "file1_modified.yaml", [
  AppendableEvents(["test3", "service1"], [
    key("my-appended-key")] + seq("one", "two", "three")),
  AppendableEvents(["test3"], [
    key("my_second_appended_key")] + map("foo", "bar"))])