From 983b192a4541004fa09a1f266129390dc78c45e2 Mon Sep 17 00:00:00 2001 From: Ryan Petrello Date: Fri, 12 Jan 2018 17:27:54 -0500 Subject: [PATCH] replace our memcached-based fact cache implementation with local files see: https://github.com/ansible/ansible-tower/issues/7840 --- awx/main/models/jobs.py | 134 ++++++++---------- awx/main/tasks.py | 23 ++-- awx/main/tests/unit/models/test_jobs.py | 175 ++++++++++-------------- awx/main/tests/unit/test_tasks.py | 17 +++ awx/plugins/fact_caching/awx.py | 128 ----------------- docs/fact_cache.md | 45 ++++-- 6 files changed, 192 insertions(+), 330 deletions(-) delete mode 100755 awx/plugins/fact_caching/awx.py diff --git a/awx/main/models/jobs.py b/awx/main/models/jobs.py index d0729cc48b61..4f5e549e0497 100644 --- a/awx/main/models/jobs.py +++ b/awx/main/models/jobs.py @@ -2,26 +2,26 @@ # All Rights Reserved. # Python +import codecs import datetime import hashlib import hmac import logging +import os import time import json -import base64 from urlparse import urljoin +import six + # Django from django.conf import settings from django.db import models #from django.core.cache import cache -import memcache from django.db.models import Q, Count from django.utils.dateparse import parse_datetime -from dateutil import parser -from dateutil.tz import tzutc from django.utils.encoding import force_text, smart_str -from django.utils.timezone import utc +from django.utils.timezone import utc, now from django.utils.translation import ugettext_lazy as _ from django.core.exceptions import ValidationError @@ -714,85 +714,61 @@ def get_notification_templates(self): def get_notification_friendly_name(self): return "Job" - @property - def memcached_fact_key(self): - return '{}'.format(self.inventory.id) - - def memcached_fact_host_key(self, host_name): - return '{}-{}'.format(self.inventory.id, base64.b64encode(host_name.encode('utf-8'))) - - def memcached_fact_modified_key(self, host_name): - return '{}-{}-modified'.format(self.inventory.id, base64.b64encode(host_name.encode('utf-8'))) - - def _get_inventory_hosts(self, only=['name', 'ansible_facts', 'modified',]): - return self.inventory.hosts.only(*only) - - def _get_memcache_connection(self): - return memcache.Client([settings.CACHES['default']['LOCATION']], debug=0) - - def start_job_fact_cache(self): + def _get_inventory_hosts(self, only=['name', 'ansible_facts', 'ansible_facts_modified', 'modified',]): if not self.inventory: - return - - cache = self._get_memcache_connection() - - host_names = [] - - for host in self._get_inventory_hosts(): - host_key = self.memcached_fact_host_key(host.name) - modified_key = self.memcached_fact_modified_key(host.name) - - if cache.get(modified_key) is None: - if host.ansible_facts_modified: - host_modified = host.ansible_facts_modified.replace(tzinfo=tzutc()).isoformat() - else: - host_modified = datetime.datetime.now(tzutc()).isoformat() - cache.set(host_key, json.dumps(host.ansible_facts)) - cache.set(modified_key, host_modified) - - host_names.append(host.name) - - cache.set(self.memcached_fact_key, host_names) - - def finish_job_fact_cache(self): - if not self.inventory: - return - - cache = self._get_memcache_connection() + return [] + return self.inventory.hosts.only(*only) + def start_job_fact_cache(self, destination, modification_times, timeout=None): + destination = os.path.join(destination, 'facts') + os.makedirs(destination, mode=0700) hosts = self._get_inventory_hosts() + if timeout is None: + timeout = settings.ANSIBLE_FACT_CACHE_TIMEOUT + if timeout > 0: + # exclude hosts with fact data older than `settings.ANSIBLE_FACT_CACHE_TIMEOUT seconds` + timeout = now() - datetime.timedelta(seconds=timeout) + hosts = hosts.filter(ansible_facts_modified__gte=timeout) for host in hosts: - host_key = self.memcached_fact_host_key(host.name) - modified_key = self.memcached_fact_modified_key(host.name) - - modified = cache.get(modified_key) - if modified is None: - cache.delete(host_key) - continue - - # Save facts if cache is newer than DB - modified = parser.parse(modified, tzinfos=[tzutc()]) - if not host.ansible_facts_modified or modified > host.ansible_facts_modified: - ansible_facts = cache.get(host_key) - try: - ansible_facts = json.loads(ansible_facts) - except Exception: - ansible_facts = None - - if ansible_facts is None: - cache.delete(host_key) - continue - host.ansible_facts = ansible_facts - host.ansible_facts_modified = modified - if 'insights' in ansible_facts and 'system_id' in ansible_facts['insights']: - host.insights_system_id = ansible_facts['insights']['system_id'] - host.save() + filepath = os.sep.join(map(six.text_type, [destination, host.name])) + with codecs.open(filepath, 'w', encoding='utf-8') as f: + os.chmod(f.name, 0600) + json.dump(host.ansible_facts, f) + # make note of the time we wrote the file so we can check if it changed later + modification_times[filepath] = os.path.getmtime(filepath) + + def finish_job_fact_cache(self, destination, modification_times): + destination = os.path.join(destination, 'facts') + for host in self._get_inventory_hosts(): + filepath = os.sep.join(map(six.text_type, [destination, host.name])) + if os.path.exists(filepath): + # If the file changed since we wrote it pre-playbook run... + modified = os.path.getmtime(filepath) + if modified > modification_times.get(filepath, 0): + with codecs.open(filepath, 'r', encoding='utf-8') as f: + try: + ansible_facts = json.load(f) + except ValueError: + continue + host.ansible_facts = ansible_facts + host.ansible_facts_modified = now() + if 'insights' in ansible_facts and 'system_id' in ansible_facts['insights']: + host.insights_system_id = ansible_facts['insights']['system_id'] + host.save() + system_tracking_logger.info( + 'New fact for inventory {} host {}'.format( + smart_str(host.inventory.name), smart_str(host.name)), + extra=dict(inventory_id=host.inventory.id, host_name=host.name, + ansible_facts=host.ansible_facts, + ansible_facts_modified=host.ansible_facts_modified.isoformat())) + else: + # if the file goes missing, ansible removed it (likely via clear_facts) + host.ansible_facts = {} + host.ansible_facts_modified = now() system_tracking_logger.info( - 'New fact for inventory {} host {}'.format( - smart_str(host.inventory.name), smart_str(host.name)), - extra=dict(inventory_id=host.inventory.id, host_name=host.name, - ansible_facts=host.ansible_facts, - ansible_facts_modified=host.ansible_facts_modified.isoformat())) + 'Facts cleared for inventory {} host {}'.format( + smart_str(host.inventory.name), smart_str(host.name))) + host.save() class JobHostSummary(CreatedModifiedModel): diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 71552d329bc8..b60ee6b16dc5 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -812,6 +812,15 @@ def run(self, pk, isolated_host=None, **kwargs): # Fetch ansible version once here to support version-dependent features. kwargs['ansible_version'] = get_ansible_version() kwargs['private_data_dir'] = self.build_private_data_dir(instance, **kwargs) + + # Fetch "cached" fact data from prior runs and put on the disk + # where ansible expects to find it + if getattr(instance, 'use_fact_cache', False) and not kwargs.get('isolated'): + instance.start_job_fact_cache( + os.path.join(kwargs['private_data_dir']), + kwargs.setdefault('fact_modification_times', {}) + ) + # May have to serialize the value kwargs['private_data_files'] = self.build_private_data_files(instance, **kwargs) kwargs['passwords'] = self.build_passwords(instance, **kwargs) @@ -1032,10 +1041,8 @@ def build_env(self, job, **kwargs): env['INVENTORY_ID'] = str(job.inventory.pk) if job.use_fact_cache and not kwargs.get('isolated'): env['ANSIBLE_LIBRARY'] = self.get_path_to('..', 'plugins', 'library') - env['ANSIBLE_CACHE_PLUGINS'] = self.get_path_to('..', 'plugins', 'fact_caching') - env['ANSIBLE_CACHE_PLUGIN'] = "awx" - env['ANSIBLE_CACHE_PLUGIN_TIMEOUT'] = str(settings.ANSIBLE_FACT_CACHE_TIMEOUT) - env['ANSIBLE_CACHE_PLUGIN_CONNECTION'] = settings.CACHES['default']['LOCATION'] if 'LOCATION' in settings.CACHES['default'] else '' + env['ANSIBLE_CACHE_PLUGIN'] = "jsonfile" + env['ANSIBLE_CACHE_PLUGIN_CONNECTION'] = os.path.join(kwargs['private_data_dir'], 'facts') if job.project: env['PROJECT_REVISION'] = job.project.scm_revision env['ANSIBLE_RETRY_FILES_ENABLED'] = "False" @@ -1286,14 +1293,14 @@ def pre_run_hook(self, job, **kwargs): ('project_update', local_project_sync.name, local_project_sync.id))) raise - if job.use_fact_cache and not kwargs.get('isolated'): - job.start_job_fact_cache() - def final_run_hook(self, job, status, **kwargs): super(RunJob, self).final_run_hook(job, status, **kwargs) if job.use_fact_cache and not kwargs.get('isolated'): - job.finish_job_fact_cache() + job.finish_job_fact_cache( + kwargs['private_data_dir'], + kwargs['fact_modification_times'] + ) try: inventory = job.inventory except Inventory.DoesNotExist: diff --git a/awx/main/tests/unit/models/test_jobs.py b/awx/main/tests/unit/models/test_jobs.py index e60b775066d9..f8949a033375 100644 --- a/awx/main/tests/unit/models/test_jobs.py +++ b/awx/main/tests/unit/models/test_jobs.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- +import json +import os +import time import pytest @@ -8,51 +11,14 @@ Host, ) -import datetime -import json -import base64 -from dateutil.tz import tzutc - - -class CacheMock(object): - def __init__(self): - self.d = dict() - - def get(self, key): - if key not in self.d: - return None - return self.d[key] - - def set(self, key, val): - self.d[key] = val - - def delete(self, key): - del self.d[key] - @pytest.fixture -def old_time(): - return (datetime.datetime.now(tzutc()) - datetime.timedelta(minutes=60)) - - -@pytest.fixture() -def new_time(): - return (datetime.datetime.now(tzutc())) - - -@pytest.fixture -def hosts(old_time, inventory): - return [ - Host(name='host1', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=old_time, inventory=inventory), - Host(name='host2', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=old_time, inventory=inventory), - Host(name='host3', ansible_facts={"a": 1, "b": 2}, ansible_facts_modified=old_time, inventory=inventory), - ] - - -@pytest.fixture -def hosts2(inventory): +def hosts(inventory): return [ - Host(name='host2', ansible_facts="foobar", ansible_facts_modified=old_time, inventory=inventory), + Host(name='host1', ansible_facts={"a": 1, "b": 2}, inventory=inventory), + Host(name='host2', ansible_facts={"a": 1, "b": 2}, inventory=inventory), + Host(name='host3', ansible_facts={"a": 1, "b": 2}, inventory=inventory), + Host(name=u'Iñtërnâtiônàlizætiøn', ansible_facts={"a": 1, "b": 2}, inventory=inventory), ] @@ -62,87 +28,92 @@ def inventory(): @pytest.fixture -def mock_cache(mocker): - cache = CacheMock() - mocker.patch.object(cache, 'set', wraps=cache.set) - mocker.patch.object(cache, 'get', wraps=cache.get) - mocker.patch.object(cache, 'delete', wraps=cache.delete) - return cache - - -@pytest.fixture -def job(mocker, hosts, inventory, mock_cache): +def job(mocker, hosts, inventory): j = Job(inventory=inventory, id=2) j._get_inventory_hosts = mocker.Mock(return_value=hosts) - j._get_memcache_connection = mocker.Mock(return_value=mock_cache) return j -@pytest.fixture -def job2(mocker, hosts2, inventory, mock_cache): - j = Job(inventory=inventory, id=3) - j._get_inventory_hosts = mocker.Mock(return_value=hosts2) - j._get_memcache_connection = mocker.Mock(return_value=mock_cache) - return j - - -def test_start_job_fact_cache(hosts, job, inventory, mocker): - - job.start_job_fact_cache() - - job._get_memcache_connection().set.assert_any_call('5', [h.name for h in hosts]) - for host in hosts: - job._get_memcache_connection().set.assert_any_call('{}-{}'.format(5, base64.b64encode(host.name)), json.dumps(host.ansible_facts)) - job._get_memcache_connection().set.assert_any_call('{}-{}-modified'.format(5, base64.b64encode(host.name)), host.ansible_facts_modified.isoformat()) +def test_start_job_fact_cache(hosts, job, inventory, tmpdir): + fact_cache = str(tmpdir) + modified_times = {} + job.start_job_fact_cache(fact_cache, modified_times, 0) + for host in hosts: + filepath = os.path.join(fact_cache, 'facts', host.name) + assert os.path.exists(filepath) + with open(filepath, 'r') as f: + assert f.read() == json.dumps(host.ansible_facts) + assert filepath in modified_times -def test_start_job_fact_cache_existing_host(hosts, hosts2, job, job2, inventory, mocker): - job.start_job_fact_cache() +def test_finish_job_fact_cache_with_existing_data(job, hosts, inventory, mocker, tmpdir): + fact_cache = str(tmpdir) + modified_times = {} + job.start_job_fact_cache(fact_cache, modified_times, 0) - for host in hosts: - job._get_memcache_connection().set.assert_any_call('{}-{}'.format(5, base64.b64encode(host.name)), json.dumps(host.ansible_facts)) - job._get_memcache_connection().set.assert_any_call('{}-{}-modified'.format(5, base64.b64encode(host.name)), host.ansible_facts_modified.isoformat()) + for h in hosts: + h.save = mocker.Mock() - job._get_memcache_connection().set.reset_mock() + ansible_facts_new = {"foo": "bar", "insights": {"system_id": "updated_by_scan"}} + filepath = os.path.join(fact_cache, 'facts', hosts[1].name) + with open(filepath, 'w') as f: + f.write(json.dumps(ansible_facts_new)) + f.flush() + # I feel kind of gross about calling `os.utime` by hand, but I noticed + # that in our container-based dev environment, the resolution for + # `os.stat()` after a file write was over a second, and I don't want to put + # a sleep() in this test + new_modification_time = time.time() + 3600 + os.utime(filepath, (new_modification_time, new_modification_time)) + + job.finish_job_fact_cache(fact_cache, modified_times) + + for host in (hosts[0], hosts[2], hosts[3]): + host.save.assert_not_called() + assert host.ansible_facts == {"a": 1, "b": 2} + assert host.ansible_facts_modified is None + assert hosts[1].ansible_facts == ansible_facts_new + assert hosts[1].insights_system_id == "updated_by_scan" + hosts[1].save.assert_called_once_with() - job2.start_job_fact_cache() - # Ensure hosts2 ansible_facts didn't overwrite hosts ansible_facts - ansible_facts_cached = job._get_memcache_connection().get('{}-{}'.format(5, base64.b64encode(hosts2[0].name))) - assert ansible_facts_cached == json.dumps(hosts[1].ansible_facts) +def test_finish_job_fact_cache_with_bad_data(job, hosts, inventory, mocker, tmpdir): + fact_cache = str(tmpdir) + modified_times = {} + job.start_job_fact_cache(fact_cache, modified_times, 0) + for h in hosts: + h.save = mocker.Mock() -def test_memcached_fact_host_key_unicode(job): - host_name = u'Iñtërnâtiônàlizætiøn' - host_key = job.memcached_fact_host_key(host_name) - assert host_key == '5-ScOxdMOrcm7DonRpw7Ruw6BsaXrDpnRpw7hu' + for h in hosts: + filepath = os.path.join(fact_cache, 'facts', h.name) + with open(filepath, 'w') as f: + f.write('not valid json!') + f.flush() + new_modification_time = time.time() + 3600 + os.utime(filepath, (new_modification_time, new_modification_time)) + job.finish_job_fact_cache(fact_cache, modified_times) -def test_memcached_fact_modified_key_unicode(job): - host_name = u'Iñtërnâtiônàlizætiøn' - host_key = job.memcached_fact_modified_key(host_name) - assert host_key == '5-ScOxdMOrcm7DonRpw7Ruw6BsaXrDpnRpw7hu-modified' + for h in hosts: + h.save.assert_not_called() -def test_finish_job_fact_cache(job, hosts, inventory, mocker, new_time): +def test_finish_job_fact_cache_clear(job, hosts, inventory, mocker, tmpdir): + fact_cache = str(tmpdir) + modified_times = {} + job.start_job_fact_cache(fact_cache, modified_times, 0) - job.start_job_fact_cache() for h in hosts: h.save = mocker.Mock() - host_key = job.memcached_fact_host_key(hosts[1].name) - modified_key = job.memcached_fact_modified_key(hosts[1].name) + os.remove(os.path.join(fact_cache, 'facts', hosts[1].name)) + job.finish_job_fact_cache(fact_cache, modified_times) - ansible_facts_new = {"foo": "bar", "insights": {"system_id": "updated_by_scan"}} - job._get_memcache_connection().set(host_key, json.dumps(ansible_facts_new)) - job._get_memcache_connection().set(modified_key, new_time.isoformat()) - - job.finish_job_fact_cache() - - hosts[0].save.assert_not_called() - hosts[2].save.assert_not_called() - assert hosts[1].ansible_facts == ansible_facts_new - assert hosts[1].insights_system_id == "updated_by_scan" + for host in (hosts[0], hosts[2], hosts[3]): + host.save.assert_not_called() + assert host.ansible_facts == {"a": 1, "b": 2} + assert host.ansible_facts_modified is None + assert hosts[1].ansible_facts == {} hosts[1].save.assert_called_once_with() - diff --git a/awx/main/tests/unit/test_tasks.py b/awx/main/tests/unit/test_tasks.py index f5a8c309b63d..963ebc87ae2c 100644 --- a/awx/main/tests/unit/test_tasks.py +++ b/awx/main/tests/unit/test_tasks.py @@ -331,6 +331,23 @@ def test_awx_task_env(self): args, cwd, env, stdout = call_args assert env['FOO'] == 'BAR' + def test_fact_cache_usage(self): + self.instance.use_fact_cache = True + + start_mock = mock.Mock() + patch = mock.patch.object(Job, 'start_job_fact_cache', start_mock) + self.patches.append(patch) + patch.start() + + self.task.run(self.pk) + call_args, _ = self.run_pexpect.call_args_list[0] + args, cwd, env, stdout = call_args + start_mock.assert_called_once() + tmpdir, _ = start_mock.call_args[0] + + assert env['ANSIBLE_CACHE_PLUGIN'] == 'jsonfile' + assert env['ANSIBLE_CACHE_PLUGIN_CONNECTION'] == os.path.join(tmpdir, 'facts') + class TestAdhocRun(TestJobExecution): diff --git a/awx/plugins/fact_caching/awx.py b/awx/plugins/fact_caching/awx.py deleted file mode 100755 index 9b929a2728cd..000000000000 --- a/awx/plugins/fact_caching/awx.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) 2015 Ansible, Inc. -# This file is a utility Ansible plugin that is not part of the AWX or Ansible -# packages. It does not import any code from either package, nor does its -# license apply to Ansible or AWX. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# Neither the name of the nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - -import os -import memcache -import json -import datetime -import base64 -from dateutil import parser -from dateutil.tz import tzutc - -from ansible import constants as C - -try: - from ansible.cache.base import BaseCacheModule -except Exception: - from ansible.plugins.cache.base import BaseCacheModule - - -class CacheModule(BaseCacheModule): - - def __init__(self, *args, **kwargs): - self.mc = memcache.Client([C.CACHE_PLUGIN_CONNECTION], debug=0) - self._timeout = int(C.CACHE_PLUGIN_TIMEOUT) - self._inventory_id = os.environ['INVENTORY_ID'] - - @property - def host_names_key(self): - return '{}'.format(self._inventory_id) - - def translate_host_key(self, host_name): - return '{}-{}'.format(self._inventory_id, base64.b64encode(host_name.encode('utf-8'))) - - def translate_modified_key(self, host_name): - return '{}-{}-modified'.format(self._inventory_id, base64.b64encode(host_name.encode('utf-8'))) - - def get(self, key): - host_key = self.translate_host_key(key) - modified_key = self.translate_modified_key(key) - - ''' - Cache entry expired - ''' - modified = self.mc.get(modified_key) - if modified is None: - raise KeyError - modified = parser.parse(modified).replace(tzinfo=tzutc()) - now_utc = datetime.datetime.now(tzutc()) - if self._timeout != 0 and (modified + datetime.timedelta(seconds=self._timeout)) < now_utc: - raise KeyError - - value_json = self.mc.get(host_key) - if value_json is None: - raise KeyError - try: - return json.loads(value_json) - # If cache entry is corrupt or bad, fail gracefully. - except (TypeError, ValueError): - self.delete(key) - raise KeyError - - def set(self, key, value): - host_key = self.translate_host_key(key) - modified_key = self.translate_modified_key(key) - - self.mc.set(host_key, json.dumps(value)) - self.mc.set(modified_key, datetime.datetime.now(tzutc()).isoformat()) - - def keys(self): - return self.mc.get(self.host_names_key) - - def contains(self, key): - try: - self.get(key) - return True - except KeyError: - return False - - def delete(self, key): - self.set(key, {}) - - def flush(self): - host_names = self.mc.get(self.host_names_key) - if not host_names: - return - - for k in host_names: - self.mc.delete(self.translate_host_key(k)) - self.mc.delete(self.translate_modified_key(k)) - - def copy(self): - ret = dict() - host_names = self.mc.get(self.host_names_key) - if not host_names: - return - - for k in host_names: - ret[k] = self.mc.get(self.translate_host_key(k)) - return ret - diff --git a/docs/fact_cache.md b/docs/fact_cache.md index d1716d8aec2d..72ad41b7597f 100644 --- a/docs/fact_cache.md +++ b/docs/fact_cache.md @@ -1,21 +1,40 @@ -# Tower as an Ansible Fact Cache -Tower can store and retrieve per-host facts via an Ansible Fact Cache Plugin. This behavior is configurable on a per-job-template basis. When enabled, Tower will serve fact requests for all Hosts in an Inventory related to the Job running. This allows users to use Job Templates with `--limit` while still having access to the entire Inventory of Host facts. The Tower Ansible Fact Cache supports a global timeout settings that it enforces per-host. The setting is available in the CTiT interface under the Jobs category with the name `ANSIBLE_FACT_CACHE_TIMEOUT` and is in seconds. +# AWX as an Ansible Fact Cache +AWX can store and retrieve per-host facts via an Ansible Fact Cache Plugin. +This behavior is configurable on a per-job-template basis. When enabled, AWX +will serve fact requests for all Hosts in an Inventory related to the Job +running. This allows users to use Job Templates with `--limit` while still +having access to the entire Inventory of Host facts. -## Tower Fact Cache Implementation Details -### Tower Injection -In order to understand the behavior of Tower as a fact cache you will need to understand how fact caching is achieved in Tower. Upon a Job invocation with `use_fact_cache=True`, Tower will inject, into memcached, all `ansible_facts` associated with each Host in the Inventory associated with the Job. Jobs invoked with `use_fact_cache=False` will not inject `ansible_facts` into memcached. The cache key is of the form `inventory_id-host_name` so that hosts with the same name in different inventories do not clash. A list of all hosts in the inventory is also injected into memcached with key `inventory_id` and value `[host_name1, host_name2, ..., host_name3]`. +## AWX Fact Cache Implementation Details +### AWX Injection +In order to understand the behavior of AWX as a fact cache you will need to +understand how fact caching is achieved in AWX. When a Job launches with +`use_fact_cache=True`, AWX will write all `ansible_facts` associated with +each Host in the associated Inventory as JSON files on the local file system +(one JSON file per host). Jobs invoked with `use_fact_cache=False` will not +write `ansible_facts` files. ### Ansible plugin usage -The Ansible fact cache plugin that ships with Tower will only be enabled on Jobs that have fact cache enabled, `use_fact_cache=True`. The fact cache plugin running in Ansible will connect to the same memcached instance. A `get()` call to the fact cache interface in Ansible will result in a looked into memcached for the host-specific set of facts. A `set()` call to the fact cache will result in an update to memcached record along with the modified time. +When `use_fact_cache=True`, Ansible will be configured to use the `jsonfile` +cache plugin. Any `get()` call to the fact cache interface in Ansible will +result in a JSON file lookup for the host-specific set of facts. Any `set()` +call to the fact cache will result in a JSON file being written to the local +file system. -### Tower Cache to DB -When a Job finishes running that has `use_fact_cache=True` enabled, Tower will go through memcached and get all records for the hosts in the Inventory. Any records with update times newer than the database per-host `ansible_facts_modified` value will result in the `ansible_facts`, `ansible_facts_modified` from memcached being saved to the database. Note that the last value of the Ansible fact cache is retained in `ansible_facts`. The globla timeout and/or individual job template `use_fact_cache` setting will not clear the per-host `ansible_facts`. +### AWX Cache to DB +When a Job with `use_fact_cache=True` finishes running, AWX will look at all +of the local JSON files that represent the fact data. Any records with file +modification times that have increased (because Ansible updated the file via +`cache.set()`) will result in the latest value being saved to the database. On +subsequent playbook runs, AWX will _only_ inject cached facts that are _newer_ +than `settings.ANSIBLE_FACT_CACHE_TIMEOUT` seconds. -### Caching Behavior -Tower will always inject the host `ansible_facts` into memcached. The Ansible Tower Fact Cache Plugin will choose to present the cached values to the user or not based on the per-host `ansible_facts_modified` time and the global `ANSIBLE_FACT_CACHE_TIMEOUT`. - -## Tower Fact Logging -New and changed facts will be logged via Tower's logging facility. Specifically, to the `system_tracking` namespace or logger. The logging payload will include the fields: `host_name`, `inventory_id`, and `ansible_facts`. Where `ansible_facts` is a dictionary of all ansible facts for `host_name` in Tower Inventory `inventory_id`. +## AWX Fact Logging +New and changed facts will be logged via AWX's logging facility. Specifically, +to the `system_tracking` namespace or logger. The logging payload will include +the fields: `host_name`, `inventory_id`, and `ansible_facts`. Where +`ansible_facts` is a dictionary of all ansible facts for `host_name` in AWX +Inventory `inventory_id`. ## Integration Testing * ensure `clear_facts` set's `hosts//ansible_facts` to `{}`