From d4b3cf5921740aa8a228868090017a7d5561aca4 Mon Sep 17 00:00:00 2001 From: Vallish Vaidyeshwara Date: Wed, 13 Feb 2019 18:54:31 +0000 Subject: [PATCH 01/10] Initial build of Autotune Signed-off-by: Vallish Vaidyeshwara Reviewed-by: Balbir Singh Reviewed-by: Frederick Lefebvre Reviewed-by: Frank van der Linden --- MANIFEST.in | 1 + Makefile | 17 + README.md | 130 +++- config/ec2sys-autotune.cfg | 23 + config/user.ini | 34 + ec2sys_autotune.spec | 55 ++ scripts/autotune | 622 +++++++++++++++++ scripts/ec2sys_autotune_start | 32 + scripts/ec2sys_autotune_stop | 28 + setup.py | 38 ++ src/ec2sys_autotune/__init__.py | 1 + src/ec2sys_autotune/ec2_autotune_lock.py | 36 + src/ec2sys_autotune/ec2_autotune_start.py | 133 ++++ src/ec2sys_autotune/ec2_autotune_stop.py | 67 ++ src/ec2sys_autotune/ec2_autotune_utils.py | 80 +++ .../ec2_instance_cfg_engine.py | 625 ++++++++++++++++++ src/ec2sys_autotune/ec2_instance_cfg_gen.py | 321 +++++++++ src/ec2sys_autotune/ec2_instance_exception.py | 25 + src/ec2sys_autotune/ec2_instance_fetch_cfg.py | 96 +++ .../ec2_instance_kernel_cfg_gen.py | 93 +++ .../ec2_instance_network_cfg_gen.py | 173 +++++ .../ec2_instance_pm_cfg_gen.py | 64 ++ .../ec2_instance_services_cfg_gen.py | 33 + .../ec2_instance_storage_cfg_gen.py | 78 +++ src/ec2sys_autotune/ec2_instance_types.py | 110 +++ .../ec2_instance_vm_cfg_gen.py | 116 ++++ .../placement_group_cfg_gen.py | 62 ++ src/ec2sys_autotune/udp_server_cfg_gen.py | 32 + unit/autotune.service | 15 + 29 files changed, 3136 insertions(+), 4 deletions(-) create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 config/ec2sys-autotune.cfg create mode 100644 config/user.ini create mode 100644 ec2sys_autotune.spec create mode 100755 scripts/autotune create mode 100755 scripts/ec2sys_autotune_start create mode 100755 scripts/ec2sys_autotune_stop create mode 100644 setup.py create mode 100644 src/ec2sys_autotune/__init__.py create mode 100644 src/ec2sys_autotune/ec2_autotune_lock.py create mode 100644 src/ec2sys_autotune/ec2_autotune_start.py create mode 100644 src/ec2sys_autotune/ec2_autotune_stop.py create mode 100644 src/ec2sys_autotune/ec2_autotune_utils.py create mode 100644 src/ec2sys_autotune/ec2_instance_cfg_engine.py create mode 100644 src/ec2sys_autotune/ec2_instance_cfg_gen.py create mode 100644 src/ec2sys_autotune/ec2_instance_exception.py create mode 100644 src/ec2sys_autotune/ec2_instance_fetch_cfg.py create mode 100644 src/ec2sys_autotune/ec2_instance_kernel_cfg_gen.py create mode 100644 src/ec2sys_autotune/ec2_instance_network_cfg_gen.py create mode 100644 src/ec2sys_autotune/ec2_instance_pm_cfg_gen.py create mode 100644 src/ec2sys_autotune/ec2_instance_services_cfg_gen.py create mode 100644 src/ec2sys_autotune/ec2_instance_storage_cfg_gen.py create mode 100644 src/ec2sys_autotune/ec2_instance_types.py create mode 100644 src/ec2sys_autotune/ec2_instance_vm_cfg_gen.py create mode 100644 src/ec2sys_autotune/placement_group_cfg_gen.py create mode 100644 src/ec2sys_autotune/udp_server_cfg_gen.py create mode 100644 unit/autotune.service diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..64ad321 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include README.md LICENSE diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7eab7b5 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +# Create a source distribution in gztar format +sources: + python setup.py sdist --formats=gztar + cp dist/*.gz . + +# Install everything from build directory +install: + python setup.py install + +# Create an RPM distribution +rpm: + python setup.py bdist_rpm + +# Clean up temporary files from 'build' command +clean: + python setup.py clean + rm -rf *.gz dist ec2sys_autotune.egg-info build diff --git a/README.md b/README.md index 08f7e63..4ca9642 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,129 @@ -## Autotune +Amazon Linux Kernel Autotuning: +------------------------------- -EC2 Amazon Linux Kernel Autotuning +License: +-------- +The code is released under GNU GENERAL PUBLIC LICENSE 2.0. See LICENSE +for details. -## License +Description: +------------ +Amazon Web Services EC2 fleet hosts a wide selection of instance types +optimized to fit different customer use cases. Instance types are carved +out for varying combinations of CPU, Memory, Storage and Networking capacity +to allow customers to pick the correct instance type for their workload. -This library is licensed under the GPL 2.0 Only. +Amazon Linux Kernel provides a stable, secure and high performance execution +environment for customer applications running on Amazon EC2. Default values +for kernel tunables is not the best and optimized value for all instance +types on EC2. To provide best customer experience for Amazon Linux Kernel +consumers on EC2, this package optimizes kernel tunable for different +instance types. + +Architecture: +------------- +Following are main blocks of ec2sys-autotune: + + ------------- + | workload | + | specific | + | generator | + ------------- + ^ + | Extends base generator + | + ------------- ------------- + | base | | Instance | + | class | <----| metadata | + | generator | | properties| + ------------- ------------- + | + | Auto generated config file + v + ------------- + | Config | + | file | + | | + ------------- + | + | Consumed by engine + v + ------------- ---------- + | Config | | Saved | + | engine | <----| system | + | | | state | + ------------- ---------- + +ec2sys-autotune is controlled by /bin/autotune CLI. + +Base class generator: This block identifies the EC2 instance type that it +is running on and fetches all public properties of this EC2 instance type +from instance metadata file. Based on the public property of EC2 instances +and the instance type it is running on, it generates a config file that is +appropriate for running instance. As of today, this block can generate +config file to control services controlled from systemd, sysctl tunables, +sysfs tunables and CPU settings (C-state and P-state). + +Config file: This is an auto generated file from base class generator. +However an end user is free to modify this config file as deemed appropriate +for ec2sys-autotune to apply the changes on the running instance. +These config files reside in /etc/ec2sys-autotune.d directory. + +Config engine: This block is transparent to the type of running EC2 instance. +Engine reads the config file and applies all settings onto the running +EC2 instance. It also saves the state of the system. Upon rolling back the +tunables, engine restores the system to default state that was present +prior to tuning the system. + +Workload specific generator: Any workload specific tuning need to inherit +from base class generator and generate system tunables. This can also be +used to stack or over ride tunables in base class generator. + +Building: +--------- +The code can be build using the Makefile script provided in the source. + +1) Create a source distribution in gztar format +make sources + +2) Install everything from build directory +make install + +3) Create an RPM distribution +make rpm + +4) Clean up temporary files from 'build' command +make clean + +Usage: +------ +As of this build, there are three profiles packaged: +1) base: This is general tuning of the system and the tunables + are workload agnostic. +2) udp-server: This profile also demonstrates how the framework + can be extended to add workload load specific tuning. As of now, + this profile has few tunables that are required especially in + docker or containers workload. +3) placement-group: This profile is for instances configured + with EC2 placement group for low latecy network connectivity. + +Display usage of the CLI: +/usr/bin/autotune --help + +Set udp-server as profile: +/usr/bin/autotune profile udp-server + +Enable the tunables: +/usr/bin/autotune apply + +Disable the tunables: +/usr/bin/autotune rollback + +Override a tunable in autotune profile: +autotune override sysctl:vm.swappiness:40 + +Exclude a tunable from autotune profile: +autotune exclude sysctl:vm.swappiness + +Delete customized tunable from autotune profile: +autotune delete sysctl:vm.swappiness diff --git a/config/ec2sys-autotune.cfg b/config/ec2sys-autotune.cfg new file mode 100644 index 0000000..611d4de --- /dev/null +++ b/config/ec2sys-autotune.cfg @@ -0,0 +1,23 @@ +[DEFAULT] +# Configuration directory. This directory holds the configuration +# tunables file. All profiles which are auto generated and user +# customization of tunables reside in this directory. +CONFIG_DIR = /etc/ec2sys-autotune.d + +# State directory. This directory holds all state information +# related to autotune including the log file which is used +# to revert the system to earlier state. +STATE_DIR = /var/lib/ec2sys-autotune + +# Log file to save the system state in ${STATE_DIR}. This file +# is used to revert system tunables to earlier default system +# state when autotune service is stopped. This file is protected +# by appropriate permissions from other system users. +# This file should never be modified by any user. +LOG_FILE = system-state.json + +# User customized configuration file in ${CONFIG_DIR} +USER_CONFIG = user.ini + +# Current service status file +STATUS = /var/run/ec2sys-autotune diff --git a/config/user.ini b/config/user.ini new file mode 100644 index 0000000..5e2876b --- /dev/null +++ b/config/user.ini @@ -0,0 +1,34 @@ +# Do not edit this config file by hand, instead customize tuning in +# user.ini as per your requirements by using /bin/autotune. + +# Profile to configure the system with +[profile] +PROFILE = base + +# System automatically generates list of tunables based +# on PROFILE. This auto generated list can be customized +# by end user by: +# 1) over riding auto generated tunable with a different value +# Format: = +# 2) exclude the tunable from being tweaked by autotune +# Format: = (exclude) +# Use of "(exclude)" as tunable value makes engine +# to ignore and not tweak this tunable +# An example comment is provided in each section to +# help end user read entries in this config file. + +[service] +# irqbalance = stop +# irqbalance = (exclude) + +[sysctl] +# vm.swappiness = 10 +# vm.swappiness = (exclude) + +[sysfs] +# /sys/devices/system/clocksource/clocksource0/current_clocksource = tsc +# /sys/devices/system/clocksource/clocksource0/current_clocksource = (exclude) + +[cpu] +# c-state = C1 +# c-state = (exclude) diff --git a/ec2sys_autotune.spec b/ec2sys_autotune.spec new file mode 100644 index 0000000..d7d53db --- /dev/null +++ b/ec2sys_autotune.spec @@ -0,0 +1,55 @@ +Name: ec2sys-autotune +Version: 1.0.0 +Release: 1%{?dist} +Summary: AWS EC2 instance autotuning + +Group: Applications/Engineering +License: GNU GENERAL PUBLIC LICENSE 2.0 +URL: https://github.com/aws/ec2sys-autotune +Source0: %{name}-%{version}.tar.gz +BuildArch: noarch +Requires(pre): systemd +Requires: kernel-tools irqbalance procps-ng +Requires: util-linux ec2-utils grep coreutils +BuildRequires: python2 python2-devel + +%description +An AWS EC2 agent that tunes guest instances automatically based on their instance type + +%prep +%setup -q -n %{name}-%{version} + +%build +%py2_build + +%install +%py2_install + +%files +%defattr(-,root,root) +%doc README.md +%license LICENSE +%{_unitdir}/autotune.service +%{_bindir}/ec2sys_autotune_start +%{_bindir}/ec2sys_autotune_stop +%{_bindir}/autotune +%config(noreplace) %{_sysconfdir}/ec2sys-autotune.cfg +%config(noreplace) %{_sysconfdir}/%{name}.d/user.ini +%dir "%{_var}/lib/%{name}" +%{python2_sitelib}/* + +%clean +rm -rf $RPM_BUILD_ROOT + +%post +%systemd_post autotune.service + +%preun +%systemd_preun autotune.service + +%postun +%systemd_postun_with_restart autotune.service + +%changelog +* Mon Feb 4 2019 Vallish Vaidyeshwara - 1.0.0 +- Initial build diff --git a/scripts/autotune b/scripts/autotune new file mode 100755 index 0000000..afa1da5 --- /dev/null +++ b/scripts/autotune @@ -0,0 +1,622 @@ +#!/usr/bin/python + +''' + +EC2 Amazon Linux Kernel Autotuning + +This tool is a front end CLI to administer autotuning. +autotune CLI can do following: + - List available profiles for tuning + - Switch profiles + - Show current active profile + - Override tunables in user config + - Exclude tunables in user config + - Delete tunables in user config + - List all config values + - Enable autotuning + - Disable autotuning + - Show current status +''' + +import os +import sys +import textwrap +try: + from configparser import RawConfigParser +except ImportError: + # Backward compatibility with python versions earlier to 3.0 + from ConfigParser import RawConfigParser +import argparse +from ec2sys_autotune.ec2_autotune_start import Ec2AutotuneStart +from ec2sys_autotune.ec2_autotune_stop import Ec2AutotuneStop +from ec2sys_autotune.ec2_instance_fetch_cfg import fetch_configuration +from ec2sys_autotune.ec2_instance_fetch_cfg import EXCLUDE_TAG +from ec2sys_autotune.ec2_autotune_utils import exec_cmds +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + +# Config file location to autotune +CONFIG_FILE = "/etc/ec2sys-autotune.cfg" + +# Current available profiles compatible with autotune +AVAILABLE_PROFILES = { + "base": "Tunables which are generic and workload agnostic", + "udp-server": "Tunables for UDP server in docker or containers workload", + "placement-group": "Tunables to configure instances for high " + + "network speed within EC2 placement group"} + + +def autogenerate_profile(profile, config_dir): + ''' + + Make sure autogenerated profile exists. + autotune.start() is optimized to reuse existing profiles + once generated. + ''' + autotune = Ec2AutotuneStart(genconfigonly=True) + try: + autotune.start() + except Ec2AutotuneError, e: + raise (e) + + # Validate auto generated profile config path + auto_profile = "{0}/{1}.ini".format(config_dir, profile) + if (os.path.isfile(auto_profile) is False): + raise Ec2AutotuneError("Failed to generate config file {0}, " + "operation failed.".format(auto_profile)) + + +def is_valid_tunable(profile, config_dir, section, tunable): + ''' + + Validate the tunable before adding it as user customized tunable. + ''' + autogenerate_profile(profile, config_dir) + + # Read auto generated config file (ini format) + autocfg_object = RawConfigParser(allow_no_value=True) + autocfg_object.read("{0}/{1}.ini".format(config_dir, profile)) + if (autocfg_object.has_section(section) is True): + options = autocfg_object.options(section) + for item in options: + if (item == tunable): + return True + return False + + +def get_all_defaults_from_config(config): + ''' + + Read in all defaults + ''' + cfg_object = RawConfigParser(allow_no_value=True) + cfg_object.read(config) + return cfg_object.defaults() + + +def get_options_from_config(config, options_needed): + ''' + + Read in the list of options requested + ''' + cfg_object = RawConfigParser(allow_no_value=True) + cfg_object.read(config) + config_options = {} + for section, option in options_needed.iteritems(): + if (cfg_object.has_option(section, option) is False): + continue + config_options[option] = cfg_object.get(section, option) + return config_options + + +def customize_user_config(user_data, remove_op=False): + ''' + + Parse the user passed in data + Override format: :: + Exclude format: : + Delete format: : + ''' + first_delim = user_data.find(":") + second_delim = user_data.find(":", first_delim+1) + section = user_data[:first_delim] + tunable = user_data[first_delim+1:second_delim] + value = user_data[second_delim+1:] + if (section is None or tunable is None or value is None): + raise Ec2AutotuneError("\nInvalid format, " + "please refer to help section.\n") + + config_defaults = get_all_defaults_from_config(CONFIG_FILE) + try: + config_dir = config_defaults["config_dir"] + user_config = config_defaults["user_config"] + options_needed = {"profile": "PROFILE", + section: tunable} + config_options = get_options_from_config( + "{0}/{1}".format(config_dir, user_config), + options_needed) + profile = config_options["PROFILE"] + except KeyError, e: + raise Ec2AutotuneError("\nInvalid key {0}".format(str(e))) + + if (section != "profile" and + is_valid_tunable(profile, config_dir, section, tunable) is False): + raise Ec2AutotuneError("\nInvalid tunable, " + "please use a valid tunable.\n") + + ''' + comment_prefixes is not available in python 2, + use sed to retain comments in configuartion file. + INIConfig retains comments, but does not provide any + interface to add a new key pair of name=value nor + provides an interface to remove key pair of name=value. + ''' + + # Remove the current customization + cur_val = None + try: + cur_val = config_options[tunable] + except KeyError, e: + # No customization to remove + pass + if (cur_val is not None): + cmd = "/bin/sed -i \'/^{0} = /d\' {1}" \ + .format(tunable, + "{0}/{1}".format(config_dir, user_config)) + exec_cmds([cmd]) + print("\nRemoved earlier user customization of {0}\n".format(tunable)) + + if (remove_op is True): + return + + # Add the new customization + cmd = "/bin/sed -i \'/^\[{0}\]/ a {1} = {2}\' {3}" \ + .format(section, tunable, value, + "{0}/{1}".format(config_dir, user_config)) + exec_cmds([cmd]) + return + + +def apply_tunables(): + ''' + + Apply autotune tunables + ''' + + autotune = Ec2AutotuneStart(CONFIG_FILE, False) + try: + autotune.start() + except Ec2AutotuneError, e: + raise (e) + return + + +def rollback_tunables(): + ''' + + Rollback autotune tunables + ''' + + autotune = Ec2AutotuneStop(CONFIG_FILE) + try: + autotune.stop() + except Ec2AutotuneError, e: + raise (e) + return + + +def reapply_tunables(): + options_needed = {"DEFAULT": "STATUS"} + config_options = get_options_from_config(CONFIG_FILE, options_needed) + try: + status = config_options["STATUS"] + except KeyError, e: + raise Ec2AutotuneError("\nInvalid key {0}".format(str(e))) + if (os.path.isfile(status) is True): + # Autotune is active, reapply to pickup new configuration + print("\nReapplying autotune tunables to " + "activate new configuration...\n") + rollback_tunables() + apply_tunables() + return + + +def process_list(args): + ''' + + List available profiles to end user + ''' + + print("\nAvailable profiles for autotune:\n") + count = 1 + for profile, description in AVAILABLE_PROFILES.items(): + print("{0:2} {1:15} - {2:60}".format(str(count), + profile, + description)) + count = count + 1 + print("\n") + return + + +def process_profile(args): + ''' + + Switch profile to a new profile + ''' + + config_defaults = get_all_defaults_from_config(CONFIG_FILE) + try: + config_dir = config_defaults["config_dir"] + user_config = config_defaults["user_config"] + options_needed = {"profile": "PROFILE"} + config_options = get_options_from_config( + "{0}/{1}".format(config_dir, user_config), + options_needed) + profile = config_options["PROFILE"] + except KeyError, e: + raise Ec2AutotuneError("\nInvalid key {0}".format(str(e))) + + try: + # Validate input profile before setting it + description = AVAILABLE_PROFILES[args.profile] + except KeyError, e: + raise Ec2AutotuneError("\nInvalid profile name {0}, " + "please use a valid profile.\n".format(str(e))) + + if (args.secured is True): + new_profile = "{0}-secured".format(args.profile) + else: + new_profile = args.profile + if (profile != new_profile): + customize_user_config("profile:PROFILE:{0}".format(new_profile)) + + ''' + After the profile is set, reapply the tunables if it has already + been applied to pick new profile. + ''' + reapply_tunables() + print("\nSuccessfully changed the profile to: {0}\n".format( + new_profile)) + return + + +def process_active(args): + ''' + + Show current active profile + ''' + + config_defaults = get_all_defaults_from_config(CONFIG_FILE) + try: + config_dir = config_defaults["config_dir"] + user_config = config_defaults["user_config"] + options_needed = {"profile": "PROFILE"} + config_options = get_options_from_config( + "{0}/{1}".format(config_dir, user_config), + options_needed) + profile = config_options["PROFILE"] + except KeyError, e: + raise Ec2AutotuneError("\nInvalid key {0}".format(str(e))) + print("\nCurrent active profile: {0}\n".format(profile)) + return + + +def process_apply(args): + ''' + + Apply the tunables + ''' + + apply_tunables() + print("\nEnabled autotune tunables\n") + return + + +def process_rollback(args): + ''' + + Rollback the tunables + ''' + + rollback_tunables() + print("\nDisabled autotune tunables\n") + return + + +def process_status(args): + ''' + + Get the current status + ''' + + options_needed = {"DEFAULT": "STATUS"} + config_options = get_options_from_config(CONFIG_FILE, options_needed) + try: + status = config_options["STATUS"] + except KeyError, e: + raise Ec2AutotuneError("\nInvalid key {0}".format(str(e))) + if (os.path.isfile(status) is True): + print("\nAutotune is active\n") + else: + print("\nAutotune is not active\n") + return + + +def print_user_overridden(item, value): + print("{0} = {1} [user overridden]".format(item, value)) + return + + +def print_user_excluded(item): + print("{0} = [user excluded]".format(item)) + return + + +def print_auto_generated(item, value): + print("{0} = {1}".format(item, value)) + return + + +def process_showconfig(args): + ''' + + Show proposed configurations for the active profile + ''' + + config_defaults = get_all_defaults_from_config(CONFIG_FILE) + try: + config_dir = config_defaults["config_dir"] + user_config = config_defaults["user_config"] + options_needed = {"profile": "PROFILE"} + config_options = get_options_from_config( + "{0}/{1}".format(config_dir, user_config), + options_needed) + profile = config_options["PROFILE"] + except KeyError, e: + raise Ec2AutotuneError("\nInvalid key {0}".format(str(e))) + autogenerate_profile(profile, config_dir) + + # Read auto generated config file (ini format) + autocfg_object = RawConfigParser(allow_no_value=True) + autocfg_object.read("{0}/{1}.ini".format(config_dir, profile)) + + print("\nAutotune configurations for profile: {0}".format(profile)) + + sections = autocfg_object.sections() + for section in sections: + # Ignore PROFILE header section + if (section == "profile"): + continue + print("\n[{0}]".format(section)) + fetch_configuration("{0}/{1}.ini".format(config_dir, profile), + "{0}/{1}".format(config_dir, user_config), + section, + print_user_overridden, + print_user_excluded, + print_auto_generated, + None) + print("\n") + return + + +def process_override(args): + ''' + + Override auto generated configuration + ''' + + # Input format: :: + if(args.override.count(":") != 2): + raise Ec2AutotuneError("\nInvalid override format, " + "please use a valid format.\n") + + customize_user_config(args.override) + ''' + After overriding tunable, reapply the tunables if it has + already been applied + ''' + reapply_tunables() + print("\nSuccessfully overrided: {0}\n".format(args.override)) + return + + +def process_exclude(args): + ''' + + Exclude auto generated configuration + ''' + + # Input format: : + if(args.exclude.count(":") != 1): + raise Ec2AutotuneError("\nInvalid exclude format, " + "please use a valid format.\n") + + customize_user_config("{0}:{1}".format(args.exclude, EXCLUDE_TAG)) + ''' + After excluding tunable, reapply the tunables if it has + already been applied + ''' + reapply_tunables() + print("\nSuccessfully excluded: {0}\n".format(args.exclude)) + return + + +def process_delete(args): + ''' + + Delete previous user customized tunable + ''' + + # Input format: : + if(args.delete.count(":") != 1): + raise Ec2AutotuneError("\nInvalid delete format, " + "please use a valid format.\n") + + customize_user_config("{0}:value".format(args.delete), + True) + ''' + After deleting tunable, reapply the tunables + if it has already been applied + ''' + reapply_tunables() + print("\nSuccessfully deleted: {0}\n".format(args.delete)) + return + + +def add_arguments_and_parse(): + ''' + + Autotune CLI + ''' + + desc = textwrap.dedent('''\ + +EC2 System Autotune + +Amazon Web Services EC2 fleet hosts a wide selection of instance types +optimized to fit different customer use cases. Instance types are carved +out for varying combinations of CPU, Memory, Storage and Networking capacity +to allow customers to pick the correct instance type for their workload. + +Amazon Linux Kernel provides a stable, secure and high performance execution +environment for customer applications running on Amazon EC2. Default values +for kernel tunables is not the best and optimized value for all instance +types on EC2. To provide best customer experience for Amazon Linux Kernel +consumers on EC2, EC2 System Autotune optimizes kernel tunable for different +instance types. + +Usage: autotune SUBCMD + +SUBCMD Following are sub-commands to autotune: + list: List available profiles + active: Show current active profile + apply: Enable autotune tunables with current active profile + rollback: Disable autotune tunables and restore system defaults + status: Show autotune status + showconfig: Show configuration for active profile + profile: Switch profile + --secured: An optional flag to include + security hardening tunables + override: Override auto generated config + exclude: Exclude auto generated config + delete: Delete user modified config + E.g. autotune showconfig + autotune apply + autotune rollback + + Following is the format of argument passed to autotune: + Only sub-commands override, exclude and delete need argument + autotune override :: + autotune exclude : + autotune delete : + : Class is one of service, sysctl, sysfs, cpu + : Name of the tunable, e.g vm.swappiness + : Value for the tunable, e.g 10 + E.g. autotune override sysctl:vm.swappiness:10 + autotune exclude sysctl:vm.swappiness + autotune delete sysctl:vm.swappiness + + ''') + + parser = argparse.ArgumentParser( + description=desc, + formatter_class=argparse.RawDescriptionHelpFormatter, + prog="autotune") + subparsers = parser.add_subparsers() + + # List available profiles + parser_list = subparsers.add_parser("list") + parser_list.add_argument("list", + help="List available profiles", + action="store_true") + parser_list.set_defaults(func=process_list) + + # Switch profile to a different profile + parser_profile = subparsers.add_parser("profile") + parser_profile.add_argument("profile", + help="Switch profile", + type=str) + parser_profile.add_argument("--secured", + help="Add hardening tunables to this profile ", + action="store_true") + parser_profile.set_defaults(func=process_profile) + + # Show the current profile that is set + parser_active = subparsers.add_parser("active") + parser_active.add_argument("active", + help="Show current active profile", + action="store_true") + parser_active.set_defaults(func=process_active) + + # Start autotune service + parser_apply = subparsers.add_parser("apply") + parser_apply.add_argument("apply", + help="Apply autotune tunables with current " + "active profile", + action="store_true") + parser_apply.set_defaults(func=process_apply) + + # Stop autotune service + parser_rollback = subparsers.add_parser("rollback") + parser_rollback.add_argument("rollback", + help="Rollback autotune tunables and restore " + "system defaults", + action="store_true") + parser_rollback.set_defaults(func=process_rollback) + + # Show current status of autotune on the running system + parser_status = subparsers.add_parser("status") + parser_status.add_argument("status", + help="Show autotune status", + action="store_true") + parser_status.set_defaults(func=process_status) + + # List all tunables that are associated with current set profile + parser_showconfig = subparsers.add_parser("showconfig") + parser_showconfig.add_argument("showconfig", + help="Show configuration for active " + "profile", + action="store_true") + parser_showconfig.set_defaults(func=process_showconfig) + + # User can override autotune's default value as per their requirement + parser_override = subparsers.add_parser("override") + override_help = \ + "Override auto generated config. Format: ::" + parser_override.add_argument("override", + help=override_help, + type=str) + parser_override.set_defaults(func=process_override) + + # User can exclude autotune from not modifying a tunable + parser_exclude = subparsers.add_parser("exclude") + exclude_help = \ + "Exclude auto generated config. Format: :" + parser_exclude.add_argument("exclude", + help=exclude_help, + type=str) + parser_exclude.set_defaults(func=process_exclude) + + # Remove overridden/excluded tunable and fall back to autotune defaults + parser_delete = subparsers.add_parser("delete") + delete_help = \ + "Delete user modified config. Format: :" + parser_delete.add_argument("delete", + help=delete_help, + type=str) + parser_delete.set_defaults(func=process_delete) + + return (parser.parse_args()) + + +def main(): + args = add_arguments_and_parse() + + try: + args.func(args) + sys.exit(0) + except Ec2AutotuneError, e: + print(e.msg) + sys.exit(-1) + +if __name__ == '__main__': + main() diff --git a/scripts/ec2sys_autotune_start b/scripts/ec2sys_autotune_start new file mode 100755 index 0000000..b2e4d24 --- /dev/null +++ b/scripts/ec2sys_autotune_start @@ -0,0 +1,32 @@ +#!/usr/bin/python + +import os +import sys +from syslog import syslog +from ec2sys_autotune.ec2_autotune_start import Ec2AutotuneStart +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError +import argparse + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--config', + default="/etc/ec2sys-autotune.cfg", + help='Configuration file to use', + type=str) + parser.add_argument('--genconfigonly', + default=False, + help='Generate configuration file for active profile', + action="store_true") + + args = parser.parse_args() + autotune = Ec2AutotuneStart(args.config, args.genconfigonly) + try: + autotune.start() + except Ec2AutotuneError, e: + syslog(e.msg) + sys.exit(-1) + sys.exit(0) + +if __name__ == '__main__': + main() diff --git a/scripts/ec2sys_autotune_stop b/scripts/ec2sys_autotune_stop new file mode 100755 index 0000000..904eb26 --- /dev/null +++ b/scripts/ec2sys_autotune_stop @@ -0,0 +1,28 @@ +#!/usr/bin/python + +import os +import sys +from syslog import syslog +from ec2sys_autotune.ec2_autotune_stop import Ec2AutotuneStop +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError +import argparse + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--config', + default="/etc/ec2sys-autotune.cfg", + help='Configuration file to use', + type=str) + + args = parser.parse_args() + autotune = Ec2AutotuneStop(args.config) + try: + autotune.stop() + except Ec2AutotuneError, e: + syslog(e.msg) + sys.exit(-1) + sys.exit(0) + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..cb91ef2 --- /dev/null +++ b/setup.py @@ -0,0 +1,38 @@ +#!/usr/bin/python + +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + +ec2sys_autotune_classifiers = [ + "Development Status :: Preview", + "Environment :: Amazon Linux 2", + "Intended Audience :: AWS EC2 AL2 consumers", + "Operating System :: Amazon Linux 2", + "Programming Language :: Python", + "License :: GNU GENERAL PUBLIC LICENSE 2.0", + "Topic :: Utilities", +] + +with open("README.md", "r") as fp: + ec2sys_autotune_long_description = fp.read() + +setup(name="ec2sys-autotune", + version='1.0.0', + author="Vallish Vaidyeshwara", + author_email="vallish@amazon.com", + url="https://github.com/awslabs/ec2sys-autotune", + scripts=["scripts/ec2sys_autotune_start", + "scripts/ec2sys_autotune_stop", + "scripts/autotune"], + data_files=[("/usr/lib/systemd/system", ["unit/autotune.service"]), + ("/etc", ["config/ec2sys-autotune.cfg"]), + ("/etc/ec2sys-autotune.d", ["config/user.ini"]), + ("/var/lib/ec2sys-autotune", [])], + packages=["ec2sys_autotune"], + package_dir={"ec2sys_autotune": "src/ec2sys_autotune"}, + description="Amazon Linux Kernel Autotuning", + long_description=ec2sys_autotune_long_description, + license="GNU GENERAL PUBLIC LICENSE 2.0", + classifiers=ec2sys_autotune_classifiers) diff --git a/src/ec2sys_autotune/__init__.py b/src/ec2sys_autotune/__init__.py new file mode 100644 index 0000000..dcf2c80 --- /dev/null +++ b/src/ec2sys_autotune/__init__.py @@ -0,0 +1 @@ +# Placeholder diff --git a/src/ec2sys_autotune/ec2_autotune_lock.py b/src/ec2sys_autotune/ec2_autotune_lock.py new file mode 100644 index 0000000..8b8f3ce --- /dev/null +++ b/src/ec2sys_autotune/ec2_autotune_lock.py @@ -0,0 +1,36 @@ +import os +import sys +import fcntl +import time +from syslog import syslog +from ec2sys_autotune.ec2_instance_exception \ + import Ec2AutotuneError + + +class Lock(object): + ''' + + This is a blocking lock implementation. However this will block + and retry only MAX_TRIES for every SLEEP_TIME seconds before failing. + ''' + def __init__(self, lock): + SLEEP_TIME = 3 + MAX_TRIES = 10 + for count in range(0, MAX_TRIES): + try: + self.lock = open(lock, "r+") + fcntl.lockf(self.lock, fcntl.LOCK_EX | fcntl.LOCK_NB) + syslog("Acquired lock") + return + except IOError, e: + syslog("Retrying lock on error {0}".format(e.errno)) + self.lock.close() + time.sleep(SLEEP_TIME) + continue + raise Ec2AutotuneError( + "Couldn't acquire the lock during service startup.") + + def __del__(self): + fcntl.lockf(self.lock, fcntl.LOCK_UN) + self.lock.close() + syslog("Released lock") diff --git a/src/ec2sys_autotune/ec2_autotune_start.py b/src/ec2sys_autotune/ec2_autotune_start.py new file mode 100644 index 0000000..f6b21d9 --- /dev/null +++ b/src/ec2sys_autotune/ec2_autotune_start.py @@ -0,0 +1,133 @@ +import os +import sys +import fcntl +from syslog import syslog +from ec2sys_autotune.ec2_instance_cfg_gen import Ec2InstanceCfgGen +from ec2sys_autotune.udp_server_cfg_gen import UdpServerCfgGen +from ec2sys_autotune.placement_group_cfg_gen import PlacementGroupCfgGen +from ec2sys_autotune.ec2_instance_cfg_engine import Ec2InstanceCfgEngine +try: + from configparser import RawConfigParser +except ImportError: + # Backward compatibility with python versions earlier to 3.0 + from ConfigParser import RawConfigParser +from ec2sys_autotune.ec2_autotune_lock import Lock + +# Exceptions +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneEexists + + +class Ec2AutotuneStart(object): + ''' + + Generate config file for a particular profile + ''' + + def __init__(self, config="/etc/ec2sys-autotune.cfg", + genconfigonly=False): + self.config = config + self.genconfigonly = genconfigonly + self.cfg_file = RawConfigParser(allow_no_value=True) + self.cfg_file.read(self.config) + + # Read in the config directory location + if (self.cfg_file.has_option("DEFAULT", "CONFIG_DIR") is False): + raise Ec2AutotuneError("Missing CONFIG_DIR in config file.") + self.config_dir = self.cfg_file.get("DEFAULT", "CONFIG_DIR") + + # Read in the user config file + if (self.cfg_file.has_option("DEFAULT", "USER_CONFIG") is False): + raise Ec2AutotuneError("Missing USER_CONFIG in config file.") + self.user_config = self.cfg_file.get("DEFAULT", "USER_CONFIG") + self.usercfg_log = RawConfigParser(allow_no_value=True) + self.usercfg_log.read("{0}/{1}".format(self.config_dir, + self.user_config)) + + return + + def start(self): + ''' + + Start Autotune service + ''' + # Disallow multiple instances of the same service + try: + lock_obj = Lock(self.config) + except Ec2AutotuneError, e: + raise (e) + + # Profile to tune the system with + if (self.usercfg_log.has_option("profile", "PROFILE") is False): + raise Ec2AutotuneError("Missing PROFILE in user config file.") + PROFILE = self.usercfg_log.get("profile", "PROFILE") + + # State dir + if (self.cfg_file.has_option("DEFAULT", "STATE_DIR") is False): + raise Ec2AutotuneError("Missing STATE_DIR in config file.") + STATE_DIR = self.cfg_file.get("DEFAULT", "STATE_DIR") + + # Log file + if (self.cfg_file.has_option("DEFAULT", "LOG_FILE") is False): + raise Ec2AutotuneError("Missing LOG_FILE in config file.") + LOG_FILE = self.cfg_file.get("DEFAULT", "LOG_FILE") + + # Status file + if (self.cfg_file.has_option("DEFAULT", "STATUS") is False): + raise Ec2AutotuneError("Missing STATUS in config file.") + STATUS = self.cfg_file.get("DEFAULT", "STATUS") + if (os.path.isfile(STATUS) is True and self.genconfigonly is False): + raise Ec2AutotuneError("Autotune profile {0} is already active" + .format(PROFILE)) + + try: + instance = None + # Instantiate appropriate object for the profile + if ("base" in PROFILE): + instance = Ec2InstanceCfgGen(self.config_dir, PROFILE) + elif ("udp-server" in PROFILE): + instance = UdpServerCfgGen(self.config_dir, PROFILE) + elif ("placement-group" in PROFILE): + instance = PlacementGroupCfgGen(self.config_dir, PROFILE) + else: + raise Ec2AutotuneError("Invalid role {0} specified." + .format(PROFILE)) + + # Generate config tunables for sub systems + instance.tune() + + syslog("Configuration {0} role has been generated." + .format(PROFILE)) + except Ec2AutotuneEexists, e: + # Config file exists from previous instance, reuse the same + syslog(e.msg) + except Ec2AutotuneError, e: + # Fatal error, config file could not be generated + raise (e) + + # If the request was to generate config only, return now + if (self.genconfigonly is True): + return + + # Configure system with generated config file + try: + instance = None + instance = Ec2InstanceCfgEngine("{0}/{1}".format(STATE_DIR, + LOG_FILE), + self.config_dir, + PROFILE, + "{0}/{1}".format(self.config_dir, + self.user_config)) + # Core function of configuration engine + instance.configure_system_settings() + + # Mark the service as started + with open(STATUS, "w") as status_file: + pass + + syslog("System has now been configured with EC2 " + "AWS Autotune {0} profile.".format(PROFILE)) + except Ec2AutotuneError, e: + raise (e) + + return diff --git a/src/ec2sys_autotune/ec2_autotune_stop.py b/src/ec2sys_autotune/ec2_autotune_stop.py new file mode 100644 index 0000000..ef65df4 --- /dev/null +++ b/src/ec2sys_autotune/ec2_autotune_stop.py @@ -0,0 +1,67 @@ +import os +import sys +from syslog import syslog +from ec2sys_autotune.ec2_instance_cfg_engine import Ec2InstanceCfgEngine +try: + from configparser import RawConfigParser +except ImportError: + # Backward compatibility with python versions earlier to 3.0 + from ConfigParser import RawConfigParser +from ec2sys_autotune.ec2_autotune_lock import Lock + +# Exception +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + + +class Ec2AutotuneStop(object): + ''' + + Restore system to earlier default state + ''' + + def __init__(self, config="/etc/ec2sys-autotune.cfg"): + self.config = config + self.cfg_file = RawConfigParser(allow_no_value=True) + self.cfg_file.read(self.config) + return + + def stop(self): + # Disallow multiple instances of the same service + try: + lock_obj = Lock(self.config) + except Ec2AutotuneError, e: + raise (e) + + # State directory - mandatory option in config file to stop service + if (self.cfg_file.has_option("DEFAULT", "STATE_DIR") is False): + raise Ec2AutotuneError("Missing STATE_DIR in config file.") + STATE_DIR = self.cfg_file.get("DEFAULT", "STATE_DIR") + + # Log file - mandatory option in config file to stop service + if (self.cfg_file.has_option("DEFAULT", "LOG_FILE") is False): + raise Ec2AutotuneError("Missing LOG_FILE in config file.") + LOG_FILE = self.cfg_file.get("DEFAULT", "LOG_FILE") + + # Status file - mandatory option in config file to stop service + if (self.cfg_file.has_option("DEFAULT", "STATUS") is False): + raise Ec2AutotuneError("Missing STATUS in config file.") + STATUS = self.cfg_file.get("DEFAULT", "STATUS") + if (os.path.isfile(STATUS) is False): + raise Ec2AutotuneError("Autotune is not active") + + try: + instance = None + # Restore the system settings to earlier state + instance = Ec2InstanceCfgEngine("{0}/{1}".format(STATE_DIR, + LOG_FILE)) + instance.restore_system_settings() + + # Clear the service status + os.remove(STATUS) + + syslog("EC2 AWS Autotune has restored original system settings " + "after clean up.") + except Ec2AutotuneError, e: + raise (e) + + return diff --git a/src/ec2sys_autotune/ec2_autotune_utils.py b/src/ec2sys_autotune/ec2_autotune_utils.py new file mode 100644 index 0000000..dbecd3a --- /dev/null +++ b/src/ec2sys_autotune/ec2_autotune_utils.py @@ -0,0 +1,80 @@ +import os +import sys +import shlex +from subprocess import call +from subprocess import CalledProcessError, check_output +from subprocess import Popen, PIPE + +# Exception +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + + +def exec_cmds(cmds): + ''' + + Execute passed in command + ''' + devnull = open(os.devnull, 'wb') + for cmd in cmds: + try: + retcode = call(shlex.split(cmd), shell=False, + stderr=devnull, stdout=devnull) + if (retcode != 0): + raise Ec2AutotuneError("Failed to execute: {0}".format(cmd)) + except OSError, e: + raise Ec2AutotuneError("Exception encountered while trying to " + "execute: {0} error: {1}".format(cmd, e)) + return + + +def get_cmd_output(cmd): + ''' + + Execute passed in command and return its output + ''' + try: + # Strip trailing newline feed + output = check_output(shlex.split(cmd), shell=False)[:-1] + return output + except CalledProcessError, e: + raise Ec2AutotuneError("Exception encountered while trying to execute:" + " {0} error: {1}".format(cmd, e.output)) + + +def get_piped_cmd_output(cmd1, cmd2): + try: + p1 = Popen(shlex.split(cmd1), stdout=PIPE, shell=False) + p2 = Popen(shlex.split(cmd2), stdin=p1.stdout, + stdout=PIPE, shell=False) + p1.stdout.close() + # Strip trailing newline feed + output = (p2.communicate()[0])[:-1] + return output + except CalledProcessError, e: + raise Ec2AutotuneError("Exception encountered while trying to execute:" + " {0} | {1} error: {2}".format(cmd1, + cmd2, + e.output)) + + +def read_sysfs_file(sysfs_file): + try: + with open(sysfs_file) as fd: + # Strip trailing newline feed + output = fd.read()[:-1] + return output + except IOError, e: + raise Ec2AutotuneError( + "Error while trying to query {0}, error {1}.".format(sysfs_file, + e.errno)) + + +def write_sysfs_file(sysfs_file, value): + try: + with open(sysfs_file, "wb") as fd: + fd.write(value) + return + except IOError, e: + raise Ec2AutotuneError( + "Error while trying to write {0}, error {1}.".format(sysfs_file, + e.errno)) diff --git a/src/ec2sys_autotune/ec2_instance_cfg_engine.py b/src/ec2sys_autotune/ec2_instance_cfg_engine.py new file mode 100644 index 0000000..93b2ca0 --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_cfg_engine.py @@ -0,0 +1,625 @@ +''' + +EC2 Amazon Linux Kernel Autotuning configuration engine. + +Configuration engine picks up the configuration and applies +these tunables onto the running instance. Configurations have +to be generated by invoking ec2_instance_cfg_gen before +calling ec2_instance_cfg_engine. If a valid configuration does +not exist, then engine bails out and fails. + +Configuration engine saves system state and upon shutdown of +ec2sys-autotune service, all earlier system settings are +restored. +''' + +import os +import sys +import stat +import json +import glob +from syslog import syslog +try: + from configparser import RawConfigParser +except ImportError: + # Backward compatibility with python versions earlier to 3.0 + from ConfigParser import RawConfigParser + +from ec2sys_autotune.ec2_instance_fetch_cfg import fetch_configuration +from ec2sys_autotune.ec2_autotune_utils import exec_cmds +from ec2sys_autotune.ec2_autotune_utils import get_cmd_output +from ec2sys_autotune.ec2_autotune_utils import get_piped_cmd_output +from ec2sys_autotune.ec2_autotune_utils import read_sysfs_file +from ec2sys_autotune.ec2_autotune_utils import write_sysfs_file + +# Exception +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + +# Types of tunables being tuned by autotune +SERVICE = "service" +SYSCTL = "sysctl" +SYSFS = "sysfs" +CPU = "cpu" + + +class Ec2InstanceCfgEngine(object): + ''' + + Core configuration engine class to configure + and restore system settings. + + Need to instantiate by passing in log file, config_dir, profile and + user_profile. Recovery instances need only log file to replay from. + ''' + def __init__(self, log_file, config_dir=None, profile=None, + user_profile=None): + # In-memory log object to save system settings + self.log_object = None + + # Dictionary of types of tunables and their service functions + self.tunables = { # tunable : service functions + SERVICE: {"get": self.get_service_state, + "set": self.set_service_state}, + SYSCTL: {"get": self.get_sysctl_value, + "set": self.set_sysctl_value}, + SYSFS: {"get": self.get_sysfs_value, + "set": self.set_sysfs_value}, + CPU: {"get": self.get_cpu_value, + "set": self.set_cpu_value}} + + ''' + Recovery instance do not need config_dir and profiles. + However if this instance is going to configure tunables, + then log file, config_dir, profile and user_profile + need to be passed in. Validate passed in params. + ''' + self.recovery_instance = (config_dir is None and + profile is None and + user_profile is None) + if (self.recovery_instance is False): # Instance to configure tunable + # Bail out if auto generated profile doesn't exist + self.auto_profile = "{0}/{1}.ini".format(config_dir, profile) + if (os.path.isfile(self.auto_profile) is False): + # Fatal error, auto profile doesn't exist + raise Ec2AutotuneError( + "Auto generated tunables file {0} does not " + "exist.".format(self.auto_profile)) + + # Bail out if user config profile doesn't exist + self.user_profile = user_profile + if (os.path.isfile(self.user_profile) is False): + # Fatal error, user profile doesn't exist + raise Ec2AutotuneError( + "User customized tunables file {0} " + "does not exist.".format(self.user_profile)) + + # Validate the tunables to be set (dry run) + try: + self.validate_tunables_to_set() + except Ec2AutotuneError, e: + raise Ec2AutotuneError( + "Validation of tunables to be set " + "failed: {0}".format(e.msg)) + + # Read in entries from /etc/sysctl.d/*.conf + tmp_lines = [] + self.sysctl_conf = [] + for filename in glob.glob("/etc/sysctl.d/*.conf"): + tmp_lines = [ln.rstrip('\n') for ln in open(filename)] + for ln in tmp_lines: + # Strip comments and blank lines + ln = ln.lstrip() + if (len(ln) == 0 or ln[:1] == "#"): + continue + self.sysctl_conf.append(ln) + + self.log_file = log_file + + return + + def start_recovery_logging(self): + ''' + + Start logging all tuned values for recovery at service shutdown + ''' + self.log_object = { + SERVICE: [], + SYSCTL: [], + SYSFS: [], + CPU: []} + return + + def stop_recovery_logging(self): + ''' + + Stop and commit all logs + ''' + # Save the log in json format on disk + json_log = json.dumps(self.log_object, indent=4, + separators=(',', ': ')) + log_file = open(self.log_file, "wb") + # Make sure log file is not accessed by anybody + os.chmod(self.log_file, 0600) + log_file.write(json_log) + log_file.close() + + self.log_object = None + return + + def set_tunable(self, tunable, cmd, + get_tunable, log_index, + new_value, orig_value): + ''' + + Core function that actually does the job of setting tunable. + orig_value will be passed only during recovery phase. orig_value + param will be NULL during system configuration phase. + ''' + # This is the action of current function invocation + action = "{0} = {1}".format(tunable, new_value) + + # Fetch current tunable value in the system + try: + current_value = get_tunable(tunable) + except Ec2AutotuneError, e: + syslog(e.msg) + return + + ''' + Current value is different from expected original value, then + this tunable is being tweaked by system user and should be left + untouched. + ''' + if (orig_value is not None and orig_value != current_value): + return + + # If new value to be set is same as current value, nothing to set + if (new_value == current_value): + return + + try: + # Set new value & log the new value for recovery + exec_cmds(cmd) + syslog("set {0}".format(action)) + if (self.recovery_instance is False and current_value is not None): + assert(orig_value is None) + # Configuring system: log system changes + tmp_log_object = {"Name": tunable, + "Original": current_value, + "Changed": new_value} + self.log_object[log_index].append(tmp_log_object) + except Ec2AutotuneError, e: + syslog(e.msg) + return + + def safe_set_tunable(self, tunable, data, + get_tunable, log_index, + new_value, orig_value): + ''' + + This function has the same functionality as set_tunable. + This version uses python's internal write library call + to set the tunable instead of forking to shell. We should + deprecate calling set_tunable as much as possible in future + to avoid forking to shell. + ''' + + ''' + Core function that actually does the job of setting tunable. + orig_value will be passed only during recovery phase. orig_value + param will be NULL during system configuration phase. + ''' + # This is the action of current function invocation + action = "{0} = {1}".format(tunable, new_value) + + # Fetch current tunable value in the system + try: + current_value = get_tunable(tunable) + except Ec2AutotuneError, e: + syslog(e.msg) + return + + ''' + Current value is different from expected original value, then + this tunable is being tweaked by system user and should be left + untouched. + ''' + if (orig_value is not None and orig_value != current_value): + return + + # If new value to be set is same as current value, nothing to set + if (new_value == current_value): + return + + try: + # Set new value & log the new value for recovery + write_sysfs_file(tunable, data) + syslog("set {0}".format(action)) + if (self.recovery_instance is False and current_value is not None): + assert(orig_value is None) + # Configuring system: log system changes + tmp_log_object = {"Name": tunable, + "Original": current_value, + "Changed": new_value} + self.log_object[log_index].append(tmp_log_object) + except Ec2AutotuneError, e: + syslog(e.msg) + return + + def parse_tunable_output(self, output): + ''' + + Tunables values are in four formats (for tunables we are + working): + 1: number + 2: [foo] bar + 3: foo + 4: number1\tnumber2\tnumber2 + Second format above is tricky as the value set is the one enclosed + within []. In cases like these, we need to extract the string + enclosed between [] and return it to caller. + For all format, convert the output into a list and return + ''' + if (output is None or len(output) == 0): + return None + output = output.strip() + start = output.find("[") + if (start != -1): + end = output.find("]") + if (end == -1): # Should never happen + raise Ec2AutotuneError( + "Parsing error of {0}".format(output)) + output = output[start+1:end] + output = output.split() + if (output[0].isdigit() is True): + output = map(int, output) + return output + + def convert_input_value(self, value): + ''' + + value will be a list, return string representation + ''' + if (isinstance(value, list) is False): + raise Ec2AutotuneError( + "input value is in an invalid format.") + + length = len(value) + value = " ".join(map(str, value)) + # Embed multiple values inside strings + if (length > 1): + value = "\"{0}\"".format(value) + return value + + def get_service_state(self, service): + ''' + + Returns the current status of service in the system + ''' + try: + output = get_piped_cmd_output( + "/bin/systemctl status {0}".format(service), + "/bin/grep Active:") + if ("running" in output): + return (["start"]) + elif ("dead" in output): + return (["stop"]) + else: + raise Ec2AutotuneError( + "{0} package not installed.".format(service)) + except Ec2AutotuneError, e: + raise e + + def set_service_state(self, service, new_state, orig_state=None): + ''' + + Set the passed in service state. + ''' + try: + tmp_new_state = self.convert_input_value(new_state) + except Ec2AutotuneError, e: + syslog("Failed to set {0} = {1}".format(service, e.msg)) + return + cmd = ["/bin/systemctl {0} {1}".format(tmp_new_state, service)] + return self.set_tunable(service, cmd, + self.get_service_state, SERVICE, + new_state, orig_state) + + def get_sysctl_value(self, sysctl_setting): + ''' + + Get value of a particular kernel setting + ''' + try: + output = get_cmd_output("/sbin/sysctl {0}".format(sysctl_setting)) + if (len(output) == 0): + return None + output = output.split("=")[1] + return self.parse_tunable_output(output) + except Ec2AutotuneError, e: + raise e + + def set_sysctl_value(self, sysctl_setting, new_value, orig_value=None): + ''' + + Set value of a particular kernel setting. + ''' + + # If the value to be configured is also being modified as part of + # /etc/sysctl.d/*.conf, then this is a conflict. + for ln in self.sysctl_conf: + if (sysctl_setting in ln): + syslog("Skipping {0} as it conflicts with " + "/etc/sysctl.d framework.".format(sysctl_setting)) + return + + try: + tmp_new_value = self.convert_input_value(new_value) + except Ec2AutotuneError, e: + syslog("Failed to set {0} = {1}".format(sysctl_setting, e.msg)) + return + cmd = ["/sbin/sysctl -q -w {0}={1}".format(sysctl_setting, + tmp_new_value)] + + return self.set_tunable(sysctl_setting, cmd, + self.get_sysctl_value, SYSCTL, + new_value, orig_value) + + def get_sysfs_value(self, sysfs_file): + ''' + + Get value of a particular sysfs setting + ''' + try: + output = read_sysfs_file(sysfs_file) + if (len(output) == 0): + return None + return self.parse_tunable_output(output) + except Ec2AutotuneError, e: + raise e + + def set_sysfs_value(self, sysfs_file, new_value, orig_value=None): + ''' + + Set value of a particular sysfs setting. + orig_value will be passed only during recovery phase. orig_value + param will be NULL during system configuring phase. + ''' + if (os.path.isfile(sysfs_file) is False): + syslog("invalid sysfs_file={0}".format(sysfs_file)) + return + + try: + tmp_new_value = self.convert_input_value(new_value) + except Ec2AutotuneError, e: + syslog("Failed to set {0} = {1}".format(sysfs_file, e.msg)) + return + + return self.safe_set_tunable(sysfs_file, tmp_new_value, + self.get_sysfs_value, SYSFS, + new_value, orig_value) + + def get_cpu_value(self, cpu_state): + ''' + + Get value of a particular CPU state + ''' + try: + retcode = 0 + output = None + # Query frequency governor + if (cpu_state == "p-state"): + # Work only with intel_pstate driver + driver = get_piped_cmd_output( + "/bin/cpupower frequency-info --driver", + "/bin/grep \"driver: intel_pstate\"") + + if(len(driver) > 0): + # Return current governor being used + output = get_piped_cmd_output( + "/bin/cpupower frequency-info", + "/bin/grep \"The governor\"") + start = output.find("\"") + if (start == -1): + raise Ec2AutotuneError( + "Parsing error of current " + "frequency-info governor") + end = output.find("\"", start+1) + if (end == -1): + raise Ec2AutotuneError( + "Parsing error of current " + "frequency-info governor") + return ([output[start+1:end]]) + # Query CPU idle state + elif (cpu_state == "c-state"): + # Work only with intel_idle driver + driver = get_piped_cmd_output( + "/bin/cpupower idle-info --silent", + "/bin/grep \"driver: intel_idle\"") + + if(len(driver) > 0): + # Number of idle states + output = get_piped_cmd_output( + "/bin/cpupower idle-info", + "/bin/grep \"Number of idle states:\"") + max_states = int(output[(output.index("states: ") + 8):]) + + # Available idle states + idle_states = [] + output = get_piped_cmd_output( + "/bin/cpupower idle-info", + "/bin/grep \"Available idle states:\"") + beg = 0 + end = len(output) + for state in range(max_states): + try: + idx = output.rindex(" ", beg, end) + 1 + except: + raise Ec2AutotuneError( + "Parsing error of available idle states") + if (idx == -1): + raise Ec2AutotuneError( + "Parsing error of available idle states") + idle_states.append(output[idx:end]) + end = idx - 1 + idle_states.reverse() + + # Return deepest enabled state + output = get_piped_cmd_output( + "/bin/cpupower idle-info", + "/bin/grep DISABLED") + if (len(output) == 0): + # No state is disabled, return deepest state + return ([idle_states[max_states - 1]]) + else: + index = output.index(" ") + output = output[:index] + return ([idle_states[idle_states.index(output) - 1]]) + else: + # State should always be either p-state or c-state + raise Ec2AutotuneError( + "Invalid {0} state".format(cpu_state)) + + except Ec2AutotuneError, e: + raise e + + def set_cpu_value(self, cpu_state, new_value, orig_value=None): + ''' + + Set value of a particular CPU state. + orig_value will be passed only during recovery phase. orig_value + param will be NULL during system configuring phase. + We set this only on systems which has intel drivers (no acpi driver). + ''' + try: + tmp_new_value = self.convert_input_value(new_value) + except Ec2AutotuneError, e: + syslog("Failed to set {0} = {1}".format(cpu_state, e.msg)) + return + + if (cpu_state == "p-state"): + ''' + P-state: value has to be one of the supported governors + by intel_pstate driver. + ''' + cmd = ["/bin/cpupower frequency-set -g {0}".format(tmp_new_value)] + elif (cpu_state == "c-state"): + ''' + C-state: value has to be one of the supported idle + states by intel_idle driver + ''' + idle_states = { # state : latency + "POLL": 0, + "C1": 2, + "C1E": 10, + "C3": 40, + "C6": 133} + if (not(tmp_new_value in idle_states)): + raise Ec2AutotuneError( + "Invalid value for c-state = " + "{0}".format(tmp_new_value)) + cmd = (["/bin/cpupower idle-set --enable-all", + "/bin/cpupower idle-set --disable-by-latency {0}" + .format(str(idle_states[tmp_new_value] + 1))]) + else: + raise Ec2AutotuneError( + "Invalid CPU state {0}".format(cpu_state)) + return self.set_tunable(cpu_state, cmd, + self.get_cpu_value, CPU, + new_value, orig_value) + + def configure_system(self, section, configure, dry_run=False): + if (dry_run is True): + fetch_configuration(self.auto_profile, + self.user_profile, + section, + None, + None, + None, + configure) + else: + fetch_configuration(self.auto_profile, + self.user_profile, + section, + configure, + None, + configure, + None) + + return + + def validate_tunables_to_set(self): + ''' + + Function that validates the tunable before applying + ''' + # Recovery instance should not be trying to configure + if (self.recovery_instance is True): + raise Ec2AutotuneError( + "Incorrect instantiation and object usage.") + + # Validate tunables + for tunable, functions in self.tunables.items(): + try: + self.configure_system(tunable, + functions["get"], + True) + except Ec2AutotuneError, e: + raise e + + return + + def configure_system_settings(self): + ''' + + Main function that configures the system + ''' + self.start_recovery_logging() + # Recovery instance should not be trying to configure + if (self.recovery_instance is True): + raise Ec2AutotuneError( + "Incorrect instantiation and object usage.") + + # Configure tunables and save their defaults + for tunable, functions in self.tunables.items(): + self.configure_system(tunable, + functions["set"]) + self.tunables = None + + self.stop_recovery_logging() + return + + def restore_system(self, list_log, tunable, restore): + ''' + + Core function used by restore_system_settings() to read-in + individual log entries and restore. + ''' + tunable_list = list_log[tunable] + for item in tunable_list: + restore(item["Name"], item["Original"], item["Changed"]) + return + + def restore_system_settings(self): + ''' + + Function called during shutdown to restore system to eariler default + ''' + # Read log file for restoring to earlier state (json format) + if (os.path.isfile(self.log_file) is False): + raise Ec2AutotuneError( + "Recovery log file does not exist, " + "failed to revert original settings.") + + log_file = open(self.log_file, "rb") + list_log = json.load(log_file) + log_file.close() + + # Restore tunables to their earlier defaults + for tunable, functions in self.tunables.items(): + self.restore_system(list_log, + tunable, + functions["set"]) + self.tunables = None + + return diff --git a/src/ec2sys_autotune/ec2_instance_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_cfg_gen.py new file mode 100644 index 0000000..947c0f9 --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_cfg_gen.py @@ -0,0 +1,321 @@ +''' + +EC2 Amazon Linux Kernel Autotuning config generator for base instance role. + +Amazon Web Services EC2 fleet hosts a wide selection of instance types +optimized to fit different customer use cases. Instance types are carved +out for varying combinations of CPU, Memory, Storage and Networking capacity +to allow customers to pick the correct instance type for their workload. + +Amazon Linux Kernel provides a stable, secure and high performance execution +environment for customer applications running on Amazon EC2. Default values +for kernel tunables is not the best and optimized value for all instance +types on EC2. To provide best customer experience for Amazon Linux Kernel +consumers on EC2, this script generates kernel tunable for different +instance types. This config generator is workload agnostic and the configs +generated are for base instance role. + +Workload specific tunable config generator needs to inherit this base +class and then generate tunable configs. +''' + +import os +import sys +import stat +import json +import math +import requests +from syslog import syslog +try: + from configparser import RawConfigParser +except ImportError: + # Backward compatibility with python versions earlier to 3.0 + from ConfigParser import RawConfigParser +from ec2sys_autotune.ec2_autotune_utils import get_cmd_output +from ec2sys_autotune.ec2_instance_types import general_purpose +from ec2sys_autotune.ec2_instance_types import compute_optimized +from ec2sys_autotune.ec2_instance_types import memory_optimized +from ec2sys_autotune.ec2_instance_types import accelerated_computing +from ec2sys_autotune.ec2_instance_types import storage_optimized +from ec2sys_autotune.ec2_instance_types import ec2_instance_types + +# Composition classes +from ec2sys_autotune.ec2_instance_services_cfg_gen \ + import CfgGenPerfOptimizingServices +from ec2sys_autotune.ec2_instance_vm_cfg_gen \ + import CfgGenVirtualMemorySettings +from ec2sys_autotune.ec2_instance_network_cfg_gen \ + import CfgGenNetworkSettings +from ec2sys_autotune.ec2_instance_kernel_cfg_gen import CfgGenKernelSettings +from ec2sys_autotune.ec2_instance_storage_cfg_gen \ + import CfgGenStorageSettings +from ec2sys_autotune.ec2_instance_pm_cfg_gen \ + import CfgGenPowerManagementSettings + +# Exceptions +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneEexists + + +# Config file message intended for end user +COMMENT = "# Do not modify this auto-generated config file, instead " \ + "customize tuning in user.ini as per your requirements." +PROFILE = "profile" +NAME = "name" +INSTANCE = "instance" +VERSION = "version" +# Bump this number for every new release +RELEASE = "1.0.0" + +# Types of tunables being tuned by autotune +SERVICE = "service" +SYSCTL = "sysctl" +SYSFS = "sysfs" +CPU = "cpu" + +# EC2 instance classes +GENERAL_PURPOSE = "general_purpose" +COMPUTE_OPTIMIZED = "compute_optimized" +MEMORY_OPTIMIZED = "memory_optimized" +ACCELERATED_COMPUTING = "accelerated_computing" +STORAGE_OPTIMIZED = "storage_optimized" + + +class Ec2InstanceCfgGen(object): + ''' + + Common base class config generator for all EC2 instances + with no role (no workload specific tuning). + ''' + + def __init__(self, config_dir, profile): + ''' + + Need to instantiate by passing in config file and profile name + ''' + # In-memory config object to generate system settings + self.cfg_object = None + + self.profile = None + self.auto_profile = "{0}/{1}.ini".format(config_dir, profile) + + # Query the instance type and instance class + try: + self.inst_type = self.get_instance_type() + self.inst_class = self.get_instance_class() + except Ec2AutotuneError, e: + raise e + + # Existing config file is for same profile, skip generation + if (os.path.isfile(self.auto_profile) is True): + cfg_file = RawConfigParser(allow_no_value=True) + cfg_file.read(self.auto_profile) + if (cfg_file.has_section(PROFILE) and + # Check for profile name to match with generated profile + cfg_file.has_option(PROFILE, NAME) and + cfg_file.get(PROFILE, NAME) == profile and + # Check current instance to match with generated profile + cfg_file.has_option(PROFILE, INSTANCE) and + cfg_file.get(PROFILE, INSTANCE) == + self.get_instance_data("Instance Type") and + # Check version to match with generated profiles version + cfg_file.has_option(PROFILE, VERSION) and + cfg_file.get(PROFILE, VERSION) == RELEASE): + raise Ec2AutotuneEexists( + "Configuration file {0} already " + "exists.".format(self.auto_profile)) + + self.profile = profile + + return + + def start_cfg_logging(self): + ''' + + Start logging tunables for generating configuration file + ''' + self.cfg_object = RawConfigParser(allow_no_value=True) + + # Header section + self.cfg_object.add_section(PROFILE) + self.cfg_object.set(PROFILE, COMMENT) + self.cfg_object.set(PROFILE, NAME, self.profile) + self.cfg_object.set(PROFILE, INSTANCE, + self.get_instance_data("Instance Type")) + self.cfg_object.set(PROFILE, VERSION, RELEASE) + + # List of services + self.cfg_object.add_section(SERVICE) + + # List of sysctl tunables + self.cfg_object.add_section(SYSCTL) + + # List of sysfs tunables + self.cfg_object.add_section(SYSFS) + + # List of CPU tunables + self.cfg_object.add_section(CPU) + return + + def stop_cfg_logging(self): + ''' + + Stop and commit all configuration + ''' + if (self.cfg_object is None): + return + + cfg_file = open(self.auto_profile, "wb") + self.cfg_object.write(cfg_file) + cfg_file.close() + # Set appropriate permission on config file + os.chmod(self.auto_profile, 0744) + self.cfg_object = None + return + + def get_instance_data(self, name): + ''' + + Return instance data for a particular property + ''' + try: + index = self.inst_type[0].index(name) + return self.inst_type[1][index] + except ValueError: + raise Ec2AutotuneError( + "Error retrieving {0} from local EC2 " + "instance metadata.".format(name)) + + def get_instance_class(self): + ''' + + Return the instance class for which this instance belongs + ''' + for instance_class in ec2_instance_types: + if (self.inst_type[1] in instance_class): + if (instance_class == general_purpose): + return GENERAL_PURPOSE + elif (instance_class == compute_optimized): + return COMPUTE_OPTIMIZED + elif (instance_class == memory_optimized): + return MEMORY_OPTIMIZED + elif (instance_class == accelerated_computing): + return ACCELERATED_COMPUTING + elif (instance_class == storage_optimized): + return STORAGE_OPTIMIZED + raise Ec2AutotuneError("Error identifying instance class.") + + def get_instance_type(self): + ''' + + Query EC2 Metadata server and identify instance type + ''' + try: + output = get_cmd_output("/bin/ec2-metadata -t") + output = output.split(":")[1] + output = output.strip() + except Ec2AutotuneError, e: + raise e + + for instance_class in ec2_instance_types: + for instance_type in instance_class[1:]: + if (output == instance_type[0]): + return (instance_class[0], instance_type) + raise Ec2AutotuneError( + "{0} is not a supported instance type for " + "autotune.".format(output)) + + def write_config_entry(self, section, name, value): + ''' + + Worker function to check for stacking entries & to write + config in memory + ''' + assert (self.cfg_object is not None) + if (section is None or name is None or value is None): + return + + assert (self.cfg_object.has_section(section)) + if (self.cfg_object.has_option(section, name)): + syslog("Stacking entry for {0}.".format(name)) + self.cfg_object.remove_option(section, name) + if (isinstance(value, list) is False): + value = [value] + self.cfg_object.set(section, name, value) + return + + def set_service_config(self, service, state): + ''' + + Set the passed in service state in config + ''' + return self.write_config_entry(SERVICE, service, state) + + def set_sysctl_config(self, sysctl_setting, value): + ''' + + Set the passed in sysctl kernel value in config + ''' + return self.write_config_entry(SYSCTL, sysctl_setting, value) + + def set_sysfs_config(self, sysfs_file, value): + ''' + + Set the passed in sysfs value in config + ''' + return self.write_config_entry(SYSFS, sysfs_file, value) + + def set_cpupower_config(self, state, value): + ''' + + Set the passed in CPU power state's value in config + ''' + return self.write_config_entry(CPU, state, value) + + def setup_objects_composition(self): + self.set_config = {"service": self.set_service_config, + "sysctl": self.set_sysctl_config, + "sysfs": self.set_sysfs_config, + "cpu": self.set_cpupower_config} + self.composition_classes = ["CfgGenPerfOptimizingServices", + "CfgGenVirtualMemorySettings", + "CfgGenNetworkSettings", + "CfgGenKernelSettings", + "CfgGenStorageSettings", + "CfgGenPowerManagementSettings"] + self.composition_objects = [] + + if ("-secured" in self.profile): + add_secured_config = True + else: + add_secured_config = False + for cl in self.composition_classes: + ns = globals()[cl] + self.composition_objects.append( + ns(self.set_config, + self.get_instance_data, + self.inst_class, + add_secured_config)) + return + + def teardown_objects_composition(self): + for obj in self.composition_objects: + del obj + obj = None + self.composition_objects = None + self.composition_classes = None + self.set_config = None + return + + def _tune(self): + self.setup_objects_composition() + for obj in self.composition_objects: + obj.tune() + self.teardown_objects_composition() + return + + def tune(self): + self.start_cfg_logging() + self._tune() + self.stop_cfg_logging() + return diff --git a/src/ec2sys_autotune/ec2_instance_exception.py b/src/ec2sys_autotune/ec2_instance_exception.py new file mode 100644 index 0000000..ffa0c63 --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_exception.py @@ -0,0 +1,25 @@ +import os +import sys + + +class Ec2AutotuneError(Exception): + ''' + + Exception class to indicate fatal configuration + errors in autotune. This class can be extended + to include more debugging information. + ''' + def __init__(self, msg): + self.msg = msg + + +class Ec2AutotuneEexists(Exception): + ''' + + Exception class to indicate EEXISTS error + in autotune. This class can be extended + to include more debugging information. + ''' + + def __init__(self, msg): + self.msg = msg diff --git a/src/ec2sys_autotune/ec2_instance_fetch_cfg.py b/src/ec2sys_autotune/ec2_instance_fetch_cfg.py new file mode 100644 index 0000000..38a9a7e --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_fetch_cfg.py @@ -0,0 +1,96 @@ +import os +import sys +import ast +try: + from configparser import RawConfigParser +except ImportError: + # Backward compatibility with python versions earlier to 3.0 + from ConfigParser import RawConfigParser + +# Exception +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + +EXCLUDE_TAG = "(exclude)" + + +def fetch_configuration(auto_profile, user_profile, section, + cb_user_overridden, cb_user_excluded, + cb_auto_generated, cb_dry_run): + ''' + + Core function used by configure_system() to + read-in configurations and configure the system. + process_showconfig() also uses this function to + show the configuration to end user. + + The implementation of this library function is + generic in that this function accepts call backs + for each of overridden, excluded, autogenerated and + dry run. A consumer is free to pick select or all + of the tunables. + + There are 2 profiles - auto_profile and user_profile. + auto_profiles are auto generated profiles by system. + user_profile are profiles customized by end user. Users + can opt to either: + 1) Override auto generated tunables + 2) Disable auto generated tunables + ''' + # Read auto generated config file (ini format) + autocfg_log = RawConfigParser(allow_no_value=True) + autocfg_log.read(auto_profile) + + # Read user configured config file (ini format) + usercfg_log = RawConfigParser(allow_no_value=True) + usercfg_log.read(user_profile) + + override = [] + exclude = [] + + # Fetch configuration for the system + if (autocfg_log.has_section(section) is True): + options = autocfg_log.options(section) + if (usercfg_log.has_section(section) is True): + user_options = usercfg_log.options(section) + for item in user_options: + value = usercfg_log.get(section, item) + if (value == EXCLUDE_TAG): + # Tunables that are excluded by user + exclude.append(item) + else: + # Tunables that are overridden by user + override.append(item) + user_options = None + for item in options: + if (cb_dry_run is not None): + # Dry run to validate tunables + try: + cb_dry_run(item) + except Ec2AutotuneError, e: + raise e + if (item in exclude): + # Excluded tunable + if (cb_user_excluded is not None): + cb_user_excluded(item) + continue + else: + value = None + if (item in override): + # Configure user configured value + value = usercfg_log.get(section, item) + value = value.split() + if (value[0].isdigit() is True): + value = map(int, value) + if (cb_user_overridden is not None): + cb_user_overridden(item, value) + else: + # Configure autotune configured value + value = autocfg_log.get(section, item) + if (value.find("[") != -1): + value = ast.literal_eval(value) + if (cb_auto_generated is not None): + cb_auto_generated(item, value) + else: + # Always generated as list + continue + return diff --git a/src/ec2sys_autotune/ec2_instance_kernel_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_kernel_cfg_gen.py new file mode 100644 index 0000000..c9efad2 --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_kernel_cfg_gen.py @@ -0,0 +1,93 @@ +''' + +EC2 instance HasA kernel. +This class needs to be a composition of base class. +''' + +import os +import sys +from syslog import syslog +from ec2sys_autotune.ec2_autotune_utils import read_sysfs_file + +# Exception +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + + +class CfgGenKernelSettings(object): + ''' + + Generate config for kernel settings + ''' + def __init__(self, set_config, get_instance_data, + inst_class, add_secured_config): + self.set_sysctl_config = set_config["sysctl"] + self.set_sysfs_config = set_config["sysfs"] + self.get_instance_data = get_instance_data + self.add_secured_config = add_secured_config + return + + def __del__(self): + self.set_sysctl_config = None + self.set_sysfs_config = None + self.get_instance_data = None + return + + def tune(self): + ''' + + ##### Kernel Settings ##### + ''' + + # Controls whether core dumps will append the PID to the core filename. + self.set_sysctl_config("kernel.core_uses_pid", 1) + + # Controls the default maxmimum size of a mesage queue + # XXX: Future work, should this be based on memory size? + self.set_sysctl_config("kernel.msgmnb", 1024 * 64) + + # Controls the maximum size of a message, in bytes + # XXX: Future work, should this be based on memory size? + self.set_sysctl_config("kernel.msgmax", 1024 * 64) + + # Controls the maximum shared segment size, in bytes + # XXX: Future work, should this be based on memory size? + self.set_sysctl_config("kernel.shmmax", 1024 * 1024 * 1024 * 64) + + # Controls the maximum number of shared memory segments, in pages + # XXX: Future work, should this be based on memory size? + self.set_sysctl_config("kernel.shmall", 1024 * 1024 * 1024 * 4) + + ''' + Set TSC as clock source for Xen based instances only. + KVM based instances have kvm-clock and are left untouched. + Few classic instance types like m1, early m3 and hs1 where physical + processors may be shared with dom0 have problems with TSC as source. + None of these systems are present in current generation of instance + types and can be safely ignored. + Setting TSC on t2 instances is bad as these instances can be live + migrated underneath which can result in TSC's value getting + bumped on the new droplet. + ''' + try: + instance_type = self.get_instance_data("Instance Type") + if (instance_type.find("t2.", 0, 3) == -1): + if (os.path.isfile("/sys/hypervisor/type") is True): + output = read_sysfs_file("/sys/hypervisor/type") + if (output == "xen"): + self.set_sysfs_config( + "/sys/devices/system/clocksource/clocksource0/" + "current_clocksource", "tsc") + except Ec2AutotuneError, e: + syslog(e.msg) + + if (self.add_secured_config is True): + # Controls the System Request debugging functionality of the kernel + self.set_sysctl_config("kernel.sysrq", 0) + + # Restrict access to kernel logs + self.set_sysctl_config("kernel.dmesg_restrict", 1) + + # Restrict access to kernel pointers in proc filesystem + self.set_sysctl_config("kernel.kptr_restrict", 1) + + return diff --git a/src/ec2sys_autotune/ec2_instance_network_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_network_cfg_gen.py new file mode 100644 index 0000000..a568999 --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_network_cfg_gen.py @@ -0,0 +1,173 @@ +''' +EC2 instance HasA network. +This class needs to be a composition of base class. +''' + +import os +import sys +from syslog import syslog + +# Exceptions +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + + +class CfgGenNetworkSettings(object): + ''' + + Generate config for network settings + ''' + def __init__(self, set_config, get_instance_data, + inst_class, add_secured_config): + self.set_sysctl_config = set_config["sysctl"] + self.get_instance_data = get_instance_data + self.add_secured_config = add_secured_config + return + + def __del__(self): + self.set_sysctl_config = None + self.get_instance_data = None + return + + def tune(self): + ''' + + ##### Net Settings ##### + ''' + + ''' + net.core.somaxconn + Increase maximum connections + ''' + self.set_sysctl_config("net.core.somaxconn", 1024) + + ''' + netdev_max_backlog + Maximum number of packets, queued on the INPUT side, + when the interface receives packets faster than kernel can + process them. + ''' + self.set_sysctl_config("net.core.netdev_max_backlog", 1024 * 4) + + try: + networking_performance = self.get_instance_data( + "Networking Performance") + + if (networking_performance == "25 Gigabit" or + networking_performance == "10 Gigabit"): + ''' + busy_read: + Low latency busy poll timeout for socket reads. + busy_poll: + Low latency busy poll timeout for poll and select. + ''' + self.set_sysctl_config("net.core.busy_read", 50) + self.set_sysctl_config("net.core.busy_poll", 50) + else: + self.set_sysctl_config("net.core.busy_read", 0) + self.set_sysctl_config("net.core.busy_poll", 0) + + if (networking_performance == "25 Gigabit"): + ''' + Internal tests showed a latency of 100 ms as RTT. + To avoid bloating buffers in WAN, use only 20% of + bandwidth delay product. + Max bandwidth in WAN is limited by slowest link + in the path. + + rmem_max + The maximum receive socket buffer size in bytes. + ''' + self.set_sysctl_config("net.core.rmem_max", 1024 * 1024 * 60) + + ''' + wmem_max + The maximum send socket buffer size in bytes. + ''' + self.set_sysctl_config("net.core.wmem_max", 1024 * 1024 * 60) + + ''' + tcp_rmem used by auto tuning + ''' + self.set_sysctl_config("net.ipv4.tcp_rmem", + [1024 * 4, + 1024 * 1024 * 30, + 1024 * 1024 * 60]) + + ''' + tcp_wmem used by auto tuning + ''' + self.set_sysctl_config("net.ipv4.tcp_wmem", + [1024 * 4, + 1024 * 1024 * 30, + 1024 * 1024 * 60]) + elif (networking_performance == "10 Gigabit"): + self.set_sysctl_config("net.core.rmem_max", 1024 * 1024 * 24) + self.set_sysctl_config("net.core.wmem_max", 1024 * 1024 * 24) + self.set_sysctl_config("net.ipv4.tcp_rmem", + [1024 * 4, + 1024 * 1024 * 12, + 1024 * 1024 * 24]) + self.set_sysctl_config("net.ipv4.tcp_wmem", + [1024 * 4, + 1024 * 1024 * 12, + 1024 * 1024 * 24]) + + if (networking_performance == "25 Gigabit" or + networking_performance == "10 Gigabit"): + # Good for fixed speed network + self.set_sysctl_config("net.ipv4.tcp_slow_start_after_idle", + 0) + # High speed networks can bloat buffer + self.set_sysctl_config("net.core.default_qdisc", + "fq_codel") + # Do not cache ssthresh from previous connection + self.set_sysctl_config("net.ipv4.tcp_no_metrics_save", + 1) + + except Ec2AutotuneError, e: + syslog(e.msg) + syslog("Failed to generate configuration specific to " + "network performance") + + # MTU discovery + self.set_sysctl_config("net.ipv4.tcp_mtu_probing", 1) + + # Make sure following defaults are not modified + self.set_sysctl_config("net.ipv4.tcp_moderate_rcvbuf", 1) + self.set_sysctl_config("net.ipv4.tcp_timestamps", 1) + self.set_sysctl_config("net.ipv4.tcp_window_scaling", 1) + self.set_sysctl_config("net.ipv4.tcp_sack", 1) + + # TCP keepalive parameters + self.set_sysctl_config("net.ipv4.tcp_keepalive_time", 90) + self.set_sysctl_config("net.ipv4.tcp_keepalive_intvl", 10) + self.set_sysctl_config("net.ipv4.tcp_keepalive_probes", 9) + + # Controls IP packet forwarding + self.set_sysctl_config("net.ipv4.ip_forward", 0) + + # Do not accept source routing + self.set_sysctl_config("net.ipv4.conf.default.accept_source_route", + 0) + + # Controls the use of TCP syncookies + self.set_sysctl_config("net.ipv4.tcp_syncookies", 1) + + # Ignore echo broadcast requests + self.set_sysctl_config("net.ipv4.icmp_echo_ignore_broadcasts", 1) + + if (self.add_secured_config is True): + # Enable kernel reverse path filtering to do source validation of + # the packets received from all the interfaces on the machine + self.set_sysctl_config("net.ipv4.conf.default.rp_filter", 1) + self.set_sysctl_config("net.ipv4.conf.all.rp_filter", 1) + + # Log martian packets + self.set_sysctl_config("net.ipv4.conf.default.log_martians", 1) + self.set_sysctl_config("net.ipv4.conf.all.log_martians", 1) + + # Ensure network settings if any are used immediately. + self.set_sysctl_config("net.ipv4.route.flush", 1) + self.set_sysctl_config("net.ipv6.route.flush", 1) + + return diff --git a/src/ec2sys_autotune/ec2_instance_pm_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_pm_cfg_gen.py new file mode 100644 index 0000000..17ca497 --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_pm_cfg_gen.py @@ -0,0 +1,64 @@ +''' + +EC2 instance HasA CPU. +This class needs to be a composition of base class. +''' +import os +import sys +from syslog import syslog +from ec2sys_autotune.ec2_autotune_utils import get_piped_cmd_output + +# Exception +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + + +class CfgGenPowerManagementSettings(object): + ''' + + Generate config for power management settings + ''' + def __init__(self, set_config, get_instance_data, + inst_class, add_secured_config): + self.set_sysfs_config = set_config["sysfs"] + self.set_cpupower_config = set_config["cpu"] + self.get_instance_data = get_instance_data + return + + def __del__(self): + self.set_sysfs_config = None + self.set_cpupower_config = None + self.get_instance_data = None + return + + def tune(self): + ''' + ##### Power Management ##### + CPU Power (P-state) + ''' + try: + # Set if only intel_pstate driver + driver = get_piped_cmd_output( + "/bin/cpupower frequency-info --driver", + "/bin/grep \"driver: intel_pstate\"") + if(len(driver) > 0): + self.set_cpupower_config("p-state", "performance") + # Intel Turbo Boost + if (self.get_instance_data("Intel Turbo") == "True"): + self.set_sysfs_config( + "/sys/devices/system/cpu/intel_pstate/no_turbo", + 0) + except Ec2AutotuneError, e: + syslog(e.msg) + + # CPU Sleep (C-state) + try: + # Set if only intel_idle driver + driver = get_piped_cmd_output( + "/bin/cpupower idle-info --silent", + "/bin/grep \"driver: intel_idle\"") + if(len(driver) > 0): + self.set_cpupower_config("c-state", "C1E") + except Ec2AutotuneError, e: + syslog(e.msg) + + return diff --git a/src/ec2sys_autotune/ec2_instance_services_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_services_cfg_gen.py new file mode 100644 index 0000000..e5be737 --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_services_cfg_gen.py @@ -0,0 +1,33 @@ +''' + +EC2 instance HasA service. +This class needs to be a composition of base class. +''' + +import os +import sys +from syslog import syslog + + +class CfgGenPerfOptimizingServices(object): + ''' + + Generate config for performance optimizing services + ''' + def __init__(self, set_config, get_instance_data, + inst_class, add_secured_config): + self.set_service_config = set_config["service"] + return + + def __del__(self): + self.set_service_config = None + return + + def tune(self): + ''' + ##### IRQ Balance ##### + irqbalance is a tool that distributes hardware interrupts across + processors. Start this service if not already started. + ''' + self.set_service_config("irqbalance", "start") + return diff --git a/src/ec2sys_autotune/ec2_instance_storage_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_storage_cfg_gen.py new file mode 100644 index 0000000..43efef2 --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_storage_cfg_gen.py @@ -0,0 +1,78 @@ +''' + +EC2 instance HasA storage. +This class needs to be a composition of base class. +''' +import os +import sys +from syslog import syslog +from ec2sys_autotune.ec2_autotune_utils import exec_cmds +from ec2sys_autotune.ec2_autotune_utils import get_cmd_output +from ec2sys_autotune.ec2_autotune_utils import get_piped_cmd_output + +# Exception +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + + +class CfgGenStorageSettings(object): + ''' + Generate config for storage settings + ''' + def __init__(self, set_config, get_instance_data, + inst_class, add_secured_config): + self.set_sysfs_config = set_config["sysfs"] + return + + def __del__(self): + self.set_sysfs_config = None + return + + def tune(self): + ''' + ##### I/O scheduler ##### + Set I/O scheduler on following criteria: + Ephermeral store HDD - deadline + Instance store NVME - kyber + Default is noop for EBS backed HDD and none for EBS backed NVME. + + Also set max_retries for NVME devices to 10 (default is 5) + ''' + try: + output = get_piped_cmd_output("/bin/ec2-metadata -b", + "/bin/grep ephemeral") + for hdd_link in output.split(): + if ("ephemeral" in hdd_link): + continue + hdd_device = get_cmd_output("/bin/readlink /dev/{0}" + .format(hdd_link)) + self.set_sysfs_config( + "/sys/block/{0}/queue/scheduler".format(hdd_device), + "deadline") + except Ec2AutotuneError, e: + syslog(e.msg) + + try: + nvme_present = False + output = get_piped_cmd_output( + "/bin/lsblk -l -d --output NAME,TRAN", + "/bin/grep -e nvme") + for nvme_device in output.split(): + if ("nvme" == nvme_device): + continue + nvme_present = True + try: + exec_cmds(["/sbin/ebsnvme-id /dev/{0}" + .format(nvme_device)]) + except Ec2AutotuneError, e: + # Instance store NVME + self.set_sysfs_config( + "/sys/block/{0}/queue/scheduler".format(nvme_device), + "kyber") + if (nvme_present): + self.set_sysfs_config( + "/sys/module/nvme_core/parameters/max_retries", + 10) + except Ec2AutotuneError, e: + syslog(e.msg) + + return diff --git a/src/ec2sys_autotune/ec2_instance_types.py b/src/ec2sys_autotune/ec2_instance_types.py new file mode 100644 index 0000000..b5763f7 --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_types.py @@ -0,0 +1,110 @@ +''' + +EC2 instance types for current generation. This script does not optimize +older generations. Following matrix is replicated from: + https://aws.amazon.com/ec2/instance-types/#instance-type-matrix +XXX: Is it worth to optimize this table? +This file is not pep8 compliant intentionally. +''' +general_purpose = ( +("Instance Type", "vCPU", "Mem (GiB)", "Instance Storage (GiB)", "Networking Performance", "CPU", "Clock Speed (GHz)", "Intel AVX", "Intel AVX2", "Intel Turbo", "EBS OPT", "Enhanced Networking"), +("t2.nano", 1, 0.5, "EBS-Only", "Low", "Intel Xeon family", "up to 3.3", "Yes", "-", "Yes", "-", "-"), +("t2.micro", 1, 1, "EBS-Only", "Low to Moderate", "Intel Xeon family", "Up to 3.3", "Yes", "-", "Yes", "-", "-"), +("t2.small", 1, 2, "EBS-Only", "Low to Moderate", "Intel Xeon family", "Up to 3.3", "Yes", "-", "Yes", "-", "-"), +("t2.medium", 2, 4, "EBS-Only", "Low to Moderate", "Intel Xeon family", "Up to 3.3", "Yes", "-", "Yes", "-", "-"), +("t2.large", 2, 8, "EBS-Only", "Low to Moderate", "Intel Xeon family", "Up to 3.0", "Yes", "-", "Yes", "-", "-"), +("t2.xlarge", 4, 16, "EBS-Only", "Moderate", "Intel Xeon family", "Up to 3.0", "Yes", "-", "Yes", "-", "-"), +("t2.2xlarge", 8, 32, "EBS-Only", "Moderate", "Intel Xeon family", "Up to 3.0", "Yes", "-", "Yes", "-", "-"), +("m5.large", 2, 8, "EBS-Only", "High", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5.xlarge", 4, 16, "EBS-Only", "High", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5.2xlarge", 8, 32, "EBS-Only", "High", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5.4xlarge", 16, 64, "EBS-Only", "High", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5.12xlarge", 48, 192, "EBS-Only", "10 Gigabit", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5.24xlarge", 96, 384, "EBS-Only", "25 Gigabit", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5d.large", 2, 8, "1 x 75 NVMe SSD", "High", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5d.xlarge", 4, 16, "1 x 150 NVMe SSD", "High", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5d.2xlarge", 8, 32, "1 x 300 NVMe SSD", "High", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5d.4xlarge", 16, 64, "2 x 300 NVMe SSD", "High", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5d.12xlarge", 48, 192, "2 x 900 NVMe SSD", "10 Gigabit", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m5d.24xlarge", 96, 384, "4 x 900 NVMe SSD", "25 Gigabit", "Intel Xeon Platinum", "2.5", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m4.large", 2, 8, "EBS-Only", "Moderate", "Intel Xeon E5-2676 v3**", "2.4", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m4.xlarge", 4, 16, "EBS-Only", "High", "Intel Xeon E5-2676 v3**", "2.4", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m4.2xlarge", 8, 32, "EBS-Only", "High", "Intel Xeon E5-2676 v3**", "2.4", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m4.4xlarge", 16, 64, "EBS-Only", "High", "Intel Xeon E5-2676 v3**", "2.4", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m4.10xlarge", 40, 160, "EBS-Only", "10 Gigabit", "Intel Xeon E5-2676 v3", "2.4", "Yes", "Yes", "Yes", "Yes", "Yes"), +("m4.16xlarge", 64, 256, "EBS-Only", "25 Gigabit", "Intel Xeon E5-2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes")) + +compute_optimized = ( +("Instance Type", "vCPU", "Mem (GiB)", "Instance Storage (GiB)", "Networking Performance", "CPU", "Clock Speed (GHz)", "Intel AVX", "Intel AVX2", "Intel AVX-512", "Intel Turbo", "EBS OPT", "Enhanced Networking"), +("c5.large", 2, 4, "EBS-Only", "Up to 10 Gbps", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5.xlarge", 4, 8, "EBS-Only", "Up to 10 Gbps", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5.2xlarge", 8, 16, "EBS-Only", "Up to 10 Gbps", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5.4xlarge", 16, 32, "EBS-Only", "Up to 10 Gpbs", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5.9xlarge", 36, 72, "EBS-Only", "10 Gigabit", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5.18xlarge", 72, 144, "EBS-Only", "25 Gigabit", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5d.large", 2, 4, "1 x 50 NVMe SSD", "Up to 10 Gbps", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5d.xlarge", 4, 8, "1 x 100 NVMe SSD", "Up to 10 Gbps", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5d.2xlarge", 8, 16, "1 x 200 NVMe SSD", "Up to 10 Gbps", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5d.4xlarge", 16, 32, "1 x 400 NVMe SSD", "Up to 10 Gbps", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5d.9xlarge", 36, 72, "1 x 900 NVMe SSD", "10 Gigabit", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c5d.18xlarge", 72, 144, "2 x 900 NVMe SSD", "25 Gigabit", "Intel Xeon Platinum", "3.0", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), +("c4.large", 2, 3.75, "EBS-Only", "Moderate", "Intel Xeon E5-2666 v3", "2.9", "Yes", "Yes", "-", "Yes", "Yes", "Yes"), +("c4.xlarge", 4, 7.5, "EBS-Only", "High", "Intel Xeon E5-2666 v3", "2.9", "Yes", "Yes", "-", "Yes", "Yes", "Yes"), +("c4.2xlarge", 8, 15, "EBS-Only", "High", "Intel Xeon E5-2666 v3", "2.9", "Yes", "Yes", "-", "Yes", "Yes", "Yes"), +("c4.4xlarge", 16, 30, "EBS-Only", "High", "Intel Xeon E5-2666 v3", "2.9", "Yes", "Yes", "-", "Yes", "Yes", "Yes"), +("c4.8xlarge", 36, 60, "EBS-Only", "10 Gigabit", "Intel Xeon E5-2666 v3", "2.9", "Yes", "Yes", "-", "Yes", "Yes", "Yes")) + +memory_optimized = ( +("Instance Type", "vCPU", "Mem (GiB)", "Instance Storage (GiB)", "Networking Performance", "CPU", "Clock Speed (GHz)", "Intel AVX", "Intel AVX2", "Intel Turbo", "EBS OPT", "Enhanced Networking"), +("x1.16xlarge", 64, 976, "1 x 1920 SSD", "10 Gigabit", "Intel Xeon E7-8880 v3", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("x1.32xlarge", 128, 1952, "2 x 1920 SSD", "25 Gigabit", "Intel Xeon E7-8880 v3", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("x1e.xlarge", 4, 122, "1 x 120 SSD", "Up to 10 Gigabit", "Intel Xeon E7-8880 v3", "2.3", "Yes", "Yes", "No", "Yes", "Yes"), +("x1e.2xlarge", 8, 244, "1 x 240 SSD", "Up to 10 Gigabit", "Intel Xeon E7-8880 v3", "2.3", "Yes", "Yes", "No", "Yes", "Yes"), +("x1e.4xlarge", 16, 488, "1 x 480 SSD", "Up to 10 Gigabit", "Intel Xeon E7-8880 v3", "2.3", "Yes", "Yes", "No", "Yes", "Yes"), +("x1e.8xlarge", 32, 976, "1 x 960 SSD", "Up to 10 Gigabit", "Intel Xeon E7-8880 v3", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("x1e.16xlarge", 64, 1952, "1 x 1920 SSD", "10 Gigabit", "Intel Xeon E7-8880 v3", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("x1e.32xlarge", 128, 3904, "2 x 1920 SSD", "25 Gigabit", "Intel Xeon E7-8880 v3", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("r4.large", 2, 15.25, "-", "Up to 10 Gigabit", "Intel Xeon E5-2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("r4.xlarge", 4, 30.5, "-", "Up to 10 Gigabit", "Intel Xeon E5-2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("r4.2xlarge", 8, 61, "-", "Up to 10 Gigabit", "Intel Xeon E5-2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("r4.4xlarge", 16, 122, "-", "Up to 10 Gigabit", "Intel Xeon E5-2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("r4.8xlarge", 32, 244, "-", "10 Gigabit", "Intel Xeon E5-2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("r4.16xlarge", 64, 488, "-", "25 Gigabit", "Intel Xeon E5-2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes")) + +accelerated_computing = ( +("Instance Type", "vCPU", "Mem (GiB)", "Instance Storage (GiB)", "Networking Performance", "CPU", "Clock Speed (GHz)", "Intel AVX", "Intel AVX2", "Intel Turbo", "EBS OPT", "Enhanced Networking"), +("p3.2xlarge", 8, 61, "EBS only", "Up to 10 Gigabit", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes"), +("p3.8xlarge", 32, 244, "EBS only", "10 Gigabit", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes"), +("p3.16xlarge", 64, 488, "EBS only", "25 Gigabit", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes"), +("p2.xlarge", 4, 61, "EBS Only", "High", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes"), +("p2.8xlarge", 32, 488, "EBS Only", "10 Gigabit", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes"), +("p2.16xlarge", 64, 732, "EBS Only", "25 Gigabit", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes"), +("g3.4xlarge", 16, 122, "EBS Only", "Up to 10 Gigabit", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes"), +("g3.8xlarge", 32, 244, "EBS Only", "10 Gigabit", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes"), +("g3.16xlarge", 64, 488, "EBS Only", "25 Gigabit", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes"), +("f1.2xlarge", 8, 122, "1 X 480 SSD", "Up to 10 Gigabit", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes"), +("f1.16xlarge", 64, 976, "4 x 960", "25 Gigabit", "Intel Xeon E5-2686 v4", "2.3 (base) 2.7 (turbo)", "Yes", "Yes", "Yes", "Yes", "Yes")) + +storage_optimized = ( +("Instance Type", "vCPU", "Mem (GiB)", "Instance Storage (GiB)", "Networking Performance", "CPU", "Clock Speed (GHz)", "Intel AVX", "Intel AVX2", "Intel Turbo", "EBS OPT", "Enhanced Networking"), +("h1.2xlarge", 8, 32, "1 x 2,000 HDD", "Up to 10 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("h1.4xlarge", 16, 64, "2 x 2,000 HDD", "Up to 10 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("h1.8xlarge", 32, 128, "4 x 2,000 HDD", "10 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("h1.16xlarge", 64, 256, "8 x 2,000 HDD", "25 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("i3.large", 2, 15.25, "1 x 475 NVMe SSD", "Up to 10 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("i3.xlarge", 4, 30.5, "1 x 950 NVMe SSD", "Up to 10 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("i3.2xlarge", 8, 61, "1 x 1,900 NVMe SSD", "Up to 10 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("i3.4xlarge", 16, 122, "2 x 1,900 NVMe SSD", "Up to 10 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("i3.8xlarge", 32, 244, "4 x 1,900 NVMe SSD", "10 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("i3.16xlarge", 64, 488, "8 x 1,900 NVMe SSD", "25 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("i3.metal", 72, 512, "8 x 1,900 NVMe SSD", "25 Gigabit", "Intel Xeon E5 2686 v4", "2.3", "Yes", "Yes", "Yes", "Yes", "Yes"), +("d2.xlarge", 4, 30.5, "3 x 2000", "Moderate", "Intel Xeon E5-2676 v3", "2.4", "Yes", "Yes", "Yes", "Yes", "Yes"), +("d2.2xlarge", 8, 61, "6 x 2000", "High", "Intel Xeon E5-2676 v3", "2.4", "Yes", "Yes", "Yes", "Yes", "Yes"), +("d2.4xlarge", 16, 122, "12 x 2000", "High", "Intel Xeon E5-2676 v3", "2.4", "Yes", "Yes", "Yes", "Yes", "Yes"), +("d2.8xlarge", 36, 244, "24 x 2000", "10 Gigabit", "Intel Xeon E5-2676 v3", "2.4", "Yes", "Yes", "Yes", "Yes", "Yes")) + +ec2_instance_types = (general_purpose, + compute_optimized, + memory_optimized, + accelerated_computing, + storage_optimized) diff --git a/src/ec2sys_autotune/ec2_instance_vm_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_vm_cfg_gen.py new file mode 100644 index 0000000..17eded3 --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_vm_cfg_gen.py @@ -0,0 +1,116 @@ +''' + +EC2 instance HasA memory. +This class needs to be a composition of base class. +''' + +import os +import sys +from syslog import syslog + +# Exception +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + +# EC2 instance classes +MEMORY_OPTIMIZED = "memory_optimized" + + +class CfgGenVirtualMemorySettings(object): + ''' + + Generate config for virtual memory settings + ''' + def __init__(self, set_config, get_instance_data, + inst_class, add_secured_config): + self.set_sysctl_config = set_config["sysctl"] + self.get_instance_data = get_instance_data + self.inst_class = inst_class + return + + def __del__(self): + self.set_sysctl_config = None + self.get_instance_data = None + self.inst_class = None + return + + def tune(self): + ''' + + ##### VM Settings ##### + ''' + + ''' + vm.swappiness + This control is used to define how aggressive the kernel will swap + memory pages. get_scan_count() skips scanning if there are is no + swap space setup. However we do not want swappiness to interfere + with hibernation swap space. Most of the databases want to + manage their own pages and lock them in memory and do not want + interference from swap management. + ''' + self.set_sysctl_config("vm.swappiness", 0) + + try: + if (self.get_instance_data("EBS OPT") == "Yes" and + self.get_instance_data("Mem (GiB)") > 28): + ''' + vm.dirty_expire_centisecs + Controls time dirty data can be in cache before it needs + to be written + ''' + self.set_sysctl_config("vm.dirty_expire_centisecs", 500) + + ''' + vm.dirty_writeback_centisecs + Controls how often the flusher need to be woken up + ''' + self.set_sysctl_config("vm.dirty_writeback_centisecs", 100) + + ''' + vm.dirty_background_ratio + Contains, as a percentage of total available memory that + contains free pages and reclaimable pages, the number of + pages at which the background kernel flusher threads will + start writing out dirty data. + ''' + self.set_sysctl_config("vm.dirty_background_ratio", 0) + self.set_sysctl_config("vm.dirty_background_bytes", + 1024 * 1024 * 1750 / 2) + + ''' + vm.dirty_ratio + Contains, as a percentage of total available memory that + contains free pages and reclaimable pages, the number of + pages at which a process which is generating disk writes + will itself start writing out dirty data. + ''' + self.set_sysctl_config("vm.dirty_ratio", 0) + self.set_sysctl_config("vm.dirty_bytes", + 1024 * 1024 * 1750 * 3) + + except Ec2AutotuneError, e: + syslog(e.msg) + syslog("Couldn't configure write back tunables of dirty pages") + + ''' + ##### Transparent Huge Pages (THP) ##### + Performance critical computing applications dealing with large memory + working sets are already running on top of libhugetlbfs and in turn + hugetlbfs. Transparent Hugepage Support is an alternative means of + using huge pages for the backing of virtual memory with huge pages + that supports the automatic promotion and demotion of page sizes and + without the shortcomings of hugetlbfs. + + Most of the databases manage their own memory allocation using mmap + interface. Though huge pages provide the opportunity of fewer TLB + entries and fewer TLB misses, the case of internal page fragmentation + and hugepage daemon trying to defragment these pages can result in + delay of allocation of page. This delay is not acceptable to database + workload where lot of threads are trying to allocate and free memory. + ''' + if (self.inst_class == MEMORY_OPTIMIZED): + self.set_sysfs_config( + "/sys/kernel/mm/transparent_hugepage/enabled", "never") + self.set_sysfs_config( + "/sys/kernel/mm/transparent_hugepage/defrag", "never") + return diff --git a/src/ec2sys_autotune/placement_group_cfg_gen.py b/src/ec2sys_autotune/placement_group_cfg_gen.py new file mode 100644 index 0000000..ccceaf3 --- /dev/null +++ b/src/ec2sys_autotune/placement_group_cfg_gen.py @@ -0,0 +1,62 @@ +''' + +EC2 placement groups allow instances to interact with low network latency. +Care should be taken to not use this configuration on WAN to avoid +bloating buffer on the internet. There is more memory buffer for each +socket now to achieve maximum network bandwidth on LAN. +''' + +import os +import sys +from syslog import syslog +from ec2sys_autotune.ec2_instance_cfg_gen import Ec2InstanceCfgGen + +# Exception +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + + +class PlacementGroupCfgGen(Ec2InstanceCfgGen): + def tune(self): + # Call super class as well to inherit base class network settings + super(PlacementGroupCfgGen, self).start_cfg_logging() + super(PlacementGroupCfgGen, self)._tune() + try: + networking_performance = self.get_instance_data( + "Networking Performance") + + if (networking_performance == "25 Gigabit"): + ''' + netdev_budget + Maximum number of packets taken from all interfaces in + one polling cycle (NAPI poll). + ''' + self.set_sysctl_config("net.core.netdev_budget", 400) + + self.set_sysctl_config("net.core.rmem_max", 1024 * 1024 * 299) + self.set_sysctl_config("net.core.wmem_max", 1024 * 1024 * 299) + self.set_sysctl_config("net.ipv4.tcp_rmem", + [1024 * 4, + 1024 * 1024 * 149, + 1024 * 1024 * 299]) + self.set_sysctl_config("net.ipv4.tcp_wmem", + [1024 * 4, + 1024 * 1024 * 149, + 1024 * 1024 * 299]) + elif (networking_performance == "10 Gigabit"): + self.set_sysctl_config("net.core.netdev_budget", 450) + self.set_sysctl_config("net.core.rmem_max", 1024 * 1024 * 120) + self.set_sysctl_config("net.core.wmem_max", 1024 * 1024 * 120) + self.set_sysctl_config("net.ipv4.tcp_rmem", + [1024 * 4, + 1024 * 1024 * 60, + 1024 * 1024 * 120]) + self.set_sysctl_config("net.ipv4.tcp_wmem", + [1024 * 4, + 1024 * 1024 * 60, + 1024 * 1024 * 120]) + except Ec2AutotuneError, e: + syslog(e.msg) + syslog("Failed to generate configuration specific to " + + "EC2 placement gorup") + super(PlacementGroupCfgGen, self).stop_cfg_logging() + return diff --git a/src/ec2sys_autotune/udp_server_cfg_gen.py b/src/ec2sys_autotune/udp_server_cfg_gen.py new file mode 100644 index 0000000..9321dde --- /dev/null +++ b/src/ec2sys_autotune/udp_server_cfg_gen.py @@ -0,0 +1,32 @@ +''' + +There have been many instances of ARP cache table not being able to accommodate +entries which can make the applications fail mysteriously. Most of these issues +have been reported with dockers and containers workload. Following links +discuss this issue extensively: +https://github.com/hashicorp/serf/issues/263 +https://github.com/moby/moby/issues/29992 +https://github.com/docker/libnetwork/issues/1522 +https://github.com/hashicorp/serf/issues/269 +This config genertor helps in tuning kernel values to avoid above reported +problems. +''' + +import os +import sys +from ec2sys_autotune.ec2_instance_cfg_gen import Ec2InstanceCfgGen + + +class UdpServerCfgGen(Ec2InstanceCfgGen): + def tune(self): + # Call super class as well to inherit base class network settings + super(UdpServerCfgGen, self).start_cfg_logging() + super(UdpServerCfgGen, self)._tune() + self.set_sysctl_config("net.ipv4.neigh.default.gc_thresh1", 30000) + self.set_sysctl_config("net.ipv4.neigh.default.gc_thresh2", 32000) + self.set_sysctl_config("net.ipv4.neigh.default.gc_thresh3", 32768) + self.set_sysctl_config("net.ipv6.neigh.default.gc_thresh1", 30000) + self.set_sysctl_config("net.ipv6.neigh.default.gc_thresh2", 32000) + self.set_sysctl_config("net.ipv6.neigh.default.gc_thresh3", 32768) + super(UdpServerCfgGen, self).stop_cfg_logging() + return diff --git a/unit/autotune.service b/unit/autotune.service new file mode 100644 index 0000000..a43beb0 --- /dev/null +++ b/unit/autotune.service @@ -0,0 +1,15 @@ +[Unit] +Description=EC2 System Autotuning +After=network.target syslog.target +DefaultDependencies=no +Conflicts=shutdown.target + +[Service] +ExecStart=/usr/bin/ec2sys_autotune_start --config /etc/ec2sys-autotune.cfg +ExecStop=/usr/bin/ec2sys_autotune_stop --config /etc/ec2sys-autotune.cfg +Type=oneshot +RemainAfterExit=yes +Restart=no + +[Install] +WantedBy=multi-user.target From 25a7afb123f745290f429deac5a6a2e5d5d6fffd Mon Sep 17 00:00:00 2001 From: iliana destroyer of worlds Date: Thu, 21 Feb 2019 16:30:56 -0800 Subject: [PATCH 02/10] Fix License tag in spec file --- ec2sys_autotune.spec => ec2sys-autotune.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename ec2sys_autotune.spec => ec2sys-autotune.spec (96%) diff --git a/ec2sys_autotune.spec b/ec2sys-autotune.spec similarity index 96% rename from ec2sys_autotune.spec rename to ec2sys-autotune.spec index d7d53db..674774d 100644 --- a/ec2sys_autotune.spec +++ b/ec2sys-autotune.spec @@ -4,7 +4,7 @@ Release: 1%{?dist} Summary: AWS EC2 instance autotuning Group: Applications/Engineering -License: GNU GENERAL PUBLIC LICENSE 2.0 +License: GPLv2 URL: https://github.com/aws/ec2sys-autotune Source0: %{name}-%{version}.tar.gz BuildArch: noarch From 4848812c7d4a898de379dfd96ec7a9f229a086c4 Mon Sep 17 00:00:00 2001 From: Vaidyeshwara Date: Thu, 21 Feb 2019 16:54:18 -0800 Subject: [PATCH 03/10] Fix URL tag in spec file --- ec2sys-autotune.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ec2sys-autotune.spec b/ec2sys-autotune.spec index 674774d..aa8f2bd 100644 --- a/ec2sys-autotune.spec +++ b/ec2sys-autotune.spec @@ -5,7 +5,7 @@ Summary: AWS EC2 instance autotuning Group: Applications/Engineering License: GPLv2 -URL: https://github.com/aws/ec2sys-autotune +URL: https://github.com/awslabs/autotune Source0: %{name}-%{version}.tar.gz BuildArch: noarch Requires(pre): systemd From 716975773f82e99e7f650339db5c3b9b7646cce6 Mon Sep 17 00:00:00 2001 From: Vaidyeshwara Date: Fri, 1 Mar 2019 19:20:19 -0800 Subject: [PATCH 04/10] Enforce root to execute autotune Non-root users executing autotune fail trying to grab the service lock. However this code flow is not user friendly. Root check needs to be more gracious and user friendly. Signed-off-by: Vallish Vaidyeshwara Reviewed-by: Balbir Singh --- ec2sys-autotune.spec | 2 +- scripts/autotune | 4 ++++ scripts/ec2sys_autotune_start | 4 ++++ scripts/ec2sys_autotune_stop | 4 ++++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ec2sys-autotune.spec b/ec2sys-autotune.spec index aa8f2bd..cb2c8ce 100644 --- a/ec2sys-autotune.spec +++ b/ec2sys-autotune.spec @@ -1,5 +1,5 @@ Name: ec2sys-autotune -Version: 1.0.0 +Version: 1.0.3 Release: 1%{?dist} Summary: AWS EC2 instance autotuning diff --git a/scripts/autotune b/scripts/autotune index afa1da5..2872ad8 100755 --- a/scripts/autotune +++ b/scripts/autotune @@ -609,6 +609,10 @@ SUBCMD Following are sub-commands to autotune: def main(): + if os.geteuid() != 0: + print("\nNeed to be root to run this script.\n") + sys.exit(-1) + args = add_arguments_and_parse() try: diff --git a/scripts/ec2sys_autotune_start b/scripts/ec2sys_autotune_start index b2e4d24..e7ff338 100755 --- a/scripts/ec2sys_autotune_start +++ b/scripts/ec2sys_autotune_start @@ -9,6 +9,10 @@ import argparse def main(): + if os.geteuid() != 0: + syslog("Need to be root to run this script.") + sys.exit(-1) + parser = argparse.ArgumentParser() parser.add_argument('--config', default="/etc/ec2sys-autotune.cfg", diff --git a/scripts/ec2sys_autotune_stop b/scripts/ec2sys_autotune_stop index 904eb26..18a0b96 100755 --- a/scripts/ec2sys_autotune_stop +++ b/scripts/ec2sys_autotune_stop @@ -9,6 +9,10 @@ import argparse def main(): + if os.geteuid() != 0: + syslog("Need to be root to run this script.") + sys.exit(-1) + parser = argparse.ArgumentParser() parser.add_argument('--config', default="/etc/ec2sys-autotune.cfg", From 726da7840c43e7c2bd060b58f1fd6a730d52bbe3 Mon Sep 17 00:00:00 2001 From: Vallish Vaidyeshwara Date: Fri, 1 Mar 2019 20:04:19 -0800 Subject: [PATCH 05/10] Fix version in spec and setup files Signed-off-by: Vallish Vaidyeshwara Reviewed-by: Balbir Singh --- ec2sys-autotune.spec | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ec2sys-autotune.spec b/ec2sys-autotune.spec index cb2c8ce..8106745 100644 --- a/ec2sys-autotune.spec +++ b/ec2sys-autotune.spec @@ -1,5 +1,5 @@ Name: ec2sys-autotune -Version: 1.0.3 +Version: 1.0.4 Release: 1%{?dist} Summary: AWS EC2 instance autotuning diff --git a/setup.py b/setup.py index cb91ef2..5bbfedd 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ ec2sys_autotune_long_description = fp.read() setup(name="ec2sys-autotune", - version='1.0.0', + version='1.0.4', author="Vallish Vaidyeshwara", author_email="vallish@amazon.com", url="https://github.com/awslabs/ec2sys-autotune", From 54f5ae8395b04b19ee8b4df8078d7dad7a79d47f Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Sun, 3 Mar 2019 21:42:57 -0800 Subject: [PATCH 06/10] Simplify section code a bit, use an array of sections, this makes it easier to add new sections Signed-off-by: Balbir Singh --- src/ec2sys_autotune/ec2_instance_cfg_gen.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/ec2sys_autotune/ec2_instance_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_cfg_gen.py index 947c0f9..85e61ca 100644 --- a/src/ec2sys_autotune/ec2_instance_cfg_gen.py +++ b/src/ec2sys_autotune/ec2_instance_cfg_gen.py @@ -73,6 +73,8 @@ class and then generate tunable configs. SYSFS = "sysfs" CPU = "cpu" +Sections = [SERVICE, SYSCTL, SYSFS, CPU] + # EC2 instance classes GENERAL_PURPOSE = "general_purpose" COMPUTE_OPTIMIZED = "compute_optimized" @@ -144,17 +146,8 @@ def start_cfg_logging(self): self.get_instance_data("Instance Type")) self.cfg_object.set(PROFILE, VERSION, RELEASE) - # List of services - self.cfg_object.add_section(SERVICE) - - # List of sysctl tunables - self.cfg_object.add_section(SYSCTL) - - # List of sysfs tunables - self.cfg_object.add_section(SYSFS) - - # List of CPU tunables - self.cfg_object.add_section(CPU) + for section in Sections: + self.cfg_object.add_section(section) return def stop_cfg_logging(self): From 5b937b07dea5135beb137575d9f6056b57eeea9b Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Mon, 4 Mar 2019 12:00:31 -0800 Subject: [PATCH 07/10] Add support (front end config file) for blacklisting Add support for blacklisting un-needed modules. This support today does not take instance type into account, but in the future we would want to consider Signed-off-by: Balbir Singh --- .../ec2_instance_blacklist_modules_cfg_gen.py | 38 +++++++++++++++++++ src/ec2sys_autotune/ec2_instance_cfg_gen.py | 19 ++++++++-- 2 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 src/ec2sys_autotune/ec2_instance_blacklist_modules_cfg_gen.py diff --git a/src/ec2sys_autotune/ec2_instance_blacklist_modules_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_blacklist_modules_cfg_gen.py new file mode 100644 index 0000000..05e35ad --- /dev/null +++ b/src/ec2sys_autotune/ec2_instance_blacklist_modules_cfg_gen.py @@ -0,0 +1,38 @@ +''' +EC2 instance tuning for blacklisting modules +This class needs to be a composition of base class. +''' + +import os +import sys +from syslog import syslog + +# Exceptions +from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError + + +class CfgGenBlacklistModules(object): + ''' + + Generate config for modules to blacklist + ''' + def __init__(self, set_config, get_instance_data, + inst_class, add_secured_config): + self.set_blacklist_mod_config = set_config["blacklist_mod"] + self.get_instance_data = get_instance_data + self.add_secured_config = add_secured_config + + def __del__(self): + self.set_blacklist_mod_config = None + self.get_instance_data = None + + def tune(self): + ''' + + We need to disable bluetooth and USB modules + ''' + self.set_blacklist_mod_config("usb_common") + self.set_blacklist_mod_config("usbcore") + self.set_blacklist_mod_config("uhid") + self.set_blacklist_mod_config("hid_generic") + self.set_blacklist_mod_config("bluetooth") diff --git a/src/ec2sys_autotune/ec2_instance_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_cfg_gen.py index 85e61ca..2ef8fea 100644 --- a/src/ec2sys_autotune/ec2_instance_cfg_gen.py +++ b/src/ec2sys_autotune/ec2_instance_cfg_gen.py @@ -51,6 +51,8 @@ class and then generate tunable configs. import CfgGenStorageSettings from ec2sys_autotune.ec2_instance_pm_cfg_gen \ import CfgGenPowerManagementSettings +from ec2sys_autotune.ec2_instance_blacklist_modules_cfg_gen \ + import CfgGenBlacklistModules # Exceptions from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError @@ -72,8 +74,9 @@ class and then generate tunable configs. SYSCTL = "sysctl" SYSFS = "sysfs" CPU = "cpu" +BLACKLIST = "blacklist_modules" -Sections = [SERVICE, SYSCTL, SYSFS, CPU] +Sections = [SERVICE, SYSCTL, SYSFS, CPU, BLACKLIST] # EC2 instance classes GENERAL_PURPOSE = "general_purpose" @@ -265,17 +268,27 @@ def set_cpupower_config(self, state, value): ''' return self.write_config_entry(CPU, state, value) + def set_blacklist_mod_config(self, module): + ''' + Set the passed modules to be added to blacklist + of modules, eventually causing those modules + to be not loaded + ''' + return self.write_config_entry(BLACKLIST, module, 1) + def setup_objects_composition(self): self.set_config = {"service": self.set_service_config, "sysctl": self.set_sysctl_config, "sysfs": self.set_sysfs_config, - "cpu": self.set_cpupower_config} + "cpu": self.set_cpupower_config, + "blacklist_mod": self.set_blacklist_mod_config} self.composition_classes = ["CfgGenPerfOptimizingServices", "CfgGenVirtualMemorySettings", "CfgGenNetworkSettings", "CfgGenKernelSettings", "CfgGenStorageSettings", - "CfgGenPowerManagementSettings"] + "CfgGenPowerManagementSettings", + "CfgGenBlacklistModules"] self.composition_objects = [] if ("-secured" in self.profile): From 34ec45db25781822e2ab4e6a19087445db968c76 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Mon, 4 Mar 2019 23:08:46 -0800 Subject: [PATCH 08/10] Add backend configuration generation for blacklist modules This patch adds support for generation of actual changes to /etc/modprobe.d/blacklist.conf. The code checks to see if the blacklist already exists in any of the files in /etc/modprobe.d and if so, skips it. Support is also added for rollback of the config via autotune rollback Signed-off-by: Balbir Singh --- .../ec2_instance_cfg_engine.py | 61 ++++++++++++++++++- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/src/ec2sys_autotune/ec2_instance_cfg_engine.py b/src/ec2sys_autotune/ec2_instance_cfg_engine.py index 93b2ca0..c6edfc7 100644 --- a/src/ec2sys_autotune/ec2_instance_cfg_engine.py +++ b/src/ec2sys_autotune/ec2_instance_cfg_engine.py @@ -18,6 +18,7 @@ import stat import json import glob +import fileinput from syslog import syslog try: from configparser import RawConfigParser @@ -40,6 +41,9 @@ SYSCTL = "sysctl" SYSFS = "sysfs" CPU = "cpu" +BLACKLIST = "blacklist_modules" +BLACKLIST_MOD_FILE = "/etc/modprobe.d/blacklist.conf" +MODPROBE_D = "/etc/modprobe.d/" class Ec2InstanceCfgEngine(object): @@ -65,7 +69,9 @@ def __init__(self, log_file, config_dir=None, profile=None, SYSFS: {"get": self.get_sysfs_value, "set": self.set_sysfs_value}, CPU: {"get": self.get_cpu_value, - "set": self.set_cpu_value}} + "set": self.set_cpu_value}, + BLACKLIST: {"get": self.get_blacklist_modules_value, + "set": self.set_blacklist_modules_value}} ''' Recovery instance do not need config_dir and profiles. @@ -126,7 +132,8 @@ def start_recovery_logging(self): SERVICE: [], SYSCTL: [], SYSFS: [], - CPU: []} + CPU: [], + BLACKLIST: []} return def stop_recovery_logging(self): @@ -528,6 +535,56 @@ def set_cpu_value(self, cpu_state, new_value, orig_value=None): self.get_cpu_value, CPU, new_value, orig_value) + def get_blacklist_modules_value(self, module): + module_files = MODPROBE_D + '/*' + for f in glob.glob(module_files): + for line in open(f): + if (((line.rfind(module) != -1) and (line.rfind("install") != -1)) or + ((line.rfind(module) != -1) and line.rfind("blacklist") != -1)): + # we found blacklist , return 1 + return 1 + return None + + def set_blacklist_modules_value(self, module, new_value, orig_value=None): + ''' + Blacklist ignores value of 1, when present, blacklist implies we + should added it to the blacklist file. set values has an oddity + by design, when setting it passes an array of values, when resetting + just the value. + ''' + module_files = MODPROBE_D + '/*' + + for filename in glob.glob(module_files): + for line in open(filename): + if (((line.rfind(module) != -1) and (line.rfind("install") != -1)) or + ((line.rfind(module) != -1) and line.rfind("blacklist") != -1)): + # we found blacklist + if orig_value != None and new_value == 0: + # remove the entry + # Do we need recovery logging, I guess not + for line in fileinput.input(files=(filename), inplace=1): + if line.rfind(module) != -1 and line.rfind("install") != -1: + print("") # remove the line + return + + if (new_value == [0] or new_value == 0): # oddity between set (for restore and actual setting) + return + try: + if (os.path.isfile(BLACKLIST_MOD_FILE) is False): + f = open(BLACKLIST_MOD_FILE, "w+") + syslog("Created {0}".format(BLACKLIST_MOD_FILE)) + f.close() + + with open(BLACKLIST_MOD_FILE, "a+") as f: + tmp_log_object = {"Name": module, + "Original": 0, + "Changed": 1} + self.log_object[BLACKLIST].append(tmp_log_object) + f.write("install {0} /bin/false\n".format(module)) + except Exception, e: + raise Ec2AutotuneError( + "Couldn't update module blacklist {0}".format(e)) + def configure_system(self, section, configure, dry_run=False): if (dry_run is True): fetch_configuration(self.auto_profile, From 6a7b4257feb49cac7b76935b778f1e94b6ad79f8 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 5 Mar 2019 20:30:31 -0800 Subject: [PATCH 09/10] Update config and release versions Signed-off-by: Balbir Singh --- setup.py | 2 +- src/ec2sys_autotune/ec2_instance_cfg_gen.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 5bbfedd..bc4f7e8 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ ec2sys_autotune_long_description = fp.read() setup(name="ec2sys-autotune", - version='1.0.4', + version='1.0.5', author="Vallish Vaidyeshwara", author_email="vallish@amazon.com", url="https://github.com/awslabs/ec2sys-autotune", diff --git a/src/ec2sys_autotune/ec2_instance_cfg_gen.py b/src/ec2sys_autotune/ec2_instance_cfg_gen.py index 2ef8fea..e5b50d5 100644 --- a/src/ec2sys_autotune/ec2_instance_cfg_gen.py +++ b/src/ec2sys_autotune/ec2_instance_cfg_gen.py @@ -67,7 +67,7 @@ class and then generate tunable configs. INSTANCE = "instance" VERSION = "version" # Bump this number for every new release -RELEASE = "1.0.0" +RELEASE = "1.0.5" # Types of tunables being tuned by autotune SERVICE = "service" From cca88ba673fa7b7b5f0e9bcc2dc5c202abf28efc Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 5 Mar 2019 20:31:38 -0800 Subject: [PATCH 10/10] Add note about on-prem images Signed-off-by: Balbir Singh --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 4ca9642..addc0ca 100644 --- a/README.md +++ b/README.md @@ -127,3 +127,8 @@ autotune exclude sysctl:vm.swappiness Delete customized tunable from autotune profile: autotune delete sysctl:vm.swappiness + +NOTE: +----- +autotune is currently designed to work with EC2 instances, on-prem images support +is not currently available.