From 204d93ee886b7ee7eee91c0118275a9842c45cb6 Mon Sep 17 00:00:00 2001 From: garimoo Date: Sat, 11 Nov 2023 16:09:19 +0900 Subject: [PATCH] Release rks version 1.0.0 --- .gitignore | 164 +++++++++++++++++++++++++++++++++++++++++++++ README.md | 112 ++++++++++++++++++++++++++++++- requirements.txt | 3 + rks/__init__.py | 0 rks/analysis.py | 40 +++++++++++ rks/display.py | 57 ++++++++++++++++ rks/main.py | 91 +++++++++++++++++++++++++ rks/redis_utils.py | 147 ++++++++++++++++++++++++++++++++++++++++ setup.py | 26 +++++++ 9 files changed, 638 insertions(+), 2 deletions(-) create mode 100644 .gitignore create mode 100644 requirements.txt create mode 100644 rks/__init__.py create mode 100644 rks/analysis.py create mode 100644 rks/display.py create mode 100644 rks/main.py create mode 100644 rks/redis_utils.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7d930d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +.pypirc +.DS_Store diff --git a/README.md b/README.md index 976e083..369368b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,110 @@ -# redis-keys-statistics -rks(redis-keys-statistics) is a Python tool for analyzing and reporting key usage statistics in Redis databases, including memory usage and type distribution, created by garimoo and is under the copyright of Woowabros +# Redis Keys Statistics + +## Overview +`redis-keys-statistics` is a Python tool, designed for analyzing and reporting key usage statistics in Redis databases with exceptional speed and efficiency. By leveraging Lua scripting to reduce network I/O, it dramatically outperforms other open-source tools — tasks that typically take hours are completed in just minutes. + +## Features +- **High Performance**: Utilizes Lua scripting to minimize network I/O, delivering results significantly faster than traditional methods. Where some tools might take hours, `redis-keys-statistics` can complete the same task in a fraction of the time. +- **Efficient Key Scanning**: Uses the Redis `SCAN` command for batch scanning of keys, further optimizing performance. +- **Memory Usage Statistics**: Reports memory usage for keys in various units (B, KB, MB, GB). +- **Top N Largest Keys**: Quickly identifies the largest keys for memory optimization. +- **Key Count by Type**: Provides a breakdown of keys by type for better data structure insights. +- **Prefix-Based Analysis**: Analyzes keys based on prefixes to understand namespace usage. +- **Cluster Support**: Compatible with Redis clusters, including replica-only analysis. +- **Readable Output**: Formatted and easy-to-read statistical tables using `prettytable`. + +## Installation +```bash +pip install rks +``` + +## Usage +Execute the script with the necessary Redis connection parameters: +```bash +rks --host --port [--password ] +``` +For detailed usage options: +```bash +rks --help +``` + +## Command Line Interface +``` +usage: rks [-h] --host HOST --port PORT [--password PASSWORD] [--cluster] [--batch_size BATCH_SIZE] [--replica_only] [--pretty_format] + +Analyze Redis Instance Key Statistics. + +optional arguments: + -h, --help show this help message and exit + --host HOST Redis host + --port PORT Redis port + --password PASSWORD Redis password + --cluster Enable cluster mode + --batch_size BATCH_SIZE + Batch size for SCAN command + --replica_only Execute only on replica instances + --pretty_format Display output in a human-readable format +``` + +## Example Output +Here is an example of what the output from `redis-keys-statistics` might look like: + +### Top 20 Largest Keys in Redis +``` ++-------------------+------+---------+------------+-------+ +| Key | Type | Size | Size Ratio | TTL | ++-------------------+------+---------+------------+-------+ +| user_sessions:123 | hash | 1.2 MB | 150% ↑ | 360 | +| cache:page:001 | zset | 900 KB | 120% ↑ | -1 | +| config:app | hash | 800 KB | 100% ↑ | 86400 | +| queue:jobs | list | 600 KB | 80% ↑ | -1 | +| temp:data:456 | set | 500 KB | 60% ↑ | 1800 | +| ... | ... | ... | ... | ... | ++-------------------+------+---------+------------+-------+ + +``` + +### Key Count by Type +``` ++--------+-------+ +| Type | Count | ++--------+-------+ +| hash | 250 | +| zset | 150 | +| list | 100 | +| set | 75 | +| string | 200 | ++--------+-------+ + +``` + +### Detailed Prefix Statistics +``` ++-------------+-------+--------------+---------+-----------------+ +| Prefix Name | Count | Average Size | Max TTL | Types | ++-------------+-------+--------------+---------+-----------------+ +| user | 100 | 200 KB | 3600 | - Type: hash | +| | | | | Count: 50 | +| | | | | - Type: string | +| | | | | Count: 50 | +| cache | 80 | 150 KB | -1 | - Type: zset | +| | | | | Count: 80 | +| config | 20 | 100 KB | 86400 | - Type: hash | +| | | | | Count: 20 | +| temp | 150 | 50 KB | 1800 | - Type: set | +| | | | | Count: 100 | +| | | | | - Type: list | +| | | | | Count: 50 | ++-------------+-------+--------------+---------+-----------------+ + +``` + +## Requirements +- Python 3.x +- Redis server or cluster + +## Contributing +Contributions are welcome. + +## Acknowledgements +Special thanks to all contributors and users of the `redis-keys-statistics` tool. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..eadcc73 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +redis +redis-py-cluster +prettytable diff --git a/rks/__init__.py b/rks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rks/analysis.py b/rks/analysis.py new file mode 100644 index 0000000..0cb9682 --- /dev/null +++ b/rks/analysis.py @@ -0,0 +1,40 @@ +import heapq + +def format_memory_size(size): + if size < 1024: + return f"{size: .2f} B" + elif size < 1024 * 1024: + return f"{size / 1024:.2f} KB" + elif size < 1024 * 1024 * 1024: + return f"{size / (1024 * 1024):.2f} MB" + else: + return f"{size / (1024 * 1024 * 1024):.2f} GB" + + +def update_topk_heap(item, min_heap, heap_size): + heapq.heappush(min_heap, (item[2], item)) + if len(min_heap) > heap_size: + heapq.heappop(min_heap) + + +def update_statistics(item, prefix_statistics_map): + key = item[0] + key_type = item[1] + memory = int(item[2]) + ttl = item[3] + + if b':' in key: + key_prefix = key.split(b':')[0] + if key_prefix not in prefix_statistics_map: + prefix_statistics_map[key_prefix] = {} + prefix_statistics_map[key_prefix]['count'] = 0 + prefix_statistics_map[key_prefix]['total_size'] = 0 + prefix_statistics_map[key_prefix]['max_ttl'] = -1 + prefix_statistics_map[key_prefix]['type_count'] = {} + prefix_statistics_map[key_prefix]['count'] += 1 + prefix_statistics_map[key_prefix]['total_size'] += memory + prefix_statistics_map[key_prefix]['max_ttl'] = max(prefix_statistics_map[key_prefix]['max_ttl'], ttl) + + if key_type not in prefix_statistics_map[key_prefix]['type_count']: + prefix_statistics_map[key_prefix]['type_count'][key_type] = 0 + prefix_statistics_map[key_prefix]['type_count'][key_type] += 1 \ No newline at end of file diff --git a/rks/display.py b/rks/display.py new file mode 100644 index 0000000..a6f821e --- /dev/null +++ b/rks/display.py @@ -0,0 +1,57 @@ +from prettytable import PrettyTable +from .analysis import format_memory_size + +def analyze_redis_keys(min_heap, prefix_statistics_map, total_key_size, total_key_count, key_count_by_type, db_num, use_pretty): + + average_key_size = total_key_size / total_key_count if total_key_count != 0 else 0 + + print("\n" + "-"*25 + f"\nAnalyzing DB {db_num}\n" + "-"*25 + "\n") + + table = PrettyTable() + table.title = "Top 20 largest keys in Redis" + table.field_names = ["Key", "Type", "Size", "Size Ratio", "TTL"] + + sorted_items = sorted(min_heap, key=lambda x: x[0], reverse=True) + + for value, item in sorted_items: + key = item[0] + key_type = item[1] + memory = int(item[2]) + ttl = item[3] + + size_increase_percentage = (memory - average_key_size) / average_key_size * 100 + formatted_size_ratio = "{:.1f}% ↑".format(size_increase_percentage) + + if use_pretty: + memory = format_memory_size(memory) + + table.add_row([key.decode('utf-8', errors='ignore'), key_type.decode('utf-8'), memory, formatted_size_ratio, ttl]) + + print(table) + + table = PrettyTable() + table.title = "Key Count by Type" + table.field_names = ["Type", "Count"] + + for key_type, count in key_count_by_type.items(): + table.add_row([key_type.decode('utf-8', errors='ignore'), count]) + + print(table) + + table = PrettyTable() + table.title = "Detailed Prefix Statistics" + table.field_names = ["Prefix Name", "Count", "Average Size", "Max TTL", "Types"] + + for prefix, item in prefix_statistics_map.items(): + + prefix_average_size = round(item['total_size'] / item['count'], 2) if item['count'] != 0 else 0 + + if use_pretty: + prefix_average_size = format_memory_size(prefix_average_size) + + types = "" + for key_type, type_count in item['type_count'].items(): + types += " - Type: {}, Count: {}\n".format(key_type.decode('utf-8'), type_count) + table.add_row([prefix.decode('utf-8', errors='ignore'), item['count'], prefix_average_size, item['max_ttl'], types]) + + print(table) \ No newline at end of file diff --git a/rks/main.py b/rks/main.py new file mode 100644 index 0000000..fa317dd --- /dev/null +++ b/rks/main.py @@ -0,0 +1,91 @@ +import argparse +import redis +import rediscluster +from datetime import datetime +from .redis_utils import get_redis_keys, get_redis_cluster_keys + +def main(): + parser = argparse.ArgumentParser(description='Analyze Redis Instance Key Statistics.') + parser.add_argument('--host', required=True, help='Redis host') + parser.add_argument('--port', required=True, help='Redis port') + parser.add_argument('--password', default=None, help='Redis password') + parser.add_argument('--cluster', action='store_true', help='Enable cluster mode') + parser.add_argument('--batch_size', type=int, default=1000, help='Batch size for SCAN command') + parser.add_argument('--replica_only', action='store_true', help='Execute only on replica instances') + parser.add_argument('--pretty_format', action='store_true', help='Display output in a human-readable format') + + args = parser.parse_args() + CONNECTION_TIMEOUT = 10 + + if args.cluster: + try: + rc = rediscluster.RedisCluster(startup_nodes=[{"host": args.host, "port": int(args.port)}], + skip_full_coverage_check=True, + password=args.password, + socket_timeout=CONNECTION_TIMEOUT) + + redis_info = rc.info() + + if list(redis_info.values())[0].get('cluster_enabled') != 1: + print("Error: Specified Redis instance is not running in cluster mode but --cluster flag was provided.") + return + + process_start_time = datetime.now() + + if get_redis_cluster_keys(rc, args.batch_size, args.replica_only, args.pretty_format) == -1: + print(f"Aborted the operation on non-readonly redis at host: {args.host}") + return + + rc.close() + + process_end_time = datetime.now() + process_taken_time = str(process_end_time - process_start_time).split(".")[0] + print(f'Process completed in {process_taken_time} (HH:MM:SS)') + + except rediscluster.exceptions.RedisClusterError as e: + print(f"Error connecting to Redis cluster: {args.host}. Error message: {str(e)}") + return + except rediscluster.exceptions.RedisClusterException as e: + if "ERROR sending 'cluster slots' command to redis server" in str(e): + print("Error: Specified Redis instance is not running in cluster mode but --cluster flag was provided.") + return + else: + print(f"Failed to connect to Redis cluster: {args.host}. Error message: {str(e)}") + return + else: + try: + r = redis.Redis(host=args.host, port=int(args.port), + password=args.password, + socket_timeout=CONNECTION_TIMEOUT) + + redis_info = r.info() + + if redis_info.get('cluster_enabled') != 0: + print("Error: Specified Redis instance is running in cluster mode but --cluster flag was not provided.") + return + + if redis_info.get('role') != 'slave' and args.replica_only: + print(f"Aborted the operation on non-readonly redis at host: {args.host}") + return + + process_start_time = datetime.now() + + for db in redis_info.keys(): + if db.startswith('db'): + db_num = int(db[2:]) + get_redis_keys(r, args.batch_size, db_num, args.pretty_format) + + r.close() + + process_end_time = datetime.now() + process_taken_time = str(process_end_time - process_start_time).split(".")[0] + print(f'Process completed in {process_taken_time} (HH:MM:SS)') + + except redis.exceptions.TimeoutError as e: + print(f"Timeout connecting to Redis: {args.host}. Error message: {str(e)}") + except redis.exceptions.ConnectionError as e: + print(f"Failed to connect to Redis: {args.host}. Error message: {str(e)}") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/rks/redis_utils.py b/rks/redis_utils.py new file mode 100644 index 0000000..6d52296 --- /dev/null +++ b/rks/redis_utils.py @@ -0,0 +1,147 @@ +import redis +import time +from .display import analyze_redis_keys +from .analysis import update_topk_heap, update_statistics + + +def get_redis_keys(r, batch_size, db_num, use_pretty): + + r.execute_command('SELECT', db_num) + + min_heap = [] + heap_size = 20 + prefix_statistics_map = {} + + total_key_count = 0 + total_key_size = 0 + key_count_by_type = {} + + script = """ + local cursor = ARGV[1] + local result = {} + + local scanResult = redis.call('SCAN', cursor, 'COUNT', BATCH_SIZE_PLACEHOLDER) + cursor = scanResult[1] + + for i, key in ipairs(scanResult[2]) do + local key_type = redis.call('TYPE', key)['ok'] + local memory = redis.call('MEMORY', 'USAGE', key) + local ttl = redis.call('TTL', key) + + table.insert(result, {key, key_type, memory, ttl}) + end + + return {cursor, result} + """ + + script = script.replace("BATCH_SIZE_PLACEHOLDER", str(batch_size)) + + cursor = b'0' + while True: + cursor, result = r.eval(script, 0, cursor) + + for item in result: + key_type = item[1] + memory = int(item[2]) + + total_key_count += 1 + total_key_size += memory + + if key_type not in key_count_by_type: + key_count_by_type[key_type] = 0 + key_count_by_type[key_type] += 1 + + update_topk_heap(item, min_heap, heap_size) + update_statistics(item, prefix_statistics_map) + + if cursor == b'0': + break + + time.sleep(0.01) + + return analyze_redis_keys(min_heap, prefix_statistics_map, total_key_size, total_key_count, key_count_by_type, db_num, use_pretty) + + +def get_redis_cluster_keys(rc, batch_size, replica_only, use_pretty): + slave_flag = False + + min_heap = [] + heap_size = 20 + prefix_statistics_map = {} + + total_key_count = 0 + total_key_size = 0 + key_count_by_type = {} + + nodes = rc.cluster_nodes() + + masters = [] + for node in nodes: + if 'master' in node['flags']: + master_dict = {'master': node['id'], 'slots': node['slots'], 'slaves': []} + masters.append(master_dict.copy()) + + for node in nodes: + if 'slave' in node['flags']: + slave_flag = True + for master in masters: + if master['master'] == node['master']: + master['slaves'].append(node['id']) + + if slave_flag is False and replica_only: + return -1 + + for master in masters: + if slave_flag is False: + node = next((node for node in nodes if node['id'] == master['master']), None) + else: + node = next((node for node in nodes if node['id'] == master['slaves'][0]), None) + r = redis.Redis(host=node['host'], port=node['port']) + r.execute_command('READONLY') + + script = """ + local cursor = ARGV[1] + local result = {} + + local scanResult = redis.call('SCAN', cursor, 'COUNT', BATCH_SIZE_PLACEHOLDER) + cursor = scanResult[1] + + for i, key in ipairs(scanResult[2]) do + local key_type = redis.call('TYPE', key)['ok'] + local memory = redis.call('MEMORY', 'USAGE', key) + local ttl = redis.call('TTL', key) + + table.insert(result, {key, key_type, memory, ttl}) + end + + return {cursor, result} + """ + + script = script.replace("BATCH_SIZE_PLACEHOLDER", str(batch_size)) + + cursor = b'0' + while True: + cursor, result = r.eval(script, 0, cursor) + + for item in result: + key_type = item[1] + memory = int(item[2]) + + total_key_count += 1 + total_key_size += memory + + if key_type not in key_count_by_type: + key_count_by_type[key_type] = 0 + key_count_by_type[key_type] += 1 + + update_topk_heap(item, min_heap, heap_size) + update_statistics(item, prefix_statistics_map) + + if cursor == b'0': + break + + time.sleep(0.01) + + r.close() + + return analyze_redis_keys(min_heap, prefix_statistics_map, total_key_size, total_key_count, key_count_by_type, 0, use_pretty) \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..47e15da --- /dev/null +++ b/setup.py @@ -0,0 +1,26 @@ +from setuptools import setup, find_packages + +setup( + name='rks', + version='1.0.0', + description='rks(redis-keys-statistics) is a Python tool for analyzing and reporting key usage statistics in Redis databases, including memory usage and type distribution, created by garimoo and is under the copyright of Woowabros', + author='garimoo', + author_email='garimoo.kim@gmail.com', + install_requires=['redis', 'redis-py-cluster', 'prettytable',], + packages=find_packages(), + entry_points={ + 'console_scripts': [ + 'rks = rks.main:main' + ] + }, + keywords=['redis', 'pypi', 'redis keys statistics', 'rks', 'redis statistics', 'garim', 'garimoo'], + python_requires='>=3.6', + package_data={}, + zip_safe=False, + classifiers=[ + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + ], +) \ No newline at end of file