Skip to content

Commit 75e5635

Browse files
authored
Add a tool to view percentage of mmaps cache (#6310)
* Add a tool to view percentage of mmaps cache * review fixes
1 parent fe42d41 commit 75e5635

1 file changed

Lines changed: 194 additions & 0 deletions

File tree

tools/smaps-view.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""
2+
Usage examples:
3+
# Basic usage - show aggregated cache percentages for all vector index files
4+
python smaps-view.py $(pidof qdrant) '.*/vector_index/.*'
5+
6+
# Verbose mode - show individual files larger than 1MB
7+
python smaps-view.py $(pidof qdrant) '.*/vector_storage/.*' -v
8+
9+
Example output:
10+
Cache percentages for pattern '.vector_storage.*':
11+
r--s: 0.00%
12+
rw-s: 0.06%
13+
"""
14+
15+
import re
16+
import sys
17+
import argparse
18+
from typing import Dict, Tuple, List
19+
from dataclasses import dataclass
20+
21+
22+
# Example of smap format:
23+
#
24+
#
25+
# 7de883800000-7de885800000 r--s 00000000 00:33 26610006 /qdrant/storage/collections/benchmark/0/segments/6250c760-ccaf-468e-a96f-8e2b03f8c524/vector_storage/vectors/chunk_2.mmap
26+
# Size: 32768 kB
27+
# KernelPageSize: 4 kB
28+
# MMUPageSize: 4 kB
29+
# Rss: 0 kB
30+
# Pss: 0 kB
31+
# Pss_Dirty: 0 kB
32+
# Shared_Clean: 0 kB
33+
# Shared_Dirty: 0 kB
34+
# Private_Clean: 0 kB
35+
# Private_Dirty: 0 kB
36+
# Referenced: 0 kB
37+
# Anonymous: 0 kB
38+
# KSM: 0 kB
39+
# LazyFree: 0 kB
40+
# AnonHugePages: 0 kB
41+
# ShmemPmdMapped: 0 kB
42+
# FilePmdMapped: 0 kB
43+
# Shared_Hugetlb: 0 kB
44+
# Private_Hugetlb: 0 kB
45+
# Swap: 0 kB
46+
# SwapPss: 0 kB
47+
# Locked: 0 kB
48+
# THPeligible: 0
49+
# ProtectionKey: 0
50+
# VmFlags: rd sh mr mw me ms sr sd
51+
#
52+
53+
54+
55+
@dataclass
56+
class MapStats:
57+
size: int = 0
58+
rss: int = 0
59+
60+
def parse_smaps(smaps_content: str, pattern: str, verbose: bool = False) -> Dict[Tuple[str, str], MapStats]:
61+
"""
62+
Parse smaps content and calculate size and RSS for files matching the pattern.
63+
Optionally logs individual file percentages for files larger than 1MB.
64+
65+
Args:
66+
smaps_content: Content of /proc/{pid}/smaps file
67+
pattern: Regular expression pattern to match against file paths
68+
verbose: Whether to print per-file information
69+
70+
Returns:
71+
Dictionary mapping (file_path, permissions) tuples to their stats
72+
"""
73+
file_stats: Dict[Tuple[str, str], MapStats] = {}
74+
current_file = None
75+
current_size = 0
76+
current_rss = 0
77+
current_permissions = ""
78+
79+
# Compile the regex pattern
80+
regex = re.compile(pattern)
81+
82+
for line in smaps_content.splitlines():
83+
# Check if line starts with an address range (new memory map entry)
84+
if re.match(r'^[0-9a-f]+-[0-9a-f]+', line):
85+
# If we have a previous file that matched, add its stats
86+
if current_file and regex.search(current_file):
87+
key = (current_file, current_permissions)
88+
if key not in file_stats:
89+
file_stats[key] = MapStats()
90+
91+
stats = file_stats[key]
92+
stats.size += current_size
93+
stats.rss += current_rss
94+
95+
# Log individual file percentage if verbose mode is enabled and file is larger than 1MB
96+
if verbose and current_size > 1024:
97+
percentage = (current_rss / current_size) * 100
98+
print(f"File: {current_file} ({current_permissions})")
99+
print(f" Size: {current_size} kB")
100+
print(f" RSS: {current_rss} kB")
101+
print(f" Cache percentage: {percentage:.2f}%")
102+
print()
103+
104+
# Extract file path and permissions from the line
105+
parts = line.split()
106+
current_file = parts[-1] if len(parts) > 5 else None
107+
current_size = 0
108+
current_rss = 0
109+
# Store full permissions string
110+
current_permissions = parts[1] if len(parts) > 1 else ""
111+
elif line.startswith('Size:'):
112+
current_size = int(line.split()[1])
113+
elif line.startswith('Rss:'):
114+
current_rss = int(line.split()[1])
115+
116+
# Don't forget to add the last entry if it matches
117+
if current_file and regex.search(current_file):
118+
key = (current_file, current_permissions)
119+
if key not in file_stats:
120+
file_stats[key] = MapStats()
121+
122+
stats = file_stats[key]
123+
stats.size += current_size
124+
stats.rss += current_rss
125+
126+
# Log individual file percentage for the last entry if verbose mode is enabled and file is larger than 1MB
127+
if verbose and current_size > 1024:
128+
percentage = (current_rss / current_size) * 100 if current_size > 0 else 0
129+
print(f"File: {current_file} ({current_permissions})")
130+
print(f" Size: {current_size} kB")
131+
print(f" RSS: {current_rss} kB")
132+
print(f" Cache percentage: {percentage:.2f}%")
133+
print()
134+
135+
return file_stats
136+
137+
def calculate_cache_percentage(pid: int, pattern: str, verbose: bool = False) -> Dict[str, float]:
138+
"""
139+
Calculate the percentage of memory that is cached for files matching the pattern.
140+
Separates by permission type.
141+
142+
Args:
143+
pid: Process ID to analyze
144+
pattern: Regular expression pattern to match against file paths
145+
verbose: Whether to print per-file information
146+
147+
Returns:
148+
Dictionary mapping permission strings to their cache percentages
149+
"""
150+
try:
151+
with open(f'/proc/{pid}/smaps', 'r') as f:
152+
content = f.read()
153+
154+
file_stats = parse_smaps(content, pattern, verbose)
155+
156+
if not file_stats:
157+
return {}
158+
159+
# Calculate percentages for each permission type
160+
permission_stats: Dict[str, Tuple[int, int]] = {}
161+
for (_, perms), stats in file_stats.items():
162+
if perms not in permission_stats:
163+
permission_stats[perms] = (0, 0)
164+
size, rss = permission_stats[perms]
165+
permission_stats[perms] = (size + stats.size, rss + stats.rss)
166+
167+
# Calculate percentages
168+
percentages = {}
169+
for perms, (size, rss) in permission_stats.items():
170+
if size > 0:
171+
percentages[perms] = (rss / size) * 100
172+
else:
173+
percentages[perms] = 0.0
174+
175+
return percentages
176+
except FileNotFoundError:
177+
print(f"Error: Could not find /proc/{pid}/smaps", file=sys.stderr)
178+
return {}
179+
except Exception as e:
180+
print(f"Error processing smaps: {e}", file=sys.stderr)
181+
return {}
182+
183+
if __name__ == '__main__':
184+
parser = argparse.ArgumentParser(description='Analyze memory maps and calculate cache percentages')
185+
parser.add_argument('pid', type=int, help='Process ID to analyze')
186+
parser.add_argument('pattern', type=str, help='Regular expression pattern to match against file paths')
187+
parser.add_argument('-v', '--verbose', action='store_true', help='Print per-file information')
188+
189+
args = parser.parse_args()
190+
191+
percentages = calculate_cache_percentage(args.pid, args.pattern, args.verbose)
192+
print(f"Cache percentages for pattern '{args.pattern}':")
193+
for perms, percentage in percentages.items():
194+
print(f" {perms}: {percentage:.2f}%")

0 commit comments

Comments
 (0)