Skip to content

Commit 852f55e

Browse files
committed
merge bitcoin#24932: Convert lint-locale-dependence.sh to Python
1 parent f745b7f commit 852f55e

File tree

2 files changed

+264
-246
lines changed

2 files changed

+264
-246
lines changed
Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) 2018-2022 The Bitcoin Core developers
3+
# Distributed under the MIT software license, see the accompanying
4+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
5+
#
6+
# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
7+
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
8+
# whereas no such call is made in bitcoind.
9+
#
10+
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
11+
# specified by the user's LC_ALL (or LC_*) environment variable as the new
12+
# C locale.
13+
#
14+
# In contrast, bitcoind does not opt in to localization -- no call to
15+
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
16+
# thus ignored.
17+
#
18+
# This results in situations where bitcoind is guaranteed to be running
19+
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
20+
# depending on the user's environment variables.
21+
#
22+
# An example: Assuming the environment variable LC_ALL=de_DE then the
23+
# call std::to_string(1.23) will return "1.230000" in bitcoind but
24+
# "1,230000" in bitcoin-qt.
25+
#
26+
# From the Qt documentation:
27+
# "On Unix/Linux Qt is configured to use the system locale settings by default.
28+
# This can cause a conflict when using POSIX functions, for instance, when
29+
# converting between data types such as floats and strings, since the notation
30+
# may differ between locales. To get around this problem, call the POSIX function
31+
# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
32+
# or QCoreApplication to reset the locale that is used for number formatting to
33+
# "C"-locale."
34+
#
35+
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
36+
# https://stackoverflow.com/a/34878283 for more details.
37+
#
38+
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent stoul/strtol with locale
39+
# independent ToIntegral<T>(...).
40+
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf.
41+
42+
import re
43+
import sys
44+
45+
from subprocess import check_output, CalledProcessError
46+
47+
48+
KNOWN_VIOLATIONS = [
49+
"src/bitcoin-tx.cpp.*stoul",
50+
"src/dbwrapper.cpp:.*vsnprintf",
51+
"src/test/dbwrapper_tests.cpp:.*snprintf",
52+
"src/test/fuzz/locale.cpp",
53+
"src/test/fuzz/string.cpp",
54+
"src/util/strencodings.cpp:.*strtoll",
55+
"src/util/system.cpp:.*fprintf"
56+
]
57+
58+
REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
59+
"src/crypto/ctaes/",
60+
"src/leveldb/",
61+
"src/secp256k1/",
62+
"src/minisketch/",
63+
"src/tinyformat.h",
64+
"src/univalue/",
65+
"src/dashbls/",
66+
"src/immer/"
67+
]
68+
69+
LOCALE_DEPENDENT_FUNCTIONS = [
70+
"alphasort", # LC_COLLATE (via strcoll)
71+
"asctime", # LC_TIME (directly)
72+
"asprintf", # (via vasprintf)
73+
"atof", # LC_NUMERIC (via strtod)
74+
"atoi", # LC_NUMERIC (via strtol)
75+
"atol", # LC_NUMERIC (via strtol)
76+
"atoll", # (via strtoll)
77+
"atoq",
78+
"btowc", # LC_CTYPE (directly)
79+
"ctime", # (via asctime or localtime)
80+
"dprintf", # (via vdprintf)
81+
"fgetwc",
82+
"fgetws",
83+
"fold_case", # boost::locale::fold_case
84+
"fprintf", # (via vfprintf)
85+
"fputwc",
86+
"fputws",
87+
"fscanf", # (via __vfscanf)
88+
"fwprintf", # (via __vfwprintf)
89+
"getdate", # via __getdate_r => isspace // __localtime_r
90+
"getwc",
91+
"getwchar",
92+
"is_digit", # boost::algorithm::is_digit
93+
"is_space", # boost::algorithm::is_space
94+
"isalnum", # LC_CTYPE
95+
"isalpha", # LC_CTYPE
96+
"isblank", # LC_CTYPE
97+
"iscntrl", # LC_CTYPE
98+
"isctype", # LC_CTYPE
99+
"isdigit", # LC_CTYPE
100+
"isgraph", # LC_CTYPE
101+
"islower", # LC_CTYPE
102+
"isprint", # LC_CTYPE
103+
"ispunct", # LC_CTYPE
104+
"isspace", # LC_CTYPE
105+
"isupper", # LC_CTYPE
106+
"iswalnum", # LC_CTYPE
107+
"iswalpha", # LC_CTYPE
108+
"iswblank", # LC_CTYPE
109+
"iswcntrl", # LC_CTYPE
110+
"iswctype", # LC_CTYPE
111+
"iswdigit", # LC_CTYPE
112+
"iswgraph", # LC_CTYPE
113+
"iswlower", # LC_CTYPE
114+
"iswprint", # LC_CTYPE
115+
"iswpunct", # LC_CTYPE
116+
"iswspace", # LC_CTYPE
117+
"iswupper", # LC_CTYPE
118+
"iswxdigit", # LC_CTYPE
119+
"isxdigit", # LC_CTYPE
120+
"localeconv", # LC_NUMERIC + LC_MONETARY
121+
"mblen", # LC_CTYPE
122+
"mbrlen",
123+
"mbrtowc",
124+
"mbsinit",
125+
"mbsnrtowcs",
126+
"mbsrtowcs",
127+
"mbstowcs", # LC_CTYPE
128+
"mbtowc", # LC_CTYPE
129+
"mktime",
130+
"normalize", # boost::locale::normalize
131+
"printf", # LC_NUMERIC
132+
"putwc",
133+
"putwchar",
134+
"scanf", # LC_NUMERIC
135+
"setlocale",
136+
"snprintf",
137+
"sprintf",
138+
"sscanf",
139+
"std::locale::global",
140+
"std::to_string",
141+
"stod",
142+
"stof",
143+
"stoi",
144+
"stol",
145+
"stold",
146+
"stoll",
147+
"stoul",
148+
"stoull",
149+
"strcasecmp",
150+
"strcasestr",
151+
"strcoll", # LC_COLLATE
152+
#"strerror",
153+
"strfmon",
154+
"strftime", # LC_TIME
155+
"strncasecmp",
156+
"strptime",
157+
"strtod", # LC_NUMERIC
158+
"strtof",
159+
"strtoimax",
160+
"strtol", # LC_NUMERIC
161+
"strtold",
162+
"strtoll",
163+
"strtoq",
164+
"strtoul", # LC_NUMERIC
165+
"strtoull",
166+
"strtoumax",
167+
"strtouq",
168+
"strxfrm", # LC_COLLATE
169+
"swprintf",
170+
"to_lower", # boost::locale::to_lower
171+
"to_title", # boost::locale::to_title
172+
"to_upper", # boost::locale::to_upper
173+
"tolower", # LC_CTYPE
174+
"toupper", # LC_CTYPE
175+
"towctrans",
176+
"towlower", # LC_CTYPE
177+
"towupper", # LC_CTYPE
178+
"trim", # boost::algorithm::trim
179+
"trim_left", # boost::algorithm::trim_left
180+
"trim_right", # boost::algorithm::trim_right
181+
"ungetwc",
182+
"vasprintf",
183+
"vdprintf",
184+
"versionsort",
185+
"vfprintf",
186+
"vfscanf",
187+
"vfwprintf",
188+
"vprintf",
189+
"vscanf",
190+
"vsnprintf",
191+
"vsprintf",
192+
"vsscanf",
193+
"vswprintf",
194+
"vwprintf",
195+
"wcrtomb",
196+
"wcscasecmp",
197+
"wcscoll", # LC_COLLATE
198+
"wcsftime", # LC_TIME
199+
"wcsncasecmp",
200+
"wcsnrtombs",
201+
"wcsrtombs",
202+
"wcstod", # LC_NUMERIC
203+
"wcstof",
204+
"wcstoimax",
205+
"wcstol", # LC_NUMERIC
206+
"wcstold",
207+
"wcstoll",
208+
"wcstombs", # LC_CTYPE
209+
"wcstoul", # LC_NUMERIC
210+
"wcstoull",
211+
"wcstoumax",
212+
"wcswidth",
213+
"wcsxfrm", # LC_COLLATE
214+
"wctob",
215+
"wctomb", # LC_CTYPE
216+
"wctrans",
217+
"wctype",
218+
"wcwidth",
219+
"wprintf"
220+
]
221+
222+
223+
def find_locale_dependent_function_uses():
224+
regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
225+
exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
226+
git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + "(_r|_s)?)[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args
227+
git_grep_output = list()
228+
229+
try:
230+
git_grep_output = check_output(git_grep_command, universal_newlines=True, encoding="utf8").splitlines()
231+
except CalledProcessError as e:
232+
if e.returncode > 1:
233+
raise e
234+
235+
return git_grep_output
236+
237+
238+
def main():
239+
exit_code = 0
240+
241+
regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
242+
git_grep_output = find_locale_dependent_function_uses()
243+
244+
for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
245+
matches = [line for line in git_grep_output
246+
if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line)
247+
and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line)
248+
and not re.search(regexp_ignore_known_violations, line)]
249+
if matches:
250+
print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
251+
for match in matches:
252+
print(match)
253+
print("")
254+
exit_code = 1
255+
256+
if exit_code == 1:
257+
print("Unnecessary locale depedence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible.\n")
258+
print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")
259+
260+
sys.exit(exit_code)
261+
262+
263+
if __name__ == "__main__":
264+
main()

0 commit comments

Comments
 (0)