Skip to content

Commit 4a5866c

Browse files
vubvub
authored andcommitted
Added byte array literal support
1 parent 04619d4 commit 4a5866c

File tree

4 files changed

+110
-8
lines changed

4 files changed

+110
-8
lines changed

README.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,22 @@ Note that not all programs that satisfy the following are valid; for example, th
3737
OR <var>.<membername>
3838
OR <var>[<expr>]
3939
varname = <str>
40-
expr = <int>
40+
expr = <literal>
4141
OR <expr> <binop> <expr>
4242
OR <expr> <boolop> <expr>
4343
OR <expr> <compareop> <expr>
4444
OR not <expr>
4545
OR <var>
4646
OR <expr>.balance
47-
OR <literal>
47+
OR <system_var>
4848
OR <basetype>(<expr>) (only some type conversions allowed)
4949
OR floor(<expr>)
50-
literal = (block.timestamp, block.coinbase, block.number, block.difficulty, tx.origin, tx.gasprice, msg.gas, self)
50+
literal = <integer>
51+
OR <fixed point number>
52+
OR <address, in the form 0x12cd2f...3fe>
53+
OR <bytes32, in the form 0x414db52e5....2a7d>
54+
OR <bytes, in the form "cow">
55+
system_var = (block.timestamp, block.coinbase, block.number, block.difficulty, tx.origin, tx.gasprice, msg.gas, self)
5156
basetype = (num, decimal, bool, address, bytes32)
5257
unit = <baseunit>
5358
OR <baseunit> * <positive integer>
@@ -89,6 +94,8 @@ Arithmetic is overflow-checked, meaning that if a number is out of range then an
8994

9095
In all three cases, it's possible to statically determine the maximum runtime of a loop. Jumping out of a loop before it ends can be done with either `break` or `return`.
9196

97+
Regarding byte array literals, unicode strings like "这个傻老外不懂中文" or "Я очень умный" are illegal, though those that manage to use values that are in the 0...255 range according to UTF-8, like "¡très bien!", are fine.
98+
9299
Code examples can be found in the `test_parser.py` file.
93100

94101
### Planned future features
@@ -98,7 +105,6 @@ Code examples can be found in the `test_parser.py` file.
98105
* A mini-language for handling num256 and signed256 values and directly / unsafely using opcodes; will be useful for high-performance code segments
99106
* Support for sha3, sha256, ecrecover, etc
100107
* Smart optimizations, including compile-time computation of arithmetic and clamps, intelligently computing realistic variable ranges, etc
101-
* Basic byte array slicing, splicing and byte access
102108

103109
### Code example
104110

tests/test_invalids.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,3 +1076,39 @@ def cat(i1: bytes <= 10, i2: bytes <= 30) -> bytes <= 40:
10761076
def cat(i1: bytes <= 10, i2: bytes <= 30) -> bytes <= 40:
10771077
return concat(i1, 5)
10781078
""", TypeMismatchException)
1079+
1080+
must_succeed("""
1081+
def foo() -> bytes <= 10:
1082+
return "badminton"
1083+
""")
1084+
1085+
must_fail("""
1086+
def foo() -> bytes <= 10:
1087+
return "badmintonzz"
1088+
""", TypeMismatchException)
1089+
1090+
must_succeed("""
1091+
def foo() -> bytes <= 10:
1092+
return slice("badmintonzzz", start=1, len=10)
1093+
""")
1094+
1095+
must_fail("""
1096+
def foo() -> bytes <= 10:
1097+
x = '0x1234567890123456789012345678901234567890'
1098+
x = 0x1234567890123456789012345678901234567890
1099+
""", TypeMismatchException)
1100+
1101+
must_fail("""
1102+
def foo():
1103+
x = "these bytes are nо gооd because the o's are from the Russian alphabet"
1104+
""", InvalidLiteralException)
1105+
1106+
must_fail("""
1107+
def foo():
1108+
x = "这个傻老外不懂中文"
1109+
""", InvalidLiteralException)
1110+
1111+
must_succeed("""
1112+
def foo():
1113+
x = "¡très bien!"
1114+
""")

tests/test_parser.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
s = t.state()
99
t.languages['viper'] = compiler_plugin.Compiler()
1010

11+
1112
null_code = """
1213
def foo():
1314
pass
@@ -1087,3 +1088,50 @@ def foo(inp: bytes <= 50) -> bytes <= 1000:
10871088
c = s.abi_contract(test_concat2, language='viper')
10881089
assert c.foo("horse" * 9 + "viper") == (b"horse" * 9 + b"viper") * 10
10891090
print('Passed second concat test')
1091+
1092+
string_literal_code = """
1093+
def foo() -> bytes <= 5:
1094+
return "horse"
1095+
1096+
def bar() -> bytes <= 10:
1097+
return concat("b", "a", "d", "m", "i", "", "nton")
1098+
1099+
def baz() -> bytes <= 40:
1100+
return concat("0123456789012345678901234567890", "12")
1101+
1102+
def baz2() -> bytes <= 40:
1103+
return concat("01234567890123456789012345678901", "12")
1104+
1105+
def baz3() -> bytes <= 40:
1106+
return concat("0123456789012345678901234567890", "1")
1107+
1108+
def baz4() -> bytes <= 100:
1109+
return concat("01234567890123456789012345678901234567890123456789",
1110+
"01234567890123456789012345678901234567890123456789")
1111+
"""
1112+
1113+
c = s.abi_contract(string_literal_code, language='viper')
1114+
assert c.foo() == b"horse"
1115+
assert c.bar() == b"badminton"
1116+
assert c.baz() == b"012345678901234567890123456789012"
1117+
assert c.baz2() == b"0123456789012345678901234567890112"
1118+
assert c.baz3() == b"01234567890123456789012345678901"
1119+
assert c.baz4() == b"0123456789" * 10
1120+
1121+
print("String literal test passed")
1122+
1123+
for i in range(95, 96, 97):
1124+
kode = """
1125+
def foo(s: num, L: num) -> bytes <= 100:
1126+
x = 27
1127+
r = slice("%s", start=s, len=L)
1128+
y = 37
1129+
if x * y == 999:
1130+
return r
1131+
""" % ("c" * i)
1132+
c = s.abi_contract(kode, language='viper')
1133+
for e in range(63, 64, 65):
1134+
for _s in range(31, 32, 33):
1135+
assert c.foo(_s, e - _s) == b"c" * (e - _s), (i, _s, e - _s, c.foo(_s, e - _s))
1136+
1137+
print("String literal splicing fuzz-test passed")

viper/parser.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ def fourbytes_to_int(inp):
3636
def hex_to_int(inp):
3737
if inp[:2] == '0x':
3838
inp = inp[2:]
39-
bytez = binascii.unhexlify(inp)
39+
return bytes_to_int(binascii.unhexlify(inp))
40+
41+
# Converts bytes to an integer
42+
def bytes_to_int(bytez):
4043
o = 0
4144
for b in bytez:
4245
o = o * 256 + b
@@ -301,7 +304,7 @@ def mk_full_signature(code):
301304
def parse_tree_to_lll(code, origcode):
302305
_defs, _globals = get_defs_and_globals(code)
303306
if len(set([_def.name for _def in _defs])) < len(_defs):
304-
raise VariableDeclarationException("Duplicate function name!")
307+
raise VariableDeclarationException("Duplicate function name: %s" % [x for x in _defs if _defs.count(x) > 1][0])
305308
# Initialization function
306309
initfunc = [_def for _def in _defs if is_initializer(_def)]
307310
# Regular functions
@@ -319,7 +322,6 @@ def parse_tree_to_lll(code, origcode):
319322

320323
# Parses a function declaration
321324
def parse_func(code, _globals, origcode, _vars=None):
322-
assert isinstance(origcode, str)
323325
name, args, output_type, const, sig, method_id = get_func_details(code)
324326
for arg in args:
325327
if arg[0] in _globals:
@@ -426,7 +428,17 @@ def parse_expr(expr, context):
426428
raise InvalidLiteralException("Cannot read 0x value with length %d. Expecting 40 (address) or 64 (bytes32)" % L)
427429
# Byte array literals
428430
elif isinstance(expr, ast.Str):
429-
raise Exception("Not yet implemented")
431+
bytez = b''
432+
for c in expr.s:
433+
if ord(c) >= 256:
434+
raise InvalidLiteralException("Cannot insert special character %r into byte array" % c)
435+
bytez += bytes([ord(c)])
436+
placeholder = context.new_placeholder(ByteArrayType(len(bytez)))
437+
seq = []
438+
seq.append(['mstore', placeholder, len(bytez)])
439+
for i in range(0, len(bytez), 32):
440+
seq.append(['mstore', ['add', placeholder, i + 32], bytes_to_int((bytez + b'\x00' * 31)[i: i + 32])])
441+
return LLLnode.from_list(['seq'] + seq + [placeholder], typ=ByteArrayType(len(bytez)), location='memory')
430442
# True, False, None constants
431443
elif isinstance(expr, ast.NameConstant):
432444
if expr.value == True:

0 commit comments

Comments
 (0)