Machine Level Programming:
Arrays, Structures and More
Computer Systems Organization (Spring 2016)
CSCI-UA 201, Section 2
Instructor: Joanna Klukowska
Slides adapted from
Randal E. Bryant and David R. OHallaron (CMU)
Mohamed Zahran (NYU)
1D Arrays
Array Allocation
Basic Principle
T A[L];
Array of data type T and length L
Contiguously allocated region of L * sizeof(T) bytes in memory
char string[12];
x
x + 12
int val[5];
x
x+4
x+8
x + 12
x + 16
x + 20
double a[3];
x
x+8
x + 16
x + 24
char *p[3];
3
x+8
x + 16
x + 24
Array Access
Basic Principle
T A[L];
Array of data type T and length L
Identifier A can be used as a pointer to array element 0: Type T*
1
int val[5];
x
Reference
val[4]
val
val+1
&val[2]
val[5]
*(val+1)
val + i
7
x+4
Type
Value
int
int
int
int
int
int
int
3
x
x+4
x+8
??
7
x+4i
*
*
*
0
x+8
5
x + 12
3
x + 16
x + 20
Array Example
#define LEN 5
int zip1[LEN] = { 1, 5, 2, 1, 3 };
int zip2[LEN] = { 0, 2, 1, 3, 9 };
int zip3[LEN] = { 9, 4, 7, 2, 0 };
int[LEN] zip1;
16
20
0
int[LEN] zip2;
36
56
2
24
2
40
int[LEN] zip3;
28
1
44
4
60
1
32
3
48
7
64
9
52
2
68
36
56
0
72
76
Example arrays were allocated in successive 20 byte blocks
Not guaranteed to happen in general
5
Array Accessing Example
1
int[LEN] zip1;
16
5
20
2
24
1
28
3
32
36
int get_zip_digit ( int zip [LEN], int digit ) {
return zip[digit];
}
# %rdi = z
<- it's an int pointer
# %esi = digit
movslq %esi, %rsi
movl
(%rdi,%rsi,4), %eax
ret
Register %rdi contains
starting address of array
Register %rsi contains
array index
Desired digit at
%rdi + 4*%rsi
Use memory reference (%
rdi,%rsi,4)
Array Loop Example
void incr( int zip [] ) {
int i;
for (i = 0; i < LEN; i++)
zip[i]++;
}
#%rdi is zip
movl
$0, %eax
jmp .L3
.L4:
movslq %eax, %rdx
addl
$1, (%rdi,%rdx,4)
addl
$1, %eax
.L3:
cmpl
$4, %eax
jle .L4
rep ret
See p. 208 (Aside) for explanation of the rep instruction.
# i = 0
# goto L3
# extend to %rdx
# z[i] ++
# i++
# compare i to 4
# if <=, goto L4
2D Arrays
Multidimensional (Nested) Arrays
Declaration
T A[R][C];
2D array of data type T
A[0][0]
R rows, C columns
Type T element requires K bytes
A[R-1][0]
Array Size
A[0][C-1]
A[R-1][C-1]
R * C * K bytes
Arrangement
Row-Major Ordering
int A[R][C];
A
[0]
[0]
A
[0]
[C-1]
A
[1]
[0]
A
[1]
[C-1]
4*R*C
Bytes
A
[R-1]
[0]
A
[R-1]
[C-1]
9
Nested Array Example
#define COUNT 4
int zips[LEN][COUNT] =
{{1, 5, 2, 0, 6},
{1, 5, 2, 1, 3 },
{1, 5, 2, 1, 7 },
{1, 5, 2, 2, 1 }};
int zips[4];
1 5 2 0 6 1 5 2 1 3 1 5 2 1 7 1 5 2 2 1
76
96
116
136
156
Variable zips: array of 4 elements, allocated contiguously
Each element is an array of 5 ints, allocated contiguously
Row-Major ordering of all elements in memory
10
Nested Array Row Access
Row Vectors
A[i] is array of C elements
Each element of type T requires K bytes
Starting address A + i * (C * K)
int A[R][C];
A[0]
A
[0]
[0]
A[i]
A
[0]
[C-1]
A
[i]
[0]
A+(i*C*4)
A[R-1]
A
[i]
[C-1]
A
[R-1]
[0]
A
[R-1]
[C-1]
A+((R-1)*C*4)
11
Nested Array Row Access Code
#define ROWS 4
#define COLS 5
int* get_zip ( int zips [][COLS], int ind ) {
return zips[ind];
}
# %rdi
movslq
leaq
salq
addq
ret
= zips %esi = ind
%esi, %rsi
(%rsi,%rsi,4), %rax
$2, %rax
%rdi, %rax
Row Vector
zips[ind] is array of 5 ints
Starting address zips+20*ind
Machine Code
Computes and returns address
Compute as pgh + 4*(index+4*index)
12
Nested Array Element Access
Array Elements
A[i][j] is element of type T, which requires K bytes
Address A + i * (C * K) + j * K = A + (i * C + j)* K
int A[R][C];
A[0]
A
[0]
[0]
A[R1]
A[i]
A
[0]
[C1]
A
[i]
[j]
A+(i*C*4)
A+(i*C*4)+(j*4)
A
[R-1]
[0]
A
[R-1]
[C-1]
A+((R-1)*C*4)
13
Nested Array Element Access Code
#define ROWS 4
#define COLS 5
int get_zips_digit ( int zips [][COLS], int ind, int dig ) {
return zips[ind][dig];
}
movslq
leaq
salq
addq
movslq
movl
%esi, %rsi
(%rsi,%rsi,4), %rax
$2, %rax
%rdi, %rax
%edx, %rdx
(%rax,%rdx,4), %eax
Array Elements
zips[ind][dig] is int
Address: zips + 20*ind + 4*dig
= zips + 4*(5*index + dig)
14
Structures
15
Structure Representation
struct rec {
int a[4];
size_t i;
struct rec *next;
};
r
a
0
i
16
next
24
32
Structure represented as block of memory
Big enough to hold all of the fields
Fields ordered according to declaration
Even if another ordering could yield a more compact representation
Compiler determines overall size + positions of fields
Machine-level program has no understanding of the structures in the
source code
16
Access to Structure Members
struct rec {
int a[4];
size_t i;
struct rec *next;
};
r
a
0
i
16
next
24
32
int * get_a (struct rec *r) {
return r->a;
}
# r in %rdi
movq
%rdi, %rax
ret
17
Access to Structure Members
struct rec {
int a[4];
size_t i;
struct rec *next;
};
r+4*idx
a
0
i
16
next
24
32
int * get_a_element (struct rec *r, int idx ) {
return r->a[idx];
}
# r in %rdi
movslq %esi, %rsi
movl
(%rdi,%rsi,4), %eax
ret
18
Access to Structure Members
struct rec {
int a[4];
size_t i;
struct rec *next;
};
r+16
a
0
i
16
next
24
32
int get_i (struct rec *r ) {
return r->i;
}
# r in %rdi
movl
16(%rdi), %eax
ret
19
Access to Structure Members
struct rec {
int a[4];
size_t i;
struct rec *next;
};
r+24
a
0
i
16
next
24
32
struct rec * get_next (struct rec *r ) {
return r->next;
}
# r in %rdi
movq
24(%rdi), %rax
ret
20
Structures & Alignment
Unaligned Data
c
i[0]
p p+1
i[1]
p+5
v
p+9
p+17
Aligned Data
struct S1 {
char c;
int i[2];
double v;
} *p;
Primitive data type requires K bytes
Address must be multiple of K
3 bytes
p+0
i[0]
p+4
Multiple of 4
Multiple of 8
i[1]
p+8
4 bytes
v
p+16
p+24
Multiple of 8
Multiple of 8
21
Alignment Principles
Aligned Data
Primitive data type requires K bytes
Address must be multiple of K
Required on some machines; advised on x86-64
Motivation for Aligning Data
Memory accessed by (aligned) chunks of 4 or 8 bytes (system dependent)
Inefficient to load or store datum that spans quad word boundaries
Virtual memory trickier when datum spans 2 pages
Compiler
Inserts gaps in structure to ensure correct alignment of fields
22
Specific Cases of Alignment (x86-64)
1 byte: char,
no restrictions on address
2 bytes: short,
lowest 1 bit of address must be 02
4 bytes: int, float,
lowest 2 bits of address must be 002
8 bytes: double, long, char *,
lowest 3 bits of address must be 0002
16 bytes: long double (GCC on Linux)
lowest 4 bits of address must be 00002
23
Satisfying Alignment with Structures
Within structure:
Must satisfy each elements alignment requirement
Overall structure placement
Each structure has alignment requirement K
struct S1 {
char c;
int i[2];
double v;
} *p;
K = Largest alignment of any element
Initial address & structure length must be multiples of K
Example:
K = 8, due to double element
c
3 bytes
p+0
i[0]
p+4
Multiple of 4
Multiple of 8
i[1]
p+8
4 bytes
v
p+16
p+24
Multiple of 8
Multiple of 8
24
Meeting Overall Alignment Requirement
struct S2 {
double v;
int i[2];
char c;
} *p;
For largest alignment requirement K
Overall structure must be multiple of K
v
p+0
i[0]
p+8
i[1]
7 bytes
p+16
p+24
Multiple of K=8
25
Saving Space
Put large data types first
struct S2 {
int n1;
int n2;
char c1;
char c2;
};
struct S1 {
char c1;
int n1;
char c2;
int n2;
};
Effect (K=4)
S1 uses 16 bytes
S2 uses 12 bytes
c1
3 bytes
n1
n1
n2
c2
3 bytes
c1 c2
n2
2 bytes
26
Memory Layout
27
not drawn to scale
x86-64 Linux Memory Layout
00007FFFFFFFFFFF
Stack
Stack
Runtime stack (8MB limit)
E. g., local variables
8MB
Heap
Dynamically allocated as needed
When call malloc(), calloc(), new()
Data
Statically allocated data
E.g., global vars, static vars, string constants
Shared
Libraries
Text / Shared Libraries
Executable machine instructions
Read-only
Hex Address
Heap
400000
000000
Data
Text
28
Memory Allocation Example
not drawn to scale
Stack
char big_array[1L<<24]; /* 16 MB */
char huge_array[1L<<31]; /* 2 GB */
int global = 0;
int useless() { return 0; }
int main ()
{
void *p1, *p2, *p3, *p4;
int local = 0;
p1 = malloc(1L << 28); /*
p2 = malloc(1L << 8); /*
p3 = malloc(1L << 32); /*
p4 = malloc(1L << 8); /*
/* Some print statements ...
}
Where does everything go?
Shared
Libraries
256 MB */
256 B */
4 GB */
256 B */
*/
Heap
Data
Text
not drawn to scale
x86-64 Example Addresses
00007F
Stack
address range ~247
Heap
local
p1
p3
p4
p2
big_array
huge_array
main()
useless()
0x00007ffe4d3be87c
0x00007f7262a1e010
0x00007f7162a1d010
0x000000008359d120
0x000000008359d010
0x0000000080601060
0x0000000000601060
0x000000000040060c
0x0000000000400590
Heap
Data
Text
000000