0% found this document useful (0 votes)
751 views104 pages

Latex Programming

Uploaded by

aimantahmid100
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
751 views104 pages

Latex Programming

Uploaded by

aimantahmid100
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 104

6/15/25, 2:00 AM template.cpp 6/15/25, 2:00 AM template.

cpp
-----------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------- -
- -------------------------------Segment Tree----------------------------------------------
---------------------------------------DSU----------------------------------------------- -
- -----------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------- -
- class segtree {
class dsu { public:
public: struct node {
vector<int> p; // don't forget to set default value (used for leaves)
int n; // not necessarily neutral element!
... a = ...;
dsu(int _n) : n(_n) {
p.resize(n); void apply(int l, int r, ... v) {
iota(p.begin(), p.end(), 0); ...
} }
};
inline int get(int x) {
return (x == p[x] ? x : (p[x] = get(p[x]))); node unite(const node &a, const node &b) const {
} node res;
...
inline bool unite(int x, int y) { return res;
x = get(x); }
y = get(y);
if (x != y) { inline void push(int x, int l, int r) {
p[x] = y; int y = (l + r) >> 1;
return true; int z = x + ((y - l + 1) << 1);
} // push from x into (x + 1) and z
return false; ...
} /*
}; if (tree[x].add != 0) {
tree[x + 1].apply(l, y, tree[x].add);
tree[z].apply(y + 1, r, tree[x].add);
----------------------------------------------------------------------------------------- tree[x].add = 0;
- }
-----------------------------------Hashmap----------------------------------------------- */
- }
-----------------------------------------------------------------------------------------
- inline void pull(int x, int z) {
// #include<bits/extc++.h> tree[x] = unite(tree[x + 1], tree[z]);
#include <ext/pb_ds/assoc_container.hpp> }

struct splitmix64_hash { int n;


static uint64_t splitmix64(uint64_t x) { vector<node> tree;
// http://xorshift.di.unimi.it/splitmix64.c
x += 0x9e3779b97f4a7c15; void build(int x, int l, int r) {
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9; if (l == r) {
x = (x ^ (x >> 27)) * 0x94d049bb133111eb; return;
return x ^ (x >> 31); }
} int y = (l + r) >> 1;
int z = x + ((y - l + 1) << 1);
size_t operator()(uint64_t x) const { build(x + 1, l, y);
static const uint64_t FIXED_RANDOM = build(z, y + 1, r);
std::chrono::steady_clock::now().time_since_epoch().count(); pull(x, z);
return splitmix64(x + FIXED_RANDOM); }
}
}; template <typename M>
void build(int x, int l, int r, const vector<M> &v) {
template <typename K, typename V, typename Hash = splitmix64_hash> if (l == r) {
using HashMap = __gnu_pbds::gp_hash_table<K, V, Hash>; tree[x].apply(l, r, v[l]);
return;
template <typename K, typename Hash = splitmix64_hash> }
using HashSet = HashMap<K, __gnu_pbds::null_type, Hash>; int y = (l + r) >> 1;
int z = x + ((y - l + 1) << 1);
file:///C:/Users/moazz/Desktop/template.cpp 1/10 file:///C:/Users/moazz/Desktop/template.cpp 2/10
6/15/25, 2:00 AM template.cpp 6/15/25, 2:00 AM template.cpp
build(x + 1, l, y, v); if (!f(tree[x])) {
build(z, y + 1, r, v); return -1;
pull(x, z); }
} return find_first_knowingly(x, l, r, f);
}
node get(int x, int l, int r, int ll, int rr) { push(x, l, r);
if (ll <= l && r <= rr) { int y = (l + r) >> 1;
return tree[x]; int z = x + ((y - l + 1) << 1);
} int res = -1;
int y = (l + r) >> 1; if (ll <= y) {
int z = x + ((y - l + 1) << 1); res = find_first(x + 1, l, y, ll, rr, f);
push(x, l, r); }
node res{}; if (rr > y && res == -1) {
if (rr <= y) { res = find_first(z, y + 1, r, ll, rr, f);
res = get(x + 1, l, y, ll, rr); }
} else { pull(x, z);
if (ll > y) { return res;
res = get(z, y + 1, r, ll, rr); }
} else {
res = unite(get(x + 1, l, y, ll, rr), get(z, y + 1, r, ll, rr)); int find_last_knowingly(int x, int l, int r, const function<bool(const node&)> &f) {
} if (l == r) {
} return l;
pull(x, z); }
return res; push(x, l, r);
} int y = (l + r) >> 1;
int z = x + ((y - l + 1) << 1);
template <typename... M> int res;
void modify(int x, int l, int r, int ll, int rr, const M&... v) { if (f(tree[z])) {
if (ll <= l && r <= rr) { res = find_last_knowingly(z, y + 1, r, f);
tree[x].apply(l, r, v...); } else {
return; res = find_last_knowingly(x + 1, l, y, f);
} }
int y = (l + r) >> 1; pull(x, z);
int z = x + ((y - l + 1) << 1); return res;
push(x, l, r); }
if (ll <= y) {
modify(x + 1, l, y, ll, rr, v...); int find_last(int x, int l, int r, int ll, int rr, const function<bool(const node&)>
} &f) {
if (rr > y) { if (ll <= l && r <= rr) {
modify(z, y + 1, r, ll, rr, v...); if (!f(tree[x])) {
} return -1;
pull(x, z); }
} return find_last_knowingly(x, l, r, f);
}
int find_first_knowingly(int x, int l, int r, const function<bool(const node&)> &f) { push(x, l, r);
if (l == r) { int y = (l + r) >> 1;
return l; int z = x + ((y - l + 1) << 1);
} int res = -1;
push(x, l, r); if (rr > y) {
int y = (l + r) >> 1; res = find_last(z, y + 1, r, ll, rr, f);
int z = x + ((y - l + 1) << 1); }
int res; if (ll <= y && res == -1) {
if (f(tree[x + 1])) { res = find_last(x + 1, l, y, ll, rr, f);
res = find_first_knowingly(x + 1, l, y, f); }
} else { pull(x, z);
res = find_first_knowingly(z, y + 1, r, f); return res;
} }
pull(x, z);
return res; segtree(int _n) : n(_n) {
} assert(n > 0);
tree.resize(2 * n - 1);
int find_first(int x, int l, int r, int ll, int rr, const function<bool(const node&)> build(0, 0, n - 1);
&f) { }
if (ll <= l && r <= rr) {
file:///C:/Users/moazz/Desktop/template.cpp 3/10 file:///C:/Users/moazz/Desktop/template.cpp 4/10
6/15/25, 2:00 AM template.cpp 6/15/25, 2:00 AM template.cpp
template <typename M> s.emplace(dist[to], to);
segtree(const vector<M> &v) { }
n = v.size(); }
assert(n > 0); }
tree.resize(2 * n - 1); return dist;
build(0, 0, n - 1, v); // returns numeric_limits<T>::max() if there's no path
} }

node get(int ll, int rr) { -----------------------------------------------------------------------------------------


assert(0 <= ll && ll <= rr && rr <= n - 1); -
return get(0, 0, n - 1, ll, rr); -----------------------------------Graph-------------------------------------------------
} -
-----------------------------------------------------------------------------------------
node get(int p) { -
assert(0 <= p && p <= n - 1);
return get(0, 0, n - 1, p, p); template <typename T>
} class graph {
public:
template <typename... M> struct edge {
void modify(int ll, int rr, const M&... v) { int from;
assert(0 <= ll && ll <= rr && rr <= n - 1); int to;
modify(0, 0, n - 1, ll, rr, v...); T cost;
} };

// find_first and find_last call all FALSE elements vector<edge> edges;


// to the left (right) of the sought position exactly once vector<vector<int>> g;
int n;
int find_first(int ll, int rr, const function<bool(const node&)> &f) {
assert(0 <= ll && ll <= rr && rr <= n - 1); graph(int _n) : n(_n) {
return find_first(0, 0, n - 1, ll, rr, f); g.resize(n);
} }

int find_last(int ll, int rr, const function<bool(const node&)> &f) { virtual int add(int from, int to, T cost) = 0;
assert(0 <= ll && ll <= rr && rr <= n - 1); };
return find_last(0, 0, n - 1, ll, rr, f);
}
}; -----------------------------------------------------------------------------------------
-
-----------------------------------Suffix Array------------------------------------------
----------------------------------------------------------------------------------------- -
- -----------------------------------------------------------------------------------------
-----------------------------------Dijkstra---------------------------------------------- -
-
----------------------------------------------------------------------------------------- template <typename T>
- vector<int> suffix_array(int n, const T &s, int char_bound) {
vector<int> a(n);
template <typename T> if (n == 0) {
vector<T> dijkstra(const graph<T> &g, int start) { return a;
assert(0 <= start && start < g.n); }
vector<T> dist(g.n, numeric_limits<T>::max()); if (char_bound != -1) {
priority_queue<pair<T, int>, vector<pair<T, int>>, greater<>> s; vector<int> aux(char_bound, 0);
dist[start] = 0; for (int i = 0; i < n; i++) {
s.emplace(dist[start], start); aux[s[i]]++;
while (!s.empty()) { }
auto [expected, i] = s.top(); int sum = 0;
s.pop(); for (int i = 0; i < char_bound; i++) {
if (dist[i] != expected) { int add = aux[i];
continue; aux[i] = sum;
} sum += add;
for (int id : g.g[i]) { }
auto &e = g.edges[id]; for (int i = 0; i < n; i++) {
int to = e.from ^ e.to ^ i; a[aux[s[i]]++] = i;
if (dist[i] + e.cost < dist[to]) { }
dist[to] = dist[i] + e.cost; } else {
file:///C:/Users/moazz/Desktop/template.cpp 5/10 file:///C:/Users/moazz/Desktop/template.cpp 6/10
6/15/25, 2:00 AM template.cpp 6/15/25, 2:00 AM template.cpp
iota(a.begin(), a.end(), 0); k = 0;
sort(a.begin(), a.end(), [&s](int i, int j) { return s[i] < s[j]; }); } else {
} int j = sa[pos[i] + 1];
vector<int> sorted_by_second(n); while (i + k < n && j + k < n && s[i + k] == s[j + k]) {
vector<int> ptr_group(n); k++;
vector<int> new_group(n); }
vector<int> group(n); lcp[pos[i]] = k;
group[a[0]] = 0; }
for (int i = 1; i < n; i++) { }
group[a[i]] = group[a[i - 1]] + (!(s[a[i]] == s[a[i - 1]])); return lcp;
} }
int cnt = group[a[n - 1]] + 1;
int step = 1; template <typename T>
while (cnt < n) { vector<int> build_lcp(const T &s, const vector<int> &sa) {
int at = 0; return build_lcp((int) s.size(), s, sa);
for (int i = n - step; i < n; i++) { }
sorted_by_second[at++] = i;
}
for (int i = 0; i < n; i++) {
if (a[i] - step >= 0) {
sorted_by_second[at++] = a[i] - step; -----------------------------------------------------------------------------------------
} -
} -----------------------------------DFS---------------------------------------------------
for (int i = n - 1; i >= 0; i--) { -
ptr_group[group[a[i]]] = i; -----------------------------------------------------------------------------------------
} -
for (int i = 0; i < n; i++) {
int x = sorted_by_second[i]; #include "../c++_template.cpp"
a[ptr_group[group[x]]++] = x;
} // =========================
new_group[a[0]] = 0; // Depth First Search (DFS)
for (int i = 1; i < n; i++) { // =========================
if (group[a[i]] != group[a[i - 1]]) { const int MAXN = 1000;
new_group[a[i]] = new_group[a[i - 1]] + 1; vector<int> g[MAXN];
} else { bool visited[MAXN];
int pre = (a[i - 1] + step >= n ? -1 : group[a[i - 1] + step]); int n;
int cur = (a[i] + step >= n ? -1 : group[a[i] + step]);
new_group[a[i]] = new_group[a[i - 1]] + (pre != cur); //recursive
} void dfs(int u) {
} visited[u] = true;
swap(group, new_group); for(int v : g[u]) {
cnt = group[a[n - 1]] + 1; if(!visited[v]) {
step <<= 1; dfs(v);
} }
return a; }
} }

template <typename T> //recursive, using depth


vector<int> suffix_array(const T &s, int char_bound) { int depth[MAXN];
return suffix_array((int) s.size(), s, char_bound); void dfs(int u, int d) {
} depth[u] = d;
for(int v : g[u]) {
template <typename T> if(depth[v] == -1) { // not visited yet
vector<int> build_lcp(int n, const T &s, const vector<int> &sa) { dfs(v, d+1);
assert((int) sa.size() == n); }
vector<int> pos(n); }
for (int i = 0; i < n; i++) { }
pos[sa[i]] = i;
} //iterative
vector<int> lcp(max(n - 1, 0)); void dfs(int root) {
int k = 0; stack<int> s;
for (int i = 0; i < n; i++) { s.push(root);
k = max(k - 1, 0); visited[root] = true;
if (pos[i] == n - 1) { while (!s.empty()) {
file:///C:/Users/moazz/Desktop/template.cpp 7/10 file:///C:/Users/moazz/Desktop/template.cpp 8/10
6/15/25, 2:00 AM template.cpp 6/15/25, 2:00 AM template.cpp
int u = s.top(); s.pop(); void all_subsets(vector<int> items) {
for (int v : g[u]) { int n = vals.size();
if (!visited[v]) { int times = (1 << n);
visited[u] = true; vector<int> bits(n, 0)
s.push(v); while(times-- > 0) {
} do_something(bits)
} // generate next set's bit representation
} int i = 0, carry = 1;
} while (i < n) {
in[i] += carry;
//----------------------------- if (in[i] <= 1)
// Finding connected components carry = 0;
//----------------------------- else
int count_cc() { in[i] = 0;
int count = 0; i++;
memset(visited, 0, sizeof(bool)*n); }
rep(i,0,n) { }
if (!visited[i]) { }
count++, dfs(i);
} /* ========================================= */
} /* Split n items into k containers optimally */
return count; /* ========================================= */
} int capacities[MAXN];
int N;
//------------------------------ // Return cost of storing n items in i-th container
// Flood Fill storage_cost(int i, int n);
//------------------------------ // Find best way to split n items among containers
// from index i to N-1. For simplicity, the total
//explicit graph // remaining capacity is carried along.
const int DFS_WHITE = -1; int search_splits(int i, int n, int tot_cap) {
vector<int> dfs_num(DFS_WHITE,n); if (i >= N) return 0;
void floodfill(int u, int color) { int min_k = max(0, n - (tot_cap - capacities[i]));
dfs_num[u] = color; int max_k = min(n, capacities[i]);
for (int v : g[u]) { int min_cost = INT_MAX;
if (dfs_num[v] == DFS_WHITE) { rep(k, min_k, max_k) {
floodfill(v, color); min_cost = min(min_cost,
} storage_cost(i, k) +
} search_splits(i+1, n-k, tot_cap - capacities[i]);
} )

//implicit graph }
int dirs[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}; }
const char EMPTY = '*'; int best_split(int n) {
int floodfill(int r, int c, char color) { int tot_cap = 0;
if (r < 0 || r >= R || c < 0 || c >= C) return 0; // outside grid rep(i,0,N-1) tot_cap += capacities[i];
if (grid[r][c] != EMPTY) return 0; // cannot be colored return search_splits(0,n,tot_cap);
grid[r][c] = color; }
int ans = 1;
rep(i,0,4) ans += floodfill(r + dirs[i][0], c + dirs[i][1], color);
return ans;
}

-----------------------------------------------------------------------------------------
-
-----------------------------------Brute Force-------------------------------------------
-
-----------------------------------------------------------------------------------------
-

/* =============================== */
/* Try all 2^n subsets of n items */
/* =============================== */
file:///C:/Users/moazz/Desktop/template.cpp 9/10 file:///C:/Users/moazz/Desktop/template.cpp 10/10
This information is used to place each element into the correct slot immediately, so
there is no need to rearrange lists.

Chapter 3 3.2.2 Stability


Stable sorting algorithms maintain the relative order of records with equal keys (i.e.
values). That is, a sorting algorithm is stable if whenever there are two records R
and S with the same key and with R appearing before S in the original list, R will
appear before S in the sorted list.
Sorting Algorithms
3.2.3 Adaptability
3.1 Problem Statement of Sorting Algorithms It means whether or not the presortedness of the input affects the running time.
Algorithms that take this into account are known to be adaptive.
Problem: Given an array that contains n elements, your task is to sort the elements
in increasing order.
Example. For example, the array 3.2.4 Memory Usage
1 3 8 2 9 2 5 6 Some sorting algorithms are called "in-place". Strictly, an in-place sort needs only
O (1) memory beyond the items being sorted; sometimes O (log n) additional memory
will be as follows after sorting: is considered "in-place".

1 2 2 3 5 6 8 9
3.2.5 Computational Complexity
3.2 Properties of Sorting Algorithms For typical serial sorting algorithms good behaviour is O (n log n), with parallel sort
in O (log2 n), and bad behaviour is O (n2 ).
3.2.1 Types of Sorting Algorithms
There are two broad types of sorting algorithms: integer sorts and comparison sorts.
3.3 Common O (n2 ) Algorithms
Comparison Sorts
These algorithms are arguably easy to implement but generally not used due to high
Comparison sorts compare elements at each step of the algorithm to determine if one
time complexity.
element should be to the left or right of another element.
Comparison sorts are usually more straightforward to implement than integer sorts,
but comparison sorts are limited by a lower bound of O (n log n), meaning that, on
average, comparison sorts cannot be faster than O (n log n). 3.3.1 Bubble Sort
Bubble sort is a simple sorting algorithm. The algorithm starts at the beginning of
Integer Sorts
the data set. It compares the first two elements, and if the first is greater than the
Integer sorts are sometimes called counting sorts (though there is a specific integer second, it swaps them. It continues doing this for each pair of adjacent elements to
sort algorithm called counting sort). Integer sorts do not make comparisons. Integer the end of the data set. It then starts again with the first two elements, repeating
sorts determine for each element x - how many elements are less than x. For example, until no swaps have occurred on the last pass. It is rarely used to sort large, unordered
if there are 14 elements that are less than x, then x will be placed in the 15th slot. data sets.

11 12
5. Merge the sorted subarrays array[ a . . . k ] and array[k + 1 . . . b] into a sorted
2 2 2 2 4 4 4 4 4 4 subarray array[ a . . . b].
4 4 4 4 2 2 3 3 3 3 Merge sort is an efficient algorithm, because it halves the size of the subarray at
each step. The recursion consists of O (log n) levels, and processing each level takes
1 1 3 3 3 3 2 2 2 2 O(n) time. Merging the subarrays array[ a . . . k] and array[k + 1 . . . b] is possible in
linear time, because they are already sorted.
3 3 1 1 1 1 1 1 1 1 For example, consider sorting the following array:
Example of Bubble Sort
1 3 6 2 8 2 5 9

3.3.2 Insertion Sort


1 3 6 2 8 2 5 9
Insertion sort is a simple sorting algorithm that is relatively efficient for small lists
and mostly sorted lists, and is often used as part of more sophisticated algorithms. It
1 3 6 2 8 2 5 9
works by taking elements from the list one by one and inserting them in their correct
position into a new sorted list. In arrays, the new list and the remaining elements
can share the array’s space, but insertion is expensive, requiring shifting all following 1 3 6 2 8 2 5 9
elements over by one.
MERGING BEGINS
Start 4 3 2 1 Iteration C 2 3 4 1 1 3 6 2 8 2 5 9

Iteration A 4 3 2 1 2 3 1 4 1 3 2 6 2 8 5 9

1 2 3 6 2 5 8 9
Iteration B 3 4 2 1 2 1 3 4

Finished 1 2 3 4 1 2 2 3 5 6 8 9
3 2 4 1
Insertion Sort Example 3.4.2 Quick Sort
Quicksort uses divide and conquer to sort an array. Divide and conquer is a technique
used for breaking algorithms down into subproblems, solving the subproblems, and
3.4 Common O (n log n) Algorithms then combining the results back together to solve the original problem. It can be
helpful to think of this method as divide, conquer, and combine.
Here are the divide, conquer, and combine steps that quicksort uses:
3.4.1 Merge Sort
Divide:
Merge sort sorts a subarray array[ a . . . b] as follows:
1. Pick a pivot element, A[q]. Picking a good pivot is the key for a fast imple-
1. If a = b, do not do anything, because the subarray is already sorted. mentation of quicksort; however, it is difficult to determine what a good pivot
might be.
2. Calculate the position of the middle element: k = b( a + b)/2c.
2. Partition, or rearrange, the array into two subarrays: A[ p, ..., q − 1] such that
3. Recursively sort the subarray array[ a . . . k ]. all elements are less than A[q], and A[q + 1, ..., r ] such that all elements are
greater than or equal to A[q].
4. Recursively sort the subarray array[k + 1 . . . b].

13 14
Conquer: 3.4.3 Heap Sort
1. Sort the subarrays A[ p, ..., q − 1] and A[q + 1, ..., r ] recursively with quicksort. Heapsort is a much more efficient version of selection sort. It also works by determining
the largest (or smallest) element of the list, placing that at the end (or beginning) of
Combine: the list, then continuing with the rest of the list, but accomplishes this task efficiently
by using a data structure called a heap, a special type of binary tree.
1. No work is needed to combine the arrays because they are already sorted.
3.4.4 Counting Sort
3 7 8 5 2 1 9 5 4 Counting sort assumes that each of the n input elements in a list has a key value
ranging from 0 to k, for some integer k. For each element in the list, counting sort
3 7 8 5 2 1 9 5 4 determines the number of elements that are less than it. Counting sort can use this
information to place the element directly into the correct slot of the output array.
Counting sort uses three lists: the input list, A[0, 1, ..., n], the output list,
3 5 8 5 2 1 9 4 7
B[0, 1, ..., n], and a list that serves as temporary memory, C [0, 1, ..., k ]. Note that A
and B have n slots (a slot for each element), while C contains k slots (a slot for each
3 9 8 5 2 1 4 5 7 key value).

3 1 8 5 2 4 9 5 7
3.5 Comparison of Sorting Algorithms
3 1 2 5 4 8 9 5 7
Algorithm Best Case Worst Case Average Space Usage Stable?
3 1 2 4 5 8 9 5 7 Case
Bubble Sort O (n) O ( n2 ) O ( n2 ) O (1) Yes
Insertion O (n) O ( n2 ) O ( n2 ) O (1) Yes
Sort
3 1 2 5 8 9 5 7
Merge Sort O (n log n) O (n log n) O (n log n) O (n) Yes
Quicksort O (n log n) O ( n2 ) O (n log n) O (n) Usually Not
1 2 3 5 5 9 7 8 Heapsort O (n log n) O (n log n) O (n log n) O (1) No
Counting O (k + n) O (k + n) O (k + n) O (k + n) Yes
5 5 7 9 8 Sort

5 5 9 8 3.6 Sorting in C++


The C++ standard library contains the function sort that can be easily used for
8 9 sorting arrays and other data structures.
There are many benefits in using a library function. First, it saves time because
there is no need to implement the function. Second, the library implementation is
certainly correct and efficient: it is not probable that a home-made sorting function
1 2 3 4 5 5 7 8 9 would be better.
Quicksort Example In this section we will see how to use the C++ sort function. The following code
sorts a vector in increasing order:

vector<int> v = {4,2,5,3,5,8,3};
sort(v.begin(),v.end());

15 16
After the sorting, the contents of the vector will be [2, 3, 3, 4, 5, 5, 8]. The default
sorting order is increasing, but a reverse order is possible as follows:

sort(v.rbegin(),v.rend());

An ordinary array can be sorted as follows:

int n = 7; // array size


int a[] = {4,2,5,3,5,8,3};
sort(a, a+n);
Chapter 4
The following code sorts the string s: Bit Manipulation
string s = "monkey";
sort(s.begin(), s.end());
4.1 Tricks
Sorting a string means that the characters of the string are sorted. For example, the
string ”monkey” becomes ”ekmnoy”.
Integer containers can be sorted in decreasing order as follows: 4.1.1 Multiply and divide by 2i

int n = 7; // array size n = n << i; // Multiply n with 2^i


int a[] = {4,2,5,3,5,8,3}; n = n >> 1; // Divide n by 2^i
sort(a, a+n, greater<int>());// greater<int>() as third parameter
// final array is int a[] = {8,5,5,4,3,3,2};

4.1.2 Checking if a number is odd or even

if (num & 1)
cout << "ODD";
else
cout << "EVEN";

4.1.3 Swapping of 2 numbers using XOR


This method is fast and doesn’t require the use of 3rd variable.

a ^= b;
b ^= a;
a ^= b;

17 19
4.1.4 Compute XOR from 1 to n (direct method) 4.1.5 Check if a number is a power of 2

bool poweroftwo(int x)
Algorithm: Compute XOR of numbers from 1 to n
{
Input: n return x & (x-1) == 0;
Output: XOR of all numbers from 1 to n }
1 Find the remainder of n by moduling it with 4.
2 Check,
(I) If rem = 0, then xor will be same as n.
(II) If rem = 1, then xor will be 1. 4.1.6 Change case of English alphabet
(II) If rem = 1, then xor will be 1.
(II) If rem = 3 ,then xor will be 0. ch |= ’ ’; //Upper to Lower
ch &= ’_’ ; //Lower to Upper

int computeXOR(int n)
{
4.1.7 Find log2 x of integer
if (n % 4 == 0)
return n;
if (n % 4 == 1) int logarithm(int x)
return 1; {
if (n % 4 == 2) int res = 0;
return n + 1; while (x >>= 1)
else res++;
return 0; return res;
} }

How does this work?


4.2 Bit Shift
When we do XOR of numbers, we get 0 as XOR value just before a multiple of 4.
This keeps repeating before every multiple of 4. The left bit shift x<<k appends k zero bits to the number, and the right bit shift x>>k
removes the k last bits from the number.
Number Binary-Repr XOR-from-1-to-n
Note that x<<k corresponds to multiplying x by 2k , and x>>k corresponds to
1 1 [0001]
2 10 [0011]
dividing x by 2k rounded down to an integer
3 11 [0000] <----- We get a 0
4 100 [0100] <----- Equals to n 4.2.1 Application
5 101 [0001]
6 110 [0111] Check if kth bit is set
7 111 [0000] <----- We get 0
8 1000 [1000] <----- Equals to n The kth bit of a number is one exactly when x (1<<k ) is not zero. The following
9 1001 [0001] code prints the bit representation of an int number x:
10 1010 [1011]
11 1011 [0000] <------ We get 0 for (int i = 31; i >= 0; i--)
12 1100 [1100] <------ Equals to n {
if (x&(1<<i)) cout << "1"; //check if ith bit is 1
else cout << "0";
}

20 21
Set the kth bit 4.3 C++ Special Functions
x |= (1 < < k) //sets the kth bit of x to one The g++ compiler provides the following functions for counting bits:

• __builtin_clz( x ): the number of zeros at the beginning of the number


Unset the kth bit • __builtin_ctz( x ): the number of zeros at the end of the number

x &= ~(1 << k) //unsets the kth bit of x to zero • __builtin_popcount( x ): the number of ones in the number

• __builtin_parity( x ): the parity (even or odd) of the number of ones


Invert the kth bit The functions can be used as follows:

x ^= (1 << k) //Inverts the kth bit of x int x = 5328; // 00000000000000000001010011010000


cout << __builtin_clz(x) << "\n"; // 19
cout << __builtin_ctz(x) << "\n"; // 4
cout << __builtin_popcount(x) << "\n"; // 5
To get the Least Significant Bit
cout << __builtin_parity(x) << "\n"; // 1

T = (S & (-S)) While the above functions only support int numbers, there are also long long
//T is a power of two with only one bit set which is the LSB. versions of the functions available with the suffix ll like __builtin_popcountll( x ).

To turn on all bits of a number 4.4 Set Representation


Every subset of a set {0, 1, 2, . . . , n − 1} can be represented as an n bit integer whose
~(x & 0) //x&0 is 0 and ~ inverts all bits to 1
one bits indicate which elements belong to the subset. This is an efficient way
to represent sets, because every element requires only one bit of memory, and set
operations can be implemented as bit operations.
To turn on all bits till n
For example, since int is a 32-bit type, an int number can represent any subset
of the set {0, 1, 2, . . . , 31}. The bit representation of the set {1, 3, 4, 8} is
S = (1 << n) - 1 //in case n =3 , s = 7 = 8-1
00000000000000000000000100011010,

Get n mod d where d is a power of 2 which corresponds to the number 28 + 24 + 23 + 21 = 282.

// This function will return n % d. 4.4.1 Set implementation


// d must be one of: 1, 2, 4, 8, 16, 32, ...
unsigned int getModulo(unsigned int n, unsigned int d)
The following code declares an int variable x that can contain a subset of {0, 1, 2, . . . , 31}.
{ After this, the code adds the elements 1, 3, 4 and 8 to the set and prints the size of
return ( n & (d - 1) ); the set.
}
int x = 0;
x |= (1<<1);
x |= (1<<3);
Trivia x |= (1<<4);
The formula x & ( x − 1) sets the last one bit of x to zero. The formula x | ( x − 1) x |= (1<<8);
inverts all the bits after the last one bit. cout << __builtin_popcount(x) << "\n"; // 4

22 23
Then, the following code prints all elements that belong to the set: Solution

for (int i = 0; i < 32; i++) { Optimized solution:


if (x&(1<<i)) cout << i << " "; Divide the grid into blocks of columns such that each block consists of N
} consecutive columns. Then, each row is stored as a list of N-bit numbers that
// output: 1 3 4 8 describe the colors of the squares. Now we can process N columns at the same
time using bit operations. In the following code, color[y][k] represents a block
of N colors as bits.

4.5 Example Problems int count = 0;


for (int i = 0; i <= n/N; i++) {
4.5.1 Counting Grids with Black Corners count += __builtin_popcount(color[a][i]&color[b][i]);
}
Problem: Given an n × n grid whose each square is either black (1) or white (0),
calculate the number of subgrids whose all corners are black. Time Complexity is O(n3 /N ) time.

Example. For example, the grid

contains two such subgrids:

Solution
Naive Solution of time complexity O (n3 ):
Go through all O(n2 ) pairs of rows and for each pair ( a, b) calculate the
number of columns that contain a black square in both rows in O(n) time.
The following code assumes that color[y][ x ] denotes the color in row y and
column x:

int count = 0;
for (int i = 0; i < n; i++) {
if (color[a][i] == 1 && color[b][i] == 1) count++;
}

Then, those columns account for count(count − 1)/2 subgrids with black
corners, because we can choose any two of them to form a subgrid.

24 25
5.2 Generating Permutations
Problem: Print all the permutations of a set of size n.

Example. For example, the permutations of {0, 1, 2} are (0, 1, 2), (0, 2, 1), (1, 0, 2),

Chapter 5 (1, 2, 0), (2, 0, 1) and (2, 1, 0).

Solution
We can use the built-in C++ function next_permutation which rearranges the
array into the next lexicographically greater permutation. It returns 1 if this is
Brute-Force Algorithms possible and 0 otherwise.

int arr[n]; //array of n elements


do
{//we use do while loop to also print the original array itself
5.1 Generating Subsets for(int i : arr)
{
Problem: Print all the subsets of a set of size n. cout << i << " ";
}
cout << "\n";
Example. For example, the subsets of {0, 1, 2} are ∅, {0}, {1}, {2}, {0, 1}, {0, 2},
}
{1, 2} and {0, 1, 2}. while(next_permutation(arr, arr+n));

Solution
We can use the bit representation of numbers to generate subsets.
Let’s say, set s has n elements. We will use the bits of numbers to show the
presence of element in set. If x th bit in number is SET, then x th element in s is
present in current subset. We will loop from 0 to 2n − 1, and for each number,
we will check among the first n bits, the SET bits in it and take corresponding
elements. In each iteration, we will have one subset.

int arr[n]; //array of n elements


for(int j = 0; j< (1 << n); j++)
{ // (1<<n) is equal to 2^n - 1
for(int i = 0; i<n; i++)
{ //checking first n bits
if(j&(1<<i))
{ //print the corresponding element
//first subset is always null set
cout << arr[i] << " ";
}
}
cout << "\n";
}

The time complexity is O (n2n ) which is exponential.

27 28
For example, the following code calculates n!, the factorial of n, modulo m:

Chapter 16 long long x = 1;


for (int i = 2; i <= n; i++) {
x = (x*i)%m;
}
Basic Maths cout << x%m << "\n";

16.4 Floating point numbers


16.1 Integer bounds
Printing floating point numbers up to n digits.
• int : 231 − 1 ≈ 2 × 109
float pi = 3.14159;
• long long : 263 − 1 ≈ 9 × 1018 cout << fixed << setprecision(3) << pi; //3.142

• unsigned int : 232 − 1 ≈ 4 × 109

• unsigned long long : 264 − 1 ≈ 1.8 × 1019


16.5 Mathematics
16.2 Method for integers > 264 − 1 ≈ 1.8 × 1019 16.5.1 Sum formulas
Arbitrary precision data type: We can use any precision with the help of cppint data n
n ( n + 1)
type if we are not sure about how much precision is needed in future. It automatically ∑ x = 1 + 2 + 3 + ... + n = 2
x =1
converts the desired precision at the Run-time.
and
n
#include <boost/multiprecision/cpp_int.hpp> n(n + 1)(2n + 1)
using namespace boost::multiprecision; ∑ x 2 = 12 + 22 + 32 + . . . + n 2 = 6
.
x =1
int main()
{ and
n
cpp_int x; //Can have arbitrary precision n2 ( n + 1)2
} ∑ x3 = 13 + 23 + 33 + . . . + n3 = 4
x =1

16.5.2 Sum of Arithmetic Progression


16.3 Modular arithmetic
n( a + b)
An important property of the remainder is that in addition, subtraction and multipli- |a + ·{z
· · + b} =
2
cation, the remainder can be taken before the operation: n numbers

where,
( a + b) (mod m) = ( a (mod m) + b (mod m)) (mod m) a is the first number,
( a − b) (mod m) = ( a (mod m) − b (mod m)) (mod m) b is the last number and
( a · b) (mod m) = ( a (mod m) · b (mod m)) (mod m) n is the amount of numbers.

97 98
16.5.3 Sum of Geometric Progression 17.2 Sieve of Eratosthenes
bk − a
a + ak + ak2 + · · · + b =
k−1 The sieve of Eratosthenes is one of the most efficient ways to find all primes smaller
where, than n when n is smaller than 10 million
a is the first number,
Problem: Given a number n, print all primes smaller than or equal to n. It is also
b is the last number and
given that n is a small number.
the ratio between consecutive numbers is k.
A special case of a sum of a geometric progression is the formula
Solution
1 + 2 + 4 + 8 + . . . + 2n−1 = 2n − 1. Following is the algorithm to find all the prime numbers less than or equal to a
given integer n by Eratosthenes’s method:

1. The algorithm builds an array sieve whose positions 0, 1, 2, 3, ..., n are


used. The value sieve[k] = 0 means that k is prime, and the value
sieve[k] 6= 0 means that k is not a prime and one of its prime factors
is sieve[k].

2. The algorithm iterates through the numbers 2...n one by one. Always
when a new prime x is found, the algorithm records that the multiples of
x (2x, 3x, 4x, ...) which are ≥ x2 are not primes, because the number x
divides them.

For example, if n = 20, the array is as follows:

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
0 0 0 0 2 0 2 0 2 3 2 0 3 0 2 3 2 0 3 0 2

int n = 20; //example


int sieve[n+1];
memset(sieve, 0, sizeof(sieve)); //initialize to zero
for (int x = 2; x*x <= n; x++)
{
if (sieve[x]) continue;
for (int u = x*x; u <= n; u += x)
{
sieve[u] = x;
}
}
for(int i = 2; i<=n; i++)
{ //print all prime numbers
if(!sieve[i])
cout << i << "\n";
}

Time Complexity is O (n log (log n)) which is very close to linear O (n)

99 102
17.3 Prime Factorization for very large numbers. In competitive programming, some problems are based on
these common conjectures.
Problem: Given a number n, print all prime factors of n. Conjecture 1 (Goldbach’s conjecture): Each even integer n >2 can be represented
as a sum n = a + b so that both a and b are primes.
Solution
Conjecture 2 (Goldbach’s weak conjecture): Every odd number greater than 5 can
Following are the steps to find all prime factors. be expressed as the sum of three primes. (A prime may be used more than once in
1. While n is divisible by 2, print 2 and divide n by 2. the same sum.) This is trivial to prove if the above conjecture is proved to be true.
√ Conjecture 3 (Twin prime conjecture): There is an infinite number of pairs of the
2. After step 1, n must be odd. Now start a loop from i = 3 to n. While form p, p + 2, where both p and p + 2 are primes.
i divides n, print i and divide n by i, increment i by 2 and continue. Conjecture 4 (Legendre’s conjecture): There is always a prime between numbers
3. If n is a prime number and is greater than 2, then n will not become 1 n2 and (n + 1)2 , where n is any positive integer.
by above two steps. So print n if it is greater than 2. Conjecture 5 (Collatz Conjecture): Collatz conjecture states that a number n
converges to 1 on repeatedly performing the following operations:
vector<int> factors(int n)
n → n/2 if n is even
{
vector<int> f; n → 3n + 1 if n is odd
while (n%2 == 0)
This has been verified for numbers up to 5.6 × 1013 . For example is x = 3, then:
{
f.push_back(2);
n = n/2; 3 → 10 → 5 → 16 → 8 → 4 → 2 → 1
} Conjecture 6 (Mersenne Prime Conjecture): There are infinitely positive integers n
for (int i = 3; i <= sqrt(n); i = i+2) for which 2n − 1 is a prime number. (There are currently 47 Mersenne primes known)
{
while (n%i == 0)
{
f.push_back(i);
n = n/i;
}
}
// This condition is to handle the case when n
// is a prime number greater than 2
if (n > 2)
f.push_back(n);
return f;
}

Time Complexity is O ( n)
There is an even efficient solution which uses Sieve of Eratosthenes to pre
compute prime numbers. It has time complexity O (log n). You can read about
it on GeeksforGeeks or CP3 book.

17.4 Common Conjectures


In mathematics, a conjecture is a conclusion or proposition based on incomplete
information, for which no proof has been found. But many of them have been verified

103 104
17.5 Modulo of Big Number 17.7 Euler’s Totient Function
Problem: Given a big number num represented as string and an integer x, find value Euler’s totient function ϕ(n) gives the number of coprime numbers to n between
of num mod x. Output is expected as an integer. 1 and n. For example, ϕ(14) = 6, because 1, 3, 5, 9, 11 and 13 are coprime to 14.
The value of ϕ(n) can be calculated from the prime factorization of n using the
Solution formula

The idea is to process all digits one by one and use the property that ( xy)
 
1
mod a ≡ ( x mod a × y mod a) mod a. Below is the implementation. ϕ(n) = n ∏ 1 −
p|n
p
int modulo(string num, int x) Where the product is over the distinct prime numbers dividing n.Note that
{ ϕ(n) = n − 1 if n is prime. The implementation of totient function is shown below.
// Initialize result
int res = 0; int totient(int n)
{
// One by one process all digits of ’num’ int result = n;
for (int i = 0; i < num.length(); i++) for (int p = 2; p * p <= n; ++p)
res = (res*10 + (int)num[i] - ’0’) % x; {
return res; if (n % p == 0)
} {
while (n % p == 0) n /= p;
result -= result / p;
}
}

17.6 Modular Exponentiation if (n > 1) result -= result / n;


return result;
}
Problem: Given x,m,n, find x n mod m

Solution
The naive solution would run in O (n) time. Using modular exponentiation we 17.8 Some Common Theorems
can bring down the complexity to O (log n) by using the following algorithm:
Theorem is a mathematical result that has been proved for every input value which
1 n=0 lies in it’s domain.
x n = { x n/2 · x n/2 n is even
x n −1 · x n is odd 17.8.1 Lagrange’s Four-Square Theorem
Lagrange’s theorem states that every positive integer can be represented as a sum of
int power(int x, int n, int m) four squares. i.e.,
{ n = a2 + b2 + c2 + d2
if (n == 0) return 1%m;
long long u = power(x,n/2,m); where, n, a, b, c, d ∈ N
u = (u*u)%m;
if (n%2 == 1) u = (u*x)%m; The number of representations of a natural number n as the sum of four squares
return u; is denoted by r4 (n). Jacobi’s four-square theorem states that this is eight times
} the sum of the divisors of n if n is odd and 24 times the sum of the odd divisors
of n if n is even. In particular, for a prime number p we have the explicit formula
r4 ( p ) = 8( p + 1)

105 106
17.8.2 Wilson’s Theorem 17.8.4 Pythagorean Triples
Wilson’s theorem states that a number n is prime exactly when A Pythagorean triple is a triple ( a, b, c) that satisfies the Pythagorean theorem
a2 + b2 = c2 , which means that there is a right triangle with side lengths a, b and c.
(n − 1)! ≡ −1 (mod n) For example, (3, 4, 5) is a Pythagorean triple.
If ( a, b, c) is a Pythagorean triple, all triples of the form (ka, kb, kc) are also
OR Pythagorean triples where k>1. A Pythagorean triple is primitive if a, b and c are
coprime, and all Pythagorean triples can be constructed from primitive triples using a
( n − 1) ! (mod n) = n − 1 multiplier k.
However the theorem cannot be applied to large values of n, because it is difficult to Euclid’s formula can be used to produce all primitive Pythagorean triples. Each
calculate values of (n − 1)! when n is even as large as 50. such triple is of the form

(n2 − m2 , 2nm, n2 + m2 ),
17.8.3 Fibonacci Numbers
where 0<m<n, n and m are coprime and at least one of n and m is even. For
The Fibonacci sequence is defined as follows: example, when m = 1 and n = 2, the formula produces the smallest Pythagorean
triple
F0 = 0, F1 = 1, Fn = Fn−1 + Fn−2 (22 − 12 , 2 · 2 · 1, 22 + 12 ) = (3, 4, 5).
The first elements of the sequence are:
17.9 Linear Diophantine Equations
0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, ...
A Diophantine equation is a polynomial equation, usually in two or more unknowns,
Cassini’s identity such that only the integral solutions are required. An Integral solution is a solution
such that all the unknown variables take only integer values.
Fn−1 Fn+1 − Fn2 = (−1)n Problem: Given three integers a, b, c representing a linear equation of the form :
ax + by = c. Determine if the equation has a solution such that x and y are both
Addition Rule integral values.
Solution
Fk Fn+1 + Fk−1 Fn
Fn+k = {
F2n = Fn ( Fn+1 + Fn−1 ), if k = n For linear Diophantine equation equations, integral solutions exist if and only if,
the GCD of coefficients of the two variables divides the constant term perfectly.
GCD identity In other words the integral solution exists if, gcd( a, b)|c.
Thus the algorithm to determine if an equation has integral solution is pretty
GCD ( Fm , Fn ) = FGCD(m,n) straightforward.

bool isPossible(int a, int b, int c)


Zeckendorf’s Theorem {
// __gcd() is an inbuilt function in C++ STL to return
Zeckendorf’s theorem states that every positive integer can be represented uniquely
// GCD of two numbers.
as the sum of one or more distinct Fibonacci numbers in such a way that the sum
return (c%__gcd(a,b) == 0);
does not include any two consecutive Fibonacci numbers. More precisely, if N is any }
positive integer, there exist positive integers ci ≥ 2, with ci+1 > ci + 1, such that

k
N = ∑ Fci
i =0

Where Fn is the nth Fibonacci number.

107 108
17.10 Euclid’s Algorithm for GCD 17.11 Modular Inverse
Problem: Given two non-negative integers a and b, we have to find their gcd(greatest The inverse of x (mod m) is a number x −1 such that
common divisor), i.e. the largest number which is a divisor of both a and b. It’s
commonly denoted by gcd( a, b). Mathematically it is defined as: xx −1 (mod m) ≡ 1
Using modular inverses, we can divide numbers modulo m, because division by x
gcd( a, b) = max k.
k=1...∞ : k| a ∧k |b corresponds to multiplication by x −1 . For example, to evaluate the value of 36/6
(mod 17), we can use the formula 2 × 3 (mod 17), because 36 (mod 17) = 2 and
(here the symbol "|" denotes divisibility, i.e. "k | a" means "k divides a")
6−1 (mod 17) = 3.
Solution However, a modular inverse does not always exist. For example, if x = 2 and
m = 4, the equation
The algorithm is extremely simple:
xx −1 (mod m) = 1
a, if b = 0 cannot be solved, because all multiples of 2 are even and the remainder can never be
gcd( a, b) = {
gcd(b, a (mod b)), otherwise. 1 when m = 4. It turns out that the value of x −1 mod m can be calculated exactly
when x and m are coprime.
int gcd (int a, int b) A short one-liner to compute modular inverse when x and m are coprime is shown
{ below
if (b == 0)
return a; long long int inv(long long int x, long long int m)
else {
return gcd (b, a % b); return 1<x ? m - inv(m%x,x)*m/x : 1;
} }

But C++11 has a built-in fuction to calculate gcd If m is prime modular inverse can be calculate by: x −1 = x m−2 . This implementation
is shown below(using modular exponentiation):
__gcd(a,b) //returns gcd(a,b)
int power(int x, int n, int m)
Time Complexity is O (log{min{ a, b}}) {
if (n == 0) return 1%m;
long long u = power(x,n/2,m);
u = (u*u)%m;
if (n%2 == 1) u = (u*x)%m;
return u;
}
int inv(long long int x, long long int m)
{
return power(x, m-2, m)
//return x^{m-2} mod m
}

Time complexity of both these implementations is O (log m)

109 110
17.12 Chinese Remainder Theorem
Problem: Find x that satisfies the following equations:

x ≡ a1 (mod m1 )
x ≡ a2 (mod m2 )
x ≡ a3 (mod m3 )
···
x ≡ an (mod mn )
where all pairs of m1 , m2 , . . . , mn are coprime.
Solution
Let M = m1 × m2 × m3 × · · · × mn
Let M1 , M2 , M3 , · · · , Mn be such that

M1 = M/m1
M2 = M/m2
M3 = M/m3
···
Mn = M/mn
Let y1 , y2 , y3 , · · · , yn be such that yi is the modular inverse of Mi i.e.,

M1 × y1 ≡ 1 (mod m1 )
M2 × y2 ≡ 1 (mod m2 )
M3 × y3 ≡ 1 (mod m3 )
···
Mn × yn ≡ 1 (mod mn )
Then x ≡ a1 M1 y1 + a2 M2 y2 + a2 M2 y2 + · · · + an Mn yn (mod M )

Once we have found a solution x, we can create an infinite number of other


solutions, because all numbers of the form

x + kM

where k is any whole number are solutions.

Example.
x ≡ 3 (mod 8)
x ≡ 1 (mod 9)
x ≡ 4 (mod 11)
∴ M = 8 × 9 × 11 = 792

M1 = 792/8 = 99
M2 = 792/9 = 88
M3 = 792/11 = 72

111
Pablo Messina’s ICPC Notebook CONTENTS - CONTENTS Página 1 de 40
Contents 7.6 Lowest Commen Ancestor (LCA) . . . . . . . . . . . . . . . . . . . . . . 23
7.7 Diameter of a Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25
1 C++ Template 2 7.8 Articulation Points, Cut Edges, Biconnected Components . . . . . . . . 25
7.9 Strongly Connected Components . . . . . . . . . . . . . . . . . . . . . . 26
2 C++ Cheat Sheet 2 7.10 Max Flow : Dinic . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26
3 Data Structures 4 8 Mathematics 27
3.1 C++ STL . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 8.1 Euclidean Algorithm . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 27
3.1.1 Pairs & Tuples . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 8.2 Primality Test . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28
3.1.2 Array . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 8.3 Prime Factorization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29
3.1.3 Vector . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5 8.4 Binary modular exponentiation . . . . . . . . . . . . . . . . . . . . . . . 30
3.1.4 Queue & Stack . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 8.5 Modular Binomial Coefficient . . . . . . . . . . . . . . . . . . . . . . . . 30
3.1.5 Priority Queue . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 8.6 Modular Multinomial Coefficient . . . . . . . . . . . . . . . . . . . . . . 30
3.1.6 Set & Multiset . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7 8.7 Chinese Remainder Theorem (CRT) . . . . . . . . . . . . . . . . . . . . 31
3.1.7 Map & Multimap . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 8.8 Theorems . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 32
3.1.8 Unordered Set & Multiset . . . . . . . . . . . . . . . . . . . . . . 9 8.8.1 Pick’s Theorem . . . . . . . . . . . . . . . . . . . . . . . . . . . . 32
3.1.9 Unordered Map & Multimap . . . . . . . . . . . . . . . . . . . . 10
3.1.10 Deque . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 9 Geometry 32
3.1.11 List . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 9.1 Geometry 2D Utils . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 32
3.1.12 Policy based Data Structures: Ordered Set . . . . . . . . . . . . 11 9.2 Geometry 3D Utils . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 33
3.1.13 Bitset . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 9.3 Polygon Algorithms . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
3.2 Sparse Tables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 9.4 Trigonometry . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 36
3.3 Fenwick Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 9.5 Convex Hull . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 36
3.4 Fenwick Tree 2D . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13 9.6 Green’s Theorem . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37
3.5 Segment Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13
3.6 Segment Tree Lazy . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14 10 Strings 37
3.7 Union-Find . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15 10.1 Suffix Array . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37
10.2 Trie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 38
4 Binary Search 16 10.3 Rolling Hashing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 38
10.4 KMP (Knuth Morris Pratt) . . . . . . . . . . . . . . . . . . . . . . . . . 39
5 Ternary Search 17 10.5 Shortest Repeating Cycle . . . . . . . . . . . . . . . . . . . . . . . . . . 40

6 Dynamic Programming 17
6.1 Longest Increasing Subsequence . . . . . . . . . . . . . . . . . . . . . . . 17
6.2 Travelling Salesman Problem . . . . . . . . . . . . . . . . . . . . . . . . 17
6.3 Knapsack . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 18
6.4 Divide & Conquer Optimization . . . . . . . . . . . . . . . . . . . . . . 19

7 Graphs 20
7.1 BFS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
7.2 DFS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
7.3 TopoSort . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 21
7.4 Dijkstra . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 21
7.5 Minimum Spanning Tree (Kruskal & Prim) . . . . . . . . . . . . . . . . 22
Pablo Messina’s ICPC Notebook 2 C++ CHEAT SHEET - Página 2 de 40
1 C++ Template 29 ss >> y; // y = 12345678910
30
31 // 2) stoi, stoll
1 #pragma GCC optimize("Ofast") 32 string str_dec = "2001, A Space Odyssey";
2 #include <bits/stdc++.h> 33 string str_hex = "40c3";
3 using namespace std; 34 string str_bin = "-10010110001";
4 // defines 35 string str_auto = "0x7f";
5 #define rep(i,a,b) for(int i = a; i <= b; ++i) 36 int sz;
6 #define invrep(i,b,a) for(int i = b; i >= a; --i) 37 int i_dec = stoi(str_dec,&sz);
7 #define umap unordered_map 38 int i_hex = stoi(str_hex,0,16);
8 #define uset unordered_set 39 int i_bin = stoi(str_bin,0,2);
9 // typedefs; 40 int i_auto = stoi(str_auto,0,0);
10 typedef unsigned long long int ull; 41 cout << str_dec << ": " << i_dec << " and [" << str_dec.substr(sz) << "]\n";
11 typedef long long int ll; 42 cout << str_hex << ": " << i_hex << ’\n’;
12 typedef vector<int> vi; 43 cout << str_bin << ": " << i_bin << ’\n’;
13 typedef pair<int,int> ii; 44 cout << str_auto << ": " << i_auto << ’\n’;
14 // ------------------------------- 45 // 2001, A Space Odyssey: 2001 and [, A Space Odyssey]
15 int main() { 46 // 40c3: 16579
16 ios::sync_with_stdio(false); 47 // -10010110001: -1201
17 cin.tie(0); cout.tie(0); 48 // 0x7f: 127
18 return 0; 49 string str = "8246821 0xffff 020";
19 } 50 int sz = 0;
51 while (!str.empty()) {
2 C++ Cheat Sheet 52
53
long long ll = stoll(str,&sz,0);
cout << str.substr(0,sz) << " interpreted as " << ll << ’\n’;
54 str = str.substr(sz);
1 /* ================================= */ 55 }
2 /* Input/Output with C++: cin & cout */ 56 // 8246821 interpreted as 8246821
3 /* ================================= */ 57 // 0xffff interpreted as 65535
4 58 // 020 interpreted as 16
5 // reading many lines of unknown length 59

6 string line; 60 /* ========================== */


7 while(getline(cin, line)) {} 61 /* C STRING UTILITY FUNCTIONS */
8 62 /* ========================== */
9 // reading tokens from a line 63 int strcmp ( const char * str1, const char * str2 ); // (-1,0,1)
10 string token; 64 int memcmp ( const void * ptr1, const void * ptr2, size_t num ); // (-1,0,1)
11 stringstream ss(line); 65 void * memcpy ( void * destination, const void * source, size_t num );
12 while (ss >> token) { /* do something with token */} 66

13 67 /* ============================ */
14 // printing floating with fixed precision 68 /* C++ STRING UTILITY FUNCTIONS */
15 cout << setprecision(6) << fixed; 69 /* ============================ */
16 cout << 12312.12312355; 70

17 71 // split a string by a single char delimiter


18 /* ================================= */ 72 void split(const string &s, char delim, vector<string> &elems) {
19 /* CONVERTING FROM STRING TO NUMBERS */ 73 stringstream ss(s);
20 /* ================================= */ 74 string item;
21 75 while (getline(ss, item, delim))
22 // 1) stringstream 76 elems.push_back(item);
23 string s = "12345"; 77 }
24 stringstream ss(s); 78

25 int x; 79 // find index of string or char within string


26 ss >> x; // x = 12345 80 string str = "random";
27 ss << "12345678910"; 81 size_t pos = str.find("ra");
28 long long y; 82 size_t pos = str.find(’m’);
Pablo Messina’s ICPC Notebook 2 C++ CHEAT SHEET - Página 3 de 40
83 if (pos == string::npos) // not found 137 return a.y < b.y;
84 138 }
85 // substrings 139 bool operator>(const Point& a, const Point& b) {
86 string subs = str.substr(pos, length); 140 if (a.x != b.x) return a.x > b.x;
87 string subs = str.substr(pos); // default: to the end of the string 141 return a.y > b.y;
88 142 }
89 // std::string from cstring’s substring 143 bool operator==(const Point& a, const Point& b) {
90 const char* s = "bla1 bla2"; 144 return a.x == b.x && a.y == b.y;
91 int offset = 5, len = 4; 145 }
92 string subs(s + offset, len); // bla2 146 // Note: if you overload the < operator for a custom struct,
93 147 // then you can use that struct with any library function
94 // ------------------------- 148 // or data structure that requires the < operator
95 // string comparisons 149 // Examples:
96 string str1("green apple"); 150 priority_queue<Point> pq;
97 string str2("red apple"); 151 vector<Point> pts;
98 if (str1.compare(str2) != 0) 152 sort(pts.begin(), pts.end());
99 cout << str1 << " is not " << str2 << ’\n’; 153 lower_bound(pts.begin(), pts.end(), {1,2});
100 if (str1.compare(6,5,"apple") == 0) 154 upper_bound(pts.begin(), pts.end(), {1,2});
101 cout << "still, " << str1 << " is an apple\n"; 155 set<Point> pt_set;
102 if (str2.compare(str2.size()-5,5,"apple") == 0) 156 map<Point, int> pt_map;
103 cout << "and " << str2 << " is also an apple\n"; 157
104 if (str1.compare(6,5,str2,4,5) == 0) 158 /* =============== */
105 cout << "therefore, both are apples\n"; 159 /* RANDOM INTEGERS */
106 // green apple is not red apple 160 /* =============== */
107 // still, green apple is an apple 161 #include <cstdlib>
108 // and red apple is also an apple 162 #include <ctime>
109 // therefore, both are apples 163 srand(time(NULL));
110 164 int x = rand() % 100; // 0-99
111 /* ===================== */ 165 int randBetween(int a, int b) { // a-b
112 /* OPERATOR OVERLOADING */ 166 return a + (rand() % (1 + b - a));
113 /* ===================== */ 167 }
114 168
115 //-------------------------- 169 /* ============== */
116 // method #1: inside struct 170 /* Bitwise Tricks */
117 struct Point { 171 /* ============== */
118 int x, y; 172 // amount of one-bits in number
119 bool operator<(const Point& p) const { 173 int __builtin_popcount(int x);
120 if (x != p.x) return x < p.x; 174 int __builtin_popcountl(long x);
121 return y < p.y; 175 int __builtin_popcountll(long long x);
122 } 176 // amount of leading zeros in number
123 bool operator>(const Point& p) const { 177 int __builtin_clz(int x);
124 if (x != p.x) return x > p.x; 178 int __builtin_clzl(long x);
125 return y > p.y; 179 int __builtin_clzll(ll x);
126 } 180 // binary length of non-negative number
127 bool operator==(const Point& p) const { 181 int bitlen(int x) { return sizeof(x) * 8 - __builtin_clz(x); }
128 return x == p.x && y == p.y; 182 int bitlen(ll x) { return sizeof(x) * 8 - __builtin_clzll(x); }
129 } 183 // index of most significant bit
130 }; 184 int log2(int x) { return sizeof(x) * 8 - __builtin_clz(x) - 1; }
131 185 int log2(ll x) { return sizeof(x) * 8 - __builtin_clzll(x) - 1; }
132 //-------------------------- 186 // reverse the bits of an integer
133 // method #2: outside struct 187 int reverse_bits(int x) {
134 struct Point {int x, y; }; 188 int v = 0;
135 bool operator<(const Point& a, const Point& b) { 189 while (x) v <<= 1, v |= x&1, x >>= 1;
136 if (a.x != b.x) return a.x < b.x; 190 return v;
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - Página 4 de 40
191 } 8 //======
192 // get string binary representation of an integer 9 // Example: pair of ints
193 string bitstring(int x) { 10 typedef pair<int,int> ii; // use ii as abbreviation
194 int len = sizeof(x) * 8 - __builtin_clz(x); 11 // initialization
195 if (len == 0) return "0"; 12 ii p(5,5); // option 1
196 13 ii p = make_pair(5,5) // option 2
197 char buff[len+1]; buff[len] = ’\0’; 14 ii p = {5, 5}; // option 3
198 for (int i = len-1; i >= 0; --i, x >>= 1) 15 // getting values
199 buff[i] = (char)(’0’ + (x&1)); 16 int x = p.first, y = p.second;
200 return string(buff); 17 // modifying values
201 } 18 p.first++, p.second--; // p = {6, 4}
202 19
203 /* ================== */ 20 //=======
204 /* Hexadecimal Tricks */ 21 // TUPLE
205 /* ================== */ 22 //=======
206 // get string hex representation of an integer 23 // Example: tuples of 3 ints
207 string to_hex(int num) { 24 typedef tuple<int,int,int> iii; // use iii as abbreviation
208 static char buff[100]; 25 // initialization
209 static const char* hexdigits = "0123456789abcdef"; 26 iii t(5,5,5); // option 1
210 buff[99] = ’\0’; 27 iii t = make_tuple(5,5,5); // option 2
211 int i = 98; 28 iii t = {5, 5, 5}; // option 3
212 do { 29 // getting values
213 buff[i--] = hexdigits[num & 0xf]; 30 int x,y,z;
214 num >>= 4; 31 x = get<0>(t), y = get<1>(t), z = get<2>(t); // option 1
215 } while (num); 32 tie(x,y,z) = t; // option 2
216 return string(buff+i+1); 33 // modifying values
217 } 34 get<0>(t)++, get<1>(t)--, get<2>(t)+=2; // t = {6, 4, 7}
218 // [’0’-’9’ ’a’-’f’] -> [0 - 15]
219 int char_to_digit(char c) { 3.1.2 Array
220 if (’0’ <= c && c <= ’9’)
221 return c - ’0’; 1 //================
222 return 10 + c - ’a’; 2 // declare arrays
223 } 3 //================
224 4 int arr[10];
225 /* ================= */ 5 int arr[10][10];
226 /* CLIMITS CONSTANTS */ 6 int arr[5] = {1, 2, 3, 4, 5};
227 /* ================= */ 7 int arr[4][2] = {{0,1}, {1,0}, {0,-1}, {-1,0}};
228 INT_MIN INT_MAX UINT_MAX LONG_MIN LONG_MAX ULONG_MAX LLONG_MIN LLONG_MAX ULLONG_MAX 8
9 //============================
3 Data Structures 10
11
// fill array using std::fill
//============================
12 // http://www.cplusplus.com/reference/algorithm/fill/
3.1 C++ STL 13
14 // 1) arrays 1D
3.1.1 Pairs & Tuples 15 int arr[100];
16 fill(arr, arr+4, -5);
1 // references: 17 fill(arr, arr+N, val);
2 // https://www.geeksforgeeks.org/returning-multiple-values-from-a-function-using-tuple- 18 fill(arr + offset, arr + N, val);
and-pair-in-c/ 19 double arr[100];
3 // http://www.cplusplus.com/reference/utility/pair/ 20 fill(arr, arr+7, 0.999);
4 // http://www.cplusplus.com/reference/tuple/ 21
5 22 // 2) arrays 2D or more
6 //====== 23 int arr[100][100];
7 // PAIR 24 fill(&arr[0][0], &arr[0][0] + sizeof(arr), -1231);
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.1 C++ STL Página 5 de 40
25 28
26 //========================= 29 vector<int> v; // v = {}, v.size() == 0
27 // fill array using memset 30 v.reserve(1000); // reserve 1000 x sizeof(int) bytes of contiguous memory in advance
28 //========================= 31 // ** we use v.reserve(MAXN) when we when we know the maximum memory we will ever
29 int arr[100][100]; 32 // need to prevent unnecessary memory reallocations
30 memset(arr, -1, sizeof(arr)); 33
31 memset(arr, 0, sizeof(arr)); 34 rep(i, 1, 10) v.push_back(i); // v = { 1, 2, 3, ..., 10 }, v.size() == 10
32 // ** only works with 0 and -1 for arryas of ints/longs 35 int x = v.front(); // x = 1
33 // because memset works on bytes (same value is written on each char) 36 int y = v.back(); // y = 10
34 // sizeof(arr) returns the number of bytes in arr 37 v.pop_back(); // remove last element -> v = { 1, 2, 3, ..., 9 }, v.size() == 9
35 38
36 // in the case of char arrays, we can set any value, since 39 // clearing
37 // sizeof(char) = 1 (each char uses a single byte) 40 v.clear(); // v = {}, v.size() == 0
38 char char_arr[100][100]; 41
39 memset(char_arr, ’k’, sizeof(char_arr)); 42 //========
40 43 // RESIZE
41 // filling with -1/0 the first N ints in arr 44 //========
42 int arr[MAXN]; 45 rep(i,1,10) v.push_back(i); // v = { 1, 2, ..., 10 }
43 memset(arr, -1, sizeof(int) * N); 46 v.resize(5); // v = { 1, 2, 3, 4, 5 }
44 memset(arr, 0, sizeof(int) * N); 47 v.resize(8,100); // v = { 1, 2, 3, 4, 5, 100, 100, 100 }
45 48 v.resize(12); // v = { 1, 2, 3, 4, 5, 100, 100, 100, 0, 0, 0, 0 }
46 // interesting links: 49
47 // https://stackoverflow.com/questions/936687/how-do-i-declare-a-2d-array-in-c-using-new/ 50 //========
48 // https://stackoverflow.com/questions/8767166/passing-a-2d-array-to-a-c-function 51 // ASSIGN
52 //========
3.1.3 Vector 53 v.assign(N, 4); // v = { 4, 4, ..., 4 } (N times)
54

1 // references: 55 vector<int> v2;


2 // http://www.cplusplus.com/reference/vector/vector/ 56 v2.assign(v.begin(), v.end()); // v2 = v
3 // https://www.geeksforgeeks.org/vector-in-cpp-stl/ 57 v2.assign(v.begin() + 1, v.end() - 1); // v2 = v[1:-1]
4 #include <bits/stdc++.h> 58

5 #define rep(i,a,b) for(int i=a; i<=b; i++) 59 int arr[5] = {1, 2, 3, 4, 5};
6 using namespace std; 60 v2.assign(arr, arr + 5); // v2 = {1, 2, 3, 4, 5}
7 61 v2.assign(arr, arr + 3); // v2 = {1, 2, 3}
8 //============================== 62

9 // DECLARATION & INITIALIZATION 63 //===========================


10 //============================== 64 // EMPLACE_BACK VS PUSH_BACK
11 65 //===========================
12 // vector of ints 66 struct CustomData {
13 vector<int> v; // empty 67 int x; double y; string z;
14 vector<int> v(100); // size 100 68 CustomData(int x, double y, string z) : x(x), y(y), z(z) {}
15 vector<int> v(N); // size N, make sure N is defined 69 };
16 vector<int> v(N, 2); // size N filled with 2’s 70 vector<CustomData> v;
17 vector<int> v = { 1, 2, 3, 5, 6 }; // list initialization (since C++11) 71 // option 1: with push_back() -> object is created and then copied
18 v[0] = -8; // v = { -8, 2, 3, 5, 6 } 72 v.push_back(CustomData(1,2.32,"foo")); // using constructor
19 v[1] = 0; // v = { -8, 0, 3, 5, 6 } 73 v.push_back({1, 2.32,"bar"}); // c++11: using curly braces
20 74 // option 2: with emplace_back() -> object is created in its final location ;)
21 // vector of vector of ints 75 v.emplace_back(1, 2.32, "foo");
22 // a matrix of R rows by C columns filled with -1 76 // ** NOTE: for emplace_back() make sure your custom struct/class has a constructor
23 vector<vector<int>> matrix(R, vector<int>(C,-1)); 77 // for push_back(), no need to define a constructor
24 78

25 //===================================================== 79

26 // MODIFYING A VECTOR (capacity, size, adding elements) 80 //========================


27 ///===================================================== 81 // ITERATING OVER VECTORS
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.1 C++ STL Página 6 de 40
82 //======================== 14 while (!q.empty()) {
83 // reference: 15 cout << q.front() << ’ ’;
84 // https://stackoverflow.com/questions/15176104/c11-range-based-loop-get-item-by-value-or 16 q.pop();
-reference-to-const 17 } // output: 1 2 3 4 5
85 18
86 // 1) foward direction 19 //===== STACK =====
87 20 stack<int> s;
88 vector<CustomData> v(100); // vector of custom type 21 // adding to stack
89 // option 1: iterate over element copies (slower) 22 rep(i,1,5) s.push(i); // s = {1, 2, 3, 4, 5}
90 for (auto x : v) { /* do something */ } 23 // OR
91 // option 2: iterate over references (faster) 24 rep(i,1,5) s.emplace(i); // s = {1, 2, 3, 4, 5}
92 for (auto& x : v) { /* do something */ } 25 // removing from stack
93 // option 3: iterate over const references (equally fast) 26 while (!s.empty()) {
94 // * the const keyword is just to prevent unintended modifications 27 cout << s.top() << ’ ’;
95 for (const auto& x : v) { /* do something */ } 28 s.pop();
96 29 } // output: 5 4 3 2 1
97 vector<int> v {1, 2, 3, 4, 5, 6}; // vector of ints
98 for (int x : v) { /* do something */ } 3.1.5 Priority Queue
99 for (int& x : v) { /* do something */ }
100 for (const int& x : v) { /* do something */ } 1 // references:
101 2 // http://www.cplusplus.com/reference/queue/priority_queue/
102 // using iterators 3 // https://www.geeksforgeeks.org/priority-queue-in-cpp-stl/
103 for (auto it = v.begin(); it != v.end(); ++it) { 4
104 const auto& x = *it; // use *it to access original element pointed by it 5 //====================
105 /* do something with x */ 6 // 1) MAXHEAP of ints
106 } 7 //====================
107 8 priority_queue<int> q;
108 // 2) backward direction 9 q.push(30);
109 for (auto it = v.rbegin(); it != v.rend(); ++it) { 10 q.push(100);
110 const auto& x = *it; 11 q.push(25);
111 } 12 q.push(40);
112 13 cout << "Popping out elements...";
113 //==================== 14 while (!q.empty()) {
114 // SWAPPING 2 VECTORS 15 cout << ’ ’ << q.top();
115 //==================== 16 q.pop();
116 vector<int> v1 = {1, 1, 1, 1}; 17 }
117 vector<int> v2 = {2, 2, 2}; 18 cout << ’\n’;
118 v1.swap(v2); // v1 = {2, 2, 2}, v2 = {1, 1, 1, 1} 19 // Popping out elements... 100 40 30 25
20
3.1.4 Queue & Stack 21 //====================
22 // 2) MINHEAP of ints
1 // references: 23 //====================
2 // http://www.cplusplus.com/reference/queue/queue/ 24 priority_queue<int, vector<int>, greater<int>> q;
3 // https://www.geeksforgeeks.org/queue-cpp-stl/ 25 q.push(30);
4 // http://www.cplusplus.com/reference/stack/stack/ 26 q.push(100);
5 // https://www.geeksforgeeks.org/stack-in-cpp-stl/ 27 q.push(25);
6 28 q.push(40);
7 //===== QUEUE ===== 29 cout << "Popping out elements...";
8 queue<int> q; 30 while (!q.empty()) {
9 // adding to queue 31 cout << ’ ’ << q.top();
10 rep(i,1,5) q.push(i); // q = {1, 2, 3, 4, 5} 32 q.pop();
11 // OR 33 }
12 rep(i,1,5) q.emplace(i); // q = {1, 2, 3, 4, 5} 34 cout << ’\n’;
13 // removing from queue 35 // Popping out elements... 25 30 40 100
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.1 C++ STL Página 7 de 40
36 2 // http://www.cplusplus.com/reference/set/set/
37 //==================================== 3 // http://www.cplusplus.com/reference/set/multiset/
38 // 3) custom data + custom comparator 4 #define rep(i,a,b) for(int i=a; i<=b; i++)
39 //==================================== 5
40 6 //================
41 // option 1: overload operator< inside your struct/class 7 // INITIALIZATION
42 struct Event { 8 //================
43 double time; string name; 9 // set
44 Event (double t, string n) : time(t), name(n) {} 10 set<int> s{1, 2, 3, 4, 4, 5, 5, 5, 2, 2, 2};
45 bool overload<(const Event& rhs) const { 11 for (int x : s) cout << x; // 12345
46 // define your < operator however you want 12 // multiset
47 return time > rhs.time; 13 multiset<int> ms{1, 2, 3, 4, 4, 5, 5, 5, 2, 2, 2};
48 } 14 for (int x : s) cout << x; // 12222344555
49 }; 15
50 priority_queue<Event> q; 16 //========
51 17 // INSERT
52 // option 2: use a functor 18 //========
53 struct Event { 19 // set
54 double time; string name; 20 set<int> s;
55 Event (double t, string n) : time(t), name(n) {} 21 rep(i,1,5) s.insert(i*10); // 10 20 30 40 50
56 }; 22 auto ret = s.insert(20); // no new element inserted
57 struct EventCmp { 23 auto it = ret.first;
58 bool operator()(const Event& lhs, const Event& rhs) { 24 if (it.second) cout << "20 inserted for the first time\n";
59 return lhs.time > rhs.time; 25 else cout << "20 already in set\n";
60 } 26 int myints[]= {5,10,15}; // 10 already in set, not inserted
61 }; 27 s.insert(myints,myints+3);
62 priority_queue<Event, vector<Event>, EventCmp> q; 28 cout << "s contains:";
63 29 for (int x : s) cout << ’ ’ << x;
64 // option 3: use a lambda function 30 cout << ’\n’; // 5 10 15 20 30 40 50
65 struct Event { 31 // multiset
66 double time; string name; 32 // ** same as set, but allows duplicates, so insert returns an iterator
67 Event (double t, string n) : time(t), name(n) {} 33 // not a pair
68 }; 34
69 auto cmp = [](const Event& lhs const Event& rhs) { 35 //=======
70 return lhs.time > rhs.time; 36 // ERASE
71 } 37 //=======
72 priority_queue<Event, vector<Event>, decltype(cmp)> q(cmp); 38 // -- set
73 39 set<int> s;
74 // usage example 40 rep(i,1,9) s.insert(i*10); // 10 20 30 40 50 60 70 80 90
75 q.emplace(10.2, "Peter"); 41 auto it = s.begin();
76 q.emplace(2.7, "Mary"); 42 ++it; // "it" points now to 20
77 q.emplace(5.3, "John"); 43 s.erase(it); // erase by pointer
78 q.emplace(0.3, "Bob"); 44 s.erase(40); // erase by value
79 cout << "Events:"; 45 it = s.find(60); // iterator pointing to 60
80 while (!q.empty()) { 46 s.erase(it, s.end()); // erase everything in range [it, s.end())
81 Event& e = q.top(); 47 // s = 10 30 50
82 cout << " (" << e.time << ’,’ << e.name << ")"; 48 // -- multiset
83 q.pop(); 49 multiset<int> ms;
84 } 50 ms.insert (40); // 40
85 // Events: (0.3,Bob) (2.7,Mary) (5.3,John) (10.2,Peter) 51 rep(i,1,6) ms.insert(i*10); // 10 20 30 40 40 50 60
52 auto it=ms.begin();
3.1.6 Set & Multiset 53 it++; // ^
54 ms.erase(it); // 10 30 40 40 50 60
1 // references: 55 ms.erase(40); // 10 30 50 60
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.1 C++ STL Página 8 de 40
56 it=ms.find(50); 110 //=============================
57 ms.erase(it, ms.end()); // 10 30 111 // SET/MULTISET of Custom Data
58 112 //=============================
59 //======= 113 struct CustomData {
60 // FIND 114 int x; string name;
61 //======= 115 CustomData(int x, string n) : x(x), name(n) {}
62 // -- set 116 // define operator <
63 set<int> s; 117 bool operator<(const CustomData& rhs) const {
64 rep(i,1,5) s.insert(i*10); // 10 20 30 40 50 118 return x < rhs.x;
65 auto it=s.find(20); 119 }
66 s.erase(it); // 10 30 40 50 120 };
67 s.erase(s.find(40)); // 10 30 50 121 set<CustomData> s;
68 // -- multiset 122 multiset<CustomData> ms;
69 // ** same as set 123 s.emplace(1, "foo");
70 124 s.emplace(2, "bar");
71 //=============================== 125 ms.emplace(-12, "bla");
72 // lower_bound() & upper_bound()
73 //=============================== 3.1.7 Map & Multimap
74 // -- set
75 set<int> s; 1 // references:
76 rep(i,1,9) s.insert(i*10); // 10 20 30 40 50 60 70 80 90 2 // http://www.cplusplus.com/reference/map/map/
77 auto itlow=s.lower_bound(30); // ^ 3 // http://www.cplusplus.com/reference/map/multimap/
78 auto itup=s.upper_bound(60); // ^ 4 // ** SUMMARY **
79 s.erase(itlow,itup); // 10 20 70 80 90 5 // same as set and multiset, except that for each key
80 // -- multiset 6 // now there is a value associated to it (if we only consider
81 multiset<int> ms{30, 10, 10, 40, 30, 90}; // 10 10 30 30 40 90 7 // the keys is the same as set/multiset)
82 auto itlow = ms.lower_bound(30); // ^ 8
83 auto itup = ms.upper_bound(40); // ^ 9 //================
84 ms.erase(itlow,itup); // 10 20 90 10 // INITIALIZATION
85 11 //================
86 //======================= 12 // --- map
87 // multiset::equal_range 13 map<string,float> m {{"a",1.50}, {"b",2.10}, {"c",1.40}};
88 //======================= 14 // or
89 int myints[] = {77,30,16,2,30,30}; 15 map<string,float> m;
90 multiset<int> ms(myints, myints+6); // 2 16 30 30 30 77 16 m.emplace("a", 1.50);
91 auto ret = ms.equal_range(30); // ^ ^ 17 m.emplace("b", 2.10);
92 // ret.first -> first 30 (same as ms.lower_bound(30)) 18 m.emplace("c", 1.40);
93 // ret.second -> 77 (same as ms.upper_bound(30)) 19 // --- multimap
94 ms.erase(ret.first, ret.second); // 2 16 77 20 // ** same as map
95 21
96 //======= 22 //=========
97 // COUNT 23 // INSERT
98 //======= 24 //=========
99 // --- set 25 // --- map
100 set<int> s{3, 6, 9, 12}; 26 map<char,int> m;
101 rep(i,0,9) { 27 // first insert function version (single parameter):
102 cout << i; 28 m.insert( pair<char,int>(’a’,100) );
103 if (s.count(i) > 0) cout << " is an element of s.\n"; 29 m.insert( pair<char,int>(’z’,200) );
104 else cout << " is not an element of s.\n"; 30 auto ret = m.insert ( pair<char,int>(’z’,500) );
105 } 31 if (ret.second==false) {
106 // --- multiset 32 cout << "element ’z’ already existed";
107 multiset<int> ms{10,73,12,22,73,73,12}; 33 cout << " with a value of " << ret.first->second << ’\n’;
108 cout << ms.count(73); // 3 34 }
109 35 // second insert function version (with hint position):
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.1 C++ STL Página 9 de 40
36 auto it = m.begin(); 90 first=map<char,int>(); // and first is now empty
37 m.insert(it, pair<char,int>(’b’,300)); // max efficiency inserting 91 cout << "Size of first: " << first.size() << ’\n’;
38 m.insert(it, pair<char,int>(’c’,400)); // no max efficiency inserting 92 cout << "Size of second: " << second.size() << ’\n’;
39 // third insert function version (range insertion): 93
40 map<char,int> m2; 94 //=========================
41 m2.insert(m.begin(), m.find(’c’)); 95 // generating ids with map
42 // showing contents: 96 //=========================
43 cout << "m contains:\n"; 97 int get_id(string& name) {
44 for (auto& kv : m) cout << kv.first << " => " << kv.second << ’\n’; 98 static int id = 0;
45 cout << "m2 contains:\n"; 99 static map<string,int> name2id;
46 for (auto& kv : m2) cout << kv.first << " => " << kv.second << ’\n’; 100 auto it = name2id.find(name);
47 /* 101 if (it == name2id.end())
48 element ’z’ already existed with a value of 200 102 return name2id[name] = id++;
49 m contains: 103 return it->second;
50 a => 100 104 }
51 b => 300
52 c => 400 3.1.8 Unordered Set & Multiset
53 z => 200
54 m2 contains: 1 // references:
55 a => 100 2 // http://www.cplusplus.com/reference/unordered_set/unordered_set/
56 b => 300 3 // http://www.cplusplus.com/reference/unordered_set/unordered_multiset/
57 */ 4 // ** unordered_multiset is basically the same as unordered_set
58 // --- multimap 5 // except that unordered_multiset allows duplicate elements
59 // ** same as map 6
60 7 //=========
61 //================= 8 // RESERVE
62 // map::operator[] 9 //=========
63 //================= 10 unordered_set<string> s;
64 map<char,string> m; 11 s.reserve(5);
65 m[’a’]="an element"; 12 s.insert("office");
66 m[’b’]="another element"; 13 s.insert("house");
67 m[’c’]=m[’b’]; 14 s.insert("gym");
68 cout << "m[’a’] is " << m[’a’] << ’\n’; 15 s.insert("parking");
69 cout << "m[’b’] is " << m[’b’] << ’\n’; 16 s.insert("highway");
70 cout << "m[’c’] is " << m[’c’] << ’\n’; 17 cout << "s contains:";
71 cout << "m[’d’] is " << m[’d’] << ’\n’; // (’d’ -> "") is created by default 18 for (const string& x: s) cout << " " << x;
72 cout << "m now contains " << m.size() << " elements.\n"; 19 cout << ’\n’; // s contains: highway house office gym parking
73 /* 20 // By calling reserve with the size we expected for the unordered_set
74 m[’a’] is an element 21 // container we avoided the multiple rehashes that the increases in container
75 m[’b’] is another element 22 // size could have produced and optimized the size of the hash table.
76 m[’c’] is another element 23
77 m[’d’] is 24 //========
78 m now contains 4 elements. 25 // INSERT
79 */ 26 //========
80 27 unordered_set<string> s = {"yellow","green","blue"};
81 //================= 28 array<string,2> arr = {"black","white"};
82 // map::operator= 29 string mystring = "red";
83 //================= 30 s.insert(mystring); // copy insertion
84 map<char,int> first; 31 s.insert(mystring+"dish"); // move insertion
85 map<char,int> second; 32 s.insert(arr.begin(), arr.end()); // range insertion
86 first[’x’]=8; 33 s.insert( {"purple","orange"} ); // initializer list insertion
87 first[’y’]=16; 34 cout << "s contains:";
88 first[’z’]=32; 35 for (const string& x: s) cout << " " << x;
89 second=first; // second now contains 3 ints 36 cout << ’\n’;
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.1 C++ STL Página 10 de 40
37 //s contains: green blue reddish white yellow black red orange purple 18 m.emplace("c", 1.40);
38 19 // --- unordered_multimap
39 //======= 20 // ** same as unordered_map
40 // ERASE 21
41 //======= 22 //=========
42 unordered_set<string> s = 23 // INSERT
43 {"USA","Canada","France","UK","Japan","Germany","Italy"}; 24 //=========
44 s.erase( s.begin() ); // erasing by iterator 25 // --- unordered_map
45 s.erase( "France" ); // erasing by key 26 unordered_map<string,double>
46 s.erase( s.find("Japan"), s.end() ); // erasing by range 27 myrecipe,
47 cout << "s contains:"; 28 mypantry = {{"milk",2.0},{"flour",1.5}};
48 for ( const string& x: s ) cout << " " << x; 29 pair<string,double> myshopping("baking powder",0.3);
49 cout << ’\n’; // s contains: Canada USA Italy 30 myrecipe.insert(myshopping); // copy insertion
50 31 myrecipe.insert(make_pair("eggs",6.0)); // move insertion
51 //====== 32 myrecipe.insert(mypantry.begin(), mypantry.end()); // range insertion
52 // FIND 33 myrecipe.insert( {{"sugar",0.8},{"salt",0.1}} ); // initializer list insertion
53 //====== 34 cout << "myrecipe contains:" << ’\n’;
54 unordered_set<string> s{"red","green","blue"}; 35 for (auto& x: myrecipe) cout << x.first << ": " << x.second << ’\n’;
55 auto it = s.find("black"); 36 cout << ’\n’;/*
56 assert (it == s.end()); 37 myrecipe contains:
57 assert (s.find("red") != s.end()); 38 salt: 0.1
58 39 eggs: 6
59 //======= 40 sugar: 0.8
60 // COUNT 41 baking powder: 0.3
61 //======= 42 flour: 1.5
62 unordered_set<string> s { "hat", "umbrella", "suit" }; 43 milk: 2 */
63 for (auto& x: {"hat","sunglasses","suit","t-shirt"}) { 44 // --- unordered_multimap
64 if (s.count(x) > 0) cout << "s has " << x << ’\n’; 45 // ** same as unordered_map
65 else cout << "s has no " << x << ’\n’; 46
66 } /* 47 //==========================
67 s has hat 48 // unordered_map::operator[]
68 s has no sunglasses 49 //===========================
69 s has suit 50 unordered_map<string,string> m;
70 s has no t-shirt */ 51 m["Bakery"]="Barbara"; // new element inserted
52 m["Seafood"]="Lisa"; // new element inserted
3.1.9 Unordered Map & Multimap 53 m["Produce"]="John"; // new element inserted
54 string name = m["Bakery"]; // existing element accessed (read)
1 // references: 55 m["Seafood"] = name; // existing element accessed (written)
2 // http://www.cplusplus.com/reference/unordered_map/unordered_map/ 56 m["Bakery"] = m["Produce"]; // existing elements accessed (read/written)
3 // http://www.cplusplus.com/reference/unordered_map/unordered_multimap/ 57 name = m["Deli"]; // non-existing element: new element "Deli" inserted!
4 // ** SUMMARY ** 58 m["Produce"] = m["Gifts"]; // new element "Gifts" inserted, "Produce" written
5 // same as unordered_set and unordered_multiset, except that for each key 59 for (auto& x: m) cout << x.first << ": " << x.second << ’\n’;
6 // now there is a value associated to it (if we only consider 60 /*
7 // the keys is the same as unordered_set/unordered_multiset) 61 Seafood: Barbara
8 62 Deli:
9 //================ 63 Bakery: John
10 // INITIALIZATION 64 Gifts:
11 //================ 65 Produce:
12 // --- unordered_map 66 */
13 unordered_map<string,float> m {{"a",1.50}, {"b",2.10}, {"c",1.40}}; 67

14 // or 68 //==========================
15 unordered_map<string,float> m; 69 // unordered_map::operator=
16 m.emplace("a", 1.50); 70 //==========================
17 m.emplace("b", 2.10); 71 typedef unordered_map<string,string> stringmap;
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.1 C++ STL Página 11 de 40
72 stringmap merge (stringmap a,stringmap b) { 22 // 1 10 20 30 30 20 2 3 4 5
73 stringmap temp(a); temp.insert(b.begin(),b.end()); return temp; 23 // ^
74 } 24 cout << "mylist contains:";
75 int main() { 25 for (int x : mylist) cout << ’ ’ << x;
76 stringmap first, second, third; 26 cout << ’\n’;
77 first = {{"AAPL","Apple"},{"MSFT","Microsoft"}}; // init list 27 // mylist contains: 1 10 20 30 30 20 2 3 4 5
78 second = {{"GOOG","Google"},{"ORCL","Oracle"}}; // init list 28
79 third = merge(first,second); // move 29 //=======
80 first = third; // copy 30 // ERASE
81 cout << "first contains:"; 31 //=======
82 for (auto& x: first) cout << " " << x.first << ":" << x.second; 32 // http://www.cplusplus.com/reference/list/list/erase/
83 cout << ’\n’; 33
84 return 0; 34 list<int> mylist;
85 } 35 list<int>::iterator it1,it2;
86 // first contains: MSFT:Microsoft AAPL:Apple GOOG:Google ORCL:Oracle 36 // set some values:
37 rep(i,1,9) mylist.push_back(i*10);
3.1.10 Deque 38 // 10 20 30 40 50 60 70 80 90
39 it1 = it2 = mylist.begin(); // ^^
1 // references: 40 advance (it2,6); // ^ ^
2 // http://www.cplusplus.com/reference/deque/deque/ 41 ++it1; // ^ ^
3 // https://www.geeksforgeeks.org/deque-cpp-stl/ 42

4 // SUMMARY: deque can do the same things as vector 43 it1 = mylist.erase(it1); // 10 30 40 50 60 70 80 90


5 // + push_front() + emplace_front() 44 // ^ ^
6 // - contiguous memory allocation is not guaranteed 45

7 // (elements may be stored in fragmented chunks of memory) 46 it2 = mylist.erase(it2); // 10 30 40 50 60 80 90


8 deque<int> dq = { 1, 2, 3 }; 47 // ^ ^
9 dq.push_back(8); // { 1, 2, 3, 8 } 48 ++it1; // ^ ^
10 dq.push_front(100); // { 100, 1, 2, 3, 8 } 49 --it2; // ^ ^
11 dq.pop_back(); // { 100, 1, 2, 3 } 50 mylist.erase(it1,it2); // 10 30 60 80 90
12 dq.pop_front(); // { 1, 2, 3} 51 // ^
52 cout << "mylist contains:";
3.1.11 List 53 for (int x : mylist) cout << ’ ’ << x;
54 cout << ’\n’;
1 // full documentation: 55 // mylist contains: 10 30 60 80 90
2 // http://www.cplusplus.com/reference/list/list/
3 // https://www.geeksforgeeks.org/list-cpp-stl/ 3.1.12 Policy based Data Structures: Ordered Set
4
5 //======== 1 // references:
6 // INSERT 2 // https://www.geeksforgeeks.org/ordered-set-gnu-c-pbds/
7 //======== 3 // https://www.geeksforgeeks.org/policy-based-data-structures-g/
8 // http://www.cplusplus.com/reference/list/list/insert/ 4 // https://codeforces.com/blog/entry/11080
9 5 #include <bits/stdc++.h>
10 list<int> mylist; 6 using namespace std;
11 list<int>::iterator it; 7 #include <ext/pb_ds/assoc_container.hpp>
12 // set some initial values: 8 #include <ext/pb_ds/tree_policy.hpp>
13 rep(i,1,5) mylist.push_back(i); // 1 2 3 4 5 9 using namespace __gnu_pbds;
14 it = mylist.begin(); 10
15 ++it; // it points now to number 2 ^ 11 typedef tree<
16 mylist.insert(it,10); // 1 10 2 3 4 5 12 int,
17 // "it" still points to number 2 ^ 13 null_type,
18 mylist.insert (it,2,20); // 1 10 20 20 2 3 4 5 14 less<int>,
19 --it; // it points now to the second 20 ^ 15 rb_tree_tag,
20 vector<int> myvector (2,30); 16 tree_order_statistics_node_update
21 mylist.insert (it,myvector.begin(),myvector.end()); 17 > ordered_set;
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.2 Sparse Tables Página 12 de 40
18 14 n = arr->size();
19 int main() { 15 int maxlog = 31 - __builtin_clz(n);
20 ordered_set o_set; 16 memo.assign(n * (maxlog + 1), -1);
21 o_set.insert(5); 17 }
22 o_set.insert(1); 18 // dp(i,e) = min { arr[j] } for j in {i, i+1, ..., i+2^e-1}
23 o_set.insert(2); 19 int dp(int i, int e) {
24 // Finding the second smallest element 20 int& ans = memo[e * n + i];
25 // in the set using * because 21 if (ans != -1) return ans;
26 // find_by_order returns an iterator 22 if (e == 0) return ans = (*arr)[i];
27 cout << *(o_set.find_by_order(1)) << ’\n’; 23 return ans = min(dp(i, e-1), dp(i+(1<<(e-1)), e-1));
28 // Finding the number of elements 24 }
29 // strictly less than k=4 25
30 cout << o_set.order_of_key(4) << ’\n’; 26 // ---- RMQ = Range Minimun Query ----
31 // Finding the count of elements less 27 // rmq(l,r) = min { arr[j] } for j in {l, l+1, ..., r}
32 // than or equal to 4 i.e. striclty less 28
33 // than 5 if integers are present 29 // option 1: complexity O(1)
34 cout << o_set.order_of_key(5) << ’\n’; 30 int rmq_O1(int l, int r) {
35 // Deleting 2 from the set if it exists 31 int e = 31 - __builtin_clz(r - l + 1);
36 if (o_set.find(2) != o_set.end()) 32 return min(dp(l,e), dp(r - (1 << e) + 1, e));
37 o_set.erase(o_set.find(2)); 33 }
38 // Now after deleting 2 from the set 34
39 // Finding the second smallest element in the set 35 // option 2: complexity O(log N)
40 cout << *(o_set.find_by_order(1)) << ’\n’; 36 int rmq_Ologn(int l, int r) {
41 // Finding the number of 37 int ans = INT_MAX;
42 // elements strictly less than k=4 38 int d = r-l+1;
43 cout << o_set.order_of_key(4) << ’\n’; 39 for (int e = 0; d; e++, d>>=1) {
44 return 0; 40 if (d & 1) {
45 } 41 ans = min(ans, dp(l, e));
42 l += 1 << e;
3.1.13 Bitset 43 }
44 }
1 bitset<4> foo; // 0000 45 return ans;
2 foo.size(); // 4 46 }
3 foo.set(); // 1111 47 };
4 foo.set(1,0); // 1011 48
5 foo.test(1); // false 49 // example of usage
6 foo.set(1); // 1111 50 int main() {
7 foo.test(1); // true 51 vector<int> arr = {1, 3, 4, 3, 1, 6, 7, 4, 8, 9};
52 SparseTable st(arr);
3.2 Sparse Tables 53 while (true) {
54 int l, r; cin >> l >> r; // read query
1 #include <bits/stdc++.h> 55 cout << st.rmq_O1(l,r) << ’\n’; // print minimum
2 using namespace std; 56 }
3 57 return 0;
4 // time complexity: 58 }
5 // - filling DP table: O(N log N)
6 // - answering queries: O(1) / O(log N) 3.3 Fenwick Tree
7
8 struct SparseTable { 1 struct BIT { // BIT = binary indexed tree (a.k.a. Fenwick Tree)
9 int n; 2 vector<int> bit;
10 vector<int> memo; 3 BIT(int n) { bit.assign(n+1, 0); }
11 vector<int>* arr; 4 // prefix sum query (sum in range 1 .. k)
12 SparseTable(vector<int>& _arr) { 5 int psq(int k) {
13 arr = &_arr; 6 int sum = 0;
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.4 Fenwick Tree 2D Página 13 de 40
7 for (; k; k -= (k & -k)) sum += bit[k]; 4 //=============================
8 return sum; 5 // 1) Segment Tree - ITERATIVE
9 } 6 //=============================
10 // range sum query (sum in range a .. b) 7 // source: https://docs.google.com/document/d/1rcex_saP4tExbbU62qGUjR3eenxOh-50
11 int rsq(int a, int b) { i9Y45WtHkc4/
12 return psq(b) - psq(a-1); 8 /*
13 } 9 Se requiere un struct para el nodo (ej: prodsgn).
14 // increment k’th value by v (and propagate) 10 Un nodo debe tener tres constructores:
15 void add(int k, int v) { 11 Aridad 0: Construye el neutro de la operacion
16 for (; k < bit.size(); k += (k & -k)) bit[k] += v; 12 Aridad 1: Construye un nodo hoja a partir del input
17 } 13 Aridad 2: Construye un nodo segun sus dos hijos
18 }; 14
15 Construccion del segment tree:
3.4 Fenwick Tree 2D 16 Hacer un arreglo de nodos (usar ctor de aridad 1).
17 ST<miStructNodo> miSegmentTree(arregloDeNodos);
18 Update:
1 struct BIT2D { // BIT = binary indexed tree (a.k.a. Fenwick Tree) 19 miSegmentTree.set_point(indice, miStructNodo(input));
2 vector<int> bit; 20 Query:
3 int R, C; 21 miSegmentTree.query(l, r) es inclusivo exclusivo y da un nodo. Usar la info del nodo
4 BIT2D(int _R, int _C) : R(_R+1), C(_C+1) { para obtener la respuesta.
5 bit.assign(R*C, 0); 22 */
6 } 23 template<class node> struct ST {
7 void add(int r, int c, int value) { 24 vector<node> t; int n;
8 for (int i = r; i < R; i += (i&-i)) 25 ST(vector<node> &arr) {
9 for (int j = c; j < C; j += (j&-j)) 26 n = arr.size();
10 bit[i * C + j] += value; 27 t.resize(n*2);
11 } 28 copy(arr.begin(), arr.end(), t.begin() + n);
12 // sum[(1, 1), (r, c)] 29 for (int i = n-1; i > 0; --i)
13 int sum(int r, int c) { 30 t[i] = node(t[i<<1], t[i<<1|1]);
14 int res = 0; 31 }
15 for (int i = r; i; i -= (i&-i)) 32 // 0-indexed
16 for (int j = c; j; j -= (j&-j)) 33 void set_point(int p, const node &value) {
17 res += bit[i * C + j]; 34 for (t[p += n] = value; p > 1; p >>= 1)
18 return res; 35 t[p>>1] = node(t[p], t[p^1]);
19 } 36 }
20 // sum[(r1, c1), (r2, c2)] 37 // inclusive exclusive, 0-indexed
21 int sum(int r1, int c1, int r2, int c2) { 38 node query(int l, int r) {
22 return sum(r2, c2) - sum(r1-1, c2) - sum(r2, c1-1) + sum(r1-1, c1-1); 39 node ansl, ansr;
23 } 40 for (l += n, r += n; l < r; l >>= 1, r >>= 1) {
24 // get value at cell (r,c) 41 if (l&1) ansl = node(ansl, t[l++]);
25 int get(int r, int c) { 42 if (r&1) ansr = node(t[--r], ansr);
26 return sum(r, c, r, c); 43 }
27 } 44 return node(ansl, ansr);
28 // set value to cell (r,c) 45 }
29 int set(int r, int c, int value) { 46 };
30 add(r, c, -get(r, c) + value); 47
31 } 48 // Interval Product (LiveArchive)
32 }; 49 struct prodsgn {
50 int sgn;
3.5 Segment Tree 51 prodsgn() {sgn = 1;}
52 prodsgn(int x) { sgn = (x > 0) - (x < 0); }
1 #include <bits/stdc++.h> 53 prodsgn(const prodsgn &a, const prodsgn &b) { sgn = a.sgn*b.sgn; }
2 using namespace std; 54 };
3 55
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.6 Segment Tree Lazy Página 14 de 40
56 // Maximum Sum (SPOJ) 110 if (j < a or a < i) return;
57 struct maxsum { 111 if (i == j) st[a] += value;
58 int first, second; 112 else {
59 maxsum() {first = second = -1;} 113 int m = (i+j)/2, l = u*2+1, r = u*2+2;
60 maxsum(int x) { first = x; second = -1; } 114 update(a, value, l, i, m);
61 maxsum(const maxsum &a, const maxsum &b) { 115 update(a, value, r, m+1, j);
62 if (a.first > b.first) { 116 st[u] = t::merge_op(st[l], st[r]);
63 first = a.first; 117 }
64 second = max(a.second, b.first); 118 }
65 } else { 119
66 first = b.first; second = max(a.first, b.second); 120 public:
67 } 121 ST(vector<ll>& v) {
68 } 122 arr = &v;
69 int answer() { return first + second; } 123 n = v.size();
70 }; 124 st.resize(n*4+5);
71 125 build(0, 0, n-1);
72 // Range Minimum Query 126 }
73 struct rminq { 127
74 int value; 128 ll query(int a, int b) {
75 rminq() {value = INT_MAX;} 129 return query(a, b, 0, 0, n-1);
76 rminq(int x) {value = x;} 130 }
77 rminq(const rminq &a, const rminq &b) { 131
78 value = min(a.value, b.value); 132 void update(int a, ll value) {
79 } 133 update(a, value, 0, 0, n-1);
80 }; 134 }
81 135 };
82 //============================= 136
83 // 2) Segment Tree - RECURSIVE 137 struct RSQ { // range sum query
84 //============================= 138 static ll const neutro = 0;
85 139 static ll merge_op(ll x, ll y) { return x + y; }
86 template<class t> class ST { 140 };
87 vector<ll> *arr, st; int n; 141
88 142 struct RMinQ { // range minimun query
89 void build(int u, int i, int j) { 143 static ll const neutro = LLONG_MAX;
90 if (i == j) { 144 static ll merge_op(ll x, ll y) { return min(x, y); }
91 st[u] = (*arr)[i]; 145 };
92 return; 146
93 } 147 struct RMaxQ { // range maximum query
94 int m = (i+j)/2, l = u*2+1, r = u*2+2; 148 static ll const neutro = LLONG_MIN;
95 build(l, i, m); 149 static ll merge_op(ll x, ll y) { return max(x, y); }
96 build(r, m+1, j); 150 };
97 st[u] = t::merge_op(st[l], st[r]); 151
98 } 152 // usage
99 153 int main() {
100 ll query(int a, int b, int u, int i, int j) { 154 vector<int> A = { 18, 17, 13, 19, 15, 11, 20 };
101 if (j < a or b < i) return t::neutro; 155 ST<RSQ> stl(A);
102 if (a <= i and j <= b) return st[u]; 156 stl.update(2, 100);
103 int m = (i+j)/2, l = u*2+1, r = u*2+2; 157 stl.query(1, 3);
104 ll x = query(a, b, l, i, m); 158 return 0;
105 ll y = query(a, b, r, m+1, j); 159 }
106 return t::merge_op(x, y);
107 } 3.6 Segment Tree Lazy
108
109 void update(int a, ll value, int u, int i, int j) { 1 #include <bits/stdc++.h>
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.7 Union-Find Página 15 de 40
2 using namespace std; 56 }
3 typedef long long int ll; 57
4 58 SegTreeLazy(int64_t n) {
5 template<class t> class SegTreeLazy { 59 arr = new vector<ll>(4 * n);
6 vector<ll> *arr, st, lazy; int n; 60 this->n = n;
7 61 st.resize(n*4+5);
8 void build(int u, int i, int j) { 62 lazy.assign(n*4+5, 0);
9 if (i == j) { 63 build(0, 0, n-1);
10 st[u] = (*arr)[i]; 64 }
11 return; 65
12 } 66 ll query(int a, int b) {
13 int m = (i+j)/2, l = u*2+1, r = u*2+2; 67 return query(a, b, 0, 0, n-1);
14 build(l, i, m); 68 }
15 build(r, m+1, j); 69
16 st[u] = t::merge_op(st[l], st[r]); 70 void update(int a, int b, ll value) {
17 } 71 update(a, b, value, 0, 0, n-1);
18 72 }
19 void propagate(int u, int i, int j, ll x) { 73 };
20 st[u] = t::range_op(st[u], i, j, x); 74
21 if (i != j) { 75 struct RSQ { // range sum query
22 lazy[u*2+1] = t::prop_left_op(lazy[u*2+1], x); 76 static ll const neutro = 0;
23 lazy[u*2+2] = t::prop_right_op(lazy[u*2+2], x); 77 static ll merge_op(ll x, ll y) { return x + y; }
24 } 78 static ll range_op(ll st_u, int i, int j, ll x) { return st_u + (j - i + 1) * x; }
25 lazy[u] = 0; 79 static ll prop_left_op(ll left_child, ll x) { return left_child + x; }
26 } 80 static ll prop_right_op(ll right_child, ll x) { return right_child + x; }
27 81 };
28 ll query(int a, int b, int u, int i, int j) { 82
29 if (j < a or b < i) return t::neutro; 83 struct RMinQ { // range minimum query
30 if (lazy[u]) propagate(u, i, j, lazy[u]); 84 static ll const neutro = LLONG_MAX;
31 if (a <= i and j <= b) return st[u]; 85 static ll merge_op(ll x, ll y) { return min(x, y); }
32 int m = (i+j)/2, l = u*2+1, r = u*2+2; 86 static ll range_op(ll st_u, int a, int b, ll x) { return st_u + x; }
33 ll x = query(a, b, l, i, m); 87 static ll prop_left_op(ll left_child, ll x) { return left_child + x; }
34 ll y = query(a, b, r, m+1, j); 88 static ll prop_right_op(ll right_child, ll x) { return right_child + x; }
35 return t::merge_op(x, y); 89 };
36 } 90
37 91 struct RMaxQ { // range maximum query
38 void update(int a, int b, ll value, int u, int i, int j) { 92 static ll const neutro = LLONG_MIN;
39 if (lazy[u]) propagate(u, i, j, lazy[u]); 93 static ll merge_op(ll x, ll y) { return max(x, y); }
40 if (a <= i and j <= b) propagate(u, i, j, value); 94 static ll range_op(ll st_u, int a, int b, ll x) { return st_u + x; }
41 else if (j < a or b < i) return; else { 95 static ll prop_left_op(ll left_child, ll x) { return left_child + x; }
42 int m = (i+j)/2, l = u*2+1, r = u*2+2; 96 static ll prop_right_op(ll right_child, ll x) { return right_child + x; }
43 update(a, b, value, l, i, m); 97 };
44 update(a, b, value, r, m+1, j); 98
45 st[u] = t::merge_op(st[l], st[r]); 99 // usage
46 } 100 int main() {
47 } 101 vector<ll> A = { 18, 17, 13, 19, 15, 11, 20 };
48 102 SegTreeLazy<RSQ> stl(A);
49 public: 103 stl.update(1, 5, 100);
50 SegTreeLazy(vector<ll>& v) { 104 stl.query(1, 3);
51 arr = &v; 105 return 0;
52 n = v.size(); 106 }
53 st.resize(n*4+5);
lazy.assign(n*4+5, 0);
54
55 build(0, 0, n-1);
3.7 Union-Find
Pablo Messina’s ICPC Notebook 4 BINARY SEARCH - Página 16 de 40
1 #include <bits/stdc++.h> 23 if (a[m] >= key)
2 using namespace std; 24 j = m;
3 25 else
4 struct UnionFind { 26 i = m + 1;
5 vector<int> p, rank, setSize; 27 }
6 int numSets; 28 return i;
7 UnionFind(int n) { 29 }
8 numSets = n; setSize.assign(n, 1); rank.assign(n, 0); p.resize(n); 30
9 rep(i,0,n-1) p[i] = i; 31 // -----------------------------
10 } 32 // EXAMPLE 2: Integer Upperbound
11 int findSet(int i) { return (p[i] == i) ? i : (p[i] = findSet(p[i])); } 33 // predicate(a, i, key) = (a[i] > key)
12 bool isSameSet(int i, int j) { return findSet(i) == findSet(j); } 34 // i.e. "first element > key"
13 void unionSet(int i, int j) { 35 int upperbound(vector<int>& a, int key, int i, int j) {
14 if (!isSameSet(i, j)) { 36 while (i < j) {
15 numSets--; 37 int m = (i + j) / 2;
16 int x = findSet(i), y = findSet(j); 38 if (a[m] > key)
17 // rank is used to keep the tree short 39 j = m;
18 if (rank[x] > rank[y]) { 40 else
19 p[y] = x; setSize[x] += setSize[y]; 41 i = m + 1;
20 } else { 42 }
21 p[x] = y; setSize[y] += setSize[x]; 43 return i
22 if (rank[x] == rank[y]) rank[y]++; 44 }
23 } 45
24 } 46 /* ======================================= */
25 } 47 /* std::upper_bound(), std::lower_bound() */
26 int numDisjointSets() { return numSets; } 48 /* ======================================= */
27 int sizeOfSet(int i) { return setSize[findSet(i)]; } 49
28 }; 50 // search between [first, last)
51 // if no value is >= key (lb) / > key (ub), return last
4 Binary Search 52
53 #include <bits/stdc++.h>
54

1 // Find the index of the first item that satisfies a predicate 55 int main () {
2 // over a range [i,j), i.e., from i to j-1 56 vector<int> v{10,20,30,30,20,10,10,20}; // 10 20 30 30 20 10 10 20
3 // If no such index exists, j is returned 57 sort (v.begin(), v.end()); // 10 10 10 20 20 20 30 30
4 function binsearch(array, i, j) { 58 auto low = lower_bound (v.begin(), v.end(), 20); // ^
5 assert(i < j) // since the range is [i,j), then j must be > i 59 auto up = upper_bound (v.begin(), v.end(), 20); // ^
6 while (i < j) { 60 cout << "lower_bound at position " << (low- v.begin()) << ’\n’;
7 m = (i+j) >> 1; // m = (i+j) / 2; 61 cout << "upper_bound at position " << (up - v.begin()) << ’\n’;
8 if (predicate(array[m])) 62 return 0;
9 j = m 63 }
10 else 64

11 i = m + 1 65 // ------------------------------------------------
12 } 66 // Query: how many items are LESS THAN (<) value x
13 return i; // notice that i == j if the predicate is false for the whole range 67

14 } 68 lower_bound(v.begin(), v.end(), x) - v.begin();


15 69

16 // ----------------------------- 70 // ------------------------------------------------
17 // EXAMPLE 1: Integer Lowerbound 71 // Query: how many items are GREATER THAN (>) value x
18 // predicate(a, i, key) = (a[i] >= key) 72

19 // i.e. "first element >= key" 73 v.end() - upper_bound(v.begin(), v.end(), x);


20 int lowerbound(vector<int>& a, int key, int i, int j) { 74

21 while (i < j) { 75 //======================


22 int m = (i + j) / 2; 76 // std::binary_search()
Pablo Messina’s ICPC Notebook 6 DYNAMIC PROGRAMMING - Página 17 de 40
77 //====================== 17 left = m1, right = m2;
78 bool myfunction (int i,int j) { return (i<j); } 18 }
79 std::vector<int> v{1,2,3,4,5,4,3,2,1}; 19
80 sort(v.begin(), v.end()); 20 ans = (v1 + v2) * 0.5;
81 bool found = std::binary_search (v.begin(), v.end(), 6, myfunction)
82
83
6 Dynamic Programming
84 /* ======================= */
85 /* Discrete Ternary Search */ 6.1 Longest Increasing Subsequence
86 /* ======================= */
87
88 int min_search(int i, int j) { 1 // =====================================
89 while (i < j) { 2 // LIS (Longest Increasing Subsequence)
90 int m = (i+j)/2; 3 // =====================================
91 int slope = eval(m+1) - eval(m); 4 // references:
92 if (slope >= 0) 5 // https://stackoverflow.com/questions/2631726/how-to-determine-the-longest-increasing-
93 j = m; subsequence-using-dynamic-programming
94 else 6 const int MAXLEN = 1000000;
95 i = m+1; 7 // return the length of the longest increasing (non-decreasing)
96 } 8 // subsequence in values
97 return i; 9 int LIS(vector<int>& values) {
98 } 10 static int q[MAXLEN+1];
99
11 int len = 0;
100 int max_search(int i, int j) { 12 q[0] = -INT_MAX; // make sure it’s strictly smallest
101 while (i < j) { 13 for (int val : values) {
102 int m = (i+j)/2; 14 if (q[len] < val) { // use <= if non-decreasing
103 int slope = eval(m+1) - eval(m); 15 q[++len] = val;
104 if (slope <= 0) 16 } else {
105 j = m; 17 int l=1, r=len;
106 else 18 while (l<r) {
107 i = m+1; 19 int m = (l+r)>>1;
108 } 20 if (q[m] >= val) { // use > if non-decreasing
109 return i; 21 r = m;
110 } 22 } else {
23 l = m+1;
24 }
5 Ternary Search 25 }
26 q[l] = val;
27 }
1 int times = 100; 28 }
2 double left = 0.0; 29 return len;
3 double right = 1000.0; 30 }
4 double ans, m1, m2, v1, v2, third;
5
6 while (times--) {
6.2 Travelling Salesman Problem
7 third = (right - left) / 3.0;
8 m1 = left + third; 1 // ----------------------------------
9 m2 = right - third; 2 // Travelling Salesman Problem (TSP)
10 v1 = eval(m1); 3 // ----------------------------------
11 v2 = eval(m2); 4 // complexity: O(2^N * N)
12 if (v1 < v2) 5
13 left = m1; 6 const int MAXN = 14; // maximum number of nodes in the problem statement
14 else if(v2 < v1) 7 int cost[MAXN][MAXN]; // cost[i][j]: cost to travel from node i to node j
15 right = m2; 8 // make sure cost[i][j] >= 0
16 else 9 int start_index; // OPTIONAL: if you need to remember the start node
Pablo Messina’s ICPC Notebook 6 DYNAMIC PROGRAMMING - 6.3 Knapsack Página 18 de 40
10 12 function DP(i, c)
11 // dp(mask, i): find the minimum cost of visiting all nodes indicated by ’mask’ 13 if i == first
12 // starting from node ’i’. 14 if c >= weight[i] && value[i] > 0 // enough space and worth it
13 // *** OPTIONAL VARIANT: include the cost of returning back to the start node at the end 15 return value[i]
14 // 16 else
15 // * mask: an int whose bits indicate the nodes we want to visit 17 return 0
16 // ** if j-th bit in mask is 1, the j-th node should be visited 18 else
17 // else, the j-th node should be ignored 19 ans = DP(i-1, c)
18 // 20 if c >= weight[i] && value[i] > 0 // enough space and worth it
19 // * i: node we are starting the travel from (i’th bit should be 1 in mask) 21 ans = max(ans, value[i] + DP(i-1, c - weight[i]))
20 int memo[1 << MAXN][MAXN]; // 2^MAXN x MAXN 22 return ans
21 int dp(int mask, int i) { 23
22 // base case 1: problem already solved 24 // -----------
23 int& ans = memo[mask][i]; 25 // BOTTOM-UP
24 if (ans != -1) return ans; 26
25 27 #define MAXN 1000 // max num items
26 // mark i-th node as visited 28 #define MAXC 500 // max capacity
27 int mask2 = mask & ~(1 << i); 29 int value[MAXN];
28 30 int weight[MAXN];
29 // base case 2: nothing else to visit 31 int memo[MAXC+1]; // 0 ... MAXC
30 if (mask2 == 0) return ans = 0; 32 int N, C;
31 // if (mask2 == 0) return ans = cost[i][start_index]; // <--- if returning back to 33
start 34 int dp() {
32 35 // first item (i = 0)
33 // general case: try all possible next nodes 36 memset(memo, 0, sizeof(memo[0]) * (C+1));
34 ans = INT_MAX; 37 if (value[0] > 0) { // worth it
35 for (int j = 0, tmp=mask2; tmp; ++j, tmp>>=1) { 38 rep (c, weight[0], C) {
36 if (tmp & 1) ans = min(ans, cost[i][j] + dp(mask2, j)); 39 memo[c] = value[0];
37 } 40 }
38 41 }
39 // return answer 42 // other items (i = 1 .. N-1)
40 return ans; 43 rep (i, 1, N-1) {
41 } 44 if (value[i] > 0) { // worth it
42 45 invrep(c, C, weight[i]) { // <--- REVERSE ORDER !!
43 int main() { // usage 46 memo[c] = max(memo[c], value[i] + memo[c - weight[i]]);
44 memset(memo, -1, sizeof memo); 47 }
45 start_index = 0; 48 }
46 cout << dp((1 << N)-1, start_index); // <-- mincost of visiting all N nodes starting 49 }
from 0 50 return memo[C];
47 } 51 }
52
6.3 Knapsack 53 // --------------------------------------
54 // VARIANT 2: with reposition of items
1 /* ===================== */ 55 // -------------------------------------
2 /* Knapsack problem : DP */ 56

3 /* ===================== */ 57 // ---------------------------------
4 58 // TOP-DOWN RECURSION (pseudo-code)
5 // -------------------------------------- 59

6 // VARIANT 1: without reposition of items 60 function DP(i, c)


7 // --------------------------------------- 61 if i == first
8 62 if c >= weight[i] && value[i] > 0 // enough space and worth it
9 // --------------------------------- 63 return value[i]
10 // TOP-DOWN RECURSION (pseudo-code) 64 else
11 65 return 0
Pablo Messina’s ICPC Notebook 6 DYNAMIC PROGRAMMING - 6.4 Divide & Conquer Optimization Página 19 de 40
66 else 8 int G,L;
67 ans = DP(i-1, c) 9 ll DP[MAXG+1][MAXL+1];
68 if c >= weight[i] && value[i] > 0 // enough space and worth it 10
69 ans = max(ans, value[i] + DP(i, c - weight[i])) // << i instead of i-1 11 // return cost of forming a group with items in the range i .. j
70 return ans 12 ll group_cost(int i, int j) { ... }
71 13
72 // ----------- 14 /**
73 // BOTTOM-UP 15 Calculates the values of DP[g][l] for l1 <= l <= l2 (a range of cells in row ’g’)
74 16 using divide & conquer optimization
75 #define MAXN 1000 // max num items 17
76 #define MAXC 500 // max capacity 18 DP[g][l] means: given a list of the first ’l’ items, partition them into ’g’ groups,
77 int value[MAXN]; 19 each group consisting of consecutive items (left to right), so that the total
78 int weight[MAXN]; 20 cost of forming those groups is the minimum possible.
79 int memo[2][MAXC + 1]; // 0 .. MAXC 21
80 int N, C; 22 If we form one group at a time, from right to left, this leads to the following
81 23 recursion:
82 int dp() { 24
83 // first item (i = 0) 25 DP[g][l] = min { DP[g-1][k] + group_cost(k,l-1) for k = g-1 .. l-1 }
84 memset(memo, 0, sizeof(memo[0]) * (C+1)); 26 DP[1][l] = group_cost(0, l-1)
85 if (value[0] > 0) { // worth it 27
86 rep (c, weight[0], C) { 28 in other words:
87 memo[0][c] = value[0] * (c / weight[0]); // collect it as many times as you 29
can 30 DP[g][l] = DP[g-1][best_k] + group_cost(best_k,l-1)
88 } 31 where best_k is the left most value of k where the minimum is reached
89 } 32
90 // other items (i = 1 .. N-1) 33 Now, for a given ’g’:
91 int prev = 0, curr = 1; 34
92 rep (i, 1, N-1) { 35 If best_k(g,0) <= best_k(g,1) <= best_k(g,2) <= ... <= best_k(g,L-1) holds
93 rep(c, 0, C) { // <--- INCREASING ORDER !! 36
94 if (c >= weight[i] && value[i] > 0) { // if fits in && worth it 37 Then, we can propagate those best_k’s recursively to reduce the range of
95 memo[curr][c] = max( 38 candidate k’s for each DP[g][l] problem we solve.
96 memo[prev][c], // option 1: don’t take it 39 Using Divide & Conquer, we fill the whole row ’g’ recursively with
97 value[i] + memo[curr][c - weight[i]] // option 2: take it 40 recursion depth O(log(L)), and each recursion layer taking O(L) time.
98 ); 41
99 } else { 42 Doing this for G groups, the total computation cost is O(G*L*log(L))
100 memo[curr][c] = memo[prev][c]; // only option is to skip it 43
101 } 44 */
102 } 45 void fill_row(int g, int l1, int l2, int k1, int k2) {
103 // update prev, curr 46 if (l1 > l2) return; // ensure valid range
104 prev = curr; 47 int lm = (l1+l2)/2; // solve middle case
105 curr = 1-curr; 48 int kmin = max(g-1, k1);
106 } 49 int kmax = min(lm-1, k2);
107 return memo[(N-1)&1][C]; // last item + full capacity 50 int best_k = -1;
108 } 51 ll mincost = LLONG_MAX;
52 rep(k,kmin,kmax) {
6.4 Divide & Conquer Optimization 53 ll tmp = DP[g-1][k] + group_cost(k, lm-1);
54 if (mincost > tmp) mincost = tmp, best_k = k;
1 #include <bits/stdc++.h> 55 }
2 using namespace std; 56 DP[g][lm] = mincost;
3 #define rep(i,a,b) for(int i=a;i<=b;++i) 57 fill_row(g, l1, lm-1, k1, best_k); // solve left cases
4 typedef long long int ll; 58 fill_row(g, lm+1, l2, best_k, k2); // solve right cases
5 59 }
6 #define MAXG 1000 60

7 #define MAXL 1000 61 void fill_dp() {


Pablo Messina’s ICPC Notebook 7 GRAPHS - Página 20 de 40
62 // base: g = 1 44 }
63 rep(l,1,L) DP[1][l] = group_cost(0,l-1); 45 }
64 // other: g >= 2 46 return count;
65 rep(g,2,G) fill_row(g,g,L,0,L); 47 }
66 }
7.2 DFS
7 Graphs
1 // =========================
2 // Depth First Search (DFS)
7.1 BFS 3 // =========================
4 const int MAXN = 1000;
1 const int MAXN = 1000; 5 vector<int> g[MAXN];
2 vector<int> g[MAXN]; // graph 6 bool visited[MAXN];
3 int depth[MAXN]; // bfs depth per node 7 int n;
4 int n; // number of nodes 8
5 9 //recursive
6 void bfs(int s) { 10 void dfs(int u) {
7 memset(depth, -1, sizeof(int) * n); // init depth with -1 11 visited[u] = true;
8 queue<int> q; q.push(s); // init queue and add ’s’ (starting node) 12 for(int v : g[u]) {
9 depth[s] = 0; // s will have depth 0 13 if(!visited[v]) {
10 while (!q.empty()) { // while there are nodes in the queue 14 dfs(v);
11 int u = q.front(); q.pop(); // extract the first node ’u’ from the queue 15 }
12 for (int v : g[u]) { // for each neighbor ’v’ of ’u’ 16 }
13 if (depth[v] == -1) { // if ’v’ has not been visited yet -> visit it 17 }
14 depth[v] = depth[u] + 1; 18
15 q.push(v); 19 //recursive, using depth
16 } 20 int depth[MAXN];
17 } 21 void dfs(int u, int d) {
18 } 22 depth[u] = d;
19 } 23 for(int v : g[u]) {
20 24 if(depth[v] == -1) { // not visited yet
21 //----------------------------- 25 dfs(v, d+1);
22 // Finding connected components 26 }
23 //----------------------------- 27 }
24 28 }
25 int count_cc() { 29
26 static bool visited[MAXN]; 30 //iterative
27 int count = 0; 31 void dfs(int root) {
28 memset(visited, 0, sizeof(bool)*n); 32 stack<int> s;
29 queue<int> q; 33 s.push(root);
30 rep(i,0,n-1) { 34 visited[root] = true;
31 if (!visited[i]) { 35 while (!s.empty()) {
32 count++; 36 int u = s.top(); s.pop();
33 visited[i] = true; 37 for (int v : g[u]) {
34 q.push(i); 38 if (!visited[v]) {
35 while (!q.empty()) { 39 visited[u] = true;
36 int u = q.front(); q.pop(); 40 s.push(v);
37 for (int v : g[u]) { 41 }
38 if (!visited[v]) { 42 }
39 visited[v] = true; 43 }
40 q.push(v); 44 }
41 } 45
42 } 46 //-----------------------------
43 } 47 // Finding connected components
Pablo Messina’s ICPC Notebook 7 GRAPHS - 7.3 TopoSort Página 21 de 40
48 //----------------------------- 13 void dfs(int u) {
49 int count_cc() { 14 visited[u] = true;
50 int count = 0; 15 for (int v : g[u]) {
51 memset(visited, 0, sizeof(bool)*n); 16 if (!visited[v])
52 rep(i,0,n-1) { 17 dfs(v);
53 if (!visited[i]) { 18 }
54 count++, dfs(i); 19 sorted.push_back(u);
55 } 20 }
56 } 21
57 return count; 22 void topo_sort() {
58 } 23 memset(visited, false, sizeof(bool) * n);
59 24 sorted.clear();
60 //------------------------------ 25 rep(i,0,n-1)
61 // Flood Fill 26 if (!visited[i])
62 //------------------------------ 27 dfs(i);
63 28 }
64 //explicit graph 29
65 const int DFS_WHITE = -1; 30 // ---------------------------
66 vector<int> dfs_num(DFS_WHITE,n); 31 // option 2: Kahn’s algorithm
67 void floodfill(int u, int color) { 32 // ---------------------------
68 dfs_num[u] = color; 33
69 for (int v : g[u]) { 34 vector<vi> g;
70 if (dfs_num[v] == DFS_WHITE) { 35 int n;
71 floodfill(v, color); 36 vi indegree;
72 } 37 vi sorted;
73 } 38
74 } 39 void compute_indegree() {
75 40 indegree.assign(n, 0);
76 //implicit graph 41 rep(u,0,n-1)
77 int dirs[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}; 42 rep(int v : g[u])
78 const char EMPTY = ’*’; 43 indegree[v]++;
79 int floodfill(int r, int c, char color) { 44 }
80 if (r < 0 || r >= R || c < 0 || c >= C) return 0; // outside grid 45
81 if (grid[r][c] != EMPTY) return 0; // cannot be colored 46 void topoSort() {
82 grid[r][c] = color; 47 sorted.clear();
83 int ans = 1; 48 compute_indegree();
84 rep(i,0,3) ans += floodfill(r + dirs[i][0], c + dirs[i][1], color); 49
85 return ans; 50 queue<int> q;
86 } 51 rep(i,0,n-1)
52 if (indegree[i] == 0)
7.3 TopoSort 53 q.push(i);
54

1 typedef vector<int> vi; 55 while(!q.empty()) {


2 56 int u = q.front(); q.pop();
3 // ---------------------------- 57 sorted.push_back(u);
4 // option 1: tarjan’s algorithm 58 for (int v : g[u]) {
5 // ---------------------------- 59 if(--indegree[v] == 0)
6 // Note: nodes are sorted in reversed order 60 q.push(v);
7 61 }
8 vector<vi> g; // graph 62 }
9 int n; // num of nodes 63 }
10 bool visited[MAXN]; // track visited nodes
11 vi sorted; 7.4 Dijkstra
12
Pablo Messina’s ICPC Notebook 7 GRAPHS - 7.5 Minimum Spanning Tree (Kruskal & Prim) Página 22 de 40
1 // complexity: (|E| + |V|) * log |V| 22 rep(i,0,n-1) p[i] = i;
2 #include <bits/stdc++.h> 23 }
3 using namespace std; 24 int findSet(int i) { return (p[i] == i) ? i : (p[i] = findSet(p[i])); }
4 typedef pair<int, int> ii; // (weight, node), in that order 25 bool isSameSet(int i, int j) { return findSet(i) == findSet(j); }
5 26 void unionSet(int i, int j) {
6 vector<vector<ii>> g; // graph 27 if (!isSameSet(i, j)) {
7 int N; // number of nodes 28 int x = findSet(i), y = findSet(j);
8 vector<int> mindist; // min distance from source to each node 29 if (rank[x] > rank[y]) { p[y] = x; }
9 vector<int> parent; // parent of each node in shortest path from source 30 else { p[x] = y; if (rank[x] == rank[y]) rank[y]++; }
10 31 }
11 void dijkstra(int source) { 32 }
12 parent.assign(N, -1); 33 };
13 mindist.assign(N, INT_MAX); 34 int find_mst(int n_nodes, vector<Edge>& edges, vector<vector<ii>>& mst) {
14 mindist[source] = 0; 35 sort(edges.begin(), edges.end());
15 priority_queue<ii, vector<ii>, greater<ii>> q; // minheap 36 UnionFind uf(n_nodes);
16 q.emplace(0, source); 37 mst.assign(n_nodes, vector<ii>());
17 while (!q.empty()) { 38 int mstcost = 0;
18 ii p = q.top(); q.pop(); 39 int count = 1;
19 int u = p.second, dist = p.first; // u = node, dist = mindist from source to u 40 for (auto& e : edges) {
20 if (mindist[u] < dist) continue; // skip outdated improvements 41 int u = e.u, v = e.v, cost = e.cost;
21 for (ii& e : g[u]) { 42 if (!uf.isSameSet(u, v)) {
22 int v = e.second, w = e.first; 43 mstcost += cost;
23 if (mindist[v] > dist + w) { 44 uf.unionSet(u, v);
24 mindist[v] = dist + w; 45 mst[u].emplace_back(v, cost);
25 parent[v] = u; 46 mst[v].emplace_back(u, cost);
26 q.emplace(mindist[v], v); 47 if (++count == n_nodes) break;
27 } 48 }
28 } 49 }
29 } 50 return mstcost;
30 } 51 }
52 }
7.5 Minimum Spanning Tree (Kruskal & Prim) 53
54 /* ============== */
1 #include <bits/stdc++.h> 55 /* METHOD 2: PRIM */
2 #define rep(i,a,b) for (int i=a; i<=b; ++i) 56 /* ============== */
3 using namespace std; 57

4 typedef pair<int,int> ii; 58 struct Edge {


5 59 int u, v, cost;
6 /* ================= */ 60 bool operator<(const Edge& o) const {
7 /* METHOD 1: KRUSKAL */ 61 return cost > o.cost; // we use ’>’ instead of ’<’ so that
8 /* ================= */ 62 // priority_queue<Edge> works as a minheap
9 63 }
10 struct Edge { 64 };
11 int u, v, cost; 65 namespace Prim {
12 bool operator<(const Edge& o) const { 66 bool visited[MAXN];
13 return cost < o.cost; 67 int find_mst(vector<vector<ii>>& g, vector<vector<ii>>& mst) {
14 } 68 int n_nodes = g.size();
15 }; 69 memset(visited, false, sizeof(bool) * n_nodes);
16 namespace Kruskal { 70 mst.assign(n_nodes, vector<ii>());
17 struct UnionFind { 71 priority_queue<Edge> q;
18 vector<int> p, rank; 72 int total_cost = 0;
19 UnionFind(int n) { 73 visited[0] = true;
20 rank.assign(n,0); 74 for (ii& p : g[0]) q.push({0, p.first, p.second});
21 p.resize(n); 75 int count = 1;
Pablo Messina’s ICPC Notebook 7 GRAPHS - 7.6 Lowest Commen Ancestor (LCA) Página 23 de 40
76 while (!q.empty()) { 33 // dfs to record direct parents and depths
77 Edge edge = q.top(); q.pop(); 34 void dfs(int u, int p, int depth) {
78 if (visited[edge.v]) continue; 35 anc(u,0) = p;
79 int u = edge.u; 36 D[u] = depth;
80 int v = edge.v; 37 for (int v : (*g)[u]) {
81 int cost = edge.cost; 38 if (D[v] == -1) {
82 visited[v] = true; 39 dfs(v, u, depth + 1);
83 total_cost += cost; 40 }
84 mst[u].emplace_back(v, cost); 41 }
85 mst[v].emplace_back(u, cost); 42 }
86 if (++count == N) break; 43
87 for (ii p : g[v]) { 44 LCA(vector<vector<int>>& _g, int root) {
88 if (visited[p.first]) continue; 45 g = &_g;
89 q.push({v, p.first, p.second}); 46 n = _g.size();
90 } 47 maxe = log2(n);
91 } 48 D.assign(n, -1);
92 return total_cost; 49 A.resize(n * (maxe + 1));
93 } 50 dfs(root, -1, 0);
94 } 51 rep(e, 1, maxe) {
52 rep (u, 0, n-1) {
7.6 Lowest Commen Ancestor (LCA) 53 // u’s 2^e th ancestor is
54 // u’s 2^(e-1) th ancestor’s 2^(e-1) th ancestor
1 /* ============================ */ 55 int a = anc(u,e-1);
2 /* LCA (Lowest Common Ancestor) */ 56 anc(u,e) = (a == -1 ? -1 : anc(a,e-1));
3 /* ============================ */ 57 }
4 #include <bits/stdc++.h> 58 }
5 using namespace std; 59 }
6 #define rep(i,a,b) for (int i=a; i<=b; ++i) 60

7 #define invrep(i,b,a) for (int i=b; i>=a; --i) 61 // move node u "k" levels up towards the root
8 62 // i.e. find the k-th ancestor of u
9 // General comments: 63 int raise(int u, int k) {
10 // * Both of these methods assume that we are working with a connected 64 for (int e = 0; k; e++, k>>=1) if (k&1) u = anc(u,e);
11 // graph ’g’ of ’n’ nodes, and that nodes are compactly indexed from 0 to n-1. 65 return u;
12 // In case you have a forest of trees, a simple trick is to create a fake 66 }
13 // root and connect all the trees to it (make sure to re-index all your nodes) 67

14 // * ’g’ need not be a ’tree’, DFS fill implictly find a tree for you 68 int lca(int u, int v) {
15 // in case you don’t care of the specific tree (e.g. if cycles are not important) 69 if (D[u] < D[v]) swap(u, v);
16 70 u = raise(u, D[u] - D[v]); // raise lowest to same level
17 // ------------------------------------------------------------ 71 if (u == v) return u; // same node, we are done
18 // METHOD 1: SPARSE TABLE - BINARY LIFTING (aka JUMP POINTERS) 72 // raise u and v to their highest ancestors below the LCA
19 // ------------------------------------------------------------ 73 invrep (e, maxe, 0) {
20 // construction: O(|V| log |V|) 74 // greedily take the biggest 2^e jump possible as long as
21 // query: O(log|V|) 75 // u and v still remain BELOW the LCA
22 // ** advantages: 76 if (anc(u,e) != anc(v,e)) {
23 // - the lca query can be modified to compute querys over the path between 2 nodes 77 u = anc(u,e), v = anc(v,e);
24 // - it’s possible to append new leaf nodes to the tree 78 }
25 79 }
26 struct LCA { 80 // the direct parent of u (or v) is lca(u,v)
27 vector<int> A, D; // ancestors, depths 81 return anc(u,0);
28 vector<vector<int>> *g; // pointer to graph 82 }
29 int n, maxe; // num nodes, max exponent 83

30 int& anc(int u, int e) { return A[e * n + u]; } 84 // distance between ’u’ and ’v’
31 int inline log2(int x) { return 31 - __builtin_clz(x); } 85 int dist(int u, int v) {
32 86 return D[u] + D[v] - 2 * D[lca(u,v)];
Pablo Messina’s ICPC Notebook 7 GRAPHS - 7.6 Lowest Commen Ancestor (LCA) Página 24 de 40
87 } 140 D[idx++] = depth;
88 // optimized version (in case you already computed lca(u,v)) 141 }
89 int dist(int u, int v, int lca_uv) { 142 }
90 return D[u] + D[v] - 2 * D[lca_uv]; 143 }
91 } 144
92 // get the node located k steps from ’u’ walking towards ’v’ 145 LCA(vector<vector<int>>& _g, int root) {
93 int kth_node_in_path(int u, int v, int k) { 146 g = &_g;
94 int lca_uv = lca(u,v); 147 n = _g.size();
95 if (D[u] - D[lca_uv] >= k) return raise(u, k); 148 H.assign(n, -1);
96 return raise(v, dist(u,v,lca_uv) - k); 149 E.resize(2*n);
97 } 150 D.resize(2*n);
98 151 idx = 0;
99 int add_child(int p, int u) { // optional 152 dfs(root, 0); // euler tour
100 // add to graph 153 int nn = idx; // <-- make sure you use the correct number
101 (*g)[p].push_back(u); 154 int maxe = log2(nn);
102 // update depth 155 DP.resize(nn * (maxe+1));
103 D[u] = D[p] + 1; 156 // build sparse table with bottom-up DP
104 // update ancestors 157 rep(i,0,nn-1) rmq(i,0) = i; // base case
105 anc(u,0) = p; 158 rep(e,1,maxe) { // general cases
106 rep (e, 1, maxe){ 159 rep(i, 0, nn - (1 << e)) {
107 p = anc(p,e-1); 160 // i ... i + 2 ^ (e-1) - 1
108 if (p == -1) break; 161 int i1 = rmq(i,e-1);
109 anc(u,e) = p; 162 // i + 2 ^ (e-1) ... i + 2 ^ e - 1
110 } 163 int i2 = rmq(i + (1 << (e-1)), e-1);
111 } 164 // choose index with minimum depth
112 }; 165 rmq(i,e) = D[i1] < D[i2] ? i1 : i2;
113 166 }
114 // ------------------------------------------ 167 }
115 // METHOD 2: SPARSE TABLE - EULER TOUR + RMQ 168 }
116 // ------------------------------------------ 169
117 // construction: O(2|V| log 2|V|) = O(|V| log |V|) 170 int lca(int u, int v) {
118 // query: O(1) (** assuming that __builtin_clz is mapped to an 171 // get ocurrence indexes in increasing order
119 // efficient processor instruction) 172 int l = H[u], r = H[v];
120 173 if (l > r) swap(l, r);
121 174 // get node with minimum depth in range [l .. r] in O(1)
122 struct LCA { 175 int len = r - l + 1;
123 vector<int> E, D, H; // E = euler tour, D = depth, H = first index of node in euler 176 int e = log2(len);
tour 177 int i1 = rmq(l,e);
124 vector<int> DP // memo for range minimun query 178 int i2 = rmq(r - ((1 << e) - 1), e);
125 vector<vector<int>> *g; // pointer to graph 179 return D[i1] < D[i2] ? E[i1] : E[i2];
126 int idx; // tracks node ocurrences 180 }
127 int n; // number of nodes 181
128 182 int dist(int u, int v) {
129 int& rmq(int i, int e) { return DP[e * idx + i]; } 183 // make sure you use H to retrieve the indexes of u and v
130 inline int log2(int x) { return 31 - __builtin_clz(x); } 184 // within the Euler Tour sequence before using D
131 185 return D[H[u]] + D[H[v]] - 2 * D[H[lca(u,v)]];
132 void dfs(int u, int depth) { 186 }
133 H[u] = idx; // index of first u’s ocurrence 187 }
134 E[idx] = u; // record node ocurrence 188
135 D[idx++] = depth; // record depth 189 // -----------------
136 for (int v : (*g)[u]) { 190 // EXAMPLE OF USAGE
137 if (H[v] == -1) { 191 // -----------------
138 dfs(v, depth + 1); // explore v’s subtree and come back to u 192 int main() {
139 E[idx] = u; // new ocurrence of u 193 // build graph
Pablo Messina’s ICPC Notebook 7 GRAPHS - 7.7 Diameter of a Tree Página 25 de 40
194 int n, m; 2 // Tarjan’s Algorithm
195 scanf("%d%d", &n, &m); 3 // -------------------
196 vector<vector<int>> g(n); 4 //references:
197 while (m--) { 5 //https://www.youtube.com/watch?v=jFZsDDB0-vo
198 int u, v; scanf("%d%d", &u, &v); 6 //https://www.hackerearth.com/practice/algorithms/graphs/articulation-points-and-bridges/
199 g[u].push_back(v); tutorial/
200 g[v].push_back(u); 7 //https://www.hackerearth.com/practice/algorithms/graphs/biconnected-components/tutorial/
201 } 8 //http://web.iitd.ac.in/~bspanda/biconnectedMTL776.pdf
202 // init LCA 9 typedef pair<int,int> ii;
203 LCA lca(g,0); 10 const int MAXN = 1000;
204 // answer queries 11 int depth[MAXN];
205 int q; scanf("%d", &q); 12 int low[MAXN];
206 while (q--) { 13 vector<int> g[MAXN];
207 int u, v; scanf("%d%d", &u, &v); 14 stack<ii> edge_stack;
208 printf("LCA(%d,%d) = %d\n", u, v, lca.lca(u,v)); 15
209 printf("dist(%d,%d) = %d\n", u, v, lca.dist(u,v)); 16 void print_and_remove_bicomp(int u, int v) {
210 } 17 puts("biconnected component found:");
211 }; 18 ii uv(u,v);
19 while (true) {
7.7 Diameter of a Tree 20 ii top = edge_stack.top();
21 edge_stack.pop();
1 // ========================== 22 printf("(%d, %d)\n", top.first, top.second);
2 // Find Tree’s Diameter Ends 23 if (top == uv) break;
3 // ========================== 24 }
4 const int MAXN = 10000; 25 }
5 26

6 int farthest_from(vector<vi>& g, int s) { // find farthest node from ’s’ with BFS 27 void dfs(int u, int p, int d) { // (node, parent, depth)
7 static int dist[MAXN]; 28 static num_root_children = 0;
8 memset(dist, -1, sizeof(int) * g.size()); 29 depth[u] = d;
9 int farthest = s; 30 low[u] = d; // u at least can reach itself (ignoring u-p edge)
10 queue<int> q; 31 for(int v : g[u]) {
11 q.push(s); 32 if (v == p) continue; // direct edge to parent -> ignore
12 dist[s] = 0; 33 if (depth[v] == -1) { // exploring a new, unvisited child node
13 while (!q.empty()) { 34 edge_stack.emplace(u,v); // add edge to stack
14 int u = q.front(); q.pop(); 35 dfs(v, u, d + 1); // explore recursively v’s subtree
15 for (int v : g[u]) { 36 // 1) detect articulation points and biconnected components
16 if (dist[v] == -1) { 37 if (p == -1) { // 1.1) special case: if u is root
17 dist[v] = dist[u] + 1; 38 if (++num_root_children == 2) {
18 q.push(v); 39 // we detected that root has AT LEAST 2 children
19 if (dist[v] > dist[farthest]) farthest = v; 40 // therefore root is an articulation point
20 } 41 printf("root = %d is articulation point\n", root);
21 } 42 }
22 } 43 // whenever we come back to the root, we just finished
23 return farthest; 44 // exploring a whole biconnected component
24 } 45 print_and_remove_bicomp(u,v);
25 46 } else if (low[v] >= d) { // 1.2) general case: non-root
26 void find_diameter(vector<vi>& g, int& e1, int& e2) { 47 printf("u = %d is articulation point\n", u);
27 e1 = farthest_from(g, 0); 48 // we entered through and came back to an AP,
28 e2 = farthest_from(g, e1); 49 // so we just finished exploring a whole biconnected component
29 } 50 print_and_remove_bicomp(u,v);
51 }
7.8 Articulation Points, Cut Edges, Biconnected Components 52 // 2) detect cut edges (a.k.a. bridges)
53 if (low[v] > depth[u]) {
1 // ------------------- 54 printf("(u,v) = (%d, %d) is cut edge\n", u, v);
Pablo Messina’s ICPC Notebook 7 GRAPHS - 7.9 Strongly Connected Components Página 26 de 40
55 } 40 ids.assign(n, -1);
56 // propagate low 41 low.resize(n);
57 low[u] = min(low[u], low[v]); 42 instack.assign(n, 0);
58 } else if (depth[v] < d) { // back-edge to proper ancestor 43 ID = 0;
59 edge_stack.emplace(u,v); // add edge to stack 44 rep(u, 0, n-1) if (ids[u] == -1) dfs(u);
60 low[u] = min(low[u], depth[v]); // propagate low 45 }
61 } else { // forward-edge to an already visited descendant 46 };
62 // => do nothing, because this edge was already considered as a 47
63 // back-edge from v -> u 48 // example of usage
64 } 49 int main() {
65 } 50 // read and build graph from standard input
66 } 51 int n, m; cin >> n >> m;
52 vector<vector<int>> g(n);
7.9 Strongly Connected Components 53 while(m--) {
54 int u, v; cin >> u >> v; u--, v--;
1 #include <bits/stdc++.h> 55 g[u].push_back(v);
2 #define rep(i,a,b) for(int i=a; i<=b; ++i) 56 }
3 using namespace std; 57 // find SCCs
4 // ----------------------------------------- 58 tarjanSCC tscc(g);
5 // implementation of Tarjan’s SCC algorithm 59 return 0;
6 struct tarjanSCC { 60 }
7 vector<int> _stack, ids, low;
8 vector<bool> instack; 7.10 Max Flow : Dinic
9 vector<vector<int>>* g;
10 int n, ID; 1 // Time Complexity:
11 void dfs(int u) { 2 // - general worst case: O (|E| * |V|^2)
12 ids[u] = low[u] = ID++; 3 // - unit capacities: O( min(V^(2/3), sqrt(E)) * E)
13 instack[u] = true; 4 // - Bipartite graph (unit capacities) + source & sink (any capacities): O(E sqrt V)
14 _stack.push_back(u); 5
15 for (int v : (*g)[u]) { 6 #include <bits/stdc++.h>
16 if (ids[v] == -1) { 7 using namespace std;
17 dfs(v); 8 typedef long long int ll;
18 low[u] = min(low[v], low[u]); 9
19 } else if (instack[v]) { 10 struct Dinic {
20 low[u] = min(low[v], low[u]); 11 struct edge {
21 } 12 int to, rev;
22 } 13 ll f, cap;
23 if (low[u] == ids[u]) { 14 };
24 // u is the root of a SCC 15
25 // ** here you can do whatever you want 16 vector<vector<edge>> g;
26 // with the SCC just found 17 vector<ll> dist;
27 cout << "SCC found!\n"; 18 vector<int> q, work;
28 // remove SCC from top of the stack 19 int n, sink;
29 while (1) { 20
30 int x = _stack.back(); _stack.pop_back(); 21 bool bfs(int start, int finish) {
31 instack[x] = false; 22 dist.assign(n, -1);
32 if (x == u) break; 23 dist[start] = 0;
33 } 24 int head = 0, tail = 0;
34 } 25 q[tail++] = start;
35 } 26 while (head < tail) {
36 tarjanSCC(vector<vector<int>>& _g) { 27 int u = q[head++];
37 g = &_g; 28 for (const edge &e : g[u]) {
38 n = _g.size(); 29 int v = e.to;
39 _stack.reserve(n); 30 if (dist[v] == -1 and e.f < e.cap) {
Pablo Messina’s ICPC Notebook 8 MATHEMATICS - Página 27 de 40
31 dist[v] = dist[u] + 1; 84 int main() {
32 q[tail++] = v; 85 Dinic din(2);
33 } 86 din.add_edge(0,1,10);
34 } 87 ll mf = din.max_flow(0,1);
35 } 88 }
36 return dist[finish] != -1;
37
38
}
8 Mathematics
39 ll dfs(int u, ll f) {
40 if (u == sink) 8.1 Euclidean Algorithm
41 return f;
42 for (int &i = work[u]; i < (int)g[u].size(); ++i) { 1 typedef long long int ll;
43 edge &e = g[u][i]; 2
44 int v = e.to; 3 inline ll mod(ll x, ll m) { return ((x %= m) < 0) ? x+m : x; }
45 if (e.cap <= e.f or dist[v] != dist[u] + 1) 4
46 continue; 5 /* ============================= */
47 ll df = dfs(v, min(f, e.cap - e.f)); 6 /* GCD (greatest common divisor) */
48 if (df > 0) { 7 /* ============================= */
49 e.f += df; 8 // OPTION 1: using C++ builtin function __gcd
50 g[v][e.rev].f -= df; 9 __gcd(a,b)
51 return df; 10 // OPTION 2: manually usings euclid’s algorithm
52 } 11 int gcd (ll a, ll b) {
53 } 12 while (b) { a %= b; swap(a,b); }
54 return 0; 13 return a;
55 } 14 }
56 15
57 Dinic(int n) { 16 /* ============ */
58 this->n = n; 17 /* extended GCD */
59 g.resize(n); 18 /* ============ */
60 dist.resize(n); 19 // extended euclid’s algorithm: find g, x, y such that
61 q.resize(n); 20 // a * x + b * y = g = gcd(a, b)
62 } 21 // The algorithm finds a solution (x0,y0) but there are infinite more:
63 22 // x = x0 + n * (b/g)
64 void add_edge(int u, int v, ll cap) { 23 // y = y0 - n * (a/g)
65 edge a = {v, (int)g[v].size(), 0, cap}; 24 // where n is integer, are the set of all solutions
66 edge b = {u, (int)g[u].size(), 0, 0}; //Poner cap en vez de 0 si la arista es 25
bidireccional 26 // --- version 1: iterative
67 g[u].push_back(a); 27 ll gcdext(ll a, ll b, ll& x, ll& y) {
68 g[v].push_back(b); 28 ll r2, x2, y2, r1, x1, y1, r0, x0, y0, q;
69 } 29 r2 = a, x2 = 1, y2 = 0;
70 30 r1 = b, x1 = 0, y1 = 1;
71 ll max_flow(int source, int dest) { 31 while (r1) {
72 sink = dest; 32 q = r2 / r1;
73 ll ans = 0; 33 r0 = r2 % r1;
74 while (bfs(source, dest)) { 34 x0 = x2 - q * x1;
75 work.assign(n, 0); 35 y0 = y2 - q * y1;
76 while (ll delta = dfs(source, LLONG_MAX)) 36 r2 = r1, x2 = x1, y2 = y1;
77 ans += delta; 37 r1 = r0, x1 = x0, y1 = y0;
78 } 38 }
79 return ans; 39 ll g = r2; x = x2, y = y2;
80 } 40 if (g < 0) g = -g, x = -x, y = -y; // make sure g > 0
81 }; 41 // for debugging (in case you think you might have bugs)
82 42 // assert (g == a * x + b * y);
83 // usage 43 // assert (g == __gcd(abs(a),abs(b)));
Pablo Messina’s ICPC Notebook 8 MATHEMATICS - 8.2 Primality Test Página 28 de 40
44 return g; 97 /* Linear Congruence Equation */
45 } 98 /* ========================== */
46 99 // recommended reading:
47 // --- version 2: recursive 100 // http://gauss.math.luc.edu/greicius/Math201/Fall2012/Lectures/linear-congruences.
48 ll gcdext(ll a, ll b, ll& x, ll& y) { article.pdf
49 if (a == 0) { 101
50 x = 0, y = 1; 102 // find smallest integer x (mod m) that solves the equation
51 return b; 103 // a * x = b (mod m)
52 } 104 bool lincongeq(ll a, ll b, ll m, ll& x) {
53 ll x1, y1; 105 assert (m > 0);
54 ll g = gcdext(b % a, a, x1, y1); 106 a = mod(a,m);
55 x = y1 - (b / a) * x1; 107 b = mod(b,m);
56 y = x1; 108 ll s, t;
57 return g; 109 ll g = gcdext(a,m,s,t);
58 } 110 if (b % g == 0) {
59 111 ll bb = b/g;
60 /* ====================== */ 112 ll mm = m/g;
61 /* multiplicative inverse */ 113 ll n = -s*bb/mm;
62 /* ====================== */ 114 x = s*bb + n*mm;
63 // find x such that a * x = 1 (mod m) 115 if (x < 0) x += mm;
64 // this is the same as finding x, y such that 116 // for debugging
65 // a * x + m * y = 1, which can be done with gcdext 117 // assert (0 <= x and x < m);
66 // and then returning x (mod m) 118 // assert (mod(a*x,m) == b);
67 ll mulinv(ll a, ll m) { 119 return true;
68 ll x, y; 120 }
69 if (gcdext(a, m, x, y) == 1) return mod(x, m); // make sure 0 <= x < m 121 return false;
70 return -1; // no inverse exists 122 }
71 }
72 8.2 Primality Test
73 /* =========================== */
74 /* Linear Diophantine Equation */ 1 // ===============
75 /* =========================== */ 2 // trial division
76 // recommended readings: 3 //=================
77 // http://gauss.math.luc.edu/greicius/Math201/Fall2012/Lectures/linear-diophantine. 4 // complexity: ~O( sqrt(x) )
article.pdf 5 bool isPrime(int x) {
78 // http://mathonline.wikidot.com/solutions-to-linear-diophantine-equations 6 for (int d = 2; d * d <= x; d++) {
79 7 if (x % d == 0)
80 // find intengers x and y such that a * x + b * y = c 8 return false;
81 bool lindiopeq(ll a, ll b, ll c, ll& x, ll& y) { 9 }
82 if (a == 0 and b == 0) { // special case 10 return true;
83 if (c == 0) { x = y = 0; return true; } 11 }
84 return false; 12
85 } 13 // =======================================
86 // general case 14 // trial division with precomputed primes
87 ll s, t; 15 // =======================================
88 ll g = gcdext(a,b,s,t); 16 // complexity: ~O( sqrt(x)/log(sqrt(x)) )
89 if (c % g == 0) { 17 // + time of precomputing primes
90 x = s*(c/g), y = t*(c/g); 18 bool isPrime(int x, vector<int>& primes) {
91 return true; 19 for (int p : primes) {
92 } 20 if (p*p > x) break;
93 return false; 21 if (p % x == 0)
94 } 22 return false;
95 23 }
96 /* ========================== */ 24 return true;
Pablo Messina’s ICPC Notebook 8 MATHEMATICS - 8.3 Prime Factorization Página 29 de 40
25 } 79 }
26 80
27 81 bool MillerRabin(u64 n) { // returns true if n is prime, else returns false.
28 // ============= 82 if (n < 2)
29 // Miller-Rabin 83 return false;
30 // ============= 84
31 // complexity: O (k * log^3(n)) 85 int r = 0;
32 // references: 86 u64 d = n - 1;
33 // https://cp-algorithms.com/algebra/primality_tests.html 87 while ((d & 1) == 0) {
34 // https://en.wikipedia.org/wiki/Miller%E2%80%93Rabin_primality_test#Complexity 88 d >>= 1;
35 using u64 = uint64_t; 89 r++;
36 using u128 = __uint128_t; 90 }
37 91
38 u64 binpower(u64 base, u64 e, u64 mod) { 92 for (int a : {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}) {
39 u64 result = 1; 93 if (n == a)
40 base %= mod; 94 return true;
41 while (e) { 95 if (check_composite(n, a, d, r))
42 if (e & 1) 96 return false;
43 result = (u128)result * base % mod; 97 }
44 base = (u128)base * base % mod; 98 return true;
45 e >>= 1; 99 }
46 }
47 return result; 8.3 Prime Factorization
48 }
49 1 //=====================
50 bool check_composite(u64 n, u64 a, u64 d, int s) { 2 // Prime Factorization
51 u64 x = binpower(a, d, n); 3 //=====================
52 if (x == 1 || x == n - 1) 4 // reference: https://cp-algorithms.com/algebra/factorization.html
53 return false; 5
54 for (int r = 1; r < s; r++) { 6 // method 1: trial division
55 x = (u128)x * x % n; 7 // complexity: ~ O( sqrt(n) + log_2(n) )
56 if (x == n - 1) 8 vector<int> trial_division(int n) {
57 return false; 9 vector<int> factors;
58 } 10 for (int d = 2; d*d <= n; d++) {
59 return true; 11 while (n % d == 0) {
60 }; 12 factors.push_back(d);
61 13 if ((n /= d) == 1) return factors;
62 bool MillerRabin(u64 n) { // returns true if n is probably prime, else returns false. 14 }
63 if (n < 4) 15 }
64 return n == 2 || n == 3; 16 if (n > 1) factors.push_back(n);
65 17 return factors;
66 int s = 0; 18 }
67 u64 d = n - 1; 19
68 while ((d & 1) == 0) { 20 // method 2: precomputed primes
69 d >>= 1; 21 // complexity: ~ O( sqrt(n) / log(sqrt(n)) + log_2(n) )
70 s++; 22 // + time of precomputing primes
71 } 23 vector<int> trial_division_precomp(int n, vector<int>& primes) {
72 24 vector<int> factors;
73 for (int i = 0; i < iter; i++) { 25 for (int d : primes) {
74 int a = 2 + rand() % (n - 3); 26 if (d*d > n) break;
75 if (check_composite(n, a, d, s)) 27 while (n % d == 0) {
76 return false; 28 factors.push_back(d);
77 } 29 if ((n /= d) == 1) return factors;
78 return true; 30 }
Pablo Messina’s ICPC Notebook 8 MATHEMATICS - 8.4 Binary modular exponentiation Página 30 de 40
31 } 14 // choose(n,0) = choose(n,n) = 1
32 if (n > 1) factors.push_back(n); 15
33 return factors; 16 // 1.1) DP top-down
34 } 17 ll memo[MAXN+1][MAXN+1];
35 18 ll choose(int n, int k) {
36 //==================================== 19 ll& ans = memo[n][k];
37 // Prime Factorization of Factorials 20 if (ans != -1) return ans;
38 //==================================== 21 if (k == 0) return ans = 1;
39 // references: 22 if (n == k) return ans = 1;
40 // http://mathforum.org/library/drmath/view/67291.html 23 if (n < k) return ans = 0;
41 // https://janmr.com/blog/2010/10/prime-factors-of-factorial-numbers/ 24 return ans = (choose(n-1,k) + choose(n-1,k-1)) % MOD;
42 #define umap unordered_map 25 }
43 umap<int,int> factorial_prime_factorization(int n, vector<int>& primes) { 26
44 umap<int,int> prime2exp; 27 // 1.2) DP bottom-up
45 for (int p : primes) { 28 ll choose[MAXN+1][MAXN+1];
46 if (p > n) break; 29 rep(m,1,MAXN) {
47 int e = 0; 30 choose[m][0] = choose[m][m] = 1;
48 int tmp = n; 31 rep(k,1,m-1) choose[m][k] = (choose[m-1][k] + choose[m-1][k-1]) % MOD;
49 while ((tmp /= p) > 0) e += tmp; 32 }
50 if (e > 0) prime2exp[p] = e; 33
51 } 34 // -------------------------------------------------
52 return prime2exp; 35 // method 3: factorials and multiplicative inverse
53 } 36 // n! / (k! * (n-k)!) = n! * (k! * (n-k)!)^-1 (MOD N)
37 // we need to find the multiplicative inverse of (k! * (n-k)!) MOD N
8.4 Binary modular exponentiation 38
39 ll fac[MAXN+1];
40 ll choose_memo[MAXN+1][MAXN+1];
1 // compute a^b (mod m) 41 void init() {
2 int binary_exp(int a, int b, int m) { 42 fac[0] = 1;
3 a %= m; 43 rep(i,1,MAXN) fac[i] = (i * fac[i-1]) % MOD;
4 int res = 1; 44 memset(choose_memo, -1, sizeof choose_memo);
5 while (b > 0) { 45 }
6 if (b&1) res = (res * a) % m; 46 ll choose_mod(int n, int k) {
7 a = (a * a) % m; 47 if (choose_memo[n][k] != -1) return choose_memo[n][k];
8 b >>= 1; 48 return choose_memo[n][k] = mul(fac[n], mulinv(mul(fac[k], fac[n-k])));
9 } 49 }
10 return res;
11 } 8.6 Modular Multinomial Coefficient
8.5 Modular Binomial Coefficient 1 typedef long long int ll;
2 const ll MOD = 1000000007ll; // a prime number
1 #define rep(i,a,b) for(int i = a; i <= b; ++i) 3 const int MAXN = 1000;
2 typedef long long int ll; 4
3 const ll MOD = 1000000007ll; // a prime number 5 /* ===================== */
4 const int MAXN = 1000; 6 /* MODULAR MULTINOMIAL */
5 7 /* ===================== */
6 /* ================== */ 8
7 /* MODULAR BINOMIAL */ 9 ll memo[MAXN+1][MAXN+1];
8 /* ================== */ 10 ll choose(int n, int k) {
9 // choose_mod(n,k) = n! / (k! * (n-k)!) % MOD 11 ll& ans = memo[n][k];
10 12 if (ans != -1) return ans;
11 // --------------------- 13 if (k == 0) return ans = 1;
12 // method 1: DP 14 if (n == k) return ans = 1;
13 // choose(n,k) = (choose(n-1,k-1) + choose(n-1,k)) % MOD 15 if (n < k) return ans = 0;
Pablo Messina’s ICPC Notebook 8 MATHEMATICS - 8.7 Chinese Remainder Theorem (CRT) Página 31 de 40
16 return ans = (choose(n-1,k) + choose(n-1,k-1)) % MOD; 39 // sol = r1 + m1 * (r2-r1)/g * x’ (mod LCM(m1,m2))
17 } 40 // where x’ comes from
18 41 // m1 * x’ + m2 * y’ = g = GCD(m1,m2)
19 // reference: https://math.stackexchange.com/a/204209/503889 42 // where x’ and y’ are the values found by extended euclidean algorithm (gcdext)
20 ll multinomial(vector<int> ks) { 43 // Useful references:
21 int n = 0; 44 // https://codeforces.com/blog/entry/61290
22 ll ans = 1; 45 // https://forthright48.com/chinese-remainder-theorem-part-1-coprime-moduli
23 for (int k : ks) { 46 // https://forthright48.com/chinese-remainder-theorem-part-2-non-coprime-moduli
24 n += k; 47 // ** Note: this solution works if lcm(m1,m2) fits in a long long (64 bits)
25 ans = (ans * choose(n,k)) % MOD; 48 pair<ll,ll> CRT(ll r1, ll m1, ll r2, ll m2) {
26 } 49 ll g, x, y; g = gcdext(m1, m2, x, y);
27 return ans; 50 if ((r1 - r2) % g != 0) return {-1, -1}; // no solution
28 } 51 ll z = m2/g;
52 ll lcm = m1 * z;
8.7 Chinese Remainder Theorem (CRT) 53 ll sol = add(mod(r1, lcm), m1*mul(mod(x,z),mod((r2-r1)/g,z),z), lcm);
54 // for debugging (in case you think you might have bugs)
1 #include <bits/stdc++.h> 55 // assert (0 <= sol and sol < lcm);
2 typedef long long int ll; 56 // assert (sol % m1 == r1 % m1);
3 using namespace std; 57 // assert (sol % m2 == r2 % m2);
4 58 return {sol, lcm}; // solution + lcm(m1,m2)
5 inline ll mod(ll x, ll m) { return ((x %= m) < 0) ? x+m : x; } 59 }
6 inline ll mul(ll x, ll y, ll m) { return (x * y) % m; } 60

7 inline ll add(ll x, ll y, ll m) { return (x + y) % m; } 61 // ==============================================


8 62 // CRT for a system of N modular linear equations
9 // extended euclidean algorithm 63 // ==============================================
10 // finds g, x, y such that 64 // Args:
11 // a * x + b * y = g = GCD(a,b) 65 // r = array of remainders
12 ll gcdext(ll a, ll b, ll& x, ll& y) { 66 // m = array of modules
13 ll r2, x2, y2, r1, x1, y1, r0, x0, y0, q; 67 // n = length of both arrays
14 r2 = a, x2 = 1, y2 = 0; 68 // Output:
15 r1 = b, x1 = 0, y1 = 1; 69 // a pair {X, lcm} where X is the solution of the sytemm
16 while (r1) { 70 // X = r[i] (mod m[i]) for i = 0 ... n-1
17 q = r2 / r1; 71 // and lcm = LCM(m[0], m[1], ..., m[n-1])
18 r0 = r2 % r1; 72 // if there is no solution, the output is {-1, -1}
19 x0 = x2 - q * x1; 73 // ** Note: this solution works if LCM(m[0],...,m[n-1]) fits in a long long (64 bits)
20 y0 = y2 - q * y1; 74 pair<ll,ll> CRT(ll* r, ll* m, int n) {
21 r2 = r1, x2 = x1, y2 = y1; 75 ll r1 = r[0], m1 = m[0];
22 r1 = r0, x1 = x0, y1 = y0; 76 rep(i,1,n-1) {
23 } 77 ll r2 = r[i], m2 = m[i];
24 ll g = r2; x = x2, y = y2; 78 ll g, x, y; g = gcdext(m1, m2, x, y);
25 if (g < 0) g = -g, x = -x, y = -y; // make sure g > 0 79 if ((r1 - r2) % g != 0) return {-1, -1}; // no solution
26 // for debugging (in case you think you might have bugs) 80 ll z = m2/g;
27 // assert (g == a * x + b * y); 81 ll lcm = m1 * z;
28 // assert (g == __gcd(abs(a),abs(b))); 82 ll sol = add(mod(r1, lcm), m1*mul(mod(x,z),mod((r2-r1)/g,z),z), lcm);
29 return g; 83 r1 = sol;
30 } 84 m1 = lcm;
31 85 }
32 // ============================================== 86 // for debugging (in case you think you might have bugs)
33 // CRT for a system of 2 modular linear equations 87 // assert (0 <= r1 and r1 < m1);
34 // ============================================== 88 // rep(i,0,n-1) assert (r1 % m[i] == r[i]);
35 // We want to find X such that: 89 return {r1, m1};
36 // 1) x = r1 (mod m1) 90 }
37 // 2) x = r2 (mod m2)
38 // The solution is given by:
Pablo Messina’s ICPC Notebook 9 GEOMETRY - 8.8 Theorems Página 32 de 40
8.8 Theorems 43 }
44
8.8.1 Pick’s Theorem 45 // calculates the cross product (b - a) x (c - a)
46 // and returns orientation:
P
A=I+ 2 −1 47 // LEFT (1): c is to the left of ray (a -> b)
48 // RIGHT (-1): c is to the right of ray (a -> b)
49 // COLLINEAR (0): c is collinear to ray (a -> b)
9 Geometry 50
51
// inspired by: https://www.geeksforgeeks.org/orientation-3-ordered-points/
int orientation(Point& a, Point& b, Point& c) {
52 ll tmp = cross(a,b,c);
9.1 Geometry 2D Utils 53 return tmp < 0 ? -1 : tmp == 0 ? 0 : 1; // sign
54 }
1 #include <bits/stdc++.h> 55

2 using namespace std; 56 /* ======================================================= */


3 typedef long long int ll; 57 /* Check if a segment is below another segment (wrt a ray) */
4 // ------------------------------- 58 /* ======================================================= */
5 const double PI = acos(-1); 59 // i.e: check if a segment is intersected by the ray first
6 const double EPS = 1e-8; 60 // Assumptions:
7 61 // 1) for each segment:
8 /* =========================== */ 62 // p1 should be LEFT (or COLLINEAR) and p2 should be RIGHT (or COLLINEAR) wrt ray
9 /* Example of Point Definition */ 63 // 2) segments do not intersect each other
10 /* =========================== */ 64 // 3) segments are not collinear to the ray
11 template<typename T> 65 // 4) the ray intersects all segments
12 struct Point<T> { // 2D 66 struct Segment { Point p1, p2;};
13 T x, y; 67 Segment segments[MAXN]; // array of line segments
14 bool operator==(const Point<T>& p) const { return x==p.x && y == p.y; } 68 bool is_si_below_sj(int i, int j) { // custom comparator based on cross product
15 Point<T> operator+(const Point<T>& p) const { return {x+p.x, y+p.y}; } 69 Segment& si = segments[i];
16 Point<T> operator-(const Point<T>& p) const { return {x-p.x, y-p.y}; } 70 Segment& sj = segments[j];
17 Point<T> operator*(T d) const { return {x*d, y*d}; } 71 return (si.p1.x >= sj.p1.x) ?
18 Point<double> cast() { return {(double)x, (double)y}; } 72 cross(si.p1, sj.p2, sj.p1) > 0:
19 T norm2() { return x*x + y*y; } 73 cross(sj.p1, si.p1, si.p2) > 0;
20 double norm() { return sqrt(norm2()); } 74 }
21 T dot(const Point<T>& p) const { return x*p.x + y*p.y; } 75 // this can be used to keep a set of segments ordered by order of intersection
22 T cross(const Point<T>& p) const { return x*p.y - y*p.x; } 76 // by the ray, for example, active segments during a SWEEP LINE
23 double angle() { 77 set<int, bool(*)(int,int)> active_segments(is_si_below_sj); // ordered set
24 double angle = atan2(y, x); 78

25 if (angle < 0) angle += 2 * PI; 79 /* ======================= */


26 return angle; 80 /* Rectangle Intersection */
27 } 81 /* ======================= */
28 Point<double> unit() { 82 bool do_rectangles_intersect(Point& dl1, Point& ur1, Point& dl2, Point& ur2) {
29 double d = norm(); 83 return max(dl1.x, dl2.x) <= min(ur1.x, ur2.x) && max(dl1.y, dl2.y) <= min(ur1.y, ur2.
30 return {x/d,y/d}; y);
31 } 84 }
32 }; 85

33 86 /* ========================= */
34 /* ========================================================= */ 87 /* Line Segment Intersection */
35 /* Cross Product -> orientation of point with respect to ray */ 88 /* ========================= */
36 /* ========================================================= */ 89 // returns whether segments p1q1 and p2q2 intersect, inspired by:
37 // cross product (b - a) x (c - a) 90 // https://www.geeksforgeeks.org/check-if-two-given-line-segments-intersect/
38 ll cross(Point& a, Point& b, Point& c) { 91 bool do_segments_intersect(Point& p1, Point& q1, Point& p2, Point& q2) {
39 ll dx0 = b.x - a.x, dy0 = b.y - a.y; 92 int o11 = orientation(p1, q1, p2);
40 ll dx1 = c.x - a.x, dy1 = c.y - a.y; 93 int o12 = orientation(p1, q1, q2);
41 return dx0 * dy1 - dx1 * dy0; 94 int o21 = orientation(p2, q2, p1);
42 // return (b - a).cross(c - a); // alternatively, using struct function 95 int o22 = orientation(p2, q2, q1);
Pablo Messina’s ICPC Notebook 9 GEOMETRY - 9.2 Geometry 3D Utils Página 33 de 40
96 if (o11 != o12 and o21 != o22) // general case -> non-collinear intersection 150 }
97 return true; 151
98 if (o11 == o12 and o11 == 0) { // particular case -> segments are collinear 152 /* ==================================== */
99 Point dl1 = {min(p1.x, q1.x), min(p1.y, q1.y)}; 153 /* Point - Line / Line Segment distance */
100 Point ur1 = {max(p1.x, q1.x), max(p1.y, q1.y)}; 154 /* ==================================== */
101 Point dl2 = {min(p2.x, q2.x), min(p2.y, q2.y)}; 155 // reference: https://stackoverflow.com/questions/849211/shortest-distance-between-a-
102 Point ur2 = {max(p2.x, q2.x), max(p2.y, q2.y)}; point-and-a-line-segment
103 return do_rectangles_intersect(dl1, ur1, dl2, ur2); 156
104 } 157 // get distance between p and projection of p on line <- a - b ->
105 return false; 158 double point_line_dist(Point& p, Point& a, Point& b) {
106 } 159 Point d = b-a;
107 160 double t = d.dot(p-a) / d.norm2();
108 /* ======================== */ 161 return (a + d * t - p).norm();
109 /* Line - Line Intersection */ 162 }
110 /* ======================== */ 163
111 ll det(Point& a, Point& b) { return a.x * b.y - a.y * b.x; } 164 // get distance between p and truncated projection of p on segment a -> b
112 // return whether straight lines <-a1-b1-> and <-a2-b2-> intersect each other 165 double point_segment_dist(Point& p, Point& a, Point& b) {
113 // if they intersect, we assign values to t1 and t2 such that 166 if (a==b) return (p-a).norm(); // segment is a single point
114 // a1 + (b1 - a1) * t1 == a2 + (b2 - a2) * t2 167 Point d = b-a; // direction
115 bool find_line_line_intersection(Point& a1, Point& b1, Point& a2, Point& b2, 168 double t = d.dot(p-a) / d.norm2();
116 double& t1, double& t2) { 169 if (t <= 0) return (p-a).norm(); // truncate left
117 Point d1 = b1 - a1; 170 if (t >= 1) return (p-b).norm(); // truncate right
118 Point d2 = b2 - a2; 171 return (a + d * t - p).norm();
119 Point _d2 = d2 * -1; 172 }
120 ll detA = det(d1, _d2); 173
121 if (detA == 0) return false; // parallel lines 174 /* ====================================== */
122 Point b = a2 - a1; 175 /* Straight Line Hashing (integer coords) */
123 t1 = (double)det(b, _d2)/(double)detA; 176 /* ====================================== */
124 t2 = (double)det(d1, b)/(double)detA; 177 // task: given 2 points p1, p2 with integer coordinates, output a unique
125 return true; 178 // representation {a,b,c} such that a*x + b*y + c = 0 is the equation
126 } 179 // of the straight line defined by p1, p2. This representation must be
127 180 // unique for each straight line, no matter which p1 and p2 are sampled.
128 181 struct Point { ll x, y; };
129 /* =================== */ 182 tuple<ll,ll,ll> hash_line(const Point& p1, const Point& p2) {
130 /* Circle Intersection */ 183 ll a = p1.y - p2.y;
131 /* =================== */ 184 ll b = p2.x - p1.x;
132 struct Circle { double x, y, r; } 185 ll c = p1.x * (p2.y - p1.y) - p1.y * (p2.x - p1.x);
133 bool is_fully_outside(double r1, double r2, double d_sqr) { 186 ll sgn = (a < 0 or (a == 0 and b < 0)) ? -1 : 1;
134 double tmp = r1 + r2; 187 ll g = __gcd(abs(a), __gcd(abs(b), abs(c))) * sgn;
135 return d_sqr > tmp * tmp; 188 return make_tuple(a/g, b/g, c/g);
136 } 189 }
137 bool is_fully_inside(double r1, double r2, double d_sqr) { 190 // task: given 2 points p1 and p2 with integer coords, return a pair {a, b}
138 if (r1 > r2) return false; 191 // which is unique for all straight lines having the same slope as
139 double tmp = r2 - r1; 192 // the straight line that goes through p1 and p2
140 return d_sqr < tmp * tmp; 193 pair<ll,ll> hash_slope(const Point& p1, const Point& p2) {
141 } 194 ll dx = p2.x - p1.x;
142 bool do_circles_intersect(Circle& c1, Circle& c2) { 195 ll dy = p2.y - p1.y;
143 double dx = c1.x - c2.x; 196 ll sgn = (dx < 0 or (dx == 0 and dy < 0)) ? -1 : 1;
144 double dy = c1.y - c2.y; 197 ll g = __gcd(abs(dx), abs(dy)) * sgn;
145 double d_sqr = dx * dx + dy * dy; 198 return {dx/g, dy/g};
146 if (is_fully_inside(c1.r, c2.r, d_sqr)) return false; 199 }
147 if (is_fully_inside(c2.r, c1.r, d_sqr)) return false;
148 if (is_fully_outside(c1.r, c2.r, d_sqr)) return false; 9.2 Geometry 3D Utils
149 return true;
Pablo Messina’s ICPC Notebook 9 GEOMETRY - 9.3 Polygon Algorithms Página 34 de 40
1 /* =========================== */ 9.3 Polygon Algorithms
2 /* Example of Point Definition */
3 /* =========================== */ 1 #include <bits/stdc++.h>
4 struct Point { // 3D 2 #define rep(i,a,b) for(int i = a; i <= b; ++i)
5 double x, y, z; 3
6 bool operator==(const Point& p) const { return x==p.x and y==p.y and z==p.z; } 4 // ----- Utils ------
7 Point operator+(const Point& p) const { return {x+p.x, y+p.y, z+p.z}; } 5 const double EPS = 1e-8;
8 Point operator-(const Point& p) const { return {x-p.x, y-p.y, z-p.z}; } 6 struct Point {
9 Point operator*(double d) const { return {x*d, y*d, z*d}; } 7 ll x, y;
10 double norm2() { return x*x + y*y + z*z; } 8 Point operator-(const Point& p) const { return {x - p.x, y - p.y}; }
11 double norm() { return sqrt(norm2()); } 9 Point operator+(const Point& p) const { return {x + p.x, y + p.y}; }
12 double dot(const Point& p) { return x*p.x + y*p.y + z*p.z; } 10 ll cross(const Point& p) const { return x*p.y - y*p.x; }
13 Point cross(Point& p) { 11 ll dot(const Point& p) const { return x*p.x + y*p.y; }
14 return { 12 };
15 y*p.z - z*p.y, 13 ll cross(Point& a, Point& b, Point& c) {
16 z*p.x - x*p.z, 14 ll dx0 = b.x - a.x, dy0 = b.y - a.y;
17 x*p.y - y*p.x 15 ll dx1 = c.x - a.x, dy1 = c.y - a.y;
18 }; 16 return dx0 * dy1 - dx1 * dy0;
19 } 17 }
20 Point unit() { 18 int orientation(Point& a, Point& b, Point& c) {
21 double d = norm(); 19 ll tmp = cross(a,b,c);
22 return {x/d, y/d, z/d}; 20 return tmp < 0 ? -1 : tmp == 0 ? 0 : 1; // sign
23 } 21 }
24 static Point from_sphere_coords(double r, double u, double v) { 22
25 return { 23 /* ======================================== */
26 r*cos(u)*cos(v), 24 /* Area of 2D non self intersecting Polygon */
27 r*cos(u)*sin(v), 25 /* ======================================== */
28 r*sin(u) 26 //based on Green’s Theorem:
29 }; 27 //http://math.blogoverflow.com/2014/06/04/greens-theorem-and-area-of-polygons/
30 } 28 // ** points must be sorted ccw or cw
31 }; 29 double polygon_area(vector<Point>& pol) {
32 // compute angle (0 <= angle <= PI) between vectors a and b 30 int n = pol.size()
33 // ** for better performance, the norms can be precomputed 31 double area = 0;
34 // or norms can be ommited altogether if a and b are unit vectors 32 for (int i = n-1, j = 0; j < n; i = j++) {
35 double angle_between(Point& a, Point& b) { 33 area += (pol[i].x + pol[j].x) * (pol[j].y - pol[i].y);
36 return acos(a.dot(b)/(a.norm() * b.norm())); 34 }
37 } 35 return area * 0.5; // use abs(area * 0.5) if points are cw
38 // check if point p belongs to the sphere arc from a to b. 36 }
39 // ** this assumes that a and b are points on a sphere centered at (0,0,0), 37
40 // and the sphere arc from a to b is the shortest path on the sphere connecting them 38 /* ================ */
41 const double EPS = 1e-8; 39 /* Point in Polygon */
42 bool point_in_arc(Point& a, Point& b, Point& p) { 40 /* ================ */
43 double angle_ab = angle_between(a, b); 41
44 double angle_ap = angle_between(a, p); 42 // -----------------------------
45 if (angle_ap > angle_ab) return false; 43 // 1) Convex Polygons
46 Point n = a.cross(b); 44
47 Point c_hat = n.cross(a).unit(); 45 // 1.1) O(N) method
48 double R = a.norm(); 46 bool point_in_convexhull(Point& p, vector<Point>& ch) {
49 Point a_hat = a * (1./R); 47 int n = ch.size();
50 Point a_rotated = (a_hat * cos(angle_ap) + c_hat * sin(angle_ap)) * R; 48 for (int i=n-1, j=0; j<n; i=j++) {
51 return (p - a_rotated).norm() < EPS; 49 if (cross(ch[i], ch[j], p) < 0) return false;
52 } 50 }
51 return true;
52 }
Pablo Messina’s ICPC Notebook 9 GEOMETRY - 9.3 Polygon Algorithms Página 35 de 40
53 107 }
54 // 1.2) O(log N) method 108
55 bool point_in_triangle(Point& a, Point& b, Point& c, Point& x) { 109 /* ================================= */
56 return cross(a, b, x) >= 0 and cross(b, c, x) >= 0 and cross(c, a, x) >= 0; 110 /* Find extreme point in Convex Hull */
57 } 111 /* ================================= */
58 bool point_in_convexhull(Point& p, vector<Point>& ch) { 112 // given two points a and b defining a vector a -> b, and given a convex hull with points
59 if (cross(ch[0], ch[1], p) < 0) return false; 113 // sorted ccw, find the index in the convex hull of the extreme point.
60 if (cross(ch[0], ch.back(), p) > 0) return false; 114 // ** the extreme point is the "leftmost" point in the convex hull with respect to the
61 int l = 2, r = ch.size() - 1; 115 // vector a -> b (if there are 2 leftmost points, pick anyone)
62 while (l < r) { 116 int extreme_point_index(Point &a, Point &b, vector<Point> &ch) {
63 int m = (l+r) >> 1; 117 int n = ch.size();
64 if (cross(ch[0], ch[m], p) <= 0) r = m; 118 Point v = b - a;
65 else l = m+1; 119 v = Point(-v.y, v.x); // to find the leftmost point
66 } 120 if (v.dot(ch[0]) >= v.dot(ch[1]) && v.dot(ch[0]) >= v.dot(ch[n - 1])) {
67 return point_in_triangle(ch[0], ch[l-1], ch[l], p); 121 return 0;
68 } 122 }
69 123 int l = 0, r = n;
70 // ---------------------------------------------- 124 while (true) {
71 // 2) General methods: for complex / simple polygons 125 int m = (l + r) / 2;
72 126 if (v.dot(ch[m]) >= v.dot(ch[m + 1]) && v.dot(ch[m]) >= v.dot(ch[m - 1])) {
73 /* Nonzero Rule (winding number) */ 127 return m;
74 bool inPolygon_nonzero(Point p, vector<Point>& pts) { 128 }
75 int wn = 0; // winding number 129 int d1 = v.dot(ch[l + 1] - ch[l]) > 0;
76 Point prev = pts.back(); 130 int d2 = v.dot(ch[m + 1] - ch[m]) > 0;
77 rep (i, 0, (int)pts.size() - 1) { 131 int a = v.dot(ch[m]) > v.dot(ch[l]);
78 Point curr = pts[i]; 132 if (d1) { if (d2 && a) l = m; else r = m; }
79 if (prev.y <= p.y) { 133 else { if (!d2 && a) r = m; else l = m; }
80 if (p.y < curr.y && cross(prev, curr, p) > 0) 134 }
81 ++ wn; // upward & left 135 }
82 } else { 136
83 if (p.y >= curr.y && cross(prev, curr, p) < 0) 137 /* ========================================= */
84 -- wn; // downward & right 138 /* Line Segment and Convex Hull Intersection */
85 } 139 /* ========================================= */
86 prev = curr; 140 pair<int,int> find_crossing_edge(Point& a, Point& b, vector<Point>& ch, int start, int
87 } end) {
88 return wn != 0; // non-zero :) 141 int o_ref = orientation(a, b, ch[start]);
89 } 142 int n = ch.size();
90 143 int l = start, r = start + ((end - start + n) % n);
91 /* EvenOdd Rule (ray casting - crossing number) */ 144 while (l < r) {
92 bool inPolygon_evenodd(Point p, vector<Point>& pts) { 145 int m = (l+r) >> 1;
93 int cn = 0; // crossing number 146 if (orientation(a, b, ch[m % n]) != o_ref) r = m;
94 Point prev = pts.back(); 147 else l = m+1;
95 rep (i, 0, (int)pts.size() - 1) { 148 }
96 Point curr = pts[i]; 149 return {(l-1+n) % n, l%n};
97 if (((prev.y <= p.y) && (p.y < curr.y)) // upward crossing 150 }
98 || ((prev.y > p.y) && (p.y >= curr.y))) { // downward crossing 151 void find_segment_convexhull_intersection(Point& a, Point& b, vector<Point>& ch) {
99 // check intersect’s x-coordinate to the right of p 152 // find rightmost and leftmost points in convex hull wrt vector a -> b
100 double t = (p.y - prev.y) / (curr.y - prev.y); 153 int i1 = extreme_point_index(a, b, ch);
101 if (p.x < prev.x + t * (curr.x - prev.x)) 154 int i2 = extreme_point_index(b, a, ch);
102 ++cn; 155 // make sure the extremes are not to the same side
103 } 156 int o1 = orientation(a, b, ch[i1]);
104 prev = curr; 157 int o2 = orientation(a, b, ch[i2]);
105 } 158 if (o1 == o2) return; // all points are to the right (left) of a -> b (no
106 return (cn & 1); // odd -> in, even -> out intersection)
Pablo Messina’s ICPC Notebook 9 GEOMETRY - 9.4 Trigonometry Página 36 de 40
159 // find 2 edges in the convex hull intersected by the straight line <- a - b -> 2 using namespace std;
160 pair<int,int> e1 = find_crossing_edge(a, b, ch, i1, i2); // binsearch from i1 to i2 3 #define rep(i,a,b) for(int i = a; i <= b; ++i)
ccw 4 #define invrep(i,b,a) for(int i = b; i >= a; --i)
161 pair<int,int> e2 = find_crossing_edge(a, b, ch, i2, i1); // binsearch from i2 to i1 5 typedef long long int ll;
ccw 6 // ----------------------------------------------
162 // find exact intersection points 7 // Convex Hull: Andrew’s Montone Chain Algorithm
163 double r1, s1, r2, s2; 8 // ----------------------------------------------
164 assert (find_line_line_intersection(a, b, ch[e1.first], ch[e1.second], r1, s1)); 9 struct Point {
165 assert (find_line_line_intersection(a, b, ch[e2.first], ch[e2.second], r2, s2)); 10 ll x, y;
166 // make sure intersections are significant and within line segment range 11 bool operator<(const Point& p) const {
167 if (r1 > 1.0 - EPS and r2 > 1.0 - EPS) return; // intersections above line segment 12 return x < p.x || (x == p.x && y < p.y);
168 if (r1 < EPS and r2 < EPS) return; // intersections below line segment 13 }
169 if (abs(r1 - r2) < EPS) return; // insignificant intersection in a single point 14 };
170 if (r1 > r2) swap(r1, r2), swap(e1, e2), swap(s1, s2); // make sure r1 < r2 15
171 // ** HERE DO WHATEVER YOU WANT WITH INTERSECTIONS FOUND 16 ll cross(Point& a, Point& b, Point& c) {
172 // 1) a + (b-a) * max(r1, 0) <--- first point of segment a -> b inside convex hull 17 ll dx0 = b.x - a.x, dy0 = b.y - a.y;
173 // if r1 < 0, point a is strictly inside the convex hull 18 ll dx1 = c.x - a.x, dy1 = c.y - a.y;
174 // 2) a + (b-a) * min(r2, 1) <--- last point of segment a -> b inside convex hull 19 return dx0 * dy1 - dx1 * dy0;
175 // if r2 > 1, point b is strictly inside the convex hull 20 }
176 cout << "(significant) intersection detected!\n"; 21
177 } 22 vector<Point> upper_hull(vector<Point>& P) {
23 // sort points lexicographically
9.4 Trigonometry 24 int n = P.size(), k = 0;
25 sort(P.begin(), P.end());
1 /* ================= */ 26 // build upper hull
2 /* Angle of a vector */ 27 vector<Point> uh(n);
3 /* ================= */ 28 invrep (i, n-1, 0) {
4 const double PI = acos(-1); 29 while (k >= 2 && cross(uh[k-2], uh[k-1], P[i]) <= 0) k--;
5 const double _2PI = 2 * PI; 30 uh[k++] = P[i];
6 31 }
7 double correct_angle(double angle) { // to ensure 0 <= angle <= 2PI 32 uh.resize(k);
8 while (angle < 0) angle += _2PI; 33 return uh;
9 while (angle > _2PI) angle -= _2PI; 34 }
10 return angle; 35

11 } 36 vector<Point> lower_hull(vector<Point>& P) {
12 double angle(double x, double y) { 37 // sort points lexicographically
13 // atan2 by itself returns an angle in range [-PI, PI] 38 int n = P.size(), k = 0;
14 // no need to "correct it" if that range is ok for you 39 sort(P.begin(), P.end());
15 return correct_angle(atan2(y, x)); 40 // collect lower hull
16 } 41 vector<Point> lh(n);
17 42 rep (i, 0, n-1) {
18 /* ============== */ 43 while (k >= 2 && cross(lh[k-2], lh[k-1], P[i]) <= 0) k--;
19 /* Cosine Theorem */ 44 lh[k++] = P[i];
20 /* ============== */ 45 }
21 // Given triangle with sides a, b and c, returns the angle opposed to side a. 46 lh.resize(k);
22 // a^2 = b^2 + c^2 - 2*b*c*cos(alpha) 47 return lh;
23 // => alpha = acos((b^2 + c^2 - a^2) /(2*b*c)) 48 }
24 double get_angle(double a, double b, double c) { 49

25 return acos((b*b + c*c - a*a)/(2*b*c)); 50 vector<Point> convex_hull(vector<Point>& P) {


26 } 51 int n = P.size(), k = 0;
52 // set initial capacity
9.5 Convex Hull 53 vector<Point> H(2*n);
54 // sort points lexicographically
1 #include <bits/stdc++.h> 55 sort(P.begin(), P.end());
Pablo Messina’s ICPC Notebook 10 STRINGS - 9.6 Green’s Theorem Página 37 de 40
56 // build lower hull 1 // ========================================================
57 for (int i = 0; i < n; ++i) { 2 // Suffix Array Construction : Prefix Doubling + Radix Sort
58 while (k >= 2 && cross(H[k-2], H[k-1], P[i]) <= 0) k--; 3 // ========================================================
59 H[k++] = P[i]; 4 // Complexity: O(N*log(N))
60 } 5 // references: https://www.cs.helsinki.fi/u/tpkarkka/opetus/10s/spa/lecture11.pdf
61 // build upper hull 6 // https://youtu.be/_TUeAdu-U_k
62 for (int i = n-2, t = k+1; i >= 0; i--) { 7 #include <bits/stdc++.h>
63 while (k >= t && cross(H[k-2], H[k-1], P[i]) <= 0) k--; 8 #define rep(i,a,b) for(int i = a; i <= b; ++i)
64 H[k++] = P[i]; 9 #define invrep(i,b,a) for(int i = b; i >= a; --i)
65 } 10 using namespace std;
66 // remove extra space 11
67 H.resize(k-1); 12 // - the input to the suffix array must be a vector of ints
68 return H; 13 // - all values in the vector must be >= 1 (because 0 is used
69 } 14 // as a special value internally)
15 struct SuffixArray {
9.6 Green’s Theorem 16 int n;
17 vector<int> counts, rank, rank_tmp, sa, sa_tmp;
1 #include <bits/stdc++.h> 18 vector<int> lcp; // optional: only if lcp is needed
2 using namespace std; 19 inline int get_rank(int i) { return i < n ? rank[i]: 0; }
3 typedef long long int ll; 20 void counting_sort(int maxv, int k) {
4
21 counts.assign(maxv+1, 0);
5 struct Point { double x, y; }; 22 rep(i,0,n-1) counts[get_rank(i+k)]++;
6
23 rep(i,1,maxv) counts[i] += counts[i-1];
7 // Computes the line integral of the vector field <0,x> over the arc of the circle with 24 invrep(i,n-1,0) sa_tmp[--counts[get_rank(sa[i]+k)]] = sa[i];
radius ’r’ 25 sa.swap(sa_tmp);
8 // and x-coordinate ’x’ from angle ’a’ to angle ’b’. The ’y’ goes away in the integral so 26 }
it 27 void compute_sa(vector<int>& s) {
9 // it doesn’t matter. 28 rep(i,0,n-1) sa[i] = i;
10 // This can be done using a parameterization of the arc in polar coordinates: 29 sort(sa.begin(), sa.end(), [&s](int i, int j) { return s[i] < s[j]; });
11 // x(t) = x + r * cos(t) 30 int r = rank[sa[0]] = 1;
12 // y(t) = y + r * sin(t) 31 rep(i,1,n-1) rank[sa[i]] = (s[sa[i]] != s[sa[i-1]]) ? ++r : r;
13 // a <= t <= b 32 for (int h=1; h < n and r < n; h <<= 1) {
14 // The final integral can be seen here: 33 counting_sort(r, h);
15 // https://www.wolframalpha.com/input/?i=integral((x+%2B+r*cos(t))+*+derivative(y+%2B+r* 34 counting_sort(r, 0);
sin(t))+*+dt,+t%3Da..b) 35 r = rank_tmp[sa[0]] = 1;
16 double arc_integral(double x, double r, double a, double b) { 36 rep(i,1,n-1) {
17 return x * r * (sin(b) - sin(a)) + r * r * 0.5 * (0.5 * (sin(2*b) - sin(2*a)) + b - a 37 if (rank[sa[i]] != rank[sa[i-1]] or
); 38 get_rank(sa[i]+h) != get_rank(sa[i-1]+h)) ++r;
18 } 39 rank_tmp[sa[i]] = r;
19
40 }
20 // Computes the line integral of the vector field <0, x> over the directed segment a -> b 41 rank.swap(rank_tmp);
21 // This can be done using the parameterization: 42 }
22 // x(t) = a.x + (b.x - a.x) * t 43 }
23 // y(t) = a.y + (b.y - a.y) * t 44 // LCP construction in O(N) using Kasai’s algorithm
24 // 0 <= t <= 1 45 // reference: https://codeforces.com/blog/entry/12796?#comment-175287
25 double segment_integral(Point& a, Point& b) { 46 void compute_lcp(vector<int>& s) { // optional: only if lcp array is needed
26 return 0.5 * (a.x + b.x) * (b.y - a.y); 47 lcp.assign(n, 0);
27 } 48 int k = 0;
49 rep(i,0,n-1) {
50 int r = rank[i]-1;
10 Strings 51 if (r == n-1) { k = 0; continue; }
52 int j = sa[r+1];
10.1 Suffix Array 53 while (i+k<n and j+k<n and s[i+k] == s[j+k]) k++;
54 lcp[r] = k;
Pablo Messina’s ICPC Notebook 10 STRINGS - 10.2 Trie Página 38 de 40
55 if (k) k--; 30 int size() { return g.size(); }
56 } 31 };
57 } 32
58 SuffixArray(vector<int>& s) { 33 // example of usage
59 n = s.size(); 34 int main() {
60 rank.resize(n); rank_tmp.resize(n); 35 Trie trie(26);
61 sa.resize(n); sa_tmp.resize(n); 36 for (string s : {"hell", "hello", "hellyeah", "helpzzzz", "abcdefg"}) {
62 compute_sa(s); 37 cout << "inserting " << s << ’\n’;
63 compute_lcp(s); // optional: only if lcp array is needed 38 trie.insert(s);
64 } 39 cout << "\ttrie size = " << trie.size() << ’\n’;
65 }; 40 }
66 41 return 0;
67 int main() { // how to use 42 }
68 string test; cin >> test;
69 vector<int> s; 10.3 Rolling Hashing
70 for (char c : test) s.push_back(c);
71 SuffixArray sa(s); 1 #include <bits/stdc++.h>
72 for (int i : sa.sa) cout << i << ":\t" << test.substr(i) << ’\n’; 2 using namespace std;
73 rep (i, 0, s.size() - 1) { 3 #define rep(i,a,b) for(int i = a; i <= b; ++i)
74 printf("LCP between %d and %d is %d\n", i, i+1, sa.lcp[i]); 4 typedef unsigned long long int ull;
75 } 5 const int MAXLEN = 1e6;
76 } 6
7 // -----------------------------
10.2 Trie 8 // Rolling Hashing: single hash
9
1 #include <bits/stdc++.h> 10 struct RH_single { // rolling hashing
2 using namespace std; 11 static const ull B = 131; // base
3 struct Trie { 12 static const ull P = 1e9 + 21; // prime
4 vector<vector<int>> g; 13 static ull pow[MAXLEN];
5 vector<int> count; 14 static ull add(ull x, ull y) { return (x + y) % P; }
6 int vocab; 15 static ull mul(ull x, ull y) { return (x * y) % P; }
7 Trie(int vocab, int maxdepth = 10000) : vocab(vocab) { 16 static void init() {
8 g.reserve(maxdepth); 17 pow[0] = 1;
9 g.emplace_back(vocab, -1); 18 rep(i, 1, MAXLEN-1) pow[i] = mul(B, pow[i-1]);
10 count.reserve(maxdepth); 19 }
11 count.push_back(0); 20 vector<ull> h;
12 } 21 int len;
13 int move_to(int u, int c) { 22 void init(vector<int>& s) {
14 assert (0 <= c and c < vocab); 23 for (int x : s) assert (x >= 1); // DEBUGGING
15 int& v = g[u][c]; 24 len = s.size();
16 if (v == -1) { 25 h.resize(len);
17 v = g.size(); 26 h[0] = s[0];
18 g.emplace_back(vocab, -1); 27 rep(i,1,len-1) h[i] = add(mul(h[i-1], B), s[i]);
19 count.push_back(0); 28 }
20 } 29 RH_single(vector<int>& s) { init(s); } // from vector<int>
21 count[v]++; 30 RH_single(string& s, char ref) { // from string
22 return v; 31 vector<int> tmp; for(char c : s) tmp.push_back(c - ref + 1);
23 } 32 init(tmp);
24 void insert(const string& s, char ref = ’a’) { // insert string 33 }
25 int u = 0; for (char c : s) u = move_to(u, c - ref); 34 ull hash(int i, int j) {
26 } 35 if (i == 0) return h[j];
27 void insert(vector<int>& s) { // insert vector<int> 36 return add(h[j], P - mul(h[i-1], pow[j - i + 1]));
28 int u = 0; for (int c : s) u = move_to(u, c); 37 }
29 } 38 ull hash() { return h[len-1]; }
Pablo Messina’s ICPC Notebook 10 STRINGS - 10.4 KMP (Knuth Morris Pratt) Página 39 de 40
39 }; 93 ull h1 = rh.hash(l1, r1);
40 ull RH_single::pow[MAXLEN]; // necessary for the code to compile 94 ull h2 = rh.hash(l2, r2);
41 95 string s1 = s.substr(l1, r1-l1+1);
42 // -------------------------------------------- 96 string s2 = s.substr(l2, r2-l2+1);
43 // Rolling Hashing: double hash (extra safety) 97 printf("Strings s1=%s and s2=%s are %s\n", s1.c_str(), s2.c_str(),
44 98 h1 == h2 ? "EQUAL" : "DISTINCT");
45 struct RH_double { // rolling hashing 99 }
46 static const ull B = 131; // base 100 }
47 static const ull P[2]; // primes
48 static ull pow[2][MAXLEN]; 10.4 KMP (Knuth Morris Pratt)
49 static ull add(ull x, ull y, int a) { return (x + y) % P[a]; }
50 static ull mul(ull x, ull y, int a) { return (x * y) % P[a]; } 1 #include <bits/stdc++.h>
51 static void init(int a) { 2 using namespace std;
52 pow[a][0] = 1; 3 #define rep(i,a,b) for(int i=a; i<=b; ++i)
53 rep(i, 1, MAXLEN-1) pow[a][i] = mul(B, pow[a][i-1], a); 4
54 } 5 // Build longest proper prefix/suffix array (lps) for pattern
55 static void init() { init(0); init(1); } 6 // lps[i] = length of the longest proper prefix which is also suffix in pattern[0 .. i]
56 vector<ull> h[2]; 7 void init_lps(string& pattern, int lps[]) {
57 int len; 8 int n = pattern.size();
58 void init(vector<int>& s) { 9 lps[0] = 0; // base case: no proper prefix/suffix for pattern[0 .. 0] (length 1)
59 for (int x : s) assert (x >= 1); // DEBUGGING 10 rep(j, 1, n-1) { // for each pattern[0 .. j]
60 len = s.size(); 11 int i = lps[j-1]; // i points to the char next to lps of previous iteration
61 rep(a,0,1) { 12 while (pattern[i] != pattern[j] and i > 0) i = lps[i-1];
62 h[a].resize(len); 13 lps[j] = pattern[i] == pattern[j] ? i+1 : 0;
63 h[a][0] = s[0]; 14 }
64 rep(i,1,len-1) h[a][i] = add(mul(h[a][i-1], B, a), s[i], a); 15 }
65 } 16
66 } 17 // Count number of matches of pattern string in target string using KMP algorithm
67 RH_double(vector<int>& s) { init(s); } // from vector<int> 18 int count_matches(string& pattern, string& target) {
68 RH_double(string& s, char ref) { // from string 19 int n = pattern.size(), m = target.size();
69 vector<int> tmp; for (char c : s) tmp.push_back(c - ref + 1); 20 int lps[n];
70 init(tmp); 21 init_lps(pattern, lps); // build lps array
71 } 22 int matches = 0;
72 ull hash(int i, int j, int a) { 23 int i = 0; // i tracks current char in pattern to compare
73 if (i == 0) return h[a][j]; 24 rep(j, 0, m-1) { // j tracks each char in target to compare
74 return add(h[a][j], P[a] - mul(h[a][i-1], pow[a][j-i+1], a), a); 25 // try to keep prefix before i as long as possible while ensuring i matches j
75 } 26 while (pattern[i] != target[j] and i > 0) i = lps[i-1];
76 ull hash(int i, int j) { 27 if (pattern[i] == target[j]) {
77 return hash(i,j,0) << 32 | hash(i,j,1); 28 if (++i == n) { // we matched the whole pattern
78 } 29 i = lps[n-1]; // shift the pattern so that the longest proper prefix/
79 ull hash() { return hash(0, len-1); } suffix pair is aligned
80 }; 30 matches++;
81 // these lines are necessary for the code to compile 31 }
82 const ull RH_double::P[2] = {(ull)1e9+21, (ull)1e9+9}; 32 }
83 ull RH_double::pow[2][MAXLEN]; 33 }
84 34 return matches;
85 // ----- usage & testing 35 }
86 int main() { 36
87 RH_double::init(); 37 int main() { // usage
88 while (true) { 38 string target, pattern;
89 string s; cin >> s; 39 while (true) {
90 int l1, r1, l2, r2; cin >> l1 >> r1 >> l2 >> r2; 40 cin >> target >> pattern;
91 char cmin = *min_element(s.begin(), s.end()); 41 cout << count_matches(pattern, target) << " matches\n";
92 RH_double rh(s, cmin); 42 }
Pablo Messina’s ICPC Notebook 10 STRINGS - 10.5 Shortest Repeating Cycle Página 40 de 40
43 return 0;
44 }

10.5 Shortest Repeating Cycle


1 #include <bits/stdc++.h>
2 using namespace std;
3
4 int shortest_repeating_cycle(string& seq) {
5 // KMP : lps step
6 int n = seq.size();
7 int lps[n];
8 lps[0] = 0;
9 int i = 0, j = 1;
10 while (j < n) {
11 while (i > 0 and seq[i] != seq[j])
12 i = lps[i-1];
13 if (seq[i] == seq[j])
14 lps[j] = ++i;
15 else
16 lps[j] = 0;
17 j++;
18 }
19 int len = n - lps[n-1];
20 return (n % len) ? n : len;
21 }
22
23 // test
24 int main() {
25 string line; cin >> line;
26 int cycle = shortest_repeating_cycle(line);
27 cout << line.substr(0, cycle) << endl;
28 return 0;
29 }
5.2. AD HOC MATHEMATICAL PROBLEMS c Steven, Felix, Suhendry

5.2 Ad Hoc Mathematical Problems


We start this chapter with something light: the Ad Hoc mathematical problems. These
are programming contest problems that require no more than basic programming skills and
some fundamental mathematics. As there are still too many problems in this category, we
further divide them into sub-categories, as shown below. These problems are not placed in
Chapter 5 Book 1 as they are Ad Hoc problems with (heavier) mathematical flavor. But remember that
many of these Ad Hoc mathematical problems are the easier ones. To do well in the actual
programming contests, contestants must also master the other sections of this chapter.
Mathematics • Finding (Simple) Formula or Pattern
These problems require the problem solver to read the problem description carefully
to get a simplified formula or to spot the pattern. Attacking them directly will usually
We all use math every day; to predict weather, to tell time, to handle money. result in a TLE verdict. The actual solutions are usually short and do not require
Math is more than formulas or equations; it’s logic, it’s rationality, loops or recursions. Example: Let set S be an infinite set of square integers: {1, 4, 9,
18
it’s using your mind to solve the biggest mysteries we know. 16, 25, . . . }. Given an integer X (1 
p X  10 ), count how many integers in S are
— TV show NUMB3RS less than X. The answer is simply: b X 1c. This is an O(1) solution.
Note that in Section 5.4, we will discuss Combinatorics problems that will also end
5.1 Overview and Motivation up with some (not necessarily simple) formula. We also have Section 9.15 where we
discuss a few known but very rare mathematical formulas.
The appearance of mathematics-related problems in programming contests is not surprising
• Base Number Conversion or Variants
since Computer Science is deeply rooted in Mathematics. Many interesting real life problems
These are the mathematical problems involving base numbers. The most frequent type
can be modeled as mathematical problems as you will frequently see in this chapter.
involves the standard conversion problems that can be easily solved manually or with
Recent ICPC problem sets (based on our experience in Asian Regionals) usually contain
C/C++/Python/OCaml (limited) or Java Integer/BigInteger (most generic) library.
one or two mathematical problems. Recent IOIs usually do not contain pure mathematics
tasks, but many tasks do require mathematical insights. This chapter aims to prepare For example, to convert 132 in base 8 (octal) into base 2 (binary), we can use base 10
contestants in dealing with many of these mathematical problems. (decimal) as the intermediate step: (132)8 is 1⇥82 +3⇥81 +2⇥80 = 64+24+2 = (90)10
We are aware that di↵erent countries place di↵erent emphases in mathematics training and (90)10 is 90 ! 45(0) ! 22(1) ! 11(0) ! 5(1) ! 2(1) ! 1(0) ! 0(1) =
in pre-University education. Thus, some contestants are familiar with the mathematical (1011010)2 (that is, divide by 2 until 0, then read the remainders from backwards).
terms listed in Table 5.1. But for others, these mathematical terms do not ring a bell, However, we can also use built-in libraries:
perhaps because the contestant has not learnt it before, or perhaps the term is di↵erent in
the contestant’s native language. In this chapter, we want to make a more level-playing – C/C++:
field for the readers by listing as many common mathematical terminologies, definitions, int v; scanf("%o", &v); // read v in octal
problems, and algorithms that frequently appear in programming contests as possible. bitset<32> bin(v); // use bitset
printf("%s\n", bin.to_string().c_str()); // print in binary
Arithmetic Progression Geometric Progression Polynomial
Algebra Logarithm/Power Big Integer – Python:
Number Theory Prime Number Sieve of Eratosthenes print("{0:b}".format(int(str(input()), 8))) # octal to binary
Miller-Rabin Greatest Common Divisor Lowest Common Multiple
Factorial Euler Phi Modified Sieve – OCaml:
Extended Euclidean Linear Diophantine Modular Inverse Printf.sprintf "%X" (int_of_string "0o374");; # octal to hexa
Combinatorics Fibonacci Golden Ratio
Binet’s Formula Zeckendorf’s Theorem Pisano Period – Java:
If we know Java Integer/BigInteger class, we can actually construct an instance
Binomial Coefficients Fermat’s little theorem Lucas’ Theorem of Integer/BigInteger class in any base (radix) and use its toString(int radix)
Catalan Numbers Inclusion-Exclusion Probability Theory method to print the value of that instance in any base (radix). This is a much more
Cycle-Finding Game Theory Zero-Sum Game flexible library solution than C/C++ or Python solutions earlier that are limited
Decision Tree Perfect Play Minimax to popular bases = 2/8/10/16. See an example below for Kattis - basicremains
Nim Game Sprague-Grundy Theorem Matrix Power (also available at UVa 10551 - Basic Remains). Given a base b and two non-
Table 5.1: List of some mathematical terms discussed in this chapter negative integers p and m—both in base b, compute p % m and print the result
as a base b integer. The solution is as follows:

273 274
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.2. AD HOC MATHEMATICAL PROBLEMS c Steven, Felix, Suhendry

Note that if a certain programming language only has log function in a specific base,
class Main { we can get logb (a) (base b) by using the fact that logb (a) = log(a)/log(b).
public static void main(String[] args) {
A nice feature of the logarithmic function is that it can be used to count the number
Scanner sc = new Scanner(System.in); // a few test cases
of digits of a given decimal a. This formula (int)floor(1 + log10((double)a)) re-
while (true) {
turns the number of digits in decimal number a. To count the number of digits in other
int b = sc.nextInt(); if (b == 0) break;
base b, we can use: (int)floor(1 + log10((double)a) / log10((double)b)).
BigInteger p = new BigInteger(sc.next(), b); // 2nd parameter
BigInteger m = new BigInteger(sc.next(), b); // is the base We are probably aware of thepsquare root function, e.g., sqrt(a), but
p some of us stum-
System.out.println((p.mod(m)).toString(b)); // print in base b ble when asked to compute n a (the n-th root of a). Fortunately, n a can be rewritten
} as a1/n . We can then use built in formula like pow((double)a, 1.0 / (double)n) or
} exp(log((double)a) * 1.0 / (double)n).
}
• Grid
These problems involve grid manipulation. The grid can be complex, but the grid
Source code: ch5/basicremains UVa10551.java follows some primitive rules. The ‘trivial’ 1D/2D grid are not classified here (review
1D/2D array section in Book 1). The solution usually depends on the problem solver’s
• Number Systems or Sequences creativity in finding the patterns to manipulate/navigate the grid or in converting the
Some Ad Hoc mathematical problems involve definitions of existing (or made-up) Num- given one into a simpler one.
ber Systems or Sequences, and our task is to produce either the number (sequence) See an example for Kattis - beehouseperimeter. You are given a honeycomb structure
within some range or just the n-th number, verify if the given number (sequence) is described by R, the number of cells of the side of honeycomb. The cells are numbered
valid according to the definition, etc. Usually, following the problem description care- from 1 to R3 (R 1)3 in row major order. For example for R = 3, the honeycomb
fully is the key to solving the problem. But some harder problems require us to simplify looks like Figure 5.1.
the formula first. Some well-known examples are:

1. Fibonacci numbers (Section 5.4.1): 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, . . .


2. Factorial (Section 5.3.7): 1, 1, 2, 6, 24, 120, 720, 5 040, 40 320, 362 880, . . .
3. Derangement (Section 5.5 and 9.15): 1, 0, 1, 2, 9, 44, 265, 1 854, 14 833, . . .
4. Catalan numbers (Section 5.4.3): 1, 1, 2, 5, 14, 42, 132, 429, 1 430, 4 862, . . .
5. Bell numbers (Section 9.15): 1, 1, 2, 5, 15, 52, 203, 877, 4 140, . . .
Figure 5.1: A Honeycomb Grid
6. Arithmetic progression sequence: a, (a+d), (a+2⇥d), (a+3⇥d), . . ., e.g., 1, 2, 3,
4, 5, 6, 7, 8, 9, 10, . . . that starts with a = 1 and with di↵erence of d = 1 between Working on this honeycomb structure directly is hard, but we will get a familiar 2D
consecutive terms. The sum of the first n terms of this arithmetic progression array after we do this transformation: let N = 2 ⇤ R-1. We fill the transformed N ⇥ N
series is Sn = n2 ⇥ (2 ⇥ a + (n 1) ⇥ d). 2D array row by row, initially R cells, grows to 2 ⇤ R-1 cells, and then shrinks again
7. Geometric progression sequence: a, a ⇥ r, a ⇥ r2 , a ⇥ r3 , . . ., e.g., 1, 2, 4, 8, to R (with prefix o↵set). For R = 3 in Figure 5.1 above, N = 5 and here is the
16, 32, 64, 128, 256, 512, . . . that starts with a = 1 and with common ratio transformed 5 ⇥ 5 2D array (-1 to indicate unused cell).
r = 2 between consecutive terms. The sum of the first n terms of this geometric
n
progression series is Sn =a⇥ 11 rr . Note that r > 1. 0 1 2 3 4
----------------
• Logarithm, Exponentiation, or Power
0 | 1 2 3 -1 -1
These problems involve the (clever) usage of log(), exp(), and/or pow() functions.
1 | 4 5 6 7 -1
Some of the important techniques are shown below:
2 | 8 9 10 11 12
These are library solutions to compute logarithm of a decimal a in any base b 2: 3 | -1 13 14 15 16
4 | -1 -1 17 18 19
– <cmath> library in C/C++ has functions: log(a) (base e), log2(a) (base 2),
and log10(a) (base 10); Now, we can easily navigate from any cell in this transformed 2D array to its 6 direc-
– Java.lang.Math has log(a) (base e) and log10(a). tions: E/SE/S/W/NW/N (no SW nor NE directions).
– Python has log(a, Base) (any base, default is e), log2(a), and log10(a). • Polynomial
– OCaml has log(a) (base e) and log10(a). These problems involve polynomial evaluation, multiplication, division, di↵erentiation,

275 276
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.2. AD HOC MATHEMATICAL PROBLEMS c Steven, Felix, Suhendry

etc. We can represent a polynomial by storing the coefficients of the polynomial’s terms
sorted by (descending order of) their powers. The (basic) operations on polynomials Exercise 5.2.1*: All these sequence of numbers below have at least one formula(s)/pattern(s).
usually require some careful usage of loops. Some polynomials are special: Please give your best guess of what are the next three numbers in each sequence!
p
Degree-2, e.g., g(x) = ax2 + bx + c (with classic roots r = ( b ± b2 4ac)/2a), and 1. 1, 2, 4, 8, 16, . . .
Degree-3, e.g., h(x) = ax3 + bx2 + cx + d that on some applications can be derived 2*. 1, 2, 4, 8, 16, 31, . . .
back into a Degree-2 polynomial of h0 (x) = 3ax2 + 2bx + c.
3. 2, 3, 5, 7, 11, 13, . . .
Later in Section 9.11, we discuss O(n2 ) straightforward polynomial multiplication and
4*. 2, 3, 5, 7, 11, 13, 19, . . .
the faster O(n log n) one using Fast Fourier Transform.
Exercise 5.2.2*: Study (Ruffini-)Horner’s method for finding the roots of a polynomial
• Fraction equation f (x) = 0.
numerator
These problems involve representing number as fraction: denominator . Most frequent
operation is to simplify the given fraction to its simplest form. We can do this by Exercise 5.2.3*: Given 1 < a < 10, 1  n  109 , show how to compute the value of
dividing both numerator n and denominator d with their greatest common divisor (1 ⇥ a + 2 ⇥ a2 + 3 ⇥ a3 + . . . + n ⇥ an ) modulo 109 + 7 efficiently, i.e., in O(log n). Both a
(gcd(n, d), also see Section 5.3.6). Another frequent operations are to add, subtract, and n are integers. Note that the naı̈ve O(n) solution is not acceptable. You may need to
multiply two (or more) fractions. Python has a built-in Fraction class that are well read Section 5.3.9 (modular arithmetic) and Section 5.8 (fast (modular) exponentiation).
equipped to deal with all these basic fraction operations.
See an example below for UVa 10814 - Simplifying Fractions where we are asked to
Programming Exercises related to Ad Hoc Mathematical problems:
reduce a large fraction to its simplest form.
a. Finding (Simple) Formula (or Pattern), Easier
class Main { 1. Entry Level: Kattis - twostones * (just check odd or even)
public static void main(String[] args) {
2. UVa 10751 - Chessboard * (trivial for N = 1 and N = 2; derive the
Scanner sc = new Scanner(System.in); formula first for N > 2; hint: use diagonal as much as possible)
int N = sc.nextInt();
3. UVa 12004 - Bubble Sort * (try small n; get the pattern; use long long)
while (N-- > 0) { // we have to use > 0
4. UVa 12918 - Lucky Thief * (sum of arithmetic progression; long long)
BigInteger p = sc.nextBigInteger();
String ch = sc.next(); // ignore this char 5. Kattis - averageshard * (find O(n) formula; also see Kattis - averageseasy)
BigInteger q = sc.nextBigInteger(); 6. Kattis - bishops * (chess pattern involving bishops; from IPSC 2004)
BigInteger gcd_pq = p.gcd(q); // wow :) 7. Kattis - crne * (simulate cutting process on small numbers; get formula)
System.out.println(p.divide(gcd_pq) + " / " + q.divide(gcd_pq)); Extra UVa: 01315, 10014, 10110, 10170, 10499, 10696, 10773, 10940, 11202,
} 11393, 12027, 12502. 12725, 12992, 13049, 13071, 13216.
} Extra Kattis: alloys, averageseasy, chanukah, limbo1, pauleigon, sequential-
} manufacturing, soylent, sumkindofproblem.
b. Finding (Simple) Formula (or Pattern), Harder
from fractions import Fraction # Python’s built in 1. Entry Level: UVa 10161 - Ant on a Chessboard * (sqrt and ceil)
N = int(input())
2. UVa 11038 - How Many O’s * (define a function f that counts the
for _ in range(N): number of 0s from 1 to n; also available at Kattis - howmanyzeros *)
frac = Fraction("".join(input().split(" "))) # simplified form
3. UVa 11231 - Black and White Painting * (there is an O(1) formula)
print(str(frac.numerator) + " / " + str(frac.denominator))
4. UVa 11718 - Fantasy of a Summation * (convert loops to a closed form
formula; use modPow to compute the results)
Source code: ch5/UVa10814.java|py 5. Kattis - mortgage * (geometric progression; divergent but finite; special case
when r = 1.0 (no interest))
6. Kattis - neighborhoodwatch * (sum of AP; inclusion-exclusion)
• Really Ad Hoc 7. Kattis - nine * (find the required formula)
These are other mathematics-related problems outside the sub-categories above.
Extra UVa: 00651, 00913, 10493, 10509, 10666, 10693, 10710, 10882, 10970,
10994, 11170, 11246, 11296, 11298, 11387, 12909, 13096, 13140.
We suggest that the readers—especially those who are new to mathematical problems—kick-
start their training programme on mathematical problems by solving at least 2 or 3 problems Extra Kattis: appallingarchitecture, beautifulprimes, dickandjane, doorman,
from each sub-category, especially the ones that we highlighted as must try *. eatingout, limbo2, loorolls, otherside, rectangularspiral, sequence.

277 278
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.2. AD HOC MATHEMATICAL PROBLEMS c Steven, Felix, Suhendry

c. Base Number Conversion f. Logarithm, Exponentiation, Power


1. Entry Level: Kattis - basicremains * (also involving BigInteger mod; also 1. Entry Level: UVa 12416 - Excessive Space Remover * (the answer is
available at UVa 10551 - Basic Remains) log2 of the max consecutive spaces in a line)
2. UVa 00343 - What Base Is This? * (try all possible pair of bases) 2. UVa 00701 - Archaelogist’s Dilemma * (use log to count # of digits)
3. UVa 00389 - Basically Speaking * (use Java Integer1 class) 3. UVa 11384 - Help is needed for Dexter * (find the smallest power of
4. UVa 11952 - Arithmetic * (check base 2 to 18; special case for base 1) two greater than n; can be solved easily using ceil(eps + log2 (n)))
5. Kattis - arithmetic * (conversion of octal (per 4 bits) to hexa (per 3 bits); 4. UVa 11847 - Cut the Silver Bar * (O(1) math formula exists: blog2 (n)c)
be careful with leading zeroes) 5. Kattis - cokolada * (the answers involve powers of two and a simulation)
6. Kattis - allaboutthatbase * (check base 1 to 36; base 1 is special; BigInteger) 6. Kattis - factstone * (use logarithm; power; also available at UVa 10916 -
7. Kattis - oktalni * (convert each 3-bits of binary strings to octal; BigInteger) Factstone Benchmark)
7. Kattis - thebackslashproblem * (actually power of two)
Extra UVa: 00290, 00355, 00446, 10473, 11185.
Extra UVa: 00107, 00113, 00474, 00545, 11636, 11666, 11714, 11986.
Extra Kattis: whichbase.
Extra Kattis: 3dprinter, bestcompression, bus, di↵erentdistances, lemonade-
d. Base Number Variants
trade, pot, schoolspirit, slatkisi, stirlingsapproximation, tetration, triangle.
1. Entry Level: UVa 00575 - Skew Binary * (base modification)
g. Grid
2. UVa 00377 - Cowculations * (base 4 operations)
3. UVa 10931 - Parity * (convert decimal to binary; count number of 1s) 1. Entry Level: UVa 00264 - Count on Cantor * (grid; pattern)
4. UVa 11121 - Base -2 * (search for the term ‘negabinary’) 2. UVa 10022 - Delta-wave * (this is not an SSSP problem; find the pattern
in this grid (triangle)-like system)
5. Kattis - aliennumbers * (source base to decimal; decimal to target base)
3. UVa 10182 - Bee Maja * (grid)
6. Kattis - ignore * (actually a base 7 conversion problem as only 7 digits are
meaningful when rotated) 4. UVa 10233 - Dermuba Triangle * (the number of items in row forms
arithmetic progression series; use hypot)
7. Kattis - mixedbasearithmetic * (mix of base 10 and two versions of base 26)
5. Kattis - beehouseperimeter * (transform the hexagonal grid like Kattis - hon-
Extra UVa: 00636, 10093, 10677, 11005, 11398, 12602. eyheist; flood fill from outside Alice’s house; count #walls touched)
Extra Kattis: babylonian, basic, crypto, parsinghex, sumsquareddigits. 6. Kattis - honeyheist * (transform the hexagonal grid input into 2D grid first;
then run SSSP on unweighted graph; BFS)
Others: IOI 2011 - Alphabets (practice task; use space-efficient base 26).
7. Kattis - maptiles2 * (simple conversion between two grid indexing systems)
e. Number Systems or Sequences
Extra UVa: 00121, 00808, 00880, 10642, 10964, 12705.
1. Entry Level: Kattis - collatz *2 (similar to UVa 00694; just do as asked)
Extra Kattis: fleaonachessboard, settlers2.
2. UVa 00443 - Humble Numbers * (try all 2i ⇥ 3j ⇥ 5k ⇥ 7l ; sort)
3. UVa 10408 - Farey Sequences * (first, generate (i, j) pairs such that h. Polynomial
gcd(i, j) = 1; then sort) 1. Entry Level: UVa 10302 - Summation of ... * (use long double)
4. UVa 11970 - Lucky Numbers * (square numbers; divisibility; brute force) 2. UVa 00930 - Polynomial Roots * (Ruffini’s rule; roots of quadratic eq)
5. Kattis - candlebox * (sum of arithmetic series [1..N ]; -6 for Rita or -3 for 3. UVa 10268 - 498’ * (polynomial derivation; Horner’s rule)
Theo; brute force Rita’s age; also available at UVa 13161 - Candle Box) 4. UVa 10586 - Polynomial Remains * (division; manipulate coefficients)
6. Kattis - permutedarithmeticsequence * (sort di↵erences of adjacent items) 5. Kattis - ada * (polynomial problem; apply the given procedure recursively)
7. Kattis - rationalsequence * (pattern finding; tree traversal on a special tree) 6. Kattis - curvyblocks * (di↵erentiate degree 3 to degree 2 polynomial; get
Extra UVa: 00136, 00138, 00413, 00640, 00694, 00927, 00962, 00974, 10006, roots of quadratic equation; the two blocks will touch at either roots)
10042, 10049, 10101, 10930, 11028, 11063, 11461, 11660, 12149, 12751. 7. Kattis - plot * (analyze the given pseudocode; the required pattern involves
Extra Kattis: hailstone, sheldon. Binomial Coefficients)
Extra UVa: 00126, 00392, 00498, 10215, 10326, 10719.
Extra Kattis: polymul1.

1 Also see Section 9.11 about Fast Fourier Transform algorithm.


Using Java BigInteger class gets TLE verdict for this problem. For base number conversion of
32-bit (i.e., not big) integers, we can just use parseInt(String s, int radix) and toString(int i,
int radix) in the faster Java Integer class. Additionally, you can also use BufferedReader and
BufferedWriter for faster I/O.
2
The (Lothar) Collatz’s Conjecture is an open problem in Mathematics.

279 280
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry

i. Fraction 5.3 Number Theory


1. Entry Level: Kattis - mixedfractions * (convert fraction to mixed fraction)
Number Theory is the study of the integers and integer-valued functions. Mastering as many
2. UVa 00332 - Rational Numbers ... * (use GCD) topics as possible in the field of number theory is important as some mathematical problems
3. UVa 00834 - Continued Fractions * (do as asked) become easy (or easier) if you know the theory behind the problems. Otherwise, either a
4. UVa 12068 - Harmonic Mean * (involving fraction; use LCM and GCD) plain brute force attack leads to a TLE response, or you simply cannot work with the given
5. Kattis - deadfraction * (try every single possible repeating decimals; also input as it is too large without some pre-processing.
available at UVa 10555 - Dead Fraction)
6. Kattis - fraction * (continued fraction to normal fraction and vice versa)
5.3.1 Prime Numbers
7. Kattis - thermostat * (convert one temperature to another; use fraction; use
Java BigInteger; gcd) A natural number starting from 2: {2, 3, 4, 5, 6, 7, . . .} is considered a prime if it is only
divisible by 1 and itself. The first and only even prime is 2. The next prime numbers are:
Extra UVa: 10814, 10976. 12848, 12970.
3, 5, 7, 11, 13, 17, 19, 23, 29, . . . , and infinitely many more primes (proof in [33]). There
Extra Kattis: fractionallotion, jointattack, rationalarithmetic, rationalratio, are 25 primes in range [0..100], 168 primes in [0..1000], 1000 primes in [0..7919], 1229
temperatureconfusion. primes in [0..10 000], etc. Some large prime numbers are3 104 729, 1 299 709, 1e9 + 7 (easy
j. Really Ad Hoc to remember4 ), 2 147 483 647 (8th Mersenne5 prime, or 231 -1), 112 272 535 095 293, etc.
Prime number is an important topic in number theory and the source for many program-
1. Entry Level: UVa 00496 - Simply Subsets * (set manipulation)
ming problems. In this section, we will discuss algorithms involving prime numbers.
2. UVa 11241 - Humidex * (the hardest case is computing Dew point given
temperature and Humidex; derive it with Algebra)
p Optimized Prime Testing Function
3. UVa 11526 - H(n) * (brute force up to n; find the pattern; avoid TLE)
4. UVa 12036 - Stable Grid * (use pigeon hole principle) The first algorithm presented in this section is for testing whether a given natural number N
5. Kattis - matrix * (use simple linear algebra; one special case when c = 0) is prime, i.e., bool isPrime(N). The most naı̈ve version is to test by definition, i.e., test if
6. Kattis - trip * (be careful with precision error; also available at UVa 10137 N is divisible by divisor 2 [2..N -1]. This works, but runs in O(N )—in terms of number
- The Trip) of divisions. This is not the best way and there are several possible improvements.
p
7. Kattis - yoda * (ad hoc; 9 digits comparison) N is divisible by a divisor 2 [2..b N c], i.e.,
The first improvement is to test if p p we
stoppwhen the divisor is greater than N . We claim that if a ⇥ b = N , then a  Npor
Extra UVa: 00276, 00613, 10023, 10190, 11042, 11055, 11715, 11816.
b  N .pQuick proof by contradiction:pLet’s p suppose that it is not the case, i.e., a > N
and b > N . This implies that a ⇥ b > pN ⇥ N or a ⇥ b > N . Contradiction. p Thus a = d
and b = Nd cannot both be greater than N . This improvement is O( N ) which is already
much faster than the previous version, but can still be improved to be twice as fast.
Profile of Algorithm Inventors p
The second improvement is toptest if N is divisible by divisor 2 [3, 5, .., N ], i.e.,
we only test odd numbers up to N . This is because there p is only one even primepnumber,
Eratosthenes of Cyrene (⇡ 300-200 years BC) was a Greek mathematician. He invented
i.e., number 2, which can be tested separately. This is O( N /2), which is also O( N ).
geography, did measurements of the circumference of Earth, and invented a simple algorithm 6
The third improvement which is already
p good enough for contest problems is to test
to generate prime numbers which we discussed in this book.
if N is divisible by prime divisors  N (but see below for probabilistic prime testing).
Marin Mersenne (1588-1648) was a French mathematicians best known for Mersenne This is because if a prime number X cannot divide N p , then there is no point testing whether
p
primes, prime number that can be written as 2n -1 for some integer n. multiples of X divide N or not. This is faster thanpO( N ) and is about O(#primes  N ).
6
For example, there are 500 odd numbers in [1.. 10 ], but there are only 168 primes in the
Gary Lee Miller is a professor of Computer Science at Carnegie Mellon University. He is
the initial inventor of Miller-Rabin primality test algorithm. same range. Prime number theorem [33] says that the number of primes less than or equal
to M —denoted by ⇡(M )—is boundedpby O(M/(ln(M p )-1)). Therefore, the complexity of
Michael Oser Rabin (born 1931) is an Israeli computer scientist. He improved Miller’s this prime testing function is about O( N / ln( N )). The code is shown below.
idea and invented the Miller-Rabin primality test algorithm. Together with Richard Manning
Karp, he also invented Rabin-Karp’s string matching algorithm.
Leonhard Euler (1707-1783) was a Swiss mathematician and one of the greatest mathe-
matician from the 18th century. Some of his inventions mentioned in this book include the 3
Having a list of large prime numbers is good for testing as these are the numbers that are hard for
frequently used f (x)/⌃/e/⇡ mathematical notations, the Euler totient (Phi) function, the algorithms like the prime testing/factoring algorithms. At least, remember 1e9 + 7 and 231 -1 are primes.
Euler tour/path (Graph), and Handshaking lemma. 4
But 1e6+7 is not a prime.
5
A Mersenne prime is a prime number that is one less than a power of two.
6
This is a bit recursive—testing whether a number is a prime by using another (smaller) prime number.
But the reason should be obvious after reading the next section.

281 282
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry

Sieve of Eratosthenes: Generating List of Prime Numbers 5.3.2 Probabilistic Prime Testing (Java Only)
If we want to generate a list of prime numbers within the range [0..N ], there is a better We have just discussed the Sieve of Eratosthenes algorithm and a deterministic prime testing
algorithm than testing each number in the range for primality. The algorithm is called ‘Sieve algorithm that is good enough for many contest problems. However, you have to type in a
of Eratosthenes’ invented by Eratosthenes of Cyrene. few lines of C++/Java/Python code to do that. If you just need to check whether a single
First, this Sieve algorithm sets all integers in the range to be ‘probably prime’ but sets 0 (or at most, a few9 ) and usually (very) large integer (beyond the limit of 64-bit integer) is a
and 1 to be not prime. Then, it takes 2 as prime and crosses out all multiples7 of 2 starting prime, e.g., UVa 10235 below to decide if the given N is not a prime, an ‘emirp’ (the reverse
from 2 ⇥ 2 = 4, 6, 8, 10, . . . until the multiple is greater than N . Then it takes the next of its digits is also a prime), or just a normal prime, then there is an alternative and shorter
non-crossed 3 as a prime and crosses out all multiples of 3 starting from 3⇥3 = 9, 12, 15, . . .. approach with the function isProbablePrime in Java10 BigInteger11 —a probabilistic prime
Then it takes 5 and crosses out all multiples of 5 starting from 5 ⇥ 5 = 25, 30, 35, . . .. And testing function based on Miller-Rabin algorithm [26, 32]. There is an important parameter
so on . . .. After that, integers that remain uncrossed within the range [0..N ] are primes. of this function: certainty. If this function returns true, then the probability that the tested
certainty
This algorithm does approximately (N ⇥ (1/2 + 1/3 + 1/5 + 1/7 + . . . + 1/last prime in BigInteger is a prime exceeds 1 12 . Usually, certainty = 10 should be enough12
range  N )) operations. Using ‘sum of reciprocals8 of primes up to N ’, we end up with the as 1 ( 12 )10 = 0.9990234375 is ⇡ 1.0. Note that using larger value of certainty obviously
time complexity of roughly O(N log log N ). decreases the probability of WA but doing so slows down your program and thus increases
Since generating a list of primes  10K using the sieve is fast (our code below can go up the risk of TLE13 .
to 107 in ⇡ 1s), we opt to use the sieve for smaller primes and reserve the optimized prime
testing function for larger primes—see previous discussion. class Main {
public static void main(String[] args) {
typedef long long ll; Scanner sc = new Scanner(System.in);
while (sc.hasNext()) {
ll _sieve_size; int N = sc.nextInt(); System.out.printf("%d is ", N);
bitset<10000010> bs; // 10^7 is the rough limit BigInteger BN = BigInteger.valueOf(N);
vll p; // compact list of primes String R = new StringBuffer(BN.toString()).reverse().toString();
int RN = Integer.parseInt(R);
void sieve(ll upperbound) { // range = [0..upperbound] BigInteger BRN = BigInteger.valueOf(RN);
_sieve_size = upperbound+1; // to include upperbound if (!BN.isProbablePrime(10)) // certainty 10 is enough
bs.set(); // all 1s System.out.println("not prime.");
bs[0] = bs[1] = 0; // except index 0+1 else if ((N != RN) && BRN.isProbablePrime(10))
for (ll i = 2; i < _sieve_size; ++i) if (bs[i]) { System.out.println("emirp.");
// cross out multiples of i starting from i*i else
for (ll j = i*i; j < _sieve_size; j += i) bs[j] = 0; System.out.println("prime.");
p.push_back(i); // add prime i to the list }
} }
} }

bool isPrime(ll N) { // good enough prime test Source code: ch5/UVa10235.java


if (N < _sieve_size) return bs[N]; // O(1) for small primes
for (int i = 0; i < (int)p.size() && p[i]*p[i] <= N; ++i)
if (N%p[i] == 0) 5.3.3 Finding Prime Factors with Optimized Trial Divisions
return false; In number theory, we know that a prime number N only has 1 and itself as factors but
return true; // slow if N = large prime a composite number N , i.e., the non-primes, can be written uniquely as a product of its
} // note: only guaranteed to work for N <= (last prime in vll p)^2 prime factors. That is, prime numbers are multiplicative building blocks of integers (the
fundamental theorem of arithmetic). For example, N = 1200 = 2 ⇥ 2 ⇥ 2 ⇥ 2 ⇥ 3 ⇥ 5 ⇥ 5 =
// inside int main() 24 ⇥ 3 ⇥ 52 (the latter form is called as prime-power factorization).
sieve(10000000); // up to 10^7 (<1s)
9
printf("%d\n", isPrime((1LL<<31)-1)); // 8th Mersenne prime Note that if your aim is to generate a list of the first few million prime numbers, the Sieve of Eratosthenes
algorithm should run faster than a few million calls of this isProbablePrime function.
printf("%d\n", isPrime(136117223861LL)); // 104729*1299709 10
A note for pure C/C++/Python/OCaml programmers: It is good to be a multi -lingual programmer by
switching to Java whenever it is more beneficial to do so, like in this instance.
11
As of year 2020, there is no equivalent C++/Python/OCaml library for to do this, yet.
7 12
Slower implementation is to start from 2 ⇥ i instead of i ⇥ i, but the di↵erence is not that much. This rule of thumb setting is a result of our empirical testings over the years.
8 13
Reciprocal is also known as multiplicative inverse. A number multiplied by its reciprocal yield 1. This randomized algorithm is a ‘Monte Carlo Algorithm’ that can give a WA with a (small) probability.

283 284
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry

A naı̈ve algorithm generates a list of primes (e.g., with sieve) and checks which prime(s) 5.3.4 Functions Involving Prime Factors
can actually divide the integer N —without changing N . This can be improved!
There are other well-known number
p ptheoretic functions involving prime factors shown below.
A better algorithm utilizes a kind of Divide and Conquer spirit. An integer N can be
All variants have similar O( N /ln N ) time complexity with the basic prime factoring via
expressed as: N = p ⇥ N 0 , where p is a prime factor and N 0 is another number which is
trial division. Interested readers can read Chapter 7: “Multiplicative Functions” of [33].
N/p—i.e., we can reduce the size of N by taking out its prime factor p. We can keep doing
this until eventually N 0 = 1. To speed up the process even further, we putilize the divisibility 1. numPF(N): Count the number of prime factors of integer N.
property that there is no more than one prime p divisor greater than
p N , so we only repeat
the process of finding prime factors until p > N . Stopping at N entails a special case: if For example: N = 60 has 4 prime factors: {2, 2, 3, 5}. The solution is a simple tweak
2
(current p) > N and N is still not 1, then N is the last prime factor. The code below takes of the trial division algorithm to find prime factors shown earlier.
in an integer N and returns the list of prime factors.
In the worst case, when N is prime, this p prime factoring algorithm with trialpdivision int numPF(ll N) {
requires
p testing
p all smaller primes up to N , mathematically denoted as O(⇡( N )) = int ans = 0;
O( N /ln N ) can be very slow14 —see the example of factoring a large composite number for (int i = 0; (i < (int)p.size()) && (p[i]*p[i] <= N); ++i)
136 117 223 861 into two large prime factors: 104 729 ⇥ 1 299 709 in the code below. However, while (N%p[i] == 0) { N /= p[i]; ++ans; }
if given composite numbers with lots of small prime factors, this algorithm is reasonably return ans + (N != 1);
fast15 —see 142 391 208 960 which is 210 ⇥ 34 ⇥ 5 ⇥ 74 ⇥ 11 ⇥ 13. }
vll primeFactors(ll N) { // pre-condition, N >= 1
vll factors; 2. numDiv(N): Count the number of divisors of integer N.
for (int i = 0; (i < (int)p.size()) && (p[i]*p[i] <= N); ++i) A divisor of N is defined as an integer that divides N without leaving a remainder. If
while (N%p[i] == 0) { // found a prime for N a number N = ai ⇥ bj ⇥ . . . ⇥ ck , then N has (i + 1) ⇥ (j + 1) ⇥ . . . ⇥ (k + 1) divisors.
N /= p[i]; // remove it from N This is because there are i + 1 ways to choose prime factor a (0, 1, . . . , i 1, i times),
factors.push_back(p[i]); j + 1 ways to choose prime factor b, . . ., and k + 1 ways to choose prime factor c. The
} total number of ways is the multiplication of these numbers.
if (N != 1) factors.push_back(N); // remaining N is a prime
return factors; Example: N = 60 = 22 ⇥ 31 ⇥ 51 has (2 + 1) ⇥ (1 + 1) ⇥ (1 + 1) = 3 ⇥ 2 ⇥ 2 = 12
} divisors. The 12 divisors are: {1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, 60}. The prime factors
of 60 are highlighted. See that N has more divisors than prime factors.
// inside int main()
sieve(10000000); int numDiv(ll N) {
vll r; int ans = 1; // start from ans = 1
for (int i = 0; (i < (int)p.size()) && (p[i]*p[i] <= N); ++i) {
r = primeFactors((1LL<<31)-1); // Mersenne prime int power = 0; // count the power
for (auto &pf : r) printf("> %lld\n", pf); while (N%p[i] == 0) { N /= p[i]; ++power; }
ans *= power+1; // follow the formula
r = primeFactors(136117223861LL); // large prime factors }
for (auto &pf : r) printf("> %lld\n", pf); // 104729*1299709 return (N != 1) ? 2*ans : ans; // last factor = N^1
}
r = primeFactors(5000000035LL); // large prime factors
for (auto &pf : r) printf("> %lld\n", pf); // 5*1000000007 3. sumDiv(N): Sum the divisors of integer N.

r = primeFactors(142391208960LL); // large composite In the previous example, N = 60 has 12 divisors. The sum of these divisors is 168.
for (auto &pf : r) printf("> %lld\n", pf); // 2^10*3^4*5*7^4*11*13 This can be computed via prime factors too. If a number N = ai ⇥ bj ⇥ . . . ⇥ ck ,
i+1 j+1 k+1
then the sum of divisors of N is a a 1 1 ⇥ b b 1 1 ⇥ ... ⇥ c c 1 1 . This closed form is
i+1
r = primeFactors(100000380000361LL); // 10000019^2 derived from summation of geometric progression series. a a 1 1 is the summation of
0 1 i 1 i
for (auto &pf : r) printf("> %lld\n", pf); // fail to factor! (why?) a , a , . . . , a , a . The total sum of divisors is the multiplication of these summation
of geometric progression series of each prime factor.
22+1 1 1+1 1+1
14
In real life applications, very large primes are commonly used in cryptography and encryption (e.g., RSA Example: N = 60 = 22 ⇥31 ⇥51 , sumDiv(60) = 2 1
⇥3 3 1 1⇥5 5 1 1 = 7⇥8⇥24
1⇥2⇥4
= 168.
algorithm) because it is computationally challenging to factor a very large number into its prime factors,
i.e., x = p1 p2 where both p1 and p2 are very large primes. We can avoid raising a prime factor pi to a certain power k using O(log k) exponenti-
15
Also see Section 9.12 for a faster (but rare) integer factoring algorithm. ation (see Section 5.8) by writing this sumDiv(N) function iteratively:

285 286
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry

5.3.5 Modified Sieve


ll sumDiv(ll N) {
ll ans = 1; // start from ans = 1 If the number of di↵erent prime factors has to be determined for many (or a range of)
for (int i = 0; (i < (int)p.size()) && (p[i]*p[i] <= N); ++i) { integers, then there is a better solution than calling numDiffPF(N) as shown in Section 5.3.4
ll multiplier = p[i], total = 1; many times. The better solution is the modified sieve algorithm. Instead of finding the
while (N%p[i] == 0) { prime factors and then calculating the required values, we start from the prime numbers and
N /= p[i]; modify the values of their multiples. The short modified sieve code is shown below:
total += multiplier;
multiplier *= p[i]; int numDiffPFarr[MAX_N+10] = {0}; // e.g., MAX_N = 10^7
} // total for for (int i = 2; i <= MAX_N; ++i)
ans *= total; // this prime factor if (numDiffPFarr[i] == 0) // i is a prime number
} for (int j = i; j <= MAX_N; j += i)
if (N != 1) ans *= (N+1); // N^2-1/N-1 = N+1 ++numDiffPFarr[j]; // j is a multiple of i
return ans;
} Similarly, this is the modified sieve code to compute the Euler Totient function:

int EulerPhi[MAX_N+10];
4. EulerPhi(N): Count the number of positive integers < N that are relatively prime for (int i = 1; i <= MAX_N; ++i) EulerPhi[i] = i;
to N . Recall: Two integers a and b are said to be relatively prime (or coprime) if for (int i = 2; i <= MAX_N; ++i)
gcd(a, b) = 1, e.g., 25 and 42. A naı̈ve algorithm to count the number of positive if (EulerPhi[i] == i) // i is a prime number
integers < N that are relatively prime to N starts with counter = 0, iterates through for (int j = i; j <= MAX_N; j += i)
i 2 [1..N -1], and increases the counter if gcd(i, N ) = 1. This is slow for large N . EulerPhi[j] = (EulerPhi[j]/i) * (i-1);
Q
A better algorithm is the Euler’s Phi (Totient) function '(N ) = N ⇥ pi (1 p1i ),
where pi is prime factor of N . These O(Nplog logp N ) modified sieve algorithms should be preferred over (up to) N individual
Example: N = 36 = 22 ⇥ 32 . '(36) = 36 ⇥ (1 12 ) ⇥ (1 13 ) = 12. Those 12 positive calls to O( N /ln N ) numDiffPF(N) or EulerPhi(N) if there are many queries over a large
integers that are relatively prime to 36 are {1, 5, 7, 11, 13, 17, 19, 23, 25, 29, 31, 35}. range, e.g., [1..n], but MAX N is at most 107 (note that we need to prepare a rather big array
in a sieve method). However, if we just need to compute the number of di↵erent prime
factors or Euler Phi for a single (or a few) but (very) large integer N , it may be faster to
ll EulerPhi(ll N) {
just use individual calls of numDiffPF(N) or EulerPhi(N).
ll ans = N; // start from ans = N
for (int i = 0; (i < (int)p.size()) && (p[i]*p[i] <= N); ++i) {
if (N%p[i] == 0) ans -= ans/p[i]; // count unique
while (N%p[i] == 0) N /= p[i]; // prime factor Exercise 5.3.5.1*: Can we write the modified sieve code for the other functions listed in
} Section 5.3.4 (i.e., other than numDiffPF(N) and EulerPhi(N)) without increasing the time
if (N != 1) ans -= ans/N; // last factor complexity of sieve? If we can, write the required code! If we cannot, explain why!
return ans;
}

Source code: ch5/primes.cpp|java|py|ml 5.3.6 Greatest Common Divisor & Least Common Multiple
The Greatest Common Divisor (GCD) of two integers: a, b denoted by gcd(a, b), is the
largest positive integer d such that d | a and d | b where x | y means that x divides y.
Exercise 5.3.4.1: Implement numDiffPF(N) and sumPF(N) that are similar to numPF(N)! Example of GCD: gcd(4, 8) = 4, gcd(6, 9) = 3, gcd(20, 12) = 4. One practical usage of GCD
numDiffPF(N): Count the number of di↵erent prime factors of N. is to simplify fractions (see UVa 10814 in Section 5.2), e.g., 69 = 6/gcd(6,9)
9/gcd(6,9)
= 6/3
9/3
= 23 .
sumPF(N): Sum the prime factors of N. Finding the GCD of two integers is an easy task with an e↵ective Divide and Conquer
Exercise 5.3.4.2: What are the answers for numPF(N), numDiffPF(N), sumPF(N), numDiv(N), Euclid algorithm [33, 7] which can be implemented as a one liner code (see below). Thus
sumDiv(N), and EulerPhi(N) when N is a prime? finding the GCD of two integers is usually not the main issue in a Mathematics-related
contest problem, but just part of a bigger solution.

287 288
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry

The GCD is closely related to Least (or Lowest) Common Multiple (LCM). The LCM Kattis - factovisors/UVa 10139 - Factovisors
of two integers (a, b) denoted by lcm(a, b), is defined as the smallest positive integer l such
Abridged problem description: “Does m divide n! (0  n, m  231 -1)?”. Recall that in
that a | l and b | l. Example of LCM: lcm(4, 8) = 8, lcm(6, 9) = 18, lcm(20, 12) = 60.
Section 5.3.7, we note that n!, i.e., f ac(n), grows very fast. We mention that with built-in
It has been shown (see [33]) that: lcm(a, b) = a ⇥ b/gcd(a, b) = a/gcd(a, b) ⇥ b. This can
data types, the largest factorial that we can still compute precisely is only 20!. In Book 1,
also be implemented as a one liner code (see below). Both GCD and LCM algorithms run
we show that we can compute large integers with Big Integer technique. However, it is very
in O(log10 n) = O(log n), where n = min(a, b).
slow to precisely compute the exact value of n! for large n.
The solution for this problem is to work with the prime factors of m and check if each
int gcd(int a, int b) { return b == 0 ? a : gcd(b, a%b); }
of those prime factors has ‘support’ in n!. This check is called the Legendre’s
P formula. Let
int lcm(int a, int b) { return a / gcd(a, b) * b; }
vp (n!) be the highest power of p that divides n. We can compute vp (n!) via 1 n
i=1 b pi c.
For example, when n = 6, we have 6! = 2⇥3⇥4⇥5⇥6 = 2⇥3⇥(22 )⇥5⇥(2⇥3) = 24 ⇥32 ⇥5
Note16 that since C++17, both gcd and lcm functions are already built-in <numeric> when expressed as its prime power factorization (we do not actually need to do this). Now
library. In Java, we can use method gcd(a, b) in BigInteger class. In Python, we can use if m1 = 9 = 32 , then this prime factor 32 has support in 6! because v3 (6!) = 2 and 32  32 .
gcd(a, b) in math module. Thus, m1 = 9 divide 6!. However, m2 = 54 = 21 ⇥ 33 has no support because although
The GCD of more than 2 numbers can be found via multiple calls of gcd of 2 numbers, v2 (6!) = 4 and 21  24 , we have v3 (6!) = 2 and 33 > 32 . Thus m2 = 54 does not divide 6!.
e.g., gcd(a, b, c) = gcd(a, gcd(b, c)). The strategy to find the LCM of more than 2 numbers
is similar. Source code: ch5/factovisors UVa10139.cpp|java|py

Exercise 5.3.6.1: The LCM formula is lcm(a, b) = a⇥b / gcd(a, b) but why do we
use a / gcd(a, b) ⇥ b instead? Try a = 2 ⇥ 109 and b = 8 using 32-bit signed integers. Exercise 5.3.8.1: Determine what is the GCD and LCM of (26 ⇥ 33 ⇥ 971 , 25 ⇥ 52 ⇥ 112 )?

Exercise 5.3.6.2: Please write the gcd(a, b) routine in iterative fashion! Exercise 5.3.8.2: Count the number of trailing zeroes of n! (assume 1  n  200 000).

Exercise 5.3.6.3*: Study alternative ‘binary gcd’ computation that replaces division (inside
modulo operation) with bit shift operations, subtractions, and comparisons. This version is
known as Stein’s algorithm.
5.3.9 Modular Arithmetic
Some (mathematical) computations in programming problems can end up having very large
positive (or very small negative) intermediate/final integer results that are beyond the range
5.3.7 Factorial of the largest built-in integer data type (currently the 64-bit long long in C++ or long
in Java). In Book 1, we have shown a way to compute Big Integers precisely. In Section
Factorial17 of n, i.e., n! or f ac(n) is defined as 1 if n = 0 and n⇥f ac(n-1) if n > 0. However, it 5.3.8, we have shown another way to work with Big Integers via its prime factors. For some
is usually more convenient to work with the iterative version, i.e., f ac(n) = 2 ⇥ 3 ⇥ 4 ⇥ . . . ⇥ other problems18 , we are only interested in the result modulo a number (usually a prime,
(n-1) ⇥ n (loop from 2 to n, skipping 1). The value of f ac(n) grows very fast. We are to minimize collision) so that the intermediate/final results always fit inside built-in integer
only able to use C/C++ long long/Java long/OCaml Int64 for up to f ac(20). Beyond data type. In this subsection, we discuss these types of problems.
that, we may need to work with the prime factors of a factorial (see Section 5.3.8), get the In UVa 10176 - Ocean Deep! Make it shallow!!, we are asked to convert a long binary
intermediate and final results modulo a smaller (usually a prime) number (see Section 5.3.9), number (up to 100 digits) to decimal. A quick calculation shows that the largest possible
or to use either Python or Java BigInteger for precise but slow computation (see Book 1). number is 2100 -1 which is beyond the range of a 64-bit integer. But the problem only asks
if the result is divisible by 131 071 (a prime number). So what we need to do is to convert
5.3.8 Working with Prime Factors binary to decimal digit by digit, while performing % 131 071 operation to the intermediate
result (note that ‘%’ is a symbol of modulo operation). If the final result is 0, then the actual
Other than using the Big Integer technique (see Book 1) which is ‘slow’, we can work with the number in binary (which we never compute in its entirety), is divisible by 131 071.
intermediate computations of large integers accurately by working with the prime factors of Important: The modulo of a negative integer can be surprising to some who are not
the integers instead of the actual integers themselves. Therefore, for some non-trivial number aware of their programming language specific behavior, e.g., 10 % 7 = 4 (in Python) but
theoretic problems, we have to work with the prime factors of the input integers even if the C++/Java % operator and OCaml mod operator produces 3 instead. To be safer if we
main problem is not really about prime numbers. After all, prime factors are the building need to find a non-negative integer a (mod m), we use ((a % m) + m) % m. For the given
blocks of integers. Let’s see the next case study. example, we have (( 10 % 7) + 7) % 7 = ( 3 + 7) % 7 = 4 % 7 = 4.
16
There is no built-in gcd function in OCaml.
17 18
We can also have multifactorial. The most common form of multifactorial is the double factorial, denoted As of year 2020, we observe that the number of problems that require Big Integer technique is decreasing
as n!!, e.g., 14!! = 14 ⇥ 12 ⇥ 10 ⇥ . . . ⇥ 2 = 645 120. This is used in Section 8.2.1. whereas the number of problems that require modular arithmetic technique is increasing.

289 290
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry

The following are true involving modular arithmetic: 5.3.10 Extended Euclidean Algorithm
1. (a + b) % m = ((a % m) + (b % m)) % m In Section 5.3.6, we have seen that gcd(a, 0) = a and gcd(a, b) = gcd(b, a%b) but this
Example: (15 + 29) % 8 Euclid’s algorithm can be extended. On top of computing the gcd(a, b) = d, the Extended
= ((15 % 8) + (29 % 8)) % 8 = (7 + 5) % 8 = 4 Euclidean algorithm can also computes the coefficients of Bézout identity (lemma), i.e.,
integers x and y such that ax + by = gcd(a, b). The implementation is as follows:
2. (a b) % m = ((a % m) (b % m)) % m
Example: (37 - 15) % 6
int extEuclid(int a, int b, int &x, int &y) { // pass x and y by ref
= ((37 % 6) - (15 % 6)) % 6 = (1 - 3) % 6 = -2 or 4
int xx = y = 0;
3. (a ⇥ b) % m = ((a % m) ⇥ (b % m)) % m int yy = x = 1;
Example: (23 ⇥ 12) % 5 while (b) { // repeats until b == 0
= ((23 % 5) ⇥ (12 % 5)) % 5 = (3 ⇥ 2) % 5 = 1 int q = a/b;
int t = b; b = a%b; a = t;
t = xx; xx = x-q*xx; x = t;
Modular Multiplicative Inverse t = yy; yy = y-q*yy; y = t;
Now, (a / b) % m is harder to compute assuming a is very large, otherwise, simply divide a }
by b and modulo the result by b. Note that a might appear in the form of a = a1 ⇥a2 ⇥· · ·⇥an return a; // returns gcd(a, b)
where each ai is small enough to fit in a built-in integer data type. Thus, it might be tempting }
to modulo a and b to m independently, perform the division, and modulo the result again.
However, this approach is wrong! (((a1 ⇥ a2 ⇥ · · · ⇥ an ) % m) / (b % m)) % m does not
For example: a = 25, b = 18
necessarily equal to (a / b) % m, i.e., the previous modular arithmetic does not work for
extendedEuclid(25, 18, x, y) updates x = 5, y = 7, and returns d = 1.
division. For example, (30 / 5) % 10 = 6 is not equal to ((30 % 10) / (5 % 10)) % 10 = 0.
This means 25 ⇥ 5 + 18 ⇥ 7 = gcd(25, 18) = 1.
Another example, (27 / 3) % 13 = 9 is not equal to ((27 % 13) / (3 % 13)) % 13 = 13 .
Fortunately, we can rewrite (a / b) % m as (a ⇥ b 1 ) % m where b 1 is the modular
multiplicative inverse of b with respect to modulus m. In other words, b 1 is an integer such Solving Linear Diophantine Equation
that (b ⇥ b 1 ) % m = 1. Then, all we have to do is solving (a ⇥ b 1 ) % m using the previous
Problem: Suppose a housewife buys apples and oranges with cost of 8.39 dollars.
modular arithmetic (for multiplication). So, how do we find b 1 % m?
An apple costs 25 cents. An orange costs 18 cents. How many of each fruit does she buy?
If m is a prime number, then we can use Fermat’s little theorem for b and m where
This problem can be modeled as a linear equation with two variables: 25x + 18y = 839.
gcd(b, m) = 1, i.e., bm 1 ⌘ 1 (mod m). If we multiply both sides with b 1 , then we will
Since we know that both x and y must be integers, this linear equation is called the Linear
obtain bm 1 · b 1 ⌘ 1 · b 1 (mod m) or simply bm 2 ⌘ b 1 (mod m). Then, to find the
Diophantine Equation. We can solve Linear Diophantine Equation with two variables even
modular multiplicative inverse of b (i.e., b 1 % m), simply compute bm 2 % m, e.g., us-
if we only have one equation! The solution is as follows:
ing efficient modular exponentiation discussed in Section 5.8.2 combined with the previous
modular arithmetic for multiplication. Therefore, (a ⇥ b 1 ) % m when m is a prime number Let a and b be integers with d = gcd(a, b). The equation ax + by = c has no integral
equals to ((a % m) ⇥ (bm 2 % m)) % m. solutions if d | c is not true. But if d | c, then there are infinitely many integral solutions.
If m is not necessarily a prime number but gcd(b, m) = 1, then we can use Euler’s The first solution (x0 , y0 ) can be found using the Extended Euclidean algorithm and the rest
Theorem, i.e., b'(m) ⌘ 1 (mod m) where '(m) is the Euler’s Phi (Totient) of m, the number can be derived from x = x0 + (b/d)n, y = y0 (a/d)n, where n is an integer. Programming
of positive integers < m which are relative prime to m. Observe that when m is a prime contest problems may have additional constraints to make the output finite (and unique).
number, Euler’s Theorem reduces to Fermat’s little theorem, i.e., '(m) = m 1. Similar Using extendedEuclid, we can solve the motivating problem shown earlier above:
to the previous, we simply need to compute b'(m) 1 % m to get the modular multiplicative The Linear Diophantine Equation with two variables 25x + 18y = 839.
inverse of b. Therefore, (a ⇥ b 1 ) % m equals to ((a % m) ⇥ (b'(m) 1 % m)) % m. Recall that extendedEuclid(25, 18) helps us get:
Example 1: a = 27, b = 3, m = 13. (27 / 3) % 13 = ((27 % 13) ⇥ (3 1 % 13)) % 13 25 ⇥ 5 + 18 ⇥ 7 = gcd(25, 18) = 1.
= ((27 % 13) ⇥ (311 % 13)) % 13 = (1 ⇥ 9) % 13 = 9.
We multiply the left and right hand side of the equation above by 839/gcd(25, 18) = 839:
Example 2: a = 27, b = 3, m = 10. (27 / 3) % 10 = ((27 % 10) ⇥ (3 1 % 10)) % 10
25 ⇥ 4195 + 18 ⇥ 5873 = 839.
= ((27 % 10) ⇥ (33 % 10)) % 10 = (1 ⇥ 9) % 10 = 9.
Thus x = 4195 + (18/1)n and y = 5873 (25/1)n.
Alternatively, we can also use the Extended Euclid algorithm to compute the modular
multiplicative inverse of b (while still assuming gcd(b, m) = 1). We discuss this version in the Since we need to have non-negative x and y (non-negative number of apples and oranges),
next Section 5.3.10. Note that if gcd(b, m) 6= 1, then b does not have a modular multiplicative we have two more additional constraints:
inverse with respect to modulus m. 4195 + 18n 0 and 5873 25n 0, or
4195/18  n  5873/25, or
233.05  n  234.92.

291 292
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry

The only possible integer n is 234. Thus the unique solution is x = 4195 + 18 ⇥ 234 = 17
and y = 5873 25 ⇥ 234 = 23, i.e., 17 apples (of 25 cents each) and 23 oranges (of 18 cents
each) for a total of 8.39 dollars. Programming Exercises related to Number Theory:

a. Prime Numbers
Modular Multiplicative Inverse with Extended Euclidean Algorithm
1. Entry Level: UVa 00543 - Goldbach’s Conjecture * (sieve; complete
Now let’s compute x such that b ⇥ x = 1 (mod m). This b ⇥ x = 1 (mod m) is equivalent to search; Goldbach’s conjecture19 ; similar to UVa 00686, 10311, and 10948)
b⇥x = 1+m⇥y where y can be any integer. We rearrange the formula into b⇥x m⇥y = 1 2. UVa 01644 - Prime Gap * (LA 3883 - Tokyo07; sieve; prime check, upper
or b ⇥ x + m ⇥ y = 1 as y is a variable that can absorb the negative sign. This is a Linear bound - lower bound)
Diophantine Equation that can be solved with the Extended Euclidean algorithm to obtain 3. UVa 10650 - Determinate Prime * (3 uni-distance consecutive primes)
the value of x (and y—ignored). This x = b 1 (mod m). 4. UVa 11752 - The Super ... * (try base 2 to 216 ; composite power; sort)
Note that the result b 1 (mod m) can only be found if b and m are relatively prime, i.e., 5. Kattis - enlarginghashtables * (use sieve up to 40 000; prime test numbers
gcd(b, m) = 1. It can be implemented as follows (notice our safeguard mod sub-routine to greater than 2n; check primality of n itself)
deal with the case when a % m is negative): 6. Kattis - primesieve * (use sieve up to 108 ; it is fast enough)
7. Kattis - reseto * (sieve of Eratosthenes until the k-th crossing)
int mod(int a, int m) { // returns a (mod m)
return ((a%m) + m) % m; // ensure positive answer Extra UVa: 00406, 00686, 00897, 00914, 10140, 10168, 10311, 10394, 10490,
} 10852, 10948.
b. (Probabilistic) Prime Testing
int modInverse(int b, int m) { // returns b^(-1) (mod m) 1. Entry Level: Kattis - pseudoprime * (yes if !isPrime(p) && a.modPow(p,
int x, y; p) = a; Big Integer; also available at UVa 11287 - Pseudoprime Numbers)
int d = extEuclid(b, m, x, y); // to get b*x + m*y == d 2. UVa 01180 - Perfect Numbers * (LA 2350 - Dhaka01; small prime check)
if (d != 1) return -1; // to indicate failure 3. UVa 01210 - Sum of Consecutive ... * (LA 3399 - Tokyo05; simple)
// b*x + m*y == 1, now apply (mod m) to get b*x == 1 (mod m)
4. UVa 10235 - Simply Emirp * (case analysis: prime/emirp/not prime;
return mod(x, m); emirp is prime number that if reversed is still a prime number)
}
5. Kattis - flowergarden * (Euclidean dist; small prime check; use isProba-
blePrime; simulation; faster solutions exist)
Now we can compute (a ⇥ b 1 ) % m even if m is not a prime but gcd(b, m) == 1 via ((a
6. Kattis - goldbach2 * (simple brute force problem; use isProbablePrime; faster
% m) ⇥ modInverse(b, m)) % m. solutions exist)
Example 1: ((27 * 3 1 ) % 7 7. Kattis - primes2 * (convert input to either base 2/8/10/16; skip those that
= ((27 % 7) ⇥ modInverse(3, 7)) % 7 = (6 ⇥ 5) % 7 = 30 % 7 = 2. cause NumberFormatException error; use isProbablePrime test and gcd)
Example 2: ((27 * 4 1 ) % 7 Extra UVa: 00960, 10924, 12542.
= ((27 % 7) ⇥ modInverse(4, 7)) % 7 = (6 ⇥ 2) % 7 = 12 % 7 = 2. c. Finding Prime Factors
Example 3 (m is not a prime but gcd(b, m) == 1: ((520 * 25 1 ) % 18 1. Entry Level: UVa 00583 - Prime Factors * (basic factorization problem)
= ((520 % 18) ⇥ modInverse(25, 18) % 18 = (16 ⇥ 13) % 18 = 208 % 18 = 10. 2. UVa 11466 - Largest Prime Divisor * (use efficient sieve implementation
This is because extendedEuclid(25, 18, x, y) updates x = 5, y = 7, and returns d = 1, to get the largest prime factors)
so we have x = ((-5%18) + 18) % 18 = (-5 + 18) % 18 = 13 % 18 = 13. 3. UVa 12703 - Little Rakin * (uses small Fibonacci numbers up to 40 and
simple prime factorization as a and b can be non primes)
Source code: ch5/modInverse.cpp|java|py 4. UVa 12805 - Raiders of the Lost Sign * (prime check; primes of format
4m 1 and 4m + 1; simple prime factorization)
5. Kattis - pascal * (find lowest prime factor of N ; special case: N = 1)
5.3.11 Number Theory in Programming Contests
6. Kattis - primalrepresentation * (factorization problem; use sieve to avoid
We will discuss Pollard’s rho (a faster integer factoring algorithm than the one shown in TLE; use long long; 231 1 is a prime)
Section 5.3.3) in Section 9.12. We will also discuss Chinese Remainder Theorem (CRT) 7. Kattis - primereduction * (factorization problem)
(that uses the Extended Euclidean algorithm in Section 5.3.10) in Section 9.13. Extra UVa: 00516, 10392.
However, there are many other number theoretic problems that cannot be discussed one
by one in this book (e.g., the various divisibility properties). Based on our experience, Also see Section 9.12 for a faster (but rare) integer factoring algorithm.
number theory problems frequently appear in ICPCs especially in Asia. It is a good idea for 19
Christian Goldbach’s conjecture (updated by Leonhard Euler) is as follows: Every even number 4 can
one team member to specifically study number theory listed in this book and beyond. be expressed as the sum of two prime numbers

293 294
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry

d. Functions Involving Prime Factors g. Factorial21


1. Entry Level: UVa 00294 - Divisors * (numDiv(N)) 1. Entry Level: Kattis - tutorial * (factorial is just part of the problem; pruning)
2. UVa 10179 - Irreducible Basic ... * (EulerPhi(N)) 2. UVa 11076 - Add Again * (do not use next permutation for 12!, TLE;
3. UVa 11353 - A Di↵erent kind of ... * (numPF(N); sort variant) observe the digits in all permutations; hint: the solution involves factorial)
4. UVa 11728 - Alternate Task * (sumDiv(N)) 3. UVa 12335 - Lexicographic Order * (given the k-th permutation, recover
5. Kattis - almostperfect * (sumDiv(N)-N; minor variation) the 1st permutation; use factorial; use Java BigInteger)
6. Kattis - divisors * (return numDiv(nCk); but do not compute nCk directly; 4. UVa 12869 - Zeroes * (LA 6847 - Bangkok 2014; every zero in factorial(n)
work with its prime factors) is due to product of factor 2 and 5; factor 2 grows faster than factor 5)
7. Kattis - relatives * (EulerPhi(N); also available at UVa 10299 - Relatives) 5. Kattis - inversefactorial * (good problem; number of digits in factorial)
6. Kattis - loworderzeros * (last non zero digit of factorial; classic)
Extra UVa: 00884, 01246, 10290, 10820, 10958, 11064, 11086, 11226, 12005,
7. Kattis - namethatpermutation * (permutation number; involving factorial)
13185, 13194.
Extra UVa: 00324, 00568, 00623, 10220, 10323, 10338, 12934.
Extra Kattis: listgame.
Extra Kattis eulersnumber, howmanydigits.
e. Modified Sieve
h. Working with Prime Factors
1. Entry Level: UVa 10699 - Count the ... * (numDiffPF(N) for a range)
2. UVa 10990 - Another New Function * (compute a range of Euler Phi 1. Entry Level: Kattis - factovisors * (factorize m; see if it has support in n!;
values; DP to compute depth Phi values; finally Max 1D Range Sum DP) Legendre’s formula; also available at UVa 10139 - Factovisors)
3. UVa 11426 - GCD - Extreme (II) * (pre-calculate EulerPhi(N), the 2. UVa 10680 - LCM * (use primefactors([1..N]) to get LCM(1, 2, . . . , N))
answer involves EulerPhi) 3. UVa 11347 - Multifactorials * (prime-power factorization; numDiv(N))
4. UVa 12043 - Divisors * (sumDiv(N) and numDiv(N); brute force) 4. UVa 11395 - Sigma Function * (key hint: a square number multiplied
5. Kattis - data * (numDiffPF(V) for V up to N ⇥ 1 000; Brute force combina- by powers of two, i.e., 2k ⇥ i2 for k 0, i 1 has odd sum of divisors)
tion/all subsets; DP Subset) 5. Kattis - consecutivesums * (work with factor; sum of AP series)
6. Kattis - farey * (pre-calculate EulerPhi(N); do prefix sum (1D RSQ) of 6. Kattis - fundamentalneighbors * (reverse prime power notation)
EulerPhi(N) from 1 to each N ; the answer is related to this value) 7. Kattis - iks * (sieve of Eratosthenes; prime factorize each number; spread
7. Kattis - nonprimefactors * (numDiv(i) - numDiffPF(i) 8i in the range; the the factors around to maximize final GCD/minimize total operations)
I/O files are large so Bu↵ered I/O speed is needed)
Extra UVa: 00160, 00993, 10061, 10484, 10780, 10791, 11889, 13067.
Extra UVa: 10738, 11327.
Extra Kattis: olderbrother, parket, perfectpowers, persistent.
f. GCD and/or LCM20
i. Modular Arithmetic
1. Entry Level: UVa 11417 - GCD * (just use brute force as input is small)
1. Entry Level: UVa 10176 - Ocean Deep; Make it ... * (convert binary
2. UVa 10407 - Simple Division * (subtract the set s with s[0]; find gcd) to decimal digit by digit; do modulo 131071 to the intermediate result)
3. UVa 10892 - LCM Cardinality * (number of divisor pairs of N : (m, n) 2. UVa 10174 - Couple-Bachelor- ... * (no Spinster number)
such that lcm(m, n) = N )
3. UVa 10212 - The Last Non-zero ... * (multiply numbers from N down
4. UVa 11388 - GCD LCM * (use GCD-LCM relationship) to N -M +1; use /10 to discard the trailing zero(es); use %1 Billion)
5. Kattis - prsteni * (GCD of first circle radius with subsequent circle radiuses) 4. UVa 10489 - Boxes of Chocolates * (keep values small with modulo)
6. Kattis - jackpot * (similar to Kattis - smallestmultiple; use Java BigInteger 5. Kattis - anothercandies * (simple modular arithmetic)
or other faster solutions)
6. Kattis - ones * (no factor of 2 and 5 implies that there is no trailing zero;
7. Kattis - smallestmultiple * (simple LCMs of all numbers; use Java BigInteger also available at UVa 10127 - Ones)
to be safe)
7. Kattis - threedigits * (simulate factorial computation; remove trailing zeroes;
Extra UVa: 00106, 00412, 10193, 11774, 11827, 12708, 12852. keep many last few non-zero digits using modulo)
Extra Kattis: doodling, dasblinkenlights. Extra UVa: 00128.
Extra Kattis: modulo, vauvau.

20 21
GCD and/or LCM problems that requires factorization are in ‘Working with Prime Factors’ category. Factorial problems that requires factorization are categorized in ‘Working with Prime Factors’ category.

295 296
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.4. COMBINATORICS c Steven, Felix, Suhendry

j. Extended Euclidean 5.4 Combinatorics


1. Entry Level: UVa 10104 - Euclid Problem * (pure Ext Euclid problem)
Combinatorics is a branch of discrete mathematics 22 concerning the study of countable
2. UVa 10090 - Marbles * (use solution for Linear Diophantine Equation) discrete structures. In programming contests, problems involving combinatorics are usually
3. UVa 10633 - Rare Easy Problem * (let C = N -M , N = 10a+b, and titled ‘How Many [Object]’, ‘Count [Object]’, etc, though some problem authors choose to
M = a; Linear Diophantine Equation: 9a+b = C) hide this fact from their problem titles. Enumerating the objects one by one in order to count
4. UVa 10673 - Play with Floor and Ceil * (uses Extended Euclidean) them usually leads to TLE. The solution code is usually short, but finding the (potentially
5. Kattis - candydistribution * (the problem boils down to finding C 1 (mod recursive) formula takes some mathematical brilliance and also patience.
K); be careful when the answer is “IMPOSSIBLE” or  K) It is also a good idea to study/memorize the common ones like the Fibonacci-related
6. Kattis - modulararithmetic * (the division operation requires modular in- formulas (see Section 5.4.1), Binomial Coefficients (see Section 5.4.2), and Catalan Numbers
verse; use Extended Euclidean algorithm) (see Section 5.4.3) to quickly recognize them. In a team-based competition like ICPC, if such
7. Kattis - soyoulikeyourfoodhot * (Linear Diophantine Equation; still solvable a problem exists in the given problem set, ask one team member who is strong in mathematics
with brute force) to derive the formula (a quick revision on more general combinatorics techniques is in Section
Extra Kattis: jughard, wipeyourwhiteboards. 5.4.4) whereas the other two concentrate on other problems. Quickly code the usually short
formula once it is obtained—interrupting whoever is currently using the computer.
k. Divisibility Test Some of these combinatorics formulas may yield overlapping subproblems that entail the
1. Entry Level: UVa 10929 - You can say 11 * (test divisibility by 11) need to use DP (review Book 1). Some computation values can also be large and entail the
2. UVa 10922 - 2 the 9s * (test divisibility by 9) need to use Big Integer (see Book 1) or modular arithmetic (see Section 5.3.9).
3. UVa 11344 - The Huge One * (use divisibility theory of [1..12])
4. UVa 11371 - Number Theory for ... * (the solving strategy is given) 5.4.1 Fibonacci Numbers
5. Kattis - divisible * (divisibility; linear pass algorithm) Leonardo Fibonacci ’s numbers are defined as f ib(0) = 0, f ib(1) = 1, and for n 2, f ib(n) =
6. Kattis - meowfactor * (divisibility test of 9ans ; small range of ans) f ib(n-1) + f ib(n-2). This generates the following familiar pattern: 0, 1, 1, 2, 3, 5, 8, 13,
7. Kattis - thinkingofanumber * (simple range; use min/max properly; then 21, 34, 55, 89, 144, 233, 377, 610, and so on. This pattern sometimes appears in contest
small divisibility tests) problems which do not mention the term ‘Fibonacci’ at all, like in some problems in the list
Extra Kattis: cocoacoalition, magical3.
of programming exercises in this section (e.g., UVa 10334, Kattis - anti11, etc).
We usually derive the Fibonacci numbers with a ‘trivial’ O(n) (usually bottom-up) DP
technique and not implement the given recurrence directly (as it is very slow). However, the
O(n) DP solution is not the fastest for all cases. Later in Section 5.8, we will show how to
Profile of Algorithm Inventors compute the n-th Fibonacci number (where n is large) in O(log n) time using the efficient
matrix power. As a note, there is an O(log n) closed-form
p formula to get the n-th Fibonacci
n n
Christian Goldbach (1690-1764) was a German mathematician. He is remembered today number: p We compute the value of ( -(- ) )/ 5 (Binet’s formula) where (golden ratio)
for Goldbach’s conjecture that he discussed extensively with Leonhard Euler. is ((1+ 5)/2) ⇡ 1.618. This value is theoretically exact, however this is not so accurate for
large Fibonacci numbers due to imprecision in floating point computations.
Diophantus of Alexandria (⇡ 200-300 AD) was an Alexandrian Greek mathematician. Fibonacci numbers have many interesting properties. One of them is Zeckendorf’s theo-
He did a lot of study in algebra. One of his works is the Linear Diophantine Equations. rem: every positive integer can be written in a unique way as a sum of one or more distinct
Leonardo Fibonacci (or Leonardo Pisano) (1170-1250) was an Italian mathematician. Fibonacci numbers such that the sum does not include any two consecutive Fibonacci num-
He published a book titled ‘Liber Abaci’ (Book of Abacus/Calculation) in which he discussed bers. For any given positive integer, a representation that satisfies Zeckendorf’s theorem can
a problem involving the growth of a population of rabbits based on idealized assumptions. be found by using a Greedy algorithm: choose the largest possible Fibonacci number at each
The solution was a sequence of numbers now known as the Fibonacci numbers. step. For example: 100 = 89 + 8 + 3; 77 = 55 + 21 + 1, 18 = 13 + 5, etc.
Another property is the Pisano Period where the last one/last two/last three/last four
Edouard Zeckendorf (1901-1983) was a Belgian mathematician. He is best known for his
digit(s) of a Fibonacci number repeats with a period of 60/300/1 500/15 000, respectively.
work on Fibonacci numbers and in particular for proving Zeckendorf’s theorem.
Jacques Philippe Marie Binet (1786-1856) was a French mathematician. He made sig- p
nificant contributions to number theory. Binet’s formula expressing Fibonacci numbers in Exercise 5.4.1.1: Try f ib(n) = ( n ( ) n )/ 5 on small n and see if this Binet’s formula
closed form is named in his honor, although the same result was known earlier. really produces f ib(7) = 13, f ib(9) = 34, f ib(11) = 89. Now, write a simple program to find
out the first value of n such that the actual value of f ib(n) di↵ers from this formula?
Blaise Pascal (1623-1662) was a French mathematician. One of his famous inventions
discussed in this book is the Pascal’s triangle of binomial coefficients.
Eugène Charles Catalan (1814-1894) was a French and Belgian mathematician. He is 22
Discrete mathematics is a study of structures that are discrete (e.g., integers {0, 1, 2, . . . }, graphs/trees
the one who introduced the Catalan numbers to solve a combinatorial problem. (vertices and edges), logic (true/false)) rather than continuous (e.g., real numbers).

297 298
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.4. COMBINATORICS c Steven, Felix, Suhendry

5.4.2 Binomial Coefficients ll C(int n, int k) { // O(log p)


if (n < k) return 0; // clearly
Another classical combinatorics problem is in finding the coefficients of the algebraic ex-
return (((fact[n] * inv(fact[k])) % p) * inv(fact[n-k])) % p;
pansion of powers of a binomial23 . These coefficients are also the numbers of ways that n
}
items can be taken k at a time, usually written as C(n, k) or n Ck . For example, (x+y)3 =
1x3 + 3x2 y + 3xy 2 + 1y 3 . The {1, 3, 3, 1} are the binomial coefficients of n = 3 with
// inside int main()
k = {0, 1, 2, 3} respectively. Or in other words, the numbers of ways that n = 3 items can
fact[0] = 1;
be taken k = {0, 1, 2, 3} item(s) at a time are {1, 3, 3, 1}, respectively.
n! for (int i = 1; i < MAX_N; ++i) // O(MAX_N) pre-processing
We can compute a single (exact) value of C(n, k) with this formula: C(n, k) = (n k)!⇥k!
fact[i] = (fact[i-1]*i) % p; // fact[i] in [0..p-1]
implemented iteratively. However, computing C(n, k) can be a challenge when n and/or cout << C(100000, 50000) << "\n"; // the answer is 149033233
k are large. There are several techniques like: making k smaller (if k > n-k, then we set
k = n-k) because n Ck = n C(n k) ; during intermediate computations, we divide the numbers
first before multiplying it with the next number; or use Big Integer technique discussed in
Book 1 (this should be used only as the last resort as Big Integer operations are slow). Exercise 5.4.2.1: A frequently used k for C(n, k) is k = 2. Show that C(n, 2) = O(n2 ).
We can also compute the value of C(n, k) using top-down DP recurrences as shown below
and then use a 2D memo table to avoid re-computations. Exercise 5.4.2.2: Why the code above only works when p > MAX N? Try p = 997 (also a
prime) and compute C(100000, 50000)%p again! What should we use to address this issue?
C(n, 0) = C(n, n) = 1 // base cases. Is it helpful if we use Extended Euclidean algorithm instead of Fermat’s little theorem?
C(n, k) = C(n-1, k-1) + C(n-1, k) // take or ignore an item, n > k > 0.
Exercise 5.4.2.3: In the given code above, we pre-calculate the values of n!%p 8n 2 [0..n]
Alternatively, we can also compute the values of C(n, k) from n = 0 up to a certain value in O(n). Actually, we can also pre-calculate the values of inv[n!%p] 8n 2 [0..n] in O(n).
of n by constructing the Pascal’s Triangle, a triangular array of binomial coefficients. The Then, each computation of C(n, k) can be O(1). Show how to do it!
leftmost and rightmost entries at each row are always 1. The inner values are the sum of two
values diagonally above it, as shown for row n = 4 below. This is essentially the bottom-up
version of the DP solution above. Notice that the sum of each row is always 2n .
5.4.3 Catalan Numbers
n = 0 1 row sum = 1 = 2^0 First, let’s define the n-th Catalan number — written using binomial coefficients notation
n = 1 1 1 row sum = 2 = 2^1 n
Ck above — as: Cat(n) = ((2⇥n) Cn )/(n + 1); Cat(0) = 1. We will see its purposes below.
n = 2 1 2 1 row sum = 4 = 2^2 If we are asked to compute the values of Cat(n) for several values of n, it may be better
n = 3 1 3 3 1 <- as shown above, row sum = 8 = 2^3 to compute the values using (bottom-up) DP. If we know Cat(n), we can compute Cat(n+1)
\ / \ / \ / by manipulating the formula like shown below.
n = 4 1 4 6 4 1 row sum = 16 = 2^4, and so on (2n)!
Cat(n) = n!⇥n!⇥(n+1)
As the values of C(n, k) grows very fast, modern programming problems often ask for the (2⇥(n+1))! (2n+2)⇥(2n+1)⇥(2n)! (2⇥(n+1))⇥(2n+1)⇥[(2n)!]
Cat(n+1) = (n+1)!⇥(n+1)!⇥((n+1)+1)
= (n+1)⇥n!⇥(n+1)⇥n!⇥(n+2)
= (n+2)⇥(n+1)⇥[n!⇥n!⇥(n+1)]
.
value of C(n, k)%p instead where p is a prime number. If time limit is not strict, we can
modify the DP formula above to compute the correct values of C(n, k)%p. For a faster (4n+2)
Therefore, Cat(n+1) = (n+2)
⇥ Cat(n).
solution, we can apply Fermat’s little theorem on the standard C(n, k) formula (if p is a
sufficiently large prime number greater than MAX N) – see the implementation below with The values of Cat(n) also grows very fast so sometimes the value of Cat(n)%p is the one
O(n) pre-calculation of the values of n!%p – or Lucas’ Theorem (if p is just a prime number asked. If p is prime (and p is a sufficiently large prime number greater than MAX N), we can
but without the greater than MAX N guarantee) – see Section 9.14. use the following Fermat’s little theorem implementation.

ll Cat[MAX_N];
typedef long long ll;
const int MAX_N = 100010;
// inside int main()
const int p = 1e9+7; // p is a prime > MAX_N
Cat[0] = 1;
for (int n = 0; n < MAX_N-1; ++n) // O(MAX_N log p)
ll inv(ll a) { // Fermat’s little theorem
Cat[n+1] = ((4*n+2)%p * Cat[n]%p * inv(n+2)) % p;
return modPow(a, p-2, p); // modPow in Section 5.8
cout << Cat[100000] << "\n"; // the answer is 945729344
} // that runs in O(log p)

ll fact[MAX_N]; We provide our modular arithmetic-style implementations in the source code below:

23
Binomial is a special case of polynomial that only has two terms. Source code: ch5/combinatorics.cpp|java|py

299 300
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.4. COMBINATORICS c Steven, Felix, Suhendry

Catalan numbers are (surprisingly) found in various combinatorial problems. Here, we list In online programming contests where contestant can access the Internet, there is one
down some of the more interesting ones (there are several others). All examples below use more technique that may be useful. First, generate the output for small instances and
n = 3 and Cat(3) = ((2⇥3) C3 )/(3 + 1) = (6 C3 )/4 = 20/4 = 5. then search for that sequence at OEIS (The On-Line Encyclopedia of Integer Sequences)
hosted at https://oeis.org/. If you are lucky, OEIS can tell you the name of the sequence
1. Cat(n) counts the number of distinct binary trees with n vertices, e.g., for n = 3: and/or the required general formula for the larger instances. Moreover, you can also use
https://wolframalpha.com/ to help you process/simplify mathematical formulas.
* * * * * There are still many other counting principles and formulas, too many to be discussed
/ / / \ \ \ in this book. As this is not a pure (discrete) mathematics book, we close this section by
* * * * * * giving a quick revision on some combinatorics techniques and give a few written exercises to
/ \ / \ test/further improve your combinatorics skills.
* * * *
• Fundamental counting principle (rule of sum): If there are n ways to do one action,
2. Cat(n) counts the number of expressions containing n pairs of parentheses which are m ways to do another action, and these two actions cannot be done at the same time,
correctly matched, e.g., for n = 3, we have: ()()(), ()(()), (())(), ((())), and (()()). For then there are n + m ways to choose one of these combined actions. We can classify
more details about this problem, see Book 1. Counting Paths on DAG (review Book 1 and also see Section 8.3) as this.
3. Cat(n) counts the number of di↵erent ways n + 1 factors can be completely parenthe- • Fundamental counting principle (rule of product): If there are n ways to do one action
sized, e.g., for n = 3 and 3 + 1 = 4 factors: {a, b, c, d}, we have: (ab)(cd), a(b(cd)), and m ways to do another action afterwards, then there are n ⇥ m ways to do both.
((ab)c)d, (a(bc))d, and a((bc)d).
• A permutation is an arrangement of objects without repetition and the order is impor-
4. Cat(n) counts the number of ways a convex polygon (see Section 7.3) of n + 2 sides tant. There are n! permutations of a set of size n distinct elements.
can be triangulated. See Figure 5.2—left.
• If the set is actually a multiset (with duplicates), then there are fewer than n! per-
5. Cat(n) counts the number of monotonic paths along the edges of an n ⇥ n grid, which mutations. Suppose that there are k distinct elements, then the actual number of
do not pass above the diagonal. A monotonic path is one which starts in the lower permutations is : (n1 )!⇥(n2n! where ni is the frequency of each distinct element i
)!⇥...⇥(nk )!
left corner, finishes in the upper right corner, and consists entirely of edges pointing and n1 + n2 + . . . + nk = n. This formula is also called as the multinomial coefficients,
rightwards or upwards. See Figure 5.2—right. the generalization of the binomial coefficients discussed in Section 5.4.2.

• A k-permutation is an arrangement of a fixed length k of distinct elements taken from


a given set of size n distinct elements. The formula is n Pk = (n n!k)! and can be derived
from the fundamental counting principle above.
S T
Figure 5.2: Left: Triangulation of a Convex Polygon, Right: Monotonic Paths • Principle of inclusion-exclusion: |A B| = |A| + |B| |A B|

• There 2n subsets (or combinations) of n distinct elements.

• There are C(n, k) number of ways to take k items out of a set of n distinct elements.
Exercise 5.4.3.1*: Which one is the hardest to factorize (see Section 5.3.3) assuming that
n is an arbitrary large integer: f ib(n), C(n, k) (assume that k = n/2), or Cat(n)? Why?
Exercise 5.4.3.2*: Catalan numbers Cat(n) appear in some other interesting problems Exercise 5.4.4.1: Count the number of di↵erent possible outcomes if you roll two 6-sided
other than the ones shown in this section. Investigate! dices and flip three 2-sided coins? Will the answer be di↵erent if we do this (rolling and
flipping) one by one in some order versus if we do this in one go?
Exercise 5.4.4.2: How many ways to form a three digits number from {0, 1, 2, . . . , 9},
5.4.4 Combinatorics in Programming Contests each digit can only be used once, 0 cannot be used as the leading digit, and one of the digit
must be 7?
The classic combinatorics-related problems involving (pure) Fibonacci and Catalan numbers
are getting rare as of year 2020. However, there are still many other combinatorics problems Exercise 5.4.4.3: How many possible passwords are there if the length of the password
involving permutations (Section 5.3.7) and combinations (that is, Binomial Coefficients, is between 1 to 10 characters and each character can either be alphabet letters [‘a’..‘z’] or
Section 5.4.2). Some of the basic ones are listed in the programming exercises below and [‘A’..‘Z’] or digits [0..9]? Please output the answer modulo 1e9+7.
the more interesting ones (but (very) rare) are listed in Section 9.15. Note that a pure Exercise 5.4.4.4: Suppose you have a 6-letter word ‘FACTOR’. If we take 3 letters from
and/or classic combinatorics problem is rarely used in modern IOI/ICPC but combinatorics this word ‘FACTOR’, we may have another word, like ‘ACT’, ‘CAT’, ‘ROT’, etc. What is
is usually a subproblem of a bigger problem (Section 8.7). the number of di↵erent 3-letter words that can be formed with the letters from ‘FACTOR’ ?

301 302
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.4. COMBINATORICS c Steven, Felix, Suhendry

Exercise 5.4.4.5: Given the 5-letter word ‘BOBBY’, rearrange the letters to get another c. Catalan Numbers
word, e.g., ‘BBBOY’, ‘YOBBB’, etc. How many di↵erent permutations are possible? 1. Entry Level: UVa 10223 - How Many Nodes? * (you can precalculate
Exercise 5.4.4.6: Using the principle of inclusion-exclusion, count this: how many integers the answers as there are only 19 Catalan Numbers < 232 -1)
in [1..1M] that are multiples of 5 and 7? 2. UVa 00991 - Safe Salutations * (Catalan Numbers)
3. UVa 10007 - Count the Trees * (answer is Cat(n) ⇥ n!; Big Integer)
Exercise 5.4.4.7: Solve UVa 11401 - Triangle Counting! “Given n rods of length 1, 2,
4. UVa 10312 - Expression Bracketing * (number of binary bracketing =
. . . , n, pick any 3 of them and build a triangle. How many distinct triangles can you make
Cat(n); number of bracketing = Super-Catalan numbers)
(consider triangle inequality, see Section 7.2)? (3  n  1M ) ”.
5. Kattis - catalan * (basic Catalan Numbers)
Exercise 5.4.4.8*: There are A boys and B girls. Count the number of ways to select a 6. Kattis - catalansquare * (Catalan Numbers++; follow the description)
group of people such that the number of boys is equal to the number of girls in the chosen 7. Kattis - fiat * (N -th Catalan Number; use Fermat’s little theorem)
group, e.g., A = 3 and B = 2, then there are 1/6/3 way(s) to select a group with 0/2/4
people, respectively, with a total of 1+6+3 = 10 ways. Extra UVa: 10303, 10643.
c. Others, Easier
1. Entry Level: UVa 11401 - Triangle Counting * (spot the pattern)
2. UVa 11310 - Delivery Debacle * (requires DP: let dp[i] be the number
Programming Exercises related to Combinatorics: of ways the cakes can be packed for a box 2⇥ i)
3. UVa 11597 - Spanning Subtree * (graph theory; trivial)
a. Fibonacci Numbers 4. UVa 12463 - Little Nephew * (double the socks and the shoes first)
1. Entry Level: UVa 00495 - Fibonacci Freeze * (O(n) DP; Big Integer) 5. Kattis - character * (OEIS A000295)
2. UVa 00763 - Fibinary Numbers * (Zeckendorf representation; greedy; 6. Kattis - honey * (OEIS A002898)
Big Integer) 7. Kattis - integerdivision * (count frequencies of each remainder of [0..d-1]; add
3. UVa 10334 - Ray Through Glasses * (combinatorics; Big Integer) C(freq, 2) per such remainder)
4. UVa 10689 - Yet Another Number ... * (easy; Pisano period) Extra UVa: 10079, 11115, 11480, 11609.
5. Kattis - anti11 * (this problem is a modified Fibonacci numbers)
c. Others, Harder
6. Kattis - batmanacci * (Fibonacci; observation on N ; Divide and Conquer)
1. Entry Level: UVa 10784 - Diagonal * (the number of diagonals in n-gon
7. Kattis - rijeci * (simple simulation with a single loop; Fibonacci)
= n ⇤ (n 3)/2; use it to derive the solution)
Extra UVa: 00580, 00900, 00948, 01258, 10183, 10450, 10497, 10579, 10862, 2. UVa 01224 - Tile Code * (LA 3904 - Seoul07; derive formula from ob-
11000, 11089, 11161, 11780, 12281, 12620. serving the small instances first)
Extra Kattis: interestingintegers. 3. UVa 11069 - A Graph Problem * (use Dynamic Programming)
b. Binomial Coefficients: 4. UVa 11538 - Chess Queen * (count along rows/columns/diagonals)
5. Kattis - anagramcounting * (use Java BigInteger)
1. Entry Level: UVa 00369 - Combinations * (be careful with overflow issue)
6. Kattis - incognito * (count frequencies; combinatorics; minus one)
2. UVa 10541 - Stripe * (a good combinatorics problem)
7. Kattis - tritiling * (there are two related recurrences here; also available at
3. UVa 11955 - Binomial Theorem * (pure application; DP) UVa 10918 - Tri Tiling)
P
4. UVa 12712 - Pattern Locker * (the answer is N i=M C(L ⇤ L, i) ⇤ i!, but
Extra UVa: 00153, 00941, 10359, 10733, 10790, 11204, 11270, 11554, 12001,
simplify the computation of this formula instead of running it directly)
12022.
5. Kattis - election * (compute the answers with help of binomial coefficients)
6. Kattis - lockedtreasure * (the answer is n Cm 1 ) Extra Kattis: kitchencombinatorics.
7. Kattis - oddbinom * (OEIS A006046) c. Also see Section 9.15 for a few rare (combinatorics) formulas and theorems.

Extra UVa: 00326, 00485, 00530, 00911, 10105, 10375, 10532.


Extra Kattis: insert, perica.
Profile of Algorithm Inventor
Profile of Algorithm Inventor François Édouard Anatole Lucas (1842-1891) was a French mathematician. Lucas is
known for his study of the Fibonacci and Lucas sequence. In this book, we discuss Lucas’
Pierre de Fermat (1607-1665) was a French Lawyer and a mathematician. In context Theorem to compute the remainder of division of the binomial coefficient C(n, k) by a prime
of Competitive Programming, he is best known for his Fermat’s little theorem as used number p in terms of the base p expansions of the integers m and n. This solution that is
in Section 5.3.9, 5.4.2, and 5.4.3. discussed in Section 9.14 is stronger than the one presented in Section 5.4.2.

303 304
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.5. PROBABILITY THEORY c Steven, Felix, Suhendry

5.5 Probability Theory – Abridged problem description of UVa 10759 - Dice Throwing: n common cubic
dice are thrown. What is the probability that the sum of all thrown dices is at
Probability Theory is a branch of mathematics dealing with the analysis of random phe- least x? (constraints: 1  n  24, 0  x < 150).
nomena. Although an event like an individual (fair) coin toss is random, the sequence of
random events will exhibit certain statistical patterns if the event is repeated many times. The sample space (the denominator of the probability value) is very simple to
This can be studied and predicted. For example, the probability of a head appearing is 1/2 compute. It is 6n .
(similarly with a tail). Therefore, if we flip a (fair) coin n times, we expect that we see heads The number of events is slightly harder to compute. We need a (simple) DP
n/2 times. because there are lots of overlapping subproblems. The state is (dice lef t, score)
In programming contests, problems involving probability are either solvable with: where dice lef t keeps track of the remaining dice that we can still throw (starting
from n) and score counts the accumulated score so far (starting from 0). DP can
• Closed-form formula. For these problems, one has to derive the required (usually O(1)) be used as there are only n ⇥ (n ⇥ 6) = 6n2 distinct states for this problem.
formula. For example, let’s discuss how to derive the solution for UVa 10491 - Cows
and Cars24 , which is a generalized version of a TV show: ‘The Monty Hall problem’25 . When dice lef t = 0, we return 1 (event) if score x, or return 0 otherwise;
When dice lef t > 0, we try throwing one more dice. The outcome v for this dice
You are given NCOW S number of doors with cows, NCARS number of doors with cars,
can be one of six values and we move to state (dice lef t-1, score+v). We sum all
and NSHOW number of doors (with cows) that are opened for you by the presenter.
the events. The time complexity is O(6n2 ⇥ 6) = O(36n2 ) which is very small as
Now, you need to count the probability of winning a car (by opening a door that has
n  24 in this problem.
a car behind it) assuming that you will always switch to another unopened door.
The first step is to realize that there are two ways to get a car. Either you pick a cow One final requirement is that we have to use gcd (see Section 5.3.6) to simplify the
first and then switch to a car, or you pick a car first, and then switch to another car. probability fraction (see Section 5.2). In some other problems, we may be asked to
The probability of each case can be computed as shown below. output the probability value correct to a certain digit after decimal point (either
between [0.0..1.0] or as percentages [0.0..100.0]).
In the first case, the chance of picking a cow first is (NCOW S /(NCOW S + NCARS )).
Then, the chance of switching to a car is (NCARS /(NCARS + NCOW S NSHOW 1)). – Abridged problem description of Kattis - bobby: Betty has an S-sided fair dice
Multiply these two values together to get the probability of the first case. The -1 is to (having values 1 through S). Betty challenges Bobby to obtain a total value R
account for the door that you have already chosen, as you cannot switch to it. on at least X out of Y rolls. If Bobby is successful, Betty will give Bobby W
The probability of the second case can be computed in a similar manner. The chance times of his initial bet. Should Bobby take the bet? Or in another word, is his
of picking a car first is (NCARS /(NCARS + NCOW S )). Then, the chance of switching to expected return greater than his original bet?
a car is ((NCARS 1)/(NCARS + NCOW S NSHOW 1)). Both -1 accounts for the car To simplify, let’s assume that Bobby bets 1 unit of currency, is his expected return
that you have already chosen. strictly greater than 1 unit?
Sum the probability values of these two cases together to get the final answer.
For a single roll of an S-sided fair dice, Bobby’s chance to hit R or higher (a
• Exploration of the search (sample) space to count number of events (usually harder to success) is psuccess = S SR+1 and consequently Bobby’s chance to hit R-1 or lower
count; may deal with combinatorics—see Section 5.4, Complete Search—see Book 1, (a failure) is RS 1 (or 1 psuccess ).
or Dynamic Programming–see Book 1) over the countable sample space (usually much
We can then write a recursive function exp val(num roll, num success). We
simpler to count). Examples:
simulate the roll one by one. The base case is when num roll == Y where
– ‘UVa 12024 - Hats’ is a problem of n people who store their n hats in a cloakroom we return W if num success X or 0 otherwise. In general case, we do one
for an event. When the event is over, these n people take their hats back. Some more throw that can be either a success with probability psuccess or a failure
take a wrong hat. Compute how likely is that everyone takes a wrong hat. with probability (1 psuccess ) and add both expected values due to linearity of
This problem can be solved via brute-force and pre-calculation by trying all n! expectation. The time complexity is O(Y 2 ) which is very small as Y  10.
permutations and see how many times the required events appear over n! because
n  12 in this problem and such O(n! ⇥ n) naı̈ve solution will only take about a
minute to run. However, a more math-savvy contestant can use this Derangement Exercise 5.5.1: Instead of memorizing the formula, show how to derive the Derangement
(DP) formula instead: An = (n-1) ⇥ (An 1 + An 2 ) that will be fast enough for DP formula An = (n-1) ⇥ (An 1 + An 2 ).
much higher n, possibly combined with modular arithmetic. Exercise 5.5.2: There are 15 students in a class. 8 of them are boys and the other 7 are
24
girls. The teacher wants to form a group of 5 students in random fashion. What is the
You may be interested to attempt an interactive problem : Kattis - askmarilyn too.
25 probability that the formed group consists of all girls?
This is an interesting probability puzzle. Readers who have not heard this problem before are encouraged
to do some Internet search and read the history of this problem. In the original problem, NCOW S = 2,
NCARS = 1, and NSHOW = 1. The probability of staying with your original choice is 13 and the probability
of switching to another unopened door is 23 and therefore it is always beneficial to switch.

305 306
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.6. CYCLE-FINDING c Steven, Felix, Suhendry

5.6 Cycle-Finding
Programming Exercises about Probability Theory: 5.6.1 Problem Description
a. Probability Theory, Easier Given a function f : S ! S (that maps a natural number from a finite set S to another
natural number in the same finite set S) and an initial value x0 2 N , the sequence of iterated
1. Entry Level: UVa 10491 - Cows and Cars * (2 ways: either pick a cow function values: {x0 , x1 = f (x0 ), x2 = f (x1 ), . . . , xi = f (xi 1 ), . . .} must eventually use
first, then switch to a car; or pick a car first, and then switch to another car)
the same value twice, i.e., 9i < j such that xi = xj . Once this happens, the sequence must
2. UVa 01636 - Headshot * (LA 4596 - NorthEasternEurope09; ad hoc then repeat the cycle of values from xi to xj 1 . Let µ (the start of cycle) be the smallest
probability question, one tricky special case involving all zeroes)
index i and (the cycle length) be the smallest positive integer such that xµ = xµ+ . The
3. UVa 10238 - Throw the Dice * (DP; s: (dice left, score); try F values; cycle-finding problem26 is defined as the problem of finding µ and given f (x) and x0 .
Big Integer; no need to simplify the fraction; see UVa 10759) For example, in UVa 00350 - Pseudo-Random Numbers, we are given a pseudo-random
4. UVa 11181 - Probability (bar) Given * (iterative brute force; try all number generator f (x) = (Z ⇥ x + I)%M with x0 = L and we want to find out the sequence
possibilities) length before any number is repeated (i.e., the ). A good pseudo-random number generator
5. Kattis - bobby * (computation of expected value) should have a large . Otherwise, the numbers generated will not look ‘random’.
6. Kattis - dicebetting * (s: (dice left, distinct numbers so far); each throw can Let’s try this process with the sample test case Z = 7, I = 5, M = 12, L = 4, so we
increase distinct numbers so far or not) have f (x) = (7 ⇥ x + 5)%12 and x0 = 4. The sequence of iterated function values is
7. Kattis - odds * (complete search; simple probability) {4, 9, 8, 1, 0, 5, 4, . . .}. We have µ = 0 and = 6 as x0 = xµ+ = x0+6 = x6 = 4. The
Extra UVa: 10328, 10759, 12024, 12114, 12230, 12457, 12461. sequence of iterated function values cycles from index 6 onwards.
On another test case Z = 26, I = 11, M = 80, L = 7, we have f (x) = (26 ⇥ x + 11)%80
Extra Kattis: dicegame, orchard, password, secretsanta. and x0 = 7. The sequence of iterated function values is {7, 33, 69, 45, 61, 77, 13, 29, 45, . . .}.
b. Probability Theory, Harder This time, we have µ = 3 and = 5.
1. Entry Level: UVa 11628 - Another lottery * (p[i] = ticket bought by i
at the last round/total tickets bought at the last round by all n; gcd) 5.6.2 Solutions using Efficient Data Structures
2. UVa 10056 - What is the Probability? * (get the closed form formula) A simple algorithm that will work for many cases and/or variants of this cycle-finding
3. UVa 10648 - Chocolate Box * (DP; s: (rem boxes, num empty)) problem uses an efficient data structure to store key to value information: a number xi
4. UVa 11176 - Winning Streak * (DP, s: (rem games, streak); t: lose this (the key) has been first encountered at iteration i (the value) in the sequence of iterated
game, or win the next W = [1..n] games and lose the (W+1)-th game) function values. Then for xj that is encountered later (j > i), we test if xj is already stored
5. Kattis - anthony * (DP probability; need to drop one parameter (N or M ) in the data structure. If it is, it implies that xj = xi , µ = i, = j i. This algorithm
and recover it from the other one) runs in O((µ + ) ⇥ DS cost) where DS cost is the cost per one data structure operation
6. Kattis - goodcoalition * (DP probability; like Knapsack) (insert/search). This algorithm requires at least O(µ + ) space to store past values.
7. Kattis - lostinthewoods * (simulate random walks of various lengths and dis- For many cycle-finding problems with rather large S (and likely large µ + ), we can use
tribute the probabilities per iteration; the answer will converge eventually) O(µ + + buf f er) space C++ STL unordered map/Java HashMap/Python dict/OCaml
Hashtbl to store/check the iteration indices of past values in O(1) time. But if we just need
Extra UVa: 00542, 00557, 10218, 10777, 11021, 11346, 11500, 11762.
to stop the algorithm upon encountering the first repeated number, we can use C++ STL
Extra Kattis: 2naire, anotherdice, bond, bribe, explosion, genius, gnollhypoth- unordered set/Java HashSet/Python set (curly braces {}) instead.
esis, pollygone, ra✏e, redsocks. For other cycle-finding problems with relatively small S (and likely small µ + ), we
may even use the O(|S|) space Direct Addressing Table (DAT) to store/check the iteration
indices of past values also in O(1) time.
Note that by trading-o↵ (large, up to O(µ + )) memory space, we can actually solve
Profile of Algorithm Inventors this cycle-finding problem in efficient O(µ + ) runtime.

John M. Pollard (born 1941) is a British mathematician who has invented algorithms for
the factorization of large numbers (the Pollard’s rho algorithm, see Section 9.12) and for the Exercise 5.6.2.1: Notice that on many random test cases of UVa 00350, the values of µ
calculation of discrete logarithms (not discussed in this book). and are close to 0. However, generate a simple test case (choose Z, I, M , and L) for UVa
00350 so that even an O(µ + ) algorithm really runs in O(M ), i.e., almost, if not all possible
Richard Peirce Brent (born 1946) is an Australian mathematician and computer scientist. integers 2 [0..M -1] are used before a cycle is detected.
His research interests include number theory (in particular factorization), random number
generators, computer architecture, and analysis of algorithms. He has invented or co-invented
various mathematics algorithms. 26
We can also view this problem as a graph problem, i.e., finding the start and length of a cycle in a
functional graph/pseudo tree.

307 308
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.6. CYCLE-FINDING c Steven, Felix, Suhendry

5.6.3 Floyd’s Cycle-Finding Algorithm 2. Finding µ


However, there is an even better algorithm called Floyd’s cycle-finding algorithm that also Next, we reset hare back to x0 and keep tortoise at its current position. Now, we advance
runs in O(µ + ) time complexity but only uses O(1) memory27 space—much smaller than both pointers to the right one step at a time, thus maintaining the k gap between the two
the solutions using efficient data structures above. This algorithm is also called ‘the tortoise pointers. When tortoise and hare points to the same value, we have just found the first
and hare (rabbit)’ algorithm. It has three components that we describe below using the repetition of length k . Since k is a multiple of , it must be true that xµ = xµ+k . The
function f (x) = (Z ⇥ x + I)%M and Z = 26, I = 11, M = 80, L = 7. first time we encounter the first repetition of length k is the value of the µ. In Table 5.3
and Figure 5.4—left, we find that µ = 3.
1. Efficient Way to Detect a Cycle: Finding k
µ x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13
Observe that for any i µ, xi = xi+k , where k > 0, e.g., in Table 5.2, x3 = x3+1⇥5 = x8 = 7 33 69 45 61 77 13 29 45 61 77 13 29 45
x3+2⇥5 = x13 = 45, and so on. If we set k = i, we get xi = xi+i = x2i . Floyd’s cycle-finding 0 H T
algorithm exploits this technique. 1 H T
2 H T
i x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 3 H T
7 33 69 45 61 77 13 29 45 61 77 13 29 45
0 TH Table 5.3: Part 2: Finding µ
1 T H
2 T H
3 T H
4 T H
5 T H

Table 5.2: Part 1: Finding k , f (x) = (26 ⇥ x + 11)%80, x0 = 7

Figure 5.4: Left: Finding µ = 3; Right: Finding =5

3. Finding

x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13


7 33 69 45 61 77 13 29 45 61 77 13 29 45
Figure 5.3: An Example of Finding k = 5 (one step before t and h point at x5 = x10 = 77) 1 T H
2 T H
The Floyd’s cycle-finding algorithm maintains two pointers called the ‘tortoise’ (the slower 3 T H
one) at xi and the ‘hare’ (the faster one) at x2i . Initially, both are at x0 . At each step of 4 T H
the algorithm, tortoise is moved one step to the right and the hare is moved two steps to 5 T H
the right28 in the sequence. Then, the algorithm compares the sequence values at these two Table 5.4: Part 3: Finding
pointers. The smallest value of i > 0 for which both tortoise and hare point to equal values
is the value of k (multiple of ). We will determine the actual from k using the next Once we get µ, we let the tortoise stay in its current position and set hare next to it. Now,
two steps. In Table 5.2 and Figure 5.3, when i = 5, we have x5 = x10 = x5+5 = x5+k = 77. we move the hare iteratively to the right one by one. The hare will point to a value that is
So, k = 5. In this example, we will see below that k is eventually 1, so = 5 too. the same as the tortoise for the first time after steps. In Table 5.4 and Figure 5.4—right,
27
But this advantage is hard to test in an online judge setup though, thus the efficient data structure we see that after the hare moves five times, x8 = x8+5 = x13 = 45. So, = 5. Therefore, we
solutions shown earlier are probably enough to solve most cycle-finding problems. report µ = 3 and = 5 for f (x) = (26 ⇥ x + 11)%80 and x0 = 7. Overall, this algorithm
28
To move right one step from xi , we use xi = f (xi ). To move right two steps from xi , we use xi = f (f (xi )). runs in O(µ + ) with only O(1) memory space.

309 310
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.7. GAME THEORY (BASIC) c Steven, Felix, Suhendry

4. The Implementation of Floyd’s Cycle-Finding Algorithm 5.7 Game Theory (Basic)


The working C/C++ implementation of this algorithm (with comments) is shown below: Problem Description
ii floydCycleFinding(int x0) { // f(x) is defined above Game Theory is a mathematical model of strategic situations (not necessarily games as
// 1st part: finding k*mu, hare h’s speed is 2x tortoise t’s in the common meaning of ‘games’) in which a player’s success in making choices depends
int t = f(x0), h = f(f(x0)); // f(x0) is after x0 on the choices of others. Many programming problems involving game theory are classified
while (t != h) { t = f(t); h = f(f(h)); } as Zero-Sum Games—a mathematical way of saying that if one player wins, then the
// 2nd part: finding mu, hare h and tortoise t move at the same speed other player loses. For example, a game of Tic-Tac-Toe (e.g., UVa 10111), Chess, various
int mu = 0; h = x0; number/integer games (e.g., UVa 10368, 10578, 10891, 11489, Kattis - amultiplicationgame),
while (t != h) { t = f(t); h = f(h); ++mu; } and others (Kattis - bachetsgame) are games with two players playing alternately (usually
// 3rd part: finding lambda, hare h moves, tortoise t stays perfectly) and (usually) there can only be one winner.
int lambda = 1; h = f(t); The common question asked in programming contest problems related to game theory is
while (t != h) { h = f(h); ++lambda; } whether the starting player of a two player competitive game has a winning move assuming
return {mu, lambda}; that both players are doing Perfect Play. That is, each player always chooses the most
} optimal choice available to him.

For more examples, visit the VisuAlgo, cycle-finding visualization and define your own29 Decision Tree
f (x) = (a ⇥ x2 + b ⇥ x + c)%M and your own x0 to see this algorithm in action. One way is to write a recursive code to explore the Decision Tree of the game (a.k.a. the
Game Tree). If there is no overlapping subproblem, pure recursive backtracking is suitable.
Visualization: https://visualgo.net/en/cyclefinding
Otherwise, Dynamic Programming is needed. Each vertex describes the current player and
Source code: ch5/UVa00350.cpp|java|py|ml the current state of the game. Each vertex is connected to all other vertices legally reachable
from that vertex according to the game rules. The root vertex describes the starting player
and the initial game state. If the game state at a leaf vertex is a winning state, it is a win
Exercise 5.6.3.1*: Richard Peirce Brent invented an improved version of Floyd’s cycle- for the current player (and a lose for the other player). At an internal vertex, the current
finding algorithm shown above. Study and implement Brent’s algorithm [4]. player chooses a vertex that guarantees a win with the largest margin (or if a win is not
possible, chooses a vertex with the least loss). This is called the Minimax strategy.
For example, in UVa 10368 - Euclid’s Game, there are two players: Stan (player 0)
and Ollie (player 1). The state of the game is a triple of integers (id, a, b). The current
player id can subtracts any positive multiple of the lesser of the two numbers, integer b,
Programming Exercises related to Cycle-Finding:
from the greater of the two numbers, integer a, provided that the resulting number must be
nonnegative. We always maintain that a b. Stan and Ollie plays alternately, until one
1. Entry Level: UVa 00350 - Pseudo-Random Numbers * (very basic cycle-
finding problem; simply run Floyd’s cycle-finding algorithm) player is able to subtract a multiple of the lesser number from the greater to reach 0, and
thereby wins. The first player is Stan. The decision tree for a game with initial state id = 0,
2. UVa 11036 - Eventually periodic ... * (cycle-finding; evaluate Reverse Polish
a = 34, and b = 12 is shown in Figure 5.5.
f with a stack)
Let’s trace what happens in Figure 5.5. At the root (initial state), we have triple
3. UVa 11053 - Flavius Josephus ... * (cycle-finding; the answer is N - ) (0, 34, 12). At this point, player 0 (Stan) has two choices: either to subtract a b = 34 12 =
4. UVa 11511 - Frieze Patterns * (cycle-finding on vectors; notice that the 22 and move to vertex (1, 22, 12) (the left branch) or to subtract a 2 ⇥ b = 34 2 ⇥ 12 = 10
pattern will cycle fast) and move to vertex (1, 12, 10) (the right branch). We try both choices recursively.
5. Kattis - dragondropped * (interactive cycle finding problem; tight constraints) Let’s start with the left branch. At vertex (1, 22, 12)—(Figure 5.5—B), the current player
1 (Ollie) has no choice but to subtract a b = 22 12 = 10. We are now at vertex (0, 12, 10)—
6. Kattis - fibonaccicycles * (detect cycle of f ib(n)%k using fast data structure)
(Figure 5.5—C). Again, Stan only has one choice which is to subtract a b = 12 10 = 2.
7. Kattis - rats * (string processing plus cycle-finding; unordered set) We are now at leaf vertex (1, 10, 2)—(Figure 5.5—D). Ollie has several choices but Ollie can
Extra UVa: 00202, 00275, 00408, 00547, 00942, 00944, 10162, 10515, 10591, definitely win as a 5 ⇥ b = 10 5 ⇥ 2 = 0 and it implies that vertex (0, 12, 10) is a losing
11549, 11634, 12464, 13217. state for Stan and vertex (1, 22, 12) is a winning state for Ollie.
Now we explore the right branch. At vertex (1, 12, 10)—(Figure 5.5—E), the current
Extra Kattis: cool1, happyprime, partygame.
player 1 (Ollie) has no choice but to subtract a b = 12 10 = 2. We are now at leaf
vertex (0, 10, 2)—(Figure 5.5—F). Stan has several choices but Stan can definitely win as
29
a 5 ⇥ b = 10 5 ⇥ 2 = 0 and it implies that vertex (1, 12, 10) is a losing state for Ollie.
This is slightly more generic than the f (x) = (Z ⇥ x + I)%M shown in this section.

311 312
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.7. GAME THEORY (BASIC) c Steven, Felix, Suhendry

Figure 5.6: Partial Decision Tree for an instance of ‘A multiplication game’

Game Theory in Programming Contests


Figure 5.5: Decision Tree for an instance of ‘Euclid’s Game’
Game Theory problems that are discussed in this section are the basic ones that can still
Therefore, for player 0 (Stan) to win this game, Stan should choose a 2 ⇥ b = 34 2 ⇥ 12 be solved with basic problem solving paradigms/algorithms discussed earlier. However,
first, as this is a winning move for Stan—(Figure 5.5—A). there are more challenging forms of Game Theory-related problems that is discussed later
Implementation wise, the first integer id in the triple can be dropped as we know that in Section 9.16.
depth 0 (root), 2, 4, . . . are always Stan’s turns and depth 1, 3, 5, . . . are always Ollie’s turns.
This integer id is used in Figure 5.5 to simplify the explanation.
Programming Exercises related to Game Theory (Basic):

Mathematical Insights to Speed-up the Solution 1. Entry Level: Kattis - euclidsgame * (minimax; backtracking; also available at
UVa 10368 - Euclid’s Game)
Not all game theory problems can be solved by exploring the entire decision tree of the game,
2. UVa 10111 - Find the Winning ... * (Tic-Tac-Toe; minimax; backtracking)
especially if the size of the tree is large. If the problem involves numbers, we may need to
come up with some mathematical insights to speed up the computation. 3. UVa 10536 - Game of Euler * (model the 4 ⇥ 4 board and 48 possible pins
For example, in UVa 00847 - A multiplication game, there are two players: Stan (player as bitmask; then this is a simple two player game)
0) and Ollie (player 1) again. The state of the game30 is an integer p. The current player 4. UVa 11489 - Integer Game * (game theory; reducible to simple math)
can multiply p with any number between 2 to 9. Stan and Ollie again play alternately, until 5. Kattis - bachetsgame * (2 players game; Dynamic Programming; also available at
one player is able to multiply p with a number between 2 to 9 such that p n (n is the UVa 10404 - Bachet’s Game)
target number), and thereby win. The first player is Stan with p = 1.
6. Kattis - blockgame2 * (observe the pattern; 2 winnable cases if N == M and
Figure 5.6 shows an instance of this multiplication game with n = 17. Initially, player 0 N %M == 0; only 1 move if M < N < 2M ; we can always win if N > 2M )
(Stan) has up to 8 choices (to multiply p = 1 by [2..9]). However, all of these 8 states are
7. Kattis - linije * (game theory; check conditions on how Mirko can win and when
winning states of player 1 as player 1 can always multiply the current p by [2..9] to make
Slavko can win; involves MCBM)
p 17—(Figure 5.6—B). Therefore player 0 (Stan) will surely lose—(Figure 5.6—A).
As 1 < n < 4 294 967 295, the resulting decision tree on the largest test case can be Extra UVa: 10578, 12293, 12469.
extremely huge. This is because each vertex in this decision tree has a huge branching factor Extra Kattis: amultiplicationgame, cuttingbrownies, irrationaldivision, ivana, joy-
of 8 (as there are 8 possible numbers to choose from between 2 to 9). It is not feasible to lessgame, peggamefortwo.
actually explore the decision tree.
It turns out that the optimal strategy for Stan to win is to always multiply p with
9 (the largest possible) while Ollie will always multiply p with 2 (the smallest possible).
Such optimization insights can be obtained by observing the pattern found in the output of
smaller instances of this problem. Note that math-savvy contestant may want to prove this
observation first before coding the solution.
30
This time we omit the player id. However, this parameter id is still shown in Figure 5.6 for clarity.

313 314
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.8. MATRIX POWER c Steven, Felix, Suhendry

5.8 Matrix Power 5.8.2 Efficient Modular Power (Exponentiation)


For this subsection, let’s assume that we are using C++/OCaml that does not have built-
5.8.1 Some Definitions and Sample Usages in library function yet for raising an integer36 b to a certain integer power p (mod m)
In this section, we discuss a special case of matrix31 : the square matrix, a matrix with the efficiently. This modular exponentiation function modPow(b, p, m) gets more important
same number of rows and columns, i.e., it has size n ⇥ n. To be precise, we discuss a special in modern programming contests because the value of bp can easily go beyond the limit of
operation 0 64-bit integer data type and using Big Integer technique is slow (review Book 1).
Q of square matrix: the powers of a square matrix. Mathematically, M = I and
M p = pi=1 M . I is the Identity matrix32 and p is the given power of square matrix M . If For the discussion below, let’s use UVa 01230 (LA 4104) - MODEX that simply asks
we can do this operation in O(n3 log p)—which is the main topic of this subsection, we can us to compute xy (mod n). Now, if we do modular exponentiation ‘by definition’ as shown
solve some more interesting problems in programming contests, e.g.,: below, we will have an inefficient O(p) solution, especially if p is large.

int mod(int a, int m) { return ((a%m)+m) % m; } // ensure positive answer


• Compute a single 33 Fibonacci number f ib(p) in O(log p) time instead of O(p).
If p = 230 , O(p) solution will get TLE34 but O(log2 (p)) solution just needs 30 steps.
int slow_modPow(int b, int p, int m) { // assume 0 <= b < m
This is achievable by using the following equality35 :
int ans = 1;
  for (int i = 0; i < p; ++i) // this is O(p)
p
1 1 f ib(p + 1) fib(p) ans = mod(ans*b, m); // ans always in [0..m-1]
=
1 0 fib(p) f ib(p 1) return ans;
}
For example, to compute f ib(11), we simply multiply the Fibonacci matrix 11 times,
i.e., raise it to the power of 11. The answer is in the secondary diagonal of the matrix. There is a better solution that uses Divide & Conquer principle. We can express bp %m as:
b0 = 1 (base case).
 11   bp = (bp/2 ⇥ bp/2 )%m if p is even.
1 1 144 89 f ib(12) fib(11)
= = bp = (bp 1 ⇥ b)%m if p is odd.
1 0 89 55 fib(11) f ib(10)
As this approach keeps halving the value of p by two, it runs in O(log p).
Let’s assume that m is (very) large and 0  b < m.
• Compute the number of paths of length L of a graph stored in an Adjacency Matrix— If we compute by definition: 29 = 2 ⇥ 2 ⇥ 2 ⇥ 2 ⇥ 2 ⇥ 2 ⇥ 2 ⇥ 2 ⇥ 2 ⇡ O(p) multiplications.
which is a square matrix—in O(n3 log L). Example: See the small graph of size n = 4 But with Divide & Conquer: 29 = 28 ⇥2 = (24 )2 ⇥2 = ((22 )2 )2 ⇥2 ⇡ O(log p) multiplications.
stored in an Adjacency Matrix M below. The various paths from vertex 0 to vertex 1
with di↵erent lengths are shown in entry M [0][1] after M is raised to power L. A typical recursive implementation of this efficient Divide & Conquer modular exponentia-
tion that solves UVa 01230 (LA 4104) is shown below (runtime: 0.000s):

The graph: 0->1 with length 1: 0->1 (only 1 path) int modPow(int b, int p, int m) { // assume 0 <= b < m
0->1 with length 2: impossible if (p == 0) return 1;
0--1 0->1 with length 3: 0->1->2->1 (and 0->1->0->1) int ans = modPow(b, p/2, m); // this is O(log p)
| 0->1 with length 4: impossible ans = mod(ans*ans, m); // double it first
2--3 0->1 with length 5: 0->1->2->3->2->1 (and 4 others) if (p&1) ans = mod(ans*b, m); // *b if p is odd
return ans; // ans always in [0..m-1]
2 3 2 3 2 3 2 3 }
0 1 0 0 1 0 1 0 0 2 0 1 0 5 0 3
6 1 0 1 0 7 6 0 2 0 1 7 6 2 0 3 0 7 6 5 0 8 0 7
6
M =4 7 M2 = 6 7 M3 = 6 7 M5 = 6 7 int main() {
0 1 0 1 5 4 1 0 2 0 5 4 0 3 0 2 5 4 0 8 0 5 5
0 0 1 0 0 1 0 1 1 0 2 0 3 0 5 0 ios::sync_with_stdio(false); cin.tie(NULL);
int c; cin >> c;
• Speed-up some DP problems as shown later in this section. while (c--) {
int x, y, n; cin >> x >> y >> n;
31
A matrix is a rectangular (2D) array of numbers. Matrix of size m ⇥ n has m rows and n columns. The cout << modPow(x, y, n) << "\n";
elements of the matrix is usually denoted by the matrix name with two subscripts.
32 }
Identity matrix is a square matrix with all zeroes except that cells along the main diagonal are all ones.
33
If we need f ib(n) for all n 2 [0..n], use O(n) DP solution mentioned in Section 5.4.1 instead. return 0;
34
If you encounter input size of ‘gigantic’ value in programming contest problems, like 1B, the problem }
author is usually looking for a logarithmic solution. Notice that log2 (1B) ⇡ log2 (230 ) is still just 30!
35 36
The derivation of this Fibonacci matrix is shown in Section 5.8.4. Technically, an integer is a 1 ⇥ 1 square matrix.

315 316
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.8. MATRIX POWER c Steven, Felix, Suhendry

Java and Python Versions


Matrix matMul(Matrix a, Matrix b) { // normally O(n^3)
Fortunately, Java and Python have built-in library functions to compute modular expo-
Matrix ans; // but O(1) as n = 2
nentiation efficiently in O(log p) time. The Java code uses function modPow(BigInteger
for (int i = 0; i < MAX_N; ++i)
exponent, BigInteger m) of Java BigInteger class to compute (thisexponent mod m) (how-
for (int j = 0; j < MAX_N; ++j)
ever, the runtime: 0.080s is slower than the manual C++/Python versions).
ans.mat[i][j] = 0;
for (int i = 0; i < MAX_N; ++i)
class Main { // UVa 01230 (LA 4104)
for (int k = 0; k < MAX_N; ++k) {
public static void main(String[] args) {
if (a.mat[i][k] == 0) continue; // optimization
Scanner sc = new Scanner(System.in);
for (int j = 0; j < MAX_N; ++j) {
int c = sc.nextInt();
ans.mat[i][j] += mod(a.mat[i][k], MOD) * mod(b.mat[k][j], MOD);
while (c-- > 0) {
ans.mat[i][j] = mod(ans.mat[i][j], MOD); // modular arithmetic
BigInteger x, y, n;
}
x = BigInteger.valueOf(sc.nextInt()); // valueOf converts
}
y = BigInteger.valueOf(sc.nextInt()); // simple integer
return ans;
n = BigInteger.valueOf(sc.nextInt()); // into BigInteger
}
System.out.println(x.modPow(y, n)); // it’s in the library!
}
Matrix matPow(Matrix base, int p) { // normally O(n^3 log p)
}
Matrix ans; // but O(log p) as n = 2
}
for (int i = 0; i < MAX_N; ++i)
for (int j = 0; j < MAX_N; ++j)
Next, the Python code uses function pow(x, y[, z]) to compute (xy mod z). The resulting ans.mat[i][j] = (i == j); // prepare identity matrix
code is even shorter and fast (runtime: 0.000s). while (p) { // iterative D&C version
if (p&1) // check if p is odd
c = int(input()) ans = matMul(ans, base); // update ans
while c > 0: base = matMul(base, base); // square the base
c -= 1 p >>= 1; // divide p by 2
[x, y, n] = map(int, input().split()) # Big Integer by default }
print(pow(x, y, n)) # it’s in the library! return ans;
}
Source code: ch5/UVa01230.cpp|java|py

5.8.4 DP Speed-up with Matrix Power


5.8.3 Efficient Matrix Modular Power (Exponentiation)
In this section, we discuss how to derive the required square matrices for three DP problems
We can use the same O(log p) efficient exponentiation technique shown above to perform and show that raising these three square matrices to the required powers can speed-up the
square matrix exponentiation (matrix power) in O(n3 log p), because each matrix multipli- computation of the original DP problems.
cation37 is O(n3 ). The iterative implementation (for comparison with the recursive imple-
mentation shown earlier) is shown below:
The Derivation of the 2 ⇥ 2 Fibonacci Matrix
ll MOD; We know that f ib(0) = 0, f ib(1) = 1, and for n 2, we have f ib(n) = f ib(n 1)+f ib(n 2).
In Section 5.4.1, we have shown that we can compute f ib(n) in O(n) by using Dynamic
const int MAX_N = 2; // 2x2 for Fib matrix Programming by computing f ib(n) one by one progressively from [2..n]. However, these DP
transitions can be made faster by re-writing the Fibonacci recurrence into matrix form as
struct Matrix { ll mat[MAX_N][MAX_N]; }; // we return a 2D array shown below:
First, we write two versions of Fibonacci recurrence as there are two terms in the recurrence:
ll mod(ll a, ll m) { return ((a%m)+m) % m; } // ensure positive answer
37
There exists a faster but more complex algorithm for matrix multiplication: The O(n2.8074 ) Strassen’s f ib(n + 1) + f ib(n) = f ib(n + 2)
algorithm. Usually we do not use this algorithm for programming contests. Multiplying two Fibonacci f ib(n) + f ib(n 1) = f ib(n + 1)
matrices shown in this section only requires 23 = 8 multiplications as n = 2. This can be treated as O(1).
Thus, we can compute f ib(p) in O(log p).

317 318
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.8. MATRIX POWER c Steven, Felix, Suhendry

Then, we re-write the recurrence into matrix form: Kattis - linearrecurrence


  
a b f ib(n + 1) f ib(n + 2) We close this section by discussing yet another example on how to derive the required square
⇥ =
c d f ib(n) f ib(n + 1) matrix for another DP problem: Kattis - linearrecurrence. This is the more general form
Now we have a⇥f ib(n+1)+b⇥f ib(n) = f ib(n+2) and c⇥f ib(n+1)+d⇥f ib(n) = f ib(n+1). compared to the previous two examples. Abridged problem description: Given a linear
Notice that by writing the DP recurrence as shown above, we now have a 2⇥2 square matrix. recurrence with
P degree N as N + 1 integers a0 , a1 , . . . , aN that describes linear recurrence
The appropriate values for a, b, c, and d must be 1, 1, 1, 0 and this is the 2 ⇥ 2 Fibonacci x t = a0 + N i=1 ai ⇥ xt i as well as N integers x0 , x1 , . . . , xN 1 giving the initial values,
matrix shown earlier in Section 5.8.1. One matrix multiplication advances DP computation compute the value of xT %M . Constraints: 0  T  1018 ; 1  M  109 .
of Fibonacci number one step forward. If we multiply this 2 ⇥ 2 Fibonacci matrix p times, Notice that T is very big and thus we are expecting a O(log T ) solution. A general degree
we advance DP computation of Fibonacci number p steps forward. We now have: N linear recurrence has N + 1 terms, so M will be an (N + 1) ⇥ (N + 1) square matrix. We
     can write N + 1 versions of consecutive xt (s) and rewrite it into matrix form.
1 1 1 1 1 1 f ib(n + 1) f ib(n + 1 + p)
⇥ ⇥ ... ⇥ ⇥ = Example 1 (Fibonacci, 1st sample test case): N = 2, a = {0, 1, 1}, and x = {0, 1},
1 0 1 0 1 0 f ib(n) f ib(n + p)
| {z } we have xt = 0 + 1 ⇥ xt 1 + 1 ⇥ xt 2 that can be written in matrix form as:
p 2 3 2 3 2 3
1 0 0 1 a0 = 1
For example, if we set n = 0 and p = 11, and then use O(log p) matrix power instead of 4 0 1 1 5 ⇥ 4 Xi 5 = 4 Xi+1 (what we want) 5
actually multiplying the matrix p times, we have the following calculations:
      0 1 0 Xi 1 Xi
11
1 1 f ib(1) 144 89 1 144 f ib(12)
⇥ = ⇥ = = Example 2 (2nd sample test case): N = 2, a = {5, 7, 9}, and x = {36 713, 5 637 282},
1 0 f ib(0) 89 55 0 89 fib(11)
we have xt = 5 + 7 ⇥ xt 1 + 9 ⇥ xt 2 that can be written in matrix form as:
This Fibonacci matrix can also be written as shown earlier in Section 5.8.1, i.e., 2 3 2 3 2 3
 p  1 0 0 1 a0 = 1
1 1 f ib(p + 1) f ib(p) 4 5 7 9 5 ⇥ 4 X1 = 5 637 282 5 = 4 X2 (what we want) 5
=
1 0 f ib(p) f ib(p 1) 0 1 0 X0 = 36 713 X1 = 5 637 282
The given sample source code implements this O(log p) algorithm to solve UVa 10229 - Note: the first row and column in M are needed as there is a0 in the given linear recurrence.
Modular Fibonacci that simply asks for F ib(n)%2m .
Source code: ch5/UVa10229.cpp|java|py|ml Exercise 5.8.4.1: Derive Tribonacci matrix using the format of Kattis - linearrecurrence:
N = 3, a = {0, 1, 1, 1}, and x = {0, 0, 1}. The first 9 terms are {0, 0, 1, 1, 2, 4, 7, 13, 24, . . .}.
UVa 10655 - Contemplation, Algebra Exercise 5.8.4.2*: Show how to compute C(n, k) for a very large n but small k (e.g.,
Next, we discuss another example on how to derive the required square matrix for another 0  n  1018 ; 1  k  1000) in O(k 2 log n) time using Matrix Power instead of O(n ⇥ k) or
DP problem: UVa 10655 - Contemplation, Algebra. Abridged problem description: Given in O(1) after O(n) pre-processing as shown in Section 5.4.
the value of p = a + b, q = a ⇥ b, and n, find the value of an + bn .
First, we tinker with the formula so that we can use p = a + b and q = a ⇥ b:
Programming Exercises related to Matrix Power:
an + bn = (a + b) ⇥ (an 1
+ bn 1 ) (a ⇥ b) ⇥ (an 2
+ bn 2 )
n n
Next, we set Xn = a + b to have Xn = p ⇥ Xn 1 q ⇥ Xn 2 . 1. Entry Level: UVa 10229 - Modular Fibonacci * (Fibonacci; modPow)
Then, we write this recurrence twice in the following form: 2. UVa 10655 - Contemplation, Algebra * (derive the square matrix)
3. UVa 11582 - Colossal Fibonacci ... * (Pisano period: The sequence f (i)%n
p ⇥ Xn+1 q ⇥ Xn = Xn+2
is periodic; use modPow)
p ⇥ Xn q ⇥ Xn 1 = Xn+1
4. UVa 12796 - Teletransport * (count the number of paths of length L in an
Then, we re-write the recurrence into matrix form: undirected graph where L can be up to 230 )
  
p q Xn+1 Xn+2 5. Kattis - checkingforcorrectness * (Java Big Integer; one subtask uses modPow)
⇥ =
1 0 Xn Xn+1 6. Kattis - porpoises * (Fibonacci; matrix power; modulo)
If we raise the 2 ⇥ 2 square matrix to the power of n (in O(log n) time) and then multiply 7. Kattis - squawk * (count the number of paths of length L in an undirected graph
the resulting square matrix with X1 = a1 + b1 = a + b = p and X0 = a0 + b0 = 1 + 1 = 2, after t steps that are reachable from source s)
we have Xn+1 and Xn . The required answer is Xn . This is faster than O(n) standard DP Extra UVa: 00374, 01230, 10518, 10870, 11029, 11486, 12470.
computation for the same recurrence.
 n   Extra Kattis: linearrecurrence, powers.
p q X1 Xn+1
⇥ =
1 0 X0 Xn

319 320
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.9. SOLUTION TO NON-STARRED EXERCISES c Steven, Felix, Suhendry

5.9 Solution to Non-Starred Exercises


int gcd(int a, int b) {
Exercise 5.2.1*: Ability to spot patterns in data can be very crucial in Competitive Pro- while (b){
gramming. These are many possible interpretations for sequence no 1 and 3 (we show the a %= b;
most probable ones). Sequence no 2 and 4 are more interesting. There are a few plausible swap(a, b);
interpretations and we challenge you to suggest at least one. }
return a;
1. 1, 2, 4, 8, 16, . . . }
This is probably a sequence of powers of two.
So the next three terms are 32, 64, 12.
Exercise 5.3.8.1: GCD(A, B) can be obtained by taking the lower power of the common
2*. 1, 2, 4, 8, 16, 31, . . . prime factors of A and B. LCM(A, B) can be obtained by taking the greater power of
Hint: the last shown term is not 32; maybe not a sequence of powers of two. all the prime factors of A and B. So, GCD(26 ⇥ 33 ⇥ 971 , 25 ⇥ 52 ⇥ 112 ) = 25 = 32 and
LCM(26 ⇥ 33 ⇥ 971 , 25 ⇥ 52 ⇥ 112 ) = 26 ⇥ 33 ⇥ 52 ⇥ 112 ⇥ 971 = 507 038 400.
3. 2, 3, 5, 7, 11, 13, . . .
Exercise 5.3.8.2: We obviously cannot compute 200 000! using Big Integer technique in 1s
This is probably a sequence of the first few primes.
and see how many trailing zeroes that it has. Instead, we have to notice that a trailing zero
So the next three terms are 17, 19, 23.
is produced every time a prime factor 2 is multiplied with a prime factor 5 of n! and the
4*. 2, 3, 5, 7, 11, 13, 19, . . . number of prime factor 2 is always greater than or equal to the number of prime factor 5.
Hint: the last shown term is not 17, maybe not a sequence of the first few primes. Hence, it is sufficient to just compute Legendre’s formula v5 (n!) as the answer.
p
Exercise 5.4.1.1: Binet’s closed-form formula for Fibonacci: f ib(n) = ( n ( ) n )/ 5
should be correct for larger n. But since double precision data type is limited, we have
Exercise 5.3.4.1:
discrepancies for larger n. This closed form formula is correct up to f ib(75) if implemented
using typical double data type in a computer program. This is unfortunately too small to
int numDiffPF(ll N) { be useful in typical programming contest problems involving Fibonacci numbers.
int ans = 0; n! n⇥(n 1)⇥(n 2)! n⇥(n 1)
for (int i = 0; i < p.size() && p[i]*p[i] <= N; ++i) { Exercise 5.4.2.1: C(n, 2) = (n 2)!⇥2!
= (n 2)!⇥2
= 2
= 0.5n2 0.5n = O(n2 ).
if (N%p[i] == 0) ++ans; // count this prime factor Exercise 5.4.2.2: The value of n!%p = 0 when n p as p|n! in that case. Then, the output
while (N%p[i] == 0) N /= p[i]; // only once of C(n, k)%p when n p will always be 0, i.e., C(100000, 50000)%997 = 0. To address
} this ‘always 0’ issue (which is not about whether we use Extended Euclidean algorithm
if (N != 1) ++ans; or Fermat’s little theorem to compute the modular multiplicative inverse), we need to use
return ans; Lucas’ theorem that is discussed in Section 9.14.
}
Exercise 5.4.2.3: This alternative solution is commented inside ch5/combinatorics.cpp.
Exercise 5.4.4.1: 6 ⇥ 6 ⇥ 2 ⇥ 2 ⇥ 2 = 62 ⇥ 23 = 36 ⇥ 8 = 288 di↵erent possible outcomes.
ll sumPF(ll N) { Each (of the two) dice has 6 possible outcomes and each (of the three) coin has 2 possible
ll ans = 0; outcomes. There is no di↵erence whether we do this process one by one or in one go.
for (int i = 0; i < p.size() && p[i]*p[i] <= N; ++i)
while (N%p[i] == 0) { N /= p[i]; ans += p[i]; } Exercise 5.4.4.2: 9 ⇥ 8 (if 7 is the first digit) + 2 ⇥ 8 ⇥ 8 (if 7 is the second or third digit,
if (N != 1) ans += N; recall that the first digit cannot be 0) = 200 di↵erent possible ways.
return ans; Exercise 5.4.4.3: (62 + 622 + . . . + 6210 )%1e9 + 7 = 894 773 311 possible passwords with
} the given criteria.
6!
Exercise 5.4.4.4: (6 3)!
= 6 ⇥ 5 ⇥ 4 = 120 3-letters words.
Exercise 5.3.4.2: When N is a prime, then numPF(N) = 1, numDiffPF(N) = 1, sumPF(N) 5! 120
Exercise 5.4.4.5: 3!⇥1!⇥1!
= 6
= 20 because there are 3 ‘B’s, 1 ‘O’, and 1 ‘Y’.
= N, numDiv(N) = 2, sumDiv(N) = N+1, and EulerPhi(N) = N-1.
Exercise 5.4.4.6: Let A be the set of integers in [1..1M] that are multiples of 5, then
Exercise 5.3.6.1: Multiplying a ⇥ b first before dividing the result by gcd(a, b) has a higher
|A| = 1M/5 = 200 000.
chance of overflow in programming contest than a/gcd(a, b) ⇥ b. In the example given, we
Let A Tbe the set of integers in [1..1M] that are multiples of 7, then |A| = 1M/7 = 142 857.
have a = 2 000 000 000 and b = 8. The LCM is 2 000 000 000—which should fit in 32-bit
Let A B be the set of integers in [1..1M] that are multiples of both 5 and 7 (multiples of
signed integers—can only be properly computed with a/gcd(a, b) ⇥ b.
5 ⇥ 7 =S35), then |A| = 1M/35 = 28 571.
Exercise 5.3.6.2: An implementation of iterative gcd: So, |A B| = 200 000 + 142 857 28 571 = 314 286.

321 322
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.10. CHAPTER NOTES c Steven, Felix, Suhendry

Exercise 5.4.4.7: The answers for few smallest n = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, . . .} are 5.10 Chapter Notes
{1, 3, 7, 13, 22, 34, 50, 70, 95, 125}. You can generate these numbers using brute force solution
first. Then find the pattern and use it. Notice that the 9 di↵erences between these 10 This chapter has grown significantly since the first edition of this book. However, even
numbers are {+2, +4, +6, +9, +12, +16, +20, +25, +30, . . .}. The 8 di↵erences of these 9 after we reach the fourth edition, we are aware that there are still many more mathematical
di↵erences are {+2, +2, +3, +3, +4, +4, +5, +5, . . .}, which can be exploited. problems and algorithms that have not been discussed in this chapter, e.g.,
Exercise 5.5.1: Let’s label the people with p1 , p2 , . . . , pn and the hats with h1 , h2 , . . . , hn . • There are many more rare combinatorics problems and formulas,
Now consider the first person p1 . This person has n-1 choices of taking someone else’s hat • There are other theorems, hypotheses, and conjectures,
(hi not h1 ). Now consider the follow up action of the original owner of hi , which is pi . There
• (Computational) Geometry is also part of Mathematics, but since we have a special
are two possibilities for pi :
chapter for that, we reserve the discussions about geometry problems in Chapter 7.
• pi does not take h1 , then this problem reduces to derangement problem with n-1 people • Later in Chapter 9, we discuss more rare mathematics algorithms/problems, e.g.,
and n-1 hats because each of the other n-1 people has 1 forbidden choice from among
– Fast Fourier Transform for fast polynomial multiplication (Section 9.11),
the remaining n-1 hats (pi is forbidden to take h1 ).
– Pollard’s rho algorithm for fast integer factorization (Section 9.12),
• pi somehow takes h1 , then this problem reduces to derangement problem with n-2 – Chinese Remainder Theorem to solve system of congruences (Section 9.13),
people and n-2 hats. – Lucas’ Theorem to compute C(n, k)%p (Section 9.14),
– Rare Formulas or Theorems (Section 9.15),
Hence, An = (n-1) ⇥ (An 1 + An 2 ).
7⇥6 42
– Sprague-Grundy Theorem in Combinatorial Game Theory (Section 9.16),
Exercise 5.5.2: We need to use Combinatorics. C(7, 5)/C(15, 5) = = = 0.2.
15⇥14 210 – Gaussian Elimination for solving systems of linear equations (Section 9.17).
Exercise 5.6.2.1: Simply set Z = 1, I = 1, M as large as possible, e.g., M = 108 , and
There are really many topics about mathematics. This is not surprising since various math-
L = 0. Then the sequence of iterated function values is {0, 1, 2, . . . , M -2, M -1, 0, . . .}.
ematical problems have been investigated by people since hundreds of years ago. Some of
Exercise 5.8.4.1: For Tribonacci with N = 3, a = {0, 1, 1, 1} and x = {0, 0, 1}, them are discussed in this chapter and in Chapter 7-9, many others are not, and yet only 1 or
we have xt = 0 + 1 ⇥ xt 1 + 1 ⇥ xt 2 + 1 ⇥ xt 3 that can be written in matrix form as: 2 will actually appear in a problem set. To do well in ICPC, it is a good idea to have at least
2 3 2 3 2 3 one strong mathematician in your ICPC team in order to have those 1 or 2 mathematical
1 0 0 0 1 a0 = 1 problems solved. Mathematical prowess is also important for IOI contestants. Although the
6 0 1 1 1 7 6 Xi 7 6 Xi+1 7
6 7 6 7 6 7 amount of problem-specific topics to be mastered is smaller, many IOI tasks require some
4 0 1 0 0 5 ⇥ 4 Xi 1 5 = 4 Xi 5
form of ‘mathematical insights’.
0 0 1 0 Xi 2 Xi 1 We end this chapter by listing some pointers that may be of interest: read number theory
books, e.g., [33], investigate mathematical topics in https://www.wolframalpha.com or
Wikipedia, and attempt programming exercises related to mathematical problems like the
ones in https://projecteuler.net [14] and https://brilliant.org [5].

Statistics 1st 2nd 3rd 4th


Number of Pages 17 29 41 52 (+27%)
Written Exercises - 19 30 21+10*=31 (+3%)
Programming Exercises 175 296 369 533 (+44%)

The breakdown of the number of programming exercises from each section is shown below:

Section Title Appearance % in Chapter % in Book


5.2 Ad Hoc Mathematics ... 212 ⇡ 40% ⇡ 6.1%
5.3 Number Theory 147 ⇡ 28% ⇡ 4.3%
5.4 Combinatorics 77 ⇡ 14% ⇡ 2.2%
5.5 Probability Theory 43 ⇡ 8% ⇡ 1.2%
5.6 Cycle-Finding 22 ⇡ 4% ⇡ 0.6%
5.7 Game Theory (Basic) 16 ⇡ 3% ⇡ 0.5%
5.8 Matrix Power 16 ⇡ 3% ⇡ 0.5%
Total 533 ⇡ 15.4%

323 324
6.2. AD HOC STRING (HARDER) c Steven, Felix, Suhendry

6.2 Ad Hoc String (Harder)


Earlier in Book 1, we discussed Ad Hoc string processing problems. In this section, we list
the harder forms that are left here instead of placed in Chapter 1.

• Cipher/Encode/Encrypt/Decode/Decrypt (Harder)
Chapter 6 This is the harder form of this big category.

• Input Parsing (Recursive)


String Processing This is the harder form involving grammars that require recursive (descent) parsers.

• Regular Expression (C++ 11 onwards/Java/Python/OCaml)


Some (but rare) string processing problems are solvable with one liner code that
The Human Genome has approximately 3.2 Giga base pairs uses regex match in <regex>; replaceAll(String regex, String replacement),
— Human Genome Project matches(String regex), useful functions of Java String/Pattern class, Python re,
or OCaml Str module. To be able to do this, one has to master the concept of Reg-
ular Expression (Regex). We will not discuss Regex in detail but we will show two
6.1 Overview and Motivation usage examples:

In this chapter, we present one more topic that appears in ICPC—although not as frequently1 1. In UVa 00325 - Identifying Legal Pascal Real Constants, we are asked to decide if
as graph and mathematics problems—string processing. String processing is common in the the given line of input is a legal Pascal Real constant. Suppose the line is stored
research field of bioinformatics. As the strings (e.g., DNA strings) that the researchers in String s, then the following one-liner Java code is the required solution:
deal with are usually (very) long, efficient string-specific data structures and algorithms are
s.matches("[-+]?\\d+(\\.\\d+([eE][-+]?\\d+)?|[eE][-+]?\\d+)")
necessary. Some of these problems are presented as contest problems in ICPCs. By mastering
the content of this chapter, ICPC contestants will have a better chance at tackling those 2. In UVa 00494 - Kindergarten Counting Game, we are asked to count how many
string processing problems. words are there in a given line. Here, a word is defined as a consecutive sequence
String processing tasks also appear in IOI, but usually they do not require advanced of letters (upper and/or lower case). Suppose the line is stored in String s, then
string data structures or algorithms due to syllabus [15] restrictions. Additionally, the input the following one-liner Java code is the required solution:
and output format of IOI tasks are usually simple2 . This eliminates the need to code tedious
s.replaceAll("[^a-zA-Z]+", " ").trim().split(" ").length
input parsing or output formatting commonly found in the ICPC problems. IOI tasks that
require string processing are usually still solvable using basic problem solving paradigms • Output Formatting
(Complete Search, D&C, Greedy, or DP). It is sufficient for IOI contestants to skim through This is the harder form of this big category.
all sections in this chapter except Section 6.3 which is about string processing with DP.
However, we believe that it may be advantageous for some IOI contestants to learn some of • String Comparison
the more advanced materials outside of their syllabus ahead of time. In this group of problems, the contestants are asked to compare strings with various
This chapter is structured as follows: it starts with a list of medium to hard/tedious Ad criteria. This sub-category is similar to the string matching problems in Section 6.4,
Hoc string problems solvable with just basic string processing skills (but harder than the but these problems mostly use strcmp-related functions.
ones discussed in Book 1). Solving many of them will definitely improve your programming
skills, but we have to make a remark that recent contest problems in ICPC (and also IOI) • Really Ad Hoc
usually do not ask for basic string processing solutions except for the ‘giveaway’ problem These are other Ad Hoc string related problems that cannot be classified into one of
that most teams (contestants) should be able to solve. The more important sections are the the other sub categories above.
string processing problems solvable with Dynamic Programming (DP) (Section 6.3), string
matching problems (Section 6.4), an extensive discussion on string processing problems where
we have to deal with reasonably long strings using Trie/Suffix Trie/Tree/Array (Section Profile of Algorithm Inventor
6.5), an alternative string matching algorithm using hashing (Section 6.6), and finally a
discussion of medium Ad Hoc string problems that uses various string techniques: Anagram Donald Ervin Knuth (born 1938) is a computer scientist and Professor Emeritus at Stan-
and Palindrome (Section 6.7). ford University. He is the author of the popular Computer Science book: “The Art of
Computer Programming”. Knuth has been called the ‘father’ of the analysis of algorithms.
1
One potential reason: String input is harder to parse correctly (due to issues like whitespaces, newlines, Knuth is also the creator of the TEX, the computer typesetting system used in this book.
etc) and string output is harder to format correctly, making such string-based I/O less preferred over the
more precise integer-based I/O.
2
IOI 2010-2019 require contestants to implement functions instead of coding I/O routines.

325 326
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.2. AD HOC STRING (HARDER) c Steven, Felix, Suhendry

d. Output Formatting, Harder


1. Entry Level: Kattis - imagedecoding * (simple Run-Length Encoding)
Programming Exercises related to Ad Hoc String Processing (Harder):
2. UVa 00918 - ASCII Mandelbrot * (tedious; follow the steps)
a. Cipher/Encode/Encrypt/Decode/Decrypt, Harder 3. UVa 11403 - Binary Multiplication * (similar with UVa 00338; tedious)
4. UVa 12155 - ASCII Diamondi * (LA 4403 - KualaLumpur08; use proper
1. Entry Level: Kattis - itsasecret * (playfair cipher; 2D array; quite tedious)
index manipulation)
2. UVa 00213 - Message ... * (LA 5152 - WorldFinals SanAntonio91)
5. Kattis - asciifigurerotation * (rotate the input 90 degrees clockwise; remove
3. UVa 00554 - Caesar Cypher * (try all shifts; output formatting) trailing whitespaces; tedious)
4. UVa 11385 - Da Vinci Code * (string manipulation and Fibonacci) 6. Kattis - juryjeopardy * (tedious problem)
5. Kattis - crackingthecode * (one corner case involving the 25th to 26th char- 7. Kattis - nizovi * (formatting with indentation; not that trivial but sample
acter determination) input/output helps)
6. Kattis - playfair * (follow the description; a bit tedious; also available at UVa
Extra UVa: 00159, 00330, 00338, 00373, 00426, 00570, 00645, 00848, 00890,
11697 - Playfair Cipher)
01219, 10333, 10562, 10761, 10800, 10875.
7. Kattis - textencryption * (convert input alphabets to UPPERCASEs; loop)
Extra Kattis: mathworksheet, pathtracing, rot, wordsfornumbers.
Extra UVa: 00179, 00306, 00385, 00468, 00726, 00741, 00850, 00856.
e. String Comparison
Extra Kattis: goodmessages, grille, monumentmaker, kleptography, permuta-
tionencryption, progressivescramble, ummcode. 1. Entry Level: UVa 11734 - Big Number of ... * (custom comparison)
2. UVa 00644 - Immediate Decodability * (use brute force)
b. Input Parsing (Recursive)
3. UVa 11048 - Automatic Correction ... * (flexible string comparison
1. Entry Level: Kattis - polish * (recursive parser) with respect to a dictionary)
2. UVa 10854 - Number of Paths * (recursive parsing plus counting) 4. UVa 11056 - Formula 1 * (sorting; case-insensitive string comparison)
3. UVa 11070 - The Good Old Times * (recursive grammar evaluation) 5. Kattis - phonelist * (sort the numbers; see if num i is a prefix of num i + 1)
4. UVa 11291 - Smeech * (recursive grammar check) 6. Kattis - rhyming * (compare suffix of a common word with the list of other
5. Kattis - calculator * (recursive parser and evaluator) given words)
6. Kattis - otpor * (parallel vs series evaluation; write a recursive parser; or use 7. Kattis - smartphone * (compare prefix so far with the target string and the
linear pass with stack) 3 suggestions; output 1 of 4 options with shortest number of keypresses)
7. Kattis - subexpression * (recursive parsing; use DP; similar to https:// Extra UVa: 00409, 00671, 00912, 11233, 11713.
visualgo.net/en/recursion tree versus DAG)
Extra Kattis: aaah, detaileddi↵erences, softpasswords.
Extra UVa: 00134, 00171, 00172, 00384, 00464, 00533, 00586, 00620, 00622,
00743. f. Really Ad Hoc

Extra Kattis: selectgroup. 1. Entry Level: Kattis - raggedright * (just simulate the requirement)
2. UVa 10393 - The One-Handed Typist * (follow problem description)
c. Regular Expression3
3. UVa 11483 - Code Creator * (straightforward; use ‘escape character’)
1. Entry Level: UVa 00494 - Kindergarten ... * (trivial with regex)
4. UVa 12916 - Perfect Cyclic String * (factorize n; string period; also see
2. UVa 00325 - Identifying Legal ... * (trivial with regex) UVa 11452)
3. UVa 00576 - Haiku Review * (solvable with regex) 5. Kattis - irepeatmyself * (string period; complete search)
4. UVa 10058 - Jimmi’s Riddles * (solvable with regex) 6. Kattis - periodicstrings * (brute force; skip non divisor)
5. Kattis - apaxiaaans * (solvable with regex) 7. Kattis - zipfslaw * (sort the words to simplify this problem; also available at
6. Kattis - hidden * (just 1D array manipulation; we can also use regex) UVa 10126 - Zipf’s Law)
7. Kattis - lindenmayorsystem * (DAT; map char to string; simulation; max Extra UVa: 00263, 00892, 00943, 01215, 10045, 10115, 10197, 10361, 10391,
answer  30 ⇥ 55 ; we can also use regex) 10508, 10679, 11452, 11839, 11962, 12243, 12414.
Extra Kattis: apaxianparent, help2, kolone, nimionese, orderlyclass, quickes-
timate, rotatecut, textureanalysis, thore, tolower.

3
There are a few other string processing problems that are solvable with regex too. However, since almost
every string processing problems that can be solved with regex can also be solved with standard ways, it is
not crucial to use regex in competitive programming.

327 328
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.3. STRING PROCESSING WITH DP c Steven, Felix, Suhendry

6.3 String Processing with DP A = ‘xxx...xx’ A = ‘xxx...xx’ A = ‘xxx...x_’


| | |
In this section, we discuss several string processing problems that are solvable with DP B = ‘yyy...yy’ B = ‘yyy...y_’ B = ‘yyy...yy’
technique discussed in Book 1. We discuss two classical problems: String Alignment and match/mismatch delete insert
Longest Common Subsequence that should be known by all competitive programmers (quite
rare nowadays) and one non classical technique: Digit DP (more popular nowadays). Addi-
tionally, we have added a collection of some known twists of these problems.
Note that for DP problems on string, we usually manipulate the integer indices of the
strings and not the actual strings (or substrings) themselves. Passing substrings as param-
eters of recursive functions is strongly discouraged as it is very slow and hard to memoize.

6.3.1 String Alignment (Edit Distance)


The String Alignment (or Edit Distance4 ) problem is defined as follows: Align5 two strings
A with B with the maximum alignment score (or minimum number of edit operations): Figure 6.1: Example: A = “ACAATCC” and B = “AGCATGC” (alignment score = 7)
After aligning A with B, there are a few possibilities between character A[i] and B[i]: With a simple scoring function where a match gets +2 points and mismatch, insert, and
1. Character A[i] and B[i] match and we do nothing (assume this worth ‘+2’ score), delete all get -1 point, the details of the string alignment score of A = “ACAATCC” and B =
2. Character A[i] and B[i] mismatch and we replace A[i] with B[i] (assume ‘-1’ score), “AGCATGC” are shown in Figure 6.1. Initially, only the base cases are known. Then, we can
3. We insert a space in A[i] (also ‘-1’ score), fill the values row by row, left to right. To fill in V (i, j) for i, j > 0, we need three other
4. We delete a letter from A[i] (also ‘-1’ score). values: V (i 1, j 1), V (i 1, j), and V (i, j 1)—see the highlighted cell at Figure 6.1,
For example: (note that we use a special symbol ‘ ’ to denote a space) middle, row 2, column 3. The best alignment score is stored at the bottom right cell (7).
To reconstruct the solution, we follow the back arrows (see the darker cells) from the
A = ‘ACAATCC’ -> ‘A_CAATCC’
bottom right cell. The solution for the given strings A and B is shown below. Diagonal arrow
B = ‘AGCATGC’ -> ‘AGCATGC_’ // A non optimal alignment
means a match or a mismatch (e.g., the last character ..C). Vertical arrow means a deletion
2-22--2- // Score = 4*2 + 4*-1 = 4
(e.g., ..CAA.. to ..C A..). Horizontal arrow means an insertion (e.g., A C.. to AGC..).
A brute force solution that tries all possible alignments will get TLE even for medium-length
A = ‘A_CAAT[C]C’ // Optimal alignment
strings A and/or B. The solution for this problem is the Needleman-Wunsch (bottom-up) DP
B = ‘AGC_AT[G]C’ // Score = 5*2 + 3*-1 = 7
algorithm [34]. Consider two strings A[1..n] and B[1..m]. We define V (i, j) to be the
score of the optimal alignment between prefix A[1..i] and B[1..j], and score(C1, C2) is The space complexity of this (bottom-up) DP algorithm is O(nm)—the size of the DP table.
a function that returns the score if character C1 is aligned with character C2. We need to fill in all cells in the table in O(1) per cell. Thus, the time complexity is O(nm).
Base cases: Source code: ch6/string alignment.cpp|java|py|ml
V (0, 0) = 0 // no score for matching two empty strings
V (i, 0) = i ⇥ score(A[i], ) // delete substring A[1..i] to make the alignment, i > 0 Exercise 6.3.1.1: Why is the cost of a match +2 and the costs of replace, insert, delete are
V (0, j) = j ⇥ score( , B[j]) // insert substring B[1..j] to make the alignment, j > 0 all -1? Are they magic numbers? Will +1 for match work? Can the costs for replace, insert,
Recurrences: For i > 0 and j > 0: delete be di↵erent? Restudy the algorithm and discover the answer.
V (i, j) = max(option1, option2, option3), where Exercise 6.3.1.2: The example source code given in this section only shows the optimal
option1 = V (i 1, j 1) + score(A[i], B[j]) // score of match or mismatch alignment score. Modify the given code to actually show the actual alignment!
option2 = V (i 1, j) + score(A[i], ) // delete Ai
option3 = V (i, j 1) + score( , B[j]) // insert Bj Exercise 6.3.1.3: Show how to use the ‘space saving technique’ shown in Book 1 to improve
this Needleman-Wunsch (bottom-up) DP algorithm! What will be the new space and time
In short, this DP algorithm concentrates on the three possibilities for the last pair of char- complexity of your solution? What is the drawback of using such a formulation?
acters, which must be either a match/mismatch, a deletion, or an insertion. Although we do
not know which one is the best, we can try all possibilities while avoiding the re-computation Exercise 6.3.1.4: The String Alignment problem in this section is called the global align-
of overlapping subproblems (i.e., basically a DP technique). ment problem and runs in O(nm). If the given contest problem is limited to d insertions
or deletions only, we can have a faster algorithm. Find a simple tweak to the Needleman-
4
Another name for ‘edit distance’ is ‘Levenshtein Distance’. One notable application of this algorithm Wunsch algorithm so that it performs at most d insertions or deletions and runs faster!
is the spelling checker feature commonly found in popular text editors. If a user misspells a word, like
‘probelm’, then a clever text editor that realizes that this word has a very close edit distance to the correct Exercise 6.3.1.5: Investigate the improvement of Needleman-Wunsch algorithm (Smith-
word ‘problem’ can do the correction automatically. Waterman algorithm [34]) to solve the local alignment problem!
5
Aligning is a process of inserting spaces to strings A or B such that they have the same number of
characters. You can view ‘inserting spaces to B’ as ‘deleting the corresponding aligned characters of A’.

329 330
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.3. STRING PROCESSING WITH DP c Steven, Felix, Suhendry

6.3.2 Longest Common Subsequence


The Longest Common Subsequence (LCS) problem is defined as follows: Given two strings A
Programming Exercises related to String Processing with DP:
and B, what is the longest common subsequence between them? For example, A = “ACAATCC”
and B = “AGCATGC” have LCS of length 5, i.e., “ACATC”. a. Classic
This LCS problem can be reduced to the String Alignment problem presented earlier, so
1. Entry Level: UVa 10405 - Longest Common ... * (classic LCS problem)
we can use the same DP algorithm. We set the score for mismatch as negative infinity (e.g.,
-1 Billion), score for insertion and deletion as 0, and the score for match as 1. This makes 2. UVa 01192 - Searching Sequence ... * (LA2460 - Singapore01; classic
the Needleman-Wunsch algorithm for String Alignment never consider mismatches. String Alignment DP problem with a bit of (unclear) output formatting)
3. UVa 12747 - Back to Edit ... * (similar to UVa 10635)
4. UVa 13146 - Edid Tistance * (classic Edit Distance problem)
Exercise 6.3.2.1: What is the LCS of A = “apple” and B = “people”? 5. Kattis - inflagrantedelicto * (kp is always 2 (read the problem description);
Exercise 6.3.2.2: The Hamming distance problem, i.e., finding the number of di↵erent kr is the LCS of the two permutations plus one; O(n log k) solution)
characters between two equal-length strings can be easily done in O(n). But it can also be 6. Kattis - pandachess * (LCS of 2 permutations ! LIS; O(n log k) solution;
reduced to a String Alignment problem. For theoretical interest, assign appropriate scores also see UVa 10635)
to match, mismatch, insert, and delete so that we can compute the answer using Needleman- 7. Kattis - princeandprincess * (find LCS of two permutations; also available
Wunsch algorithm instead! at UVa 10635 - Prince and Princess)

Exercise 6.3.2.3: The LCS problem can be solved in O(n log k) when all characters are Extra UVa: 00164, 00526, 00531, 01207, 01244, 10066, 10100, 10192.
distinct, e.g., if you are given two permutations of length n as in UVa 10635. k is the length Extra Kattis: declaration, ls, signals.
of the answer. Solve this variant! b. Non Classic
1. Entry Level: Kattis - stringfactoring * (s: the min weight of substring [i..j];
also available at UVa 11022 - String Factoring)
6.3.3 Non Classical String Processing with DP 2. UVa 11258 - String Partition * (dp(i) = int from substring [i..k] + dp(k))
In this section, we discuss Kattis - hillnumbers. A hill number is a positive integer, the 3. UVa 11361 - Investigating Div-Sum ... * (counting paths in DAG; need
digits of which possibly rise and then possibly fall, but never fall and then rise, like 12321, insights for efficient implementation; K > 90 is useless; digit DP)
12223, and 33322111. However, 1232321 is not a hill number. Verifying if a given number is 4. UVa 11552 - Fewest Flops * (dp(i, c) = minimum number of chunks after
a hill number or not is trivial. The hard part of the problem is this: Given a single integer considering the first i segments ending with character c)
n (assume it is already vetted as a hill number), count the number of positive hill numbers 5. Kattis - exam * (s: (pos, correct left); t: either your friend is wrong or your
less than or equal to n. The main issue is 1  n  1018 . friend is right, process accordingly; easier solution exists)
Initially, it may seem impossible to try all numbers  n (TLE) or create a DP table 6. Kattis - heritage * (s: (cur pos); t: try all N words in dictionary; output
up to 1018 cells (MLE). However, if we realize that there are only up to 19 digits in 1018 , final answer modulo a prime)
then we can actually treat the numbers as strings of at most 20 digits and process the digits 7. Kattis - hillnumbers * (digit DP; s: (pos, prev digit, is rising, is lower); try
one by one. This is called ‘Digit DP’ in the competitive programming community and not digit by digit; see the discussion in this section)
considered as a classic solution yet. Basically, there are some big numbers and the problem Extra UVa: 11081, 11084, 12855,
is asking for some property of the number that is decomposable to its individual digits.
Extra Kattis: chemistsvows, cudak, digitsum, haiku, zapis.
Realizing this, we can then quickly come up with the initial state s: (pos) and the initial
transition of trying all possible next digit [0..9] one by one. However, we will quickly realize Also see Section 6.7.2 for a classic string problem: Palindrome that has a
that we need to remember what was the previous used digit so we update our state to s: few interesting variants that require DP solutions.
(pos, prev digit). Now we can check if prev digit and next digit is rising, plateau, or
falling as per requirement. However, we will quickly realize that we also need to remember if
we have reached the peak before and are now concentrating on the falling part, so we update
our state to s: (pos, prev digit, is rising). We start with is rising = true and Profile of Algorithm Inventors
can only set is rising at most once in a valid hill number.
Up to here, this state is almost complete but after some initial testing, we will then James Hiram Morris (born 1941) is a Professor of Computer Science. He is a co-discoverer
realize that we count the answer wrongly. It turns out that we still need one more parameter of the Knuth-Morris-Pratt algorithm for string search.
is lower to have this complete state s: (pos, prev digit, is rising, is lower) where Vaughan Ronald Pratt (born 1944) is a Professor Emeritus at Stanford University. He was
is lower = false initially and we set is lower = true once we use next digit that is one of the earliest pioneers in the field of computer science. He has made several contributions
strictly lower than the actual digit of n at that pos. With this state, we can correctly to foundational areas such as search algorithms, sorting algorithms, and primality testing.
compute the required answer and the details are left behind for the readers. He is also a co-discoverer of the Knuth-Morris-Pratt algorithm for string-search.

331 332
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.4. STRING MATCHING c Steven, Felix, Suhendry

6.4 String Matching 1 2 3 4 5


012345678901234567890123456789012345678901234567890
6
String Matching (a.k.a String Searching ) is a problem of finding the starting index (or T = I DO NOT LIKE SEVENTY SEV BUT SEVENTY SEVENTY SEVEN
indices) of a (sub)string (called pattern P) in a longer string (called text T). Example: Let’s P = SEVENTY SEVEN
assume that we have T = “STEVEN EVENT”. If P = “EVE”, then the answers are index 2 and 0123456789012
7 (0-based indexing). If P = “EVENT”, then the answer is index 7 only. If P = “EVENING”, 1
then there is no answer (no matching found and usually we return either -1 or NULL). ^ the first character of P mismatches with T[i] from index i = 0 to 13
KMP has to shift the starting index i by +1, as with naive matching.
6.4.1 Library Solutions ... at i = 14 and j = 0 ...
1 2 3 4 5
For most pure String Matching problems on reasonably short strings, we can just use the 012345678901234567890123456789012345678901234567890
string library in our programming language. It is strstr in C <string.h>, find in C++ T = I DO NOT LIKE SEVENTY SEV BUT SEVENTY SEVENTY SEVEN
<string>, indexOf in Java String class, find in Python string, and search forward in P = SEVENTY SEVEN
OCaml Str module. Please revisit Chapter 1 for a mini task that discusses these string 0123456789012
library solutions. 1
^ then mismatches at index i = 25 and j = 11
6.4.2 Knuth-Morris-Pratt (KMP) Algorithm
There are 11 matches from index i = 14 to 24, but one mismatch at i = 25 (j = 11). The
In Book 1, we have an exercise of finding all the occurrences of a substring P (of length m) in naı̈ve matching algorithm will inefficiently restart from index i = 15 but KMP can resume
a (long) string T (of length n), if any. The code snippet, reproduced below with comments, from i = 25. This is because the matched characters before the mismatch are “SEVENTY
is actually the naı̈ve implementation of a String Matching algorithm. SEV”. “SEV” (of length 3) appears as BOTH proper suffix and prefix of “SEVENTY SEV”.
This “SEV” is also called the border of “SEVENTY SEV”. We can safely skip index i = 14 to
void naiveMatching() { 21: “SEVENTY ” in “SEVENTY SEV” as it will not match again, but we cannot rule out the
for (int i = 0; i < n-m; ++i) { // try all starting index possibility that the next match starts from the second “SEV”. So, KMP resets j back to 3,
bool found = true; skipping 11-3 = 8 characters of “SEVENTY ” (notice the trailing space), while i remains at
for (int j = 0; (j < m) && found; ++j) index 25. This is the major di↵erence between KMP and the naı̈ve matching algorithm.
if ((i+j >= n) || (P[j] != T[i+j])) // if mismatch found
found = false; // abort this, try i+1 ... at i = 25 and j = 3 (This makes KMP efficient) ...
if (found) // T[i..i+m-1] = P[0..m-1] 1 2 3 4 5
printf("P is found at index %d in T\n", i); 012345678901234567890123456789012345678901234567890
} T = I DO NOT LIKE SEVENTY SEV BUT SEVENTY SEVENTY SEVEN
} P = SEVENTY SEVEN
0123456789012
1
This naı̈ve algorithm can run in O(n) on average if applied to natural text like the paragraphs
^ immediate mismatches at index i = 25, j = 3
of this book, but it can run in O(nm) with the worst case programming contest input like this:
T = “AAAAAAAAAAB” (‘A’ ten times and then one ‘B’) and P = “AAAAB”. The naı̈ve algorithm This time the prefix of P before mismatch is “SEV”, but it does not have a border, so KMP
will keep failing at the last character of pattern P and then try the next starting index which resets j back to 0 (or in other words, restart matching pattern P from the front again).
is just one further than the previous attempt. This is not efficient. Unfortunately, a good
problem author will include such test cases in their secret test data. ... mismatches from i = 25 to i = 29... then matches from i = 30 to i = 42
In 1977, Knuth, Morris, and Pratt—thus the name of KMP—invented a better String 1 2 3 4 5
Matching algorithm that makes use of the information gained by previous character com- 012345678901234567890123456789012345678901234567890
parisons, especially those that match. KMP algorithm never re-compares a character in T T = I DO NOT LIKE SEVENTY SEV BUT SEVENTY SEVENTY SEVEN
that has matched a character in P. However, it works similarly to the naı̈ve algorithm if the P = SEVENTY SEVEN
first character of pattern P and the current character in T is a mismatch. In the following 0123456789012
example7 , comparing P[j] and T[i] and from i = 0 to 13 with j = 0 (the first character 1
of P) is no di↵erent from the naı̈ve algorithm.
6
This is a match, so P = ‘SEVENTY SEVEN’ is found at index i = 30. After this, KMP
We deal with this String Matching problem almost every time we read/edit text using a computer. How
many times have you pressed the well-known ‘CTRL + F’ shortcut (standard Windows shortcut for the ‘find knows that “SEVENTY SEVEN” has “SEVEN” (of length 5) as border, so KMP resets j back
feature’) in typical word processing softwares, web browsers, etc? to 5, e↵ectively skipping 13-5 = 8 characters of “SEVENTY ” (notice the trailing space),
7
The sentence in string T below is just for illustration. It is not grammatically correct. immediately resumes the search from i = 43, and gets another match. This is efficient.

333 334
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.4. STRING MATCHING c Steven, Felix, Suhendry

... at i = 43 and j = 5, we have matches from i = 43 to i = 50 ... We provide our source code that compares the library solution, naı̈ve matching, and one
So P = ‘SEVENTY SEVEN’ is found again at index i = 38. other string matching algorithm: Rabin-Karp that will be discussed in Section 6.6 with the
1 2 3 4 5 KMP algorithm discussed in this section.
012345678901234567890123456789012345678901234567890
T = I DO NOT LIKE SEVENTY SEV BUT SEVENTY SEVENTY SEVEN Source code: ch6/string matching.cpp|java|py|ml
P = SEVENTY SEVEN
0123456789012
Exercise 6.4.1*: Run kmpPreprocess() on P = “ABABA” and show the reset table b!
1
Exercise 6.4.2*: Run kmpSearch() with P = “ABABA” and T = “ACABAABABDABABA”.
To get such speed up, KMP has to preprocess the pattern string and get the ‘reset table’ b
Explain how the KMP search looks like?
(back). If given pattern string P = “SEVENTY SEVEN”, then table b will look like this:
1
0 1 2 3 4 5 6 7 8 9 0 1 2 3
P = S E V E N T Y S E V E N 6.4.3 String Matching in a 2D Grid
b = -1 0 0 0 0 0 0 0 0 1 2 3 4 5
The string matching problem can also be posed in 2D. Given a 2D grid/array of characters
This means, if mismatch happens in j = 11 (see the example above), i.e., after finding a (instead of the well-known 1D array of characters), find the occurrence(s) of pattern P in
match for “SEVENTY SEV”, then we know that we have to retry matching P from index j = the grid. Depending on the problem requirement, the search direction can be up to 4 or 8
b[11] = 3, i.e., KMP now assumes that it has matched only the first three characters of cardinal directions, and either the pattern must be in a straight line or it can bend.
“SEVENTY SEV”, which is “SEV”, because the next match can start with that prefix “SEV”.
The relatively short implementation of the KMP algorithm with comments is shown below. For the example from Kattis - boggle below, the pattern can bend. The solution for such
This implementation has a time complexity of O(n + m), or usually just O(n) as n > m. ‘bendable’ string matching in a 2D grid is usually recursive backtracking (see Book 1). This
is because unlike the 1D counterpart where we always go to the right, at every coordinate
const int MAX_N = 200010; (row, col) of the 2D grid, we have more than one choice to explore. The time complexity is
exponential thus this can only work for a small grid.
char T[MAX_N], P[MAX_N]; // T = text, P = pattern To speed up the backtracking process, usually we employ this simple pruning strategy:
int n, m; // n = |T|, m = |P| once the recursion depth exceeds the length of pattern P, we can immediately prune that
int b[MAX_N], n, m; // b = back table recursive branch. This is also called as depth-limited search (see Section 9.20).
ACMA // From Kattis - boggle
void kmpPreprocess() { // call this first
APcA // We can go to 8 directions and the pattern can bend
int i = 0, j = -1; b[0] = -1; // starting values
toGI // ‘contest’ is highlighted as lowercase in the grid
while (i < m) { // pre-process P
nest // can you find ‘CONTEST’, ‘ICPC’, ‘ACM’, and ‘GCPC’?
while ((j >= 0) && (P[i] != P[j])) j = b[j]; // different, reset j
++i; ++j; // same, advance both For the example from UVa 10010, the pattern can must be in a straight line. If the grid is
b[i] = j; small we can still use the easier to code recursive backtracking mentioned earlier. However
} if the grid is large, we probably need to do multiple O(n + m) string matchings, one for each
} row/column/diagonal and their reverse directions.

void kmpSearch() { // similar as above abcdefghigg // From UVa 10010 - Where’s Waldorf?
int i = 0, j = 0; // starting values hebkWaldork // We can go to 8 directions, but must be straight
while (i < n) { // search through T ftyawAldorm // ‘WALDORF’ is highlighted as UPPERCASE in the grid
while ((j >= 0) && (T[i] != P[j])) j = b[j]; // if different, reset j ftsimrLqsrc
++i; ++j; // if same, advance both byoarbeDeyv // Can you find ‘BAMBI’ and ‘BETTY’?
if (j == m) { // a match is found klcbqwikOmk
printf("P is found at index %d in T\n", i-j); strebgadhRb // Can you find ‘DAGBERT’ in this row?
j = b[j]; // prepare j for the next yuiqlxcnbjF
}
} Note that the topic of String Matching will be revisited two more times. In Section 6.5, we
} will discuss how to solve this problem using string-specific data structures. In Section 6.6,
we will discuss how to solve this problem using a probabilistic algorithm.

335 336
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry

6.5 Suffix Trie/Tree/Array


Programming Exercises related to String Matching: Suffix Trie, Suffix Tree, and Suffix Array are efficient and related data structures for strings.
We did not discuss this topic in Book 1 as these data structures are unique to strings.
a. Standard
1. Entry Level: Kattis - quiteaproblem * (trivial string matching per line)
2. UVa 00455 - Periodic String * (find s in s+s; similar with UVa 10298)
6.5.1 Suffix Trie and Applications
3. UVa 01449 - Dominating Patterns * (LA 4670 - Hefei09; just use strstr, The suffix i (or the i-th suffix) of a string is a ‘special case’ of substring that goes from
Suffix Array will get TLE as there are too many long strings to be processed) the i-th character of the string up to the last character of the string. For example, the 2-nd
4. UVa 11837 - Musical Plagiarism * (transform the input of X notes into suffix of ‘STEVEN’ is ‘EVEN’, the 4-th suffix of ‘STEVEN’ is ‘EN’ (0-based indexing).
X 1 distances; then apply KMP) A Suffix Trie9 of a set of strings S is a tree of all pos-
5. Kattis - geneticsearch * (multiple string matchings) sible suffixes of strings in S. Each edge label represents
6. Kattis - powerstrings * (find s in s+s8 ; similar with UVa 00455; also available a character. Each vertex represents a suffix indicated
at UVa 10298 - Power Strings) by its path label: a sequence of edge labels from root
7. Kattis - scrollingsign * (modified string matching; complete search; also avail- to that vertex. Each vertex is connected to (some of)
able at UVa 11576 - Scrolling Sign) the other 26 vertices (assuming that we only use upper-
Extra UVa: 00886, 11362. case Latin letters) according to the suffixes of strings in
S. The common prefix of two suffixes is shared. Each
Extra Kattis: avion, cargame, deathknight, fiftyshades, hangman, ostgotska,
vertex has two boolean flags. The first/second one is to
redrover, simon, simonsays.
indicate that there exists a suffix/word in S terminating
b. In 2D Grid in that vertex, respectively. Example: If we have S =
1. Entry Level: UVa 10010 - Where’s Waldorf ? * (2D grid; backtracking) {‘CAR’, ‘CAT’, ‘RAT’}, we have the following suffixes
2. UVa 00422 - Word Search Wonder * (2D grid; backtracking) {‘CAR’, ‘AR’, ‘R’, ‘CAT’, ‘AT’, ‘T’, ‘RAT’, ‘AT’, ‘T’}.
After sorting and removing duplicates, we have: {‘AR’,
3. UVa 00736 - Lost in Space * (2D grid; a bit modified)
‘AT’, ‘CAR’, ‘CAT’, ‘R’, ‘RAT’, ‘T’}. Figure 6.2 shows
4. UVa 11283 - Playing Boggle * (2D grid; backtracking) Figure 6.2: Suffix Trie
the Suffix Trie with 7 suffix terminating vertices (filled
5. Kattis - boggle * (2D grid; backtracking) circles) and 3 word terminating vertices (filled circles in-
6. Kattis - kinarow * (brute the top left point of each possible x or o row, then dicated with label ‘In Dictionary’).
straight-line (horizontal, vertical) or two diagonals 2D string matching) Suffix Trie is typically used as an efficient data structure for a dictionary. Assuming that
7. Kattis - knightsearch * (2D grid; backtracking or DP) the Suffix Trie of a set of strings in the dictionary has been built, we can determine if a
Extra UVa: 00604. query/pattern string P exists in this dictionary (Suffix Trie) in O(m) where m is the length
Extra Kattis: hiddenwords.
of string P —this is efficient10 . We do this by traversing the Suffix Trie from the root. For
example, if we want to find whether the word P = ‘CAT’ exists in the Suffix Trie shown in
Figure 6.2, we can start from the root node, follow the edge with label ‘C’, then ‘A’, then
‘T’. Since the vertex at this point has the word-terminating flag set to true, then we know
Profile of Algorithm Inventors that there is a word ‘CAT’ in the dictionary. Whereas, if we search for P = ‘CAD’, we go
through this path: root ! ‘C’ ! ‘A’ but then we do not have an edge with edge label ‘D’,
Saul B. Needleman and Christian D. Wunsch jointly published the string alignment so we conclude that ‘CAD’ is not in the dictionary.
Dynamic Programming algorithm in 1970. Their DP algorithm is discussed in this book. Below, we provide a basic implementation of a Trie (not the full Suffix Trie). Assuming
Temple F. Smith is a Professor in biomedical engineering who helped to develop the Smith- that we deal with only UPPERCASE alphabets [‘A’..‘Z’], we set each vertex to have up to
Waterman algorithm developed with Michael Waterman in 1981. The Smith-Waterman 26 ordered edges that represent ‘A’ to ‘Z’ and word terminating flags. We insertion of each
algorithm serves as the basis for multi sequence comparisons, identifying the segment with the (full) word/string (not the suffixes) of length up to m in S into the Trie one by one. This
maximum local sequence similarity for identifying similar DNA, RNA, and protein segments. runs in O(m) per insertion and there are up to n words to be inserted so the construction
can go up to O(nm). Then, given any pattern string P , we can start from the root and
Michael S. Waterman is a Professor at the University of Southern California. Waterman follow the corresponding edge labels to decide if P is inside S or not in O(m).
is one of the founders and current leaders in the area of computational biology. His work
has contributed to some of the most widely-used tools in the field. In particular, the Smith- 9
This is not a typo. The word ‘TRIE’ comes from the word ‘information reTRIEval’.
10
Waterman algorithm is the basis for many sequence comparison programs. Another data structure for dictionary is balanced BST. It has O(log n ⇥ m) performance for each dictio-
nary query where n is the number of words in the dictionary. This is because one string comparison already
8
Transforming s into s+s is a classic technique in string processing to simplify ‘wrap around’ cases. costs O(m). Hash Table may not be suitable as we need to order the words in the dictionary.

337 338
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry

6.5.2 Suffix Tree


struct vertex {
char alphabet;
bool exist;
vector<vertex*> child;
vertex(char a): alphabet(a), exist(false) { child.assign(26, NULL); }
};

class Trie { // this is TRIE


private: // NOT Suffix Trie
vertex* root;
public:
Trie() { root = new vertex(’!’); }

void insert(string word) { // insert a word into trie


vertex* cur = root;
for (int i = 0; i < (int)word.size(); ++i) { // O(n)
int alphaNum = word[i]-’A’;
if (cur->child[alphaNum] == NULL) // add new branch if NULL Figure 6.3: Suffixes, Suffix Trie, and Suffix Tree of T = “GATAGACA$”
cur->child[alphaNum] = new vertex(word[i]);
cur = cur->child[alphaNum]; Now, instead of working with several short strings, we work with one long(er) string. Con-
} sider a string T = “GATAGACA$”. The last character ‘$’ is a special terminating character
cur->exist = true; appended to the original string “GATAGACA”. It has an ASCII value smaller11 than the char-
} acters in T. This terminating character ensures that all suffixes terminate in leaf vertices.
The Suffix Trie of T is shown in Figure 6.3—middle. This time, the terminating vertex
bool search(string word) { // true if word in trie stores the index of the suffix that terminates in that vertex. Observe that the longer the
vertex* cur = root; string T is, there will be more duplicated vertices in the Suffix Trie. This can be inefficient.
for (int i = 0; i < (int)word.size(); ++i) { // O(m) Suffix Tree of T is a Suffix Trie where we merge vertices with only one child (essentially
int alphaNum = word[i]-’A’; a path compression). Compare Figure 6.3—middle and right to see this path compression
if (cur->child[alphaNum] == NULL) // not found process. Notice the edge label and path label in the figure. This time, the edge label can
return false; have more than one character. Suffix Tree is much more compact than Suffix Trie with at
cur = cur->child[alphaNum]; most O(n) vertices only12 (and thus at most O(n) edges). Thus, rather than using Suffix
} Trie for a long string T, we will use Suffix Tree in the subsequent sections.
return cur->exist; // check exist flag Suffix Tree can be a new data structure for most readers of this book. Therefore we have
} built a Suffix Tree visualization in VisuAlgo to show the structure of the Suffix Tree of any
(but relatively short) input string T specified by the readers themselves. Several Suffix Tree
bool startsWith(string prefix) { // true if match prefix applications shown in the next Section 6.5.3 are also included in the visualization.
vertex* cur = root;
Visualization: https://visualgo.net/en/suffixtree
for (int i = 0; i < (int)prefix.size(); ++i) {
int alphaNum = prefix[i]-’A’;
if (cur->child[alphaNum] == NULL) // not found Exercise 6.5.2.1: Given two vertices that represent two di↵erent suffixes, e.g., suffix 1
return false; and suffix 5 in Figure 6.3—right, determine what is their Longest Common Prefix (LCP)!
cur = cur->child[alphaNum]; Consequently, what does this LCP between two suffixes mean?
}
Exercise 6.5.2.2*: Draw the Suffix Trie and the Suffix Tree of T = “BANANA$”!
return true; // reach here, return true
Hint: Use the Suffix Tree visualization tool in VisuAlgo.
}
};
11
Hence, we cannot use ‘ ’ (a space, ASCII value 32) in T as ‘$’ has ASCII value 36.
Source code: ch6/Trie.cpp|py 12
There are up to n leaves for n suffixes. All internal vertices are always branching thus there can be up to
n-1 such vertices (e.g., a complete binary tree). Total: n (leaves) + (n-1) (internal vertices) = 2n-1 vertices.

339 340
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry

6.5.3 Applications of Suffix Tree be > 1 terminating vertices in the subtree rooted at x) and these suffixes share a common
prefix (which implies a repeated substring). The fact that x is the deepest internal vertex
Assuming that the Suffix Tree of a string T is already built, we can use it for these applications
(from root) implies that its path label is the longest repeated substring.
(this list is not exhaustive):
Example: In the Suffix Tree of T = “GATAGACA$” in Figure 6.5, the LRS is “GA” as it is
String Matching in O(m + occ) the path label of the deepest internal vertex x—“GA” is repeated twice in “GATAGACA$”. The
answer can be found with O(n) pass through the Suffix Tree. To deepen your understanding
With Suffix Tree, we can find all (exact) occurrences of a pattern string P in T in O(m + occ) of this application, visit VisuAlgo, Suffix Tree visualization, to create your own Suffix Tree
where m is the length of the pattern string P itself and occ is the total number of occurrences (on small string T with unique longest repeat substring or several equally-longest repeat
of P in T—no matter how long the string T (of length n) is13 . When the Suffix Tree is already substrings) and test this Longest Repeated Substring application.
built, this approach is much faster than the string matching algorithms discussed earlier in
Section 6.4.
Given the Suffix Tree of T, our task is to search for the vertex x in the Suffix Tree whose
path label represents the pattern string P. Note that a matching is simply a common prefix
between the pattern string P and some suffixes of string T. This is done by just one root to
(at worst) leaf traversal of the Suffix Tree of T following the edge labels. The vertex closest
to the root with path label that starts with P is the desired vertex x. Then, the suffix indices
stored in the terminating vertices (leaves) of the subtree rooted at x are the occurrences of
P in T.
Example: In the Suffix Tree of T = “GATAGACA$” shown in Figure 6.4 and P = “A”, we
can simply traverse from root, go along the edge with edge label ‘A’ to find vertex x with Figure 6.5: Longest Repeated Substring of T = “GATAGACA$”
the path label ‘A’. There are 4 occurrences14 of ‘A’ in the subtree rooted at x. They are
suffix 7: “A$”, suffix 5: “ACA$”, suffix 3: “AGACA$”, and suffix 1: “ATAGACA$”. If P = “Z”,
then the Suffix Tree traversal will not be able to find a suitable vertex x and reports that Finding the Longest Common Substring in O(n)
“P is not found”. To deepen your understanding of this application, visit VisuAlgo, Suffix
Tree visualization, to create your own Suffix Tree (on a small string T) and test this String
Matching application using a pattern string P of your choice.

Figure 6.6: Generalized ST of T1 = “GATAGACA$” and T2 = “CATA#” and their LCS


Figure 6.4: String Matching of T = “GATAGACA$” with Pattern String P = “A”
The problem of finding the Longest Common Substring (LCS16 ) of two or more strings
can be solved in linear time17 with Suffix Tree. Without loss of generality, let’s consider
Finding the Longest Repeated Substring in O(n)
the case with two strings only: T1 and T2 . We can build a generalized Suffix Tree that
Given the Suffix Tree of T, we can also find the Longest Repeated Substring15 (LRS) in T combines the Suffix Tree of T1 and T2 . To di↵erentiate the source of each suffix, we use two
efficiently. The LRS problem is the problem of finding the longest substring of a string that di↵erent terminating vertex symbols, one for each string. Then, we mark internal vertices
occurs at least twice. The path label of the deepest internal vertex x in the Suffix Tree of which have vertices in their subtrees with di↵erent terminating symbols in O(n). The suffixes
T is the answer. Vertex x can be found with an O(n) tree traversal (DFS/BFS). The fact represented by these marked internal vertices share a common prefix and come from both T1
that x is an internal vertex implies that it represents more than one suffix of T (there will and T2 . That is, these marked internal vertices represent the common substrings between
13
T1 and T2 . As we are interested in the longest common substring, we report the path label
Usually, m is much smaller than n.
14
To be precise, occ is the size of subtree rooted at x, which can be larger—but not more than double—than
of the deepest marked vertex as the answer also in O(n).
the actual number (occ) of terminating vertices (leaves) in the subtree rooted at x.
15 16
This problem has several interesting applications: finding the chorus section of a song (that is repeated Note that ‘Substring’ is di↵erent from ‘Subsequence’. For example, “BCE” is a subsequence but not a
several times); finding the (longest) repeated sentences in a (long) political speech, etc. Note that there is substring of “ABCDEF” whereas “BCD” (contiguous) is both a subsequence and a substring of “ABCDEF”.
17
another version of this problem, see Exercise 6.5.3.4*. Only if we use the linear time Suffix Tree construction algorithm (not discussed in this book, see [35]).

341 342
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry

For example, with T1 = “GATAGACA$” and T2 = “CATA#”, The Longest Common Substring
is “ATA” of length 3. In Figure 6.6, we see the vertices with path labels “A”, “ATA”, “CA”,
and “TA” have two di↵erent terminating symbols (notice that vertex with path label “GA”
is not considered as both suffix “GACA$” and “GATAGACA$” come from T1 ). These are the
common substrings between T1 and T2 . The deepest marked vertex is “ATA” and this is
the longest common substring between T1 and T2 . To deepen your understanding of this
application, visit VisuAlgo, Suffix Tree visualization, to create your own Suffix Tree (on two
small strings: T1 and T2 ) and test this Longest Common Substring application.

Exercise 6.5.3.1: Use the Suffix Tree in Figure 6.4; Find P1 = “C” and P2 = “CAT”!
Exercise 6.5.3.2: Find the LRS in T = “CGACATTACATTA$”! Build the Suffix Tree first. Figure 6.7: Sorting the Suffixes of T = “GATAGACA$”
Exercise 6.5.3.3: Find the LCS of T1 = “STEVEN$” and T2 = “SEVEN#”!
Exercise 6.5.3.4*: Instead of finding the LRS, we now want to find the repeated substring Basically, Suffix Array is an integer array that stores a permutation of n indices of sorted
that occurs the most. Among several possible candidates, pick the longest one. For example, suffixes. For example, consider the same19 T = “GATAGACA$” with n = 9. The Suffix Array
if T = “DEFG1ABC2DEFG3ABC4ABC$”, the answer is “ABC” of length 3 that occurs three times of T is a permutation of integers [0..n-1] = {8, 7, 5, 3, 1, 6, 4, 0, 2} as shown in
(not “BC” of length 2 or “C” of length 1 which also occur three times) instead of “DEFG” of Figure 6.7. That is, the suffixes in sorted order are suffix SA[0] = suffix 8 = “$”, suffix
length 4 that occurs only two times. Outline the strategy to find the solution! SA[1] = suffix 7 = “A$”, suffix SA[2] = suffix 5 = “ACA$”, . . . , and finally suffix SA[8] =
Exercise 6.5.3.5*: The Longest Repeated Substring (LRS) problem presented in this sec- suffix 2 = “TAGACA$”.
tion allows overlap. For example, the LRS of T = “AAAAAAAA$” is “AAAAAAA” of length 7.
What should we do if we do not allow the LRS to overlap? For example, the LRS without Suffix Tree versus Suffix Array
overlap of T = “AAAAAAAA$” should be “AAAA” of length 4.
Exercise 6.5.3.6*: Think of how to generalize this approach to find the LCS of more than
two strings. For example, given three strings T1 = “STEVEN$”, T2 = “SEVEN#”, and T3 =
“EVE@”, how to determine that their LCS is “EVE”?
Exercise 6.5.3.7*: Customize the solution further so that we find the LCS of k out of n
strings, where k  n. For example, given the same three strings T1 , T2 , and T3 as above,
how to determine that the LCS of 2 out of 3 strings is “EVEN”?
Exercise 6.5.3.8*: The Longest Common Extension (LCE) problem is as follows: Given
a string T and two indices i and j, compute the longest substring of T that starts at both
i and j. Examples assuming T = “CGACATTACATTA$”. If i = 4, and j = 9, the answer is
“ATTA”. If i = 7, and j = 9, the answer is “A”. How to solve this with Suffix Tree?

Figure 6.8: Suffix Tree (Left) and Suffix Array (Right) of T = “GATAGACA$”

6.5.4 Suffix Array Suffix Tree and Suffix Array are closely related20 . As we can see in Figure 6.8, the DFS tree
In the previous subsection, we have shown several string processing problems that can be traversal (neighbors are ordered based on sorted edge labels) of the Suffix Tree visits the
solved if the Suffix Tree is already built. However, the efficient implementation of linear time terminating vertices (the leaves) in Suffix Array order. An internal vertex in the Suffix
Suffix Tree construction (see [35]) is complex and thus risky under a programming contest Tree corresponds to a range in the Suffix Array (a collection of sorted suffixes that share a
setting. Fortunately, the next data structure that we are going to describe—the Suffix Longest Common Prefix (LCP)—to be computed below). A terminating vertex (always
Array invented by Udi Manber and Gene Myers [25]—has similar functionalities as the at leaf due to the usage of a terminating character) in the Suffix Tree corresponds to an
Suffix Tree but is (much) simpler to construct and use, especially in a programming contest individual index in the Suffix Array (a single suffix). Keep these similarities in mind.
setting. Thus, we will skip the discussion on O(n) Suffix Tree construction (see [35]) and They will be useful in the next subsection when we discuss applications of Suffix Array.
instead focus on the O(n log n) Suffix Array construction (see [37]) which is easier to use18 .
19
Then, in the next subsection, we will show that we can apply Suffix Array to solve problems Notice that we also use the terminating symbol ‘$’ to simplify Suffix Array discussion.
20
Memory usage: Suffix Tree has n|⌃| pointers where |⌃| is the number of di↵erent characters in T thus
that have been shown to be solvable with Suffix Tree. it requires O(n|⌃| log n) bits to store its data. On the other hand, Suffix Array is just an array of n indices
18
The di↵erence between O(n) and O(n log n) algorithms in programming contest setup is not much. thus it only needs O(n log n) bits to store its data, slightly more memory efficient.

343 344
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry

Naı̈ve Suffix Array Construction Efficient Suffix Array Construction


It is very easy to construct a Suffix Array given a string T[0..n-1] if we are not given a A better way to construct Suffix Array is to sort the ranking pairs (small integers) of suf-
very long string T, as shown below: fixes in O(log2 n) iterations from k = 1, 2, 4, . . . , the last power of 2 that is less than n.
At each iteration, this construction algorithm sorts the suffixes based on the ranking pair
// in int main() (RA[SA[i]], RA[SA[i]+k]) of suffix SA[i]. This algorithm is called the Prefix Doubling
scanf("%s", &T); // read T (Karp-Miller-Rosenberg) algorithm [21, 37]. An example execution is shown below for T =
int n = (int)strlen(T); // count n “GATAGACA$” and n = 9.
T[n++] = ’$’; // add terminating symbol
vi SA(n); • First, SA[i] = i and RA[i] = ASCII value of T[i] 8i 2 [0..n-1] (Table 6.1—left).
iota(SA.begin(), SA.end(), 0); // the initial SA At iteration k = 1, the ranking pair of suffix SA[i] is (RA[SA[i]], RA[SA[i]+1]).
// analysis of this sort below: O(n log n) * cmp: O(n) = O(n^2 log n)
sort(SA.begin(), SA.end(), [](int a, int b) { // O(n^2 log n)
return strcmp(T+a, T+b) < 0;
}); // continued below

When applied to string T = “GATAGACA$”, the naı̈ve SA construction code above that sorts
all suffixes with built-in sorting and string comparison library really produces the correct
Suffix Array = {8, 7, 5, 3, 1, 6, 4, 0, 2}. However, this is barely useful except for
contest problems with n  2500. The overall runtime of this algorithm is O(n2 log n) because
the strcmp operation that is used to determine the order of two (possibly long) suffixes is Table 6.1: L/R: Before/After Sorting; k = 1; the initial sorted order appears
too costly, up to O(n) per pair of suffix comparison.
Example 1: The rank of suffix 5 “ACA$” is (‘A’, ‘C’) = (65, 67).
Computing Longest Common Prefix Between Consecutive Sorted Suffixes
Example 2: The rank of suffix 3 “AGACA$” is (‘A’, ‘G’) = (65, 71).
Given the Suffix Array of T, we can compute the Longest Common Prefix (LCP) between After we sort these ranking pairs, the order of suffixes is now like Table 6.1—right,
consecutive sorted suffixes in Suffix Array order. By definition, LCP[0] = 0 as suffix SA[0] where suffix 5 “ACA$” comes before suffix 3 “AGACA$”, etc.
is the first suffix in Suffix Array order without any other suffix preceding it. For i > 0,
LCP[i] = the length of LCP between suffix SA[i] and suffix SA[i-1]. For example, in • At iteration k = 2, the ranking pair of suffix SA[i] is (RA[SA[i]], RA[SA[i]+2]).
Figure 6.8—right, we see that suffix SA[7] = suffix 0 = “GACAGATA$” has an LCP “GA” of This ranking pair is now obtained by looking at the first pair and the second pair of
length 2 with its previous sorted suffix SA[6] = suffix 4 = “GACA$”. We can compute LCP characters only. To get the new ranking pairs, we do not have to recompute many
directly by definition by using the code below. However, this approach is slow as it can things. We set the first one, i.e., Suffix 8 “$” to have new rank r = 0. Then, we iterate
increase the value of L up to O(n2 ) times, e.g., try T = “AAAAAAA$”. from i = [1..n-1]. If the ranking pair of suffix SA[i] is di↵erent from the ranking
pair of the previous suffix SA[i-1] in sorted order, we increase the rank r = r + 1.
// continuation from above Otherwise, the rank stays at r (see Table 6.2—left).
vi LCP(n);
LCP[0] = 0; // default value
for (int i = 1; i < n; ++i) { // compute by def, O(n^2)
int L = 0; // always reset L to 0
while ((SA[i]+L < n) && (SA[i-1]+L < n) &&
(T[SA[i]+L] == T[SA[i-1]+L])) ++L; // same L-th char, ++L
LCP[i] = L;
}
printf("T = ’%s’\n", T);
printf(" i SA[i] LCP[i] Suffix SA[i]\n");
Table 6.2: L/R: Before/After Sorting; k = 2; “GATAGACA” and “GACA” are swapped
for (int i = 0; i < n; ++i)
printf("%2d %2d %2d %s\n", i, SA[i], LCP[i], T+SA[i]); Example 1: In Table 6.1—right, the ranking pair of suffix 7 “A$” is (65, 36) which is
di↵erent with the ranking pair of previous suffix 8 “$-” which is (36, 0). Therefore in
The source code of this slow algorithm is given below using the fastest language (C++), but Table 6.2—left, suffix 7 has a new rank 1.
it is probably not that useful to be used in a modern programming contest.
Example 2: In Table 6.1—right, the ranking pair of suffix 4 “GACA$” is (71, 65) which
Source code: ch6/sa lcp slow.cpp is similar with the ranking pair of previous suffix 0 “GATAGACA$” which is also (71, 65).

345 346
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry

Therefore in Table 6.2—left, since suffix 0 is given a new rank 6, then suffix 4 is also theorem [20]. The idea is simple: it is easier to compute the LCP in the original position
given the same new rank 6. order of the suffixes instead of the lexicographic order of the suffixes. In Table 6.4—right, we
Once we have updated RA[SA[i]] 8i 2 [0..n-1], the value of RA[SA[i]+k] can be have the original position order of the suffixes of T = ‘GATAGACA$’. Observe that column
easily determined too. In our explanation, if SA[i]+k n, we give a default rank 0. PLCP[i] forms a pattern: decrease-by-1 block (2 ! 1 ! 0); increase to 1; decrease-by-1
See Exercise 6.5.4.1 for more details on the implementation aspect of this step. block again (1 ! 0); increase to 1 again; decrease-by-1 block again (1 ! 0), etc.

At this stage, the ranking pair of suffix 0 “GATAGACA$” is (6, 7) and suffix 4 “GACA$”
is (6, 5). These two suffixes are still not in sorted order whereas all the other suffixes
are already in their correct order. After another round of sorting, the order of suffixes
is now like Table 6.2—right.

• At iteration k = 4—notice that we double k = 2 to k = 4, skipping k = 3—, the


ranking pair of suffix SA[i] is (RA[SA[i]], RA[SA[i]+4]). This ranking pair is now
obtained by looking at the first quadruple and the second quadruple of characters only.
At this point, notice that the previous ranking pairs of Suffix 4 (6, 5) and Suffix 0 (6, 7)
in Table 6.2—right are now di↵erent. Therefore, after re-ranking, all n suffixes in Table
6.3 now have di↵erent rankings. This can be easily verified by checking if RA[SA[n-1]]
== n-1. When this happens, we have successfully obtained the Suffix Array. Notice
that the major sorting work is done in the first few iterations only and we usually do Table 6.4: Computing the LCP given the SA of T = “GATAGACA$”
not need many iterations when T is a random string (also see Exercise 6.5.4.3).
The PLCP theorem says that the total number of increase (and decrease) operations is at
most O(n). This pattern and this O(n) guarantee are exploited in the code below.
First, we compute Phi[SA[i]], i.e., we store the suffix index of the previous suffix of
suffix SA[i] in Suffix Array order. By definition, Phi[SA[0]] = -1, i.e., there is no previous
suffix that precedes suffix SA[0]. Take some time to verify the correctness of column Phi[i]
in Table 6.4—right. For example, Phi[SA[3]] = SA[3-1], so Phi[3] = SA[2] = 5.
Now, with Phi[i], we can compute the permuted LCP. The first few steps of this al-
gorithm is elaborated below. When i = 0, we have Phi[0] = 4. This means suffix 0
“GATAGACA$” has suffix 4 “GACA$” before it in Suffix Array order. The first two characters
Table 6.3: Before/After sorting; k = 4; no change (L = 2) of these two suffixes match, so PLCP[0] = 2.
When i = 1, we know that at least L-1 = 1 characters can match as the next suffix in
Suffix Array construction algorithm can be new for most readers of this book. Thus, we position order will have one less starting character than the current suffix. We have Phi[1]
have built a Suffix Array visualization tool in VisuAlgo to show the steps of this construction = 3. This means suffix 1 “ATAGACA$” has suffix 3 “AGACA$” before it in Suffix Array order.
algorithm for any (but short) input string T specified by the reader themselves. Several Suffix Observe that these two suffixes indeed have at least 1 character match (that is, we do not
Array applications shown in the next Section 6.5.5 are also included in the visualization. start from L = 0 as in computeLCP slow() function shown earlier and therefore this is more
efficient). As we cannot extend this further, we have PLCP[1] = 1.
Visualization: https://visualgo.net/en/suffixarray We continue this process until i = n-1, bypassing the case when Phi[i] = -1. As the
PLCP theorem says that L will be increased/decreased at most n times, this part runs in
We can implement the sorting of ranking pairs above using (built-in) O(n log n) sorting amortized O(n). Finally, once we have the PLCP array, we can put the permuted LCP back
library. As we repeat the sorting process up to log n times, the overall time complexity is to the correct position. The code is relatively short, as shown below.
O(log n ⇥ n log n) = O(n log2 n). With this time complexity, we can now work with strings
of length up to ⇡ 30K. However, since the sorting process only sorts pair of small integers, The Efficient Implementation
we can use a linear time two-pass Radix Sort (that internally calls Counting Sort—see the
We provide our efficient O(n log n) SA construction code combined with efficient O(n) com-
details in Book 1) to reduce the sorting time to O(n). As we repeat the sorting process up
putation of LCP between consecutive21 sorted suffixes below. Now this SA construction and
to log n times, the overall time complexity is O(log n ⇥ n) = O(n log n). Now, we can work
LCP computation code is good enough for many challenging string problems involving long
with strings of length up to ⇡ 450K—typical programming contest range.
strings in programming contests. Please scrutinize the code to understand how it works.
For ICPC contestants: as you can bring hard copy materials to the contest, it is a good
Efficient Computation of LCP Between Two Consecutive Sorted Suffixes idea to put this code in your team’s library.
A better way to compute Longest Common Prefix (LCP) between two consecutive sorted
suffixes in Suffix Array order is by using the Permuted Longest-Common-Prefix (PLCP) 21
Also see Exercise 6.5.4.5* that asks for the LCP between a range of sorted suffixes.

347 348
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry

for (int i = 0, L = 0; i < n; ++i) { // compute PLCP in O(n)


typedef pair<int, int> ii; if (Phi[i] == -1) { PLCP[i] = 0; continue; } // special case
typedef vector<int> vi; while ((i+L < n) && (Phi[i]+L < n) && (T[i+L] == T[Phi[i]+L]))
++L; // L incr max n times
class SuffixArray { PLCP[i] = L;
private: L = max(L-1, 0); // L dec max n times
vi RA; // rank array }
LCP.resize(n);
void countingSort(int k) { // O(n) for (int i = 0; i < n; ++i) // compute LCP in O(n)
int maxi = max(300, n); // up to 255 ASCII chars LCP[i] = PLCP[SA[i]]; // restore PLCP
vi c(maxi, 0); // clear frequency table }
for (int i = 0; i < n; ++i) // count the frequency
++c[i+k < n ? RA[i+k] : 0]; // of each integer rank public:
for (int i = 0, sum = 0; i < maxi; ++i) { const char* T; // the input string
int t = c[i]; c[i] = sum; sum += t; const int n; // the length of T
} vi SA; // Suffix Array
vi tempSA(n); vi LCP; // of adj sorted suffixes
for (int i = 0; i < n; ++i) // sort SA
tempSA[c[SA[i]+k < n ? RA[SA[i]+k] : 0]++] = SA[i]; SuffixArray(const char* initialT, const int _n) : T(initialT), n(_n) {
swap(SA, tempSA); // update SA constructSA(); // O(n log n)
} computeLCP(); // O(n)
}
void constructSA() { // can go up to 400K chars };
SA.resize(n);
iota(SA.begin(), SA.end(), 0); // the initial SA int main() {
RA.resize(n); scanf("%s", &T); // read T
for (int i = 0; i < n; ++i) RA[i] = T[i]; // initial rankings int n = (int)strlen(T); // count n
for (int k = 1; k < n; k <<= 1) { // repeat log_2 n times T[n++] = ’$’; // add terminating symbol
// this is actually radix sort SuffixArray S(T, n); // construct SA+LCP
countingSort(k); // sort by 2nd item printf("T = ’%s’\n", T);
countingSort(0); // stable-sort by 1st item printf(" i SA[i] LCP[i] Suffix SA[i]\n");
vi tempRA(n); for (int i = 0; i < n; ++i)
int r = 0; printf("%2d %2d %2d %s\n", i, S.SA[i], S.LCP[i], T+S.SA[i]);
tempRA[SA[0]] = r; // re-ranking process } // return 0;
for (int i = 1; i < n; ++i) // compare adj suffixes
tempRA[SA[i]] = // same pair => same rank r; otherwise, increase r
((RA[SA[i]] == RA[SA[i-1]]) && (RA[SA[i]+k] == RA[SA[i-1]+k])) ? Exercise 6.5.4.1: In the SA construction code shown above, will the following line:
r : ++r; ((RA[SA[i]] == RA[SA[i-1]]) && (RA[SA[i]+k] == RA[SA[i-1]+k])) ?
swap(RA, tempRA); // update RA
if (RA[SA[n-1]] == n-1) break; // nice optimization causes index out of bound in some cases?
} That is, will SA[i]+k or SA[i-1]+k ever be n and crash the program? Explain!
}
Exercise 6.5.4.2: Will the SA construction code shown above works if the input string T
contains a space (ASCII value = 32) inside? If it doesn’t work, what is the required solution?
void computeLCP() {
Hint: The default terminating character used—i.e., ‘$’—has ASCII value = 36.
vi Phi(n);
vi PLCP(n); Exercise 6.5.4.3: Give an input string T of length 16 so that the given O(n log n) SA
PLCP.resize(n); construction code use up all log2 16 = 4 iterations!
Phi[SA[0]] = -1; // default value Exercise 6.5.4.4*: Show the steps to compute the Suffix Array of T = “BANANA$” with
for (int i = 1; i < n; ++i) // compute Phi in O(n) n = 7. How many sorting iterations do you need to get the Suffix Array?
Phi[SA[i]] = SA[i-1]; // remember prev suffix Hint: Use the Suffix Array visualization tool in VisuAlgo.

349 350
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry

Exercise 6.5.4.5*: Show how to extend the computation of LCP between two consecutive We start by finding the lower bound. The current range is i = [0..8] and thus the middle
sorted suffixes into computation of LCP between a range of sorted suffixes, i.e., answer one is i = 4. We compare the first two characters of suffix SA[4], which is “ATAGACA$”, with
LCP(i, j). For example in Figure 6.8, LCP(1, 4) = 1 (“A”), LCP(6, 7) = 2 (“GA”), and P = ‘GA’. As P = ‘GA’ is larger, we continue exploring i = [5..8]. Next, we compare the
LCP(0, 8) = 0 (nothing in common). first two characters of suffix SA[6], which is “GACA$”, with P = ‘GA’. It is a match. As we
Exercise 6.5.4.6*: Show how to use LCP information to compute the number of distinct are currently looking for the lower bound, we do not stop here but continue exploring i =
substrings in T in O(n log n) time. [5..6]. P = ‘GA’ is larger than suffix SA[5], which is “CA$”. We stop after checking that
SA[8] doesn’t start with prefix P = ‘GA’. Index i = 6 is the lower bound, i.e., suffix SA[6],
which is “GACA$”, is the first time pattern P = ‘GA’ appears as a prefix of a suffix in the
list of sorted suffixes.
6.5.5 Applications of Suffix Array
We have mentioned earlier that Suffix Array is closely related to Suffix Tree. In this subsec-
tion, we show that with Suffix Array (which is easier to construct), we can solve the string
processing problems shown in Section 6.5.3 that are solvable using Suffix Tree.

String Matching in O(m log n)


After we obtain the Suffix Array of T, we can search for a pattern string P (of length m)
in T (of length n) in O(m log n). This is a factor of log n times slower than the Suffix Tree
version but in practice it is quite acceptable. The O(m log n) complexity comes from the
fact that we can do two O(log n) binary searches on sorted suffixes and do up to O(m) suffix
comparisons22 . The first/second binary search is to find the lower/upper bound respectively.
This lower/upper bound is the smallest/largest i such that the prefix of suffix SA[i] matches
the pattern string P, respectively. All the suffixes between the lower and upper bound are
Table 6.5: String Matching using Suffix Array
the occurrences of pattern string P in T. Our implementation is shown below:

// extension of class Suffix Array above Next, we search for the upper bound. The first step is the same as above. But at the second
ii stringMatching(const char *P) { // in O(m log n) step, we have a match between suffix SA[6], which is “GACA$”, with P = ‘GA’. Since now
int m = (int)strlen(P); // usually, m < n we are looking for the upper bound, we continue exploring i = [7..8]. We find another
int lo = 0, hi = n-1; // range = [0..n-1] match when comparing suffix SA[7], which is “GATAGACA$”, with P = ‘GA’. We stop here.
while (lo < hi) { // find lower bound This i = 7 is the upper bound in this example, i.e., suffix SA[7], which is “GATAGACA$”, is
int mid = (lo+hi) / 2; // this is round down the last time pattern P = ‘GA’ appears as a prefix of a suffix in the list of sorted suffixes.
int res = strncmp(T+SA[mid], P, m); // P in suffix SA[mid]?
(res >= 0) ? hi = mid : lo = mid+1; // notice the >= sign
} Finding the Longest Repeated Substring in O(n)
if (strncmp(T+SA[lo], P, m) != 0) return {-1, -1}; // if not found If we have computed the Suffix Array in O(n log n) and the LCP between consecutive suffixes
ii ans; ans.first = lo; in Suffix Array order in O(n), then we can determine the length of the Longest Repeated
hi = n-1; // range = [lo..n-1] Substring (LRS) of T in O(n).
while (lo < hi) { // now find upper bound The length of the LRS is just the highest number in the LCP array. In Table 6.4—left
int mid = (lo+hi) / 2; that corresponds to the Suffix Array and the LCP of T = “GATAGACA$”, the highest number
int res = strncmp(T+SA[mid], P, m); is 2 at index i = 7. The first 2 characters of the corresponding suffix SA[7] (suffix 0) is
(res > 0) ? hi = mid : lo = mid+1; // notice the > sign “GA”. This is the LRS in T.
}
if (strncmp(T+SA[hi], P, m) != 0) --hi; // special case
ans.second = hi; Finding the Longest Common Substring in O(n)
return ans; // returns (lb, ub) Without loss of generality, let’s consider the case with only two strings. We use the same
} // where P is found example as in the Suffix Tree section earlier: T1 = “GATAGACA$” and T2 = “CATA#”. To solve
the Longest Common Substring (LCS) problem using Suffix Array, first we have to concate-
A sample execution of this string matching algorithm on the Suffix Array of T = “GATAGACA$” nate both strings (note that the terminating characters of both strings must be di↵erent)
with P = “GA” is shown in Table 6.5. to produce T = “GATAGACA$CATA#”. Then, we compute the Suffix and LCP array of T as
22
This is achievable by using the strncmp function to compare only the first m characters of both suffixes. shown in Table 6.6.

351 352
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry

Programming Exercises related to Suffix Array24 :

1. Entry Level: Kattis - suffixsorting * (basic Suffix Array construction problem; be


careful with terminating symbol)
2. UVa 01254 - Top 10 * (LA 4657 - Jakarta09; Suffix Array with Segment Tree
or Sparse Table; LCP range)
3. UVa 01584 - Circular Sequence * (LA 3225 - Seoul04; min lexicographic
rotation25 ; similar with UVa 00719; other solutions exist)
4. UVa 11512 - GATTACA * (Longest Repeated Substring)
5. Kattis - automatictrading * (Suffix Array; LCP of a range; use Sparse Table)
6. Kattis - buzzwords * (Longest Repeated Substring that appears X times (2 
X < N ); also available at UVa 11855 - Buzzwords)
7. Kattis - suffixarrayreconstruction * (clever creative problem involving Suffix Ar-
ray concept; be careful that ‘*’ can be more than one character)
Table 6.6: The Suffix Array, LCP, and owner of T = “GATAGACA$CATA#”
Extra UVa: 00719, 00760, 01223, 12506.

Then, we go through consecutive suffixes in O(n). If two consecutive suffixes belong to Extra Kattis: aliens, burrowswheeler, dvaput, lifeforms, repeatedsubstrings, string-
di↵erent owners (can be easily checked23 , for example we can test if suffix SA[i] belongs to multimatching, substrings.
T1 by testing if SA[i] < the length of T1 ), we look at the LCP array and see if the maximum Others: SPOJ SARRAY - Suffix Array (problem author: Felix Halim), IOI 2008
LCP found so far can be increased. After one O(n) pass, we will be able to determine the - Type Printer (DFS traversal of Suffix Trie).
LCS. In Figure 6.6, this happens when i = 7, as suffix SA[7] = suffix 1 = “ATAGACA$CATA#” Also see Section 8.7 for some harder problems that uses (Suffix) Trie data struc-
(owned by T1 ) and its previous suffix SA[6] = suffix 10 = “ATA#” (owned by T2 ) have a ture as sub-routine.
common prefix of length 3 which is “ATA”. This is the LCS.
Finally, we close this section and this chapter by highlighting the availability of our source
code. Please spend some time understanding the source code which may not be trivial for
those who are new with Suffix Array. Profile of Data Structure Inventors
Source code: ch6/sa lcp.cpp|java|py|ml Udi Manber is an Israeli computer scientist. He works in Google as one of their vice
presidents of engineering. Along with Gene Myers, Manber invented Suffix Array data
Exercise 6.5.5.1*: Suggest some possible improvements to the stringMatching() function structure in 1991.
shown in this section so that the time complexity improves to O(m + log n)! Eugene “Gene” Wimberly Myers, Jr. is an American computer scientist and bioin-
Exercise 6.5.5.2*: Compare the KMP algorithm shown in Section 6.4 and Rabin-Karp formatician, who is best known for his development of the BLAST (Basic Local Alignment
algorithm in Section 6.6 with the string matching feature of Suffix Array, then decide a rule Search Tool) tool for sequence analysis. His 1990 paper that describes BLAST has received
of thumb on when it is better to use Suffix Array to deal with string matching and when it over 24 000 citations making it among the most highly cited paper ever. He also invented
is better to use KMP, Rabin-Karp, or just standard string libraries. Suffix Array with Udi Manber.

Exercise 6.5.5.3*: Solve the exercises on Suffix Tree applications using Suffix Array instead:
• Exercise 6.5.3.4* (repeated substrings that occurs the most, and if ties, the longest),
• Exercise 6.5.3.5* (LRS with no overlap),
• Exercise 6.5.3.6* (LCS of n 2 strings),
• Exercise 6.5.3.7* (LCS of k out of n strings where k  n), and
24
• Exercise 6.5.3.8* (LCE of T given i and j). You can try solving these problems with Suffix Tree, but you have to learn how to code the Suffix Tree
construction algorithm by yourself. The programming problems listed here are solvable with Suffix Array.
25
Min Lexicographic Rotation is a problem of finding the rotation of a string with the lowest lexicographical
order of all possible rotations. For example, the lexicographically minimal rotation of “CGAGTC][AGCT”
23
With three or more strings, this check will have more ‘if statements’. (emphasis of ‘][’ added) is “AGCTCGAGTC”.

353 354
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.6. STRING MATCHING WITH HASHING c Steven, Felix, Suhendry

6.6 String Matching with Hashing


Given two strings A and B, compare a substring of A with a substring of B, e.g., determine h(T0,0 ) = (S0 · p0 ) mod M
whether A[i..j] = B[k..l]. The brute force way to solve this problem is by comparing h(T0,1 ) = (S0 · p0 + S1 · p1 ) mod M
the characters in both substrings one by one, which leads to an O(m) solution where m
h(T0,2 ) = (S0 · p0 + S1 · p1 + S2 · p2 ) mod M
is the (sub)string length. If this comparison is repeated many times (with di↵erent sub-
..
strings), then such solution might get Time Limit Exceeded (TLE) unless n is small enough .
or repeated only a few times. For example, consider the following String Matching problem: h(T0,R ) = (h(S0,R 1 ) + SR · pR ) mod M
Given two strings: text T of length n and pattern P of length m (m  n), count how many
tuples hi, ji are there such that T[i..j] = P. As there are O(n-m) substrings of a fixed
length m from a string T of length n, then the brute force solution has an O(nm) complexity. typedef vector<int> vi;
In Section 6.4, we have learned about the Knuth-Morris-Pratt’s (KMP) algorithm that can typedef long long ll;
solve this String Matching problem in O(n + m) complexity. In Section 6.5, we have learned const int p = 131; // p and M are
about Suffix Array data structure that can solve this String Matching problem in O(m log n) const int M = 1e9+7; // relatively prime
complexity (after the Suffix Array is built in O(n log n) time). In this Section, we will learn
another technique to solve this problem with hashing. vi P; // to store p^i % M
The idea of string hashing is to convert its substrings into integers so that we can do
string comparison in O(1) by comparing their (integers) hash values. We can find the hash vi prepareP(int n) { // compute p^i % M
value of each substring in O(1) and one time preparation of O(n) with rolling hash. P.assign(n, 0);
P[0] = 1;
for (int i = 1; i < n; ++i) // O(n)
6.6.1 Hashing a String
P[i] = ((ll)P[i-1]*p) % M;
A hash of a string T of length n (0-based indexing) is usually defined as follows: return P;
}
n 1
X
h(T0,n 1 ) = Ti · pi mod M
vi computeRollingHash(string T) { // Overall: O(n)
i=0
vi P = prepareP((int)T.length()); // O(n)
Where the base p and the modulo M are integers and chosen with these recommendations: vi h(T.size(), 0);
• p is at least the size of alphabets (number of distinct characters, denoted as |⌃|), for (int i = 0; i < (int)T.length(); ++i) { // O(n)
if (i != 0) h[i] = h[i-1]; // rolling hash
26
• M is large (otherwise, our hash function will su↵er from Birthday Paradox ), h[i] = (h[i] + ((ll)T[i]*P[i]) % M) % M;
• p and M are relatively prime (otherwise, there will be too many collisions; we also need }
this requirement for the multiplicative inverse component later). return h;
}
For example, consider p = 131 and M = 109 + 7 where p and M are relatively prime. Then,
h(‘ABCBC’) = (‘A’·1310 +‘B’·1311 +‘C’·1312 +‘B’·1313 +‘C’·1314 ) mod 1 000 000 007. If we re- Now, if we want to compute the hash value of a substring TL,R (notice that L > 0 now), then,
place (‘A’, ‘B’, ‘C’) with (0, 1, 2), then we will get h(‘ABCBC’) = 591 282 386. Most of the time, the rolling hash equation becomes (note: we can treat substring TL,R as a new string T’):
we do not need to map the alphabets into (0, 1, .., |⌃|-1) like what we just did. Using the
ASCII value of each alphabet is already sufficient. In this case, h(‘ABCBC’) = 881 027 078. X
R
h(TL,R ) = Ti · pi L
mod M
i=L
6.6.2 Rolling Hash Similar to computing the sum of a subarray in O(1) using its prefix sum (see Book 1), the
value of h(TL,R ) can be computed in O(1) with the hash value of its prefix (see Figure 6.9).
The beauty of rolling hash lies in its ability to compute the hash value of a substring in Note that we have take out pL from the result (mod M ). The derivation is as follows:
O(1), given we already have the hash value of all its prefix substrings. Let Ti,j where i  j
be the substring of T from index i to j, inclusive.
First, observe that the hash value of all prefixes of a string (where i = 0) can be computed
altogether in O(n), or O(1) per prefix. See the derivation and the rolling hash code that
computes the hash values of all prefixes of T in O(n).
26
What is the probability that 2 out of 23 random people are having the same birthday? Hint: It is more
than 50% chance, which is far higher than what most untrained people thought, hence the ‘paradox’. Figure 6.9: Rolling Hash

355 356
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.6. STRING MATCHING WITH HASHING c Steven, Felix, Suhendry

6.6.4 Collisions Probability


h(T0,R ) h(T0,L 1 ) You might notice that there may be a case where two di↵erent strings have the same hash
h(TL,R ) = mod M
pL value; in other words, a collision happens. Such a collision is inevitable as the number of
PR PL 1 possible string is “infinite” (often much larger28 than M). What we want with hashing are:
i=0 T i · pi i=0 T i · pi
= mod M h(T) = h(P) if T = P, and h(T) 6= h(P) if T 6= P. The first one is obvious from the hash
pL
PR i
function, but the second one is not guaranteed. So, we want h(T) 6= h(P) to be very likely
i=L Ti · p when T 6= P. Now let us analyze the collisions probability on these scenarios:
= mod M
pL
• Comparing 2 random strings.
X
R
= Ti · pi L
mod M The collision probability is 1M and with M = 109 + 7 shown in this section, the collisions
i=L probability is quite small.
• Comparing 1 string with k other strings,
i.e., whether there exists a particular string in a set of k strings.
L
Now, to compute the division part (1/p ), we need to convert it into its multiplicative inverse In this case, the collisions probability is kM .
(p L ) such that the equation becomes:
• Comparing k strings to each other, e.g., determine whether these k strings are unique.
h(TL,R ) = (h(T0,R ) h(T0,L 1 )) · p L
mod M In this case, it is easier for us to first compute the non-collision probability, which is
M M 1
M
· M · · · M k+1
M
= P (M,k)
Mk
, where P (M, k) is k-permutation of M.
That can be implemented27 as shown below: Then, the collisions probability is 1 P (M,k) Mk
.
Let M = 109 + 7, with k = 104 , the collisions probability is ⇡ 5%.
5
int hash_fast(int L, int R) { // O(1) hash of any substr With k = 10 , the collisions probability becomes ⇡ 99%.
if (L == 0) return h[R]; // h is the prefix hashes With k = 106 , it is pretty much guaranteed there is a collision29 .
int ans = 0; The collisions probability on the third scenario is extremely bad with a large number of
ans = ((h[R] - h[L-1]) % M + M) % M; // compute differences strings, so how can we handle this? One option is to use a larger M, e.g., 1018 + 9 (need to
ans = ((ll)ans * modInverse(P[L], M)) % M; // remove P[L]^-1 (mod M) use 64-bit30 integer data type). However, using M larger than 32-bit integer may cause an
return ans; overflow when computing the hash value31 . Another better alternative is using multiple
} hashes. Thus, a string T has multiple hash values (usually 2 suffices) with di↵erent p and
M, i.e., hh1 (T0,n 1 ), h2 (T0,n 1 ), ...i, and so on. Then, two strings are considered the same only
when all their hash values are the same.
6.6.3 Rabin-Karp String Matching Algorithm
Let us consider the String Matching problem described earlier. The well-known KMP algo- Programming exercises related to String Hashing (most have alternative solutions):
rithm can solve this problem in O(n+m) where n is the length of string T and m is the length
of string P. Alternatively, we can also solve this problem with rolling hash computation. 1. Entry Level: Kattis - stringmatching * (try Rabin-Karp or KMP)
In the brute force approach, we compare each substring of length m in T. However, instead 2. UVa 11475 - Extend to Palindromes * (similar with UVa 12467)
of comparing the (sub)strings directly in O(m), we can compare their hash values in O(1). 3. UVa 12467 - Secret word * (hashing/‘border’ of KMP; see UVa 11475)
First, do a rolling hash computation on string T and also compute the hash value of string P
4. UVa 12604 - Caesar Cipher * (try Rabin-Karp/KMP up to 62 times)
(one time). Next, for each substring of T of length m, get its hash value and compare it with
h(P0,m 1 ). Therefore, the overall algorithm has an O(n + m) complexity. This algorithm is 5. Kattis - animal * (Singapore15 preliminary; hash the subtrees and compare them)
known as Rabin-Karp algorithm. We have implemented this algorithm as a working code 6. Kattis - hashing * (the problem description is very clear; good hashing practice;
below (this code is an extension from the code shown in Section 6.4.2). or use Suffix Array+Sparse Table)
7. Kattis - typo * (rolling hash; update hash value when character s[i] is deleted
Source code: ch6/string matching.cpp|java|py|ml from string s; use 2 large prime modulo to be safe)
Also see String Matching programming exercises at Section 6.4.
One advantage of learning string hashing is that we can solve various variants of String
Matching problems in which it may not be easy to use or modify the KMP algorithm. For
example, counting the number of palindromic substring or counting the number of tuples
28
hi, j, k, li such that Ti,j = Pk,l . Consider the Pigeonhole Principle.
29
Try k = 23 and M = 355 to understand the Birthday Paradox mentioned earlier.
30
Or 128-bit prime if the contest supports 128-bit integer, which may not always be the case.
27 31
Please review the inclusion-exclusion principle like the one shown in Book 1 and Section 5.3.10 on Observe that in prepareP(), computeRollingHash(T), and hash fast(L, R), we cast int to ll when
extended Euclidean algorithm/modular multiplicative inverse. multiplying to avoid overflow.

357 358
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry

6.7 Anagram and Palindrome


In this section, we will discuss two not-so-rare string processing problems that may require
more advanced (string) data structure(s) and/or algorithm(s) compared to the ones discussed
in Section 6.2. They are anagram and palindrome.

6.7.1 Anagram
An anagram is a word (or phrase/string) whose letters (characters) can be rearranged
to obtain another word, e.g., ‘elevenplustwo’ is an anagram of ‘twelveplusone’. Two
words/strings that have di↵erent lengths are obviously not anagram.

Sorting Solution
The common strategy to check if two equal-length words/strings of n characters are anagram
is to sort the letters of the words/strings and compare the results. For example, take wordA
= ‘cab’, wordB = ‘bca’. After sorting, wordA = ‘abc’ and wordB = ‘abc’ too, so they
are anagram. Review Book 1 for various sorting techniques. This runs in O(n log n).

Direct Addressing Table Solution


Another potential strategy to check if two words are anagram is to check if the character
frequencies of both words are the same. We do not have to use a full fledged Hash Table but
we can use a simpler Direct Addressing Table (DAT) (review Hash Table section in Book
1) to map characters of the first word to their frequencies in O(n). We do the same with
the characters of the second word. Then we compare those frequencies in O(k) where k is
the number of size of alphabets, e.g., 255 for ASCII characters, 26 for lowercase alphabet
characters, 52 for both lowercase and uppercase alphabet characters, etc.

6.7.2 Palindrome
A palindrome is a word (or a sequence/string) that can be read the same way in either
direction. For example, ‘ABCDCBA’ is a palindrome.

Simple O(n) Palindrome Check


Given a string s with length n characters, we can check whether s is a palindrome via
definition, i.e. by reversing32 the string s and then comparing s with its reverse. However,
we can be slightly more clever by just comparing the characters in string s up to its middle
character. It does not matter if the palindrome is of even length or odd length. This one is
O(n/2) = O(n).

// we assume that s is a global variable


bool isPal(int l, int r) { // is s[l..r] a palindrome
int n = (r-l)+1;
for (int i = 0; i < n/2; ++i)
if (s[l+i] != s[r-i])
return false;
return true;
}
32
In C++, we can use reverse(s.begin(), s.end()) to reverse a C++ string s.

359

You might also like