Latex Programming
Latex Programming
cpp
-----------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------- -
- -------------------------------Segment Tree----------------------------------------------
---------------------------------------DSU----------------------------------------------- -
- -----------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------- -
- class segtree {
class dsu { public:
public: struct node {
vector<int> p; // don't forget to set default value (used for leaves)
int n; // not necessarily neutral element!
... a = ...;
dsu(int _n) : n(_n) {
p.resize(n); void apply(int l, int r, ... v) {
iota(p.begin(), p.end(), 0); ...
} }
};
inline int get(int x) {
return (x == p[x] ? x : (p[x] = get(p[x]))); node unite(const node &a, const node &b) const {
} node res;
...
inline bool unite(int x, int y) { return res;
x = get(x); }
y = get(y);
if (x != y) { inline void push(int x, int l, int r) {
p[x] = y; int y = (l + r) >> 1;
return true; int z = x + ((y - l + 1) << 1);
} // push from x into (x + 1) and z
return false; ...
} /*
}; if (tree[x].add != 0) {
tree[x + 1].apply(l, y, tree[x].add);
tree[z].apply(y + 1, r, tree[x].add);
----------------------------------------------------------------------------------------- tree[x].add = 0;
- }
-----------------------------------Hashmap----------------------------------------------- */
- }
-----------------------------------------------------------------------------------------
- inline void pull(int x, int z) {
// #include<bits/extc++.h> tree[x] = unite(tree[x + 1], tree[z]);
#include <ext/pb_ds/assoc_container.hpp> }
int find_last(int ll, int rr, const function<bool(const node&)> &f) { virtual int add(int from, int to, T cost) = 0;
assert(0 <= ll && ll <= rr && rr <= n - 1); };
return find_last(0, 0, n - 1, ll, rr, f);
}
}; -----------------------------------------------------------------------------------------
-
-----------------------------------Suffix Array------------------------------------------
----------------------------------------------------------------------------------------- -
- -----------------------------------------------------------------------------------------
-----------------------------------Dijkstra---------------------------------------------- -
-
----------------------------------------------------------------------------------------- template <typename T>
- vector<int> suffix_array(int n, const T &s, int char_bound) {
vector<int> a(n);
template <typename T> if (n == 0) {
vector<T> dijkstra(const graph<T> &g, int start) { return a;
assert(0 <= start && start < g.n); }
vector<T> dist(g.n, numeric_limits<T>::max()); if (char_bound != -1) {
priority_queue<pair<T, int>, vector<pair<T, int>>, greater<>> s; vector<int> aux(char_bound, 0);
dist[start] = 0; for (int i = 0; i < n; i++) {
s.emplace(dist[start], start); aux[s[i]]++;
while (!s.empty()) { }
auto [expected, i] = s.top(); int sum = 0;
s.pop(); for (int i = 0; i < char_bound; i++) {
if (dist[i] != expected) { int add = aux[i];
continue; aux[i] = sum;
} sum += add;
for (int id : g.g[i]) { }
auto &e = g.edges[id]; for (int i = 0; i < n; i++) {
int to = e.from ^ e.to ^ i; a[aux[s[i]]++] = i;
if (dist[i] + e.cost < dist[to]) { }
dist[to] = dist[i] + e.cost; } else {
file:///C:/Users/moazz/Desktop/template.cpp 5/10 file:///C:/Users/moazz/Desktop/template.cpp 6/10
6/15/25, 2:00 AM template.cpp 6/15/25, 2:00 AM template.cpp
iota(a.begin(), a.end(), 0); k = 0;
sort(a.begin(), a.end(), [&s](int i, int j) { return s[i] < s[j]; }); } else {
} int j = sa[pos[i] + 1];
vector<int> sorted_by_second(n); while (i + k < n && j + k < n && s[i + k] == s[j + k]) {
vector<int> ptr_group(n); k++;
vector<int> new_group(n); }
vector<int> group(n); lcp[pos[i]] = k;
group[a[0]] = 0; }
for (int i = 1; i < n; i++) { }
group[a[i]] = group[a[i - 1]] + (!(s[a[i]] == s[a[i - 1]])); return lcp;
} }
int cnt = group[a[n - 1]] + 1;
int step = 1; template <typename T>
while (cnt < n) { vector<int> build_lcp(const T &s, const vector<int> &sa) {
int at = 0; return build_lcp((int) s.size(), s, sa);
for (int i = n - step; i < n; i++) { }
sorted_by_second[at++] = i;
}
for (int i = 0; i < n; i++) {
if (a[i] - step >= 0) {
sorted_by_second[at++] = a[i] - step; -----------------------------------------------------------------------------------------
} -
} -----------------------------------DFS---------------------------------------------------
for (int i = n - 1; i >= 0; i--) { -
ptr_group[group[a[i]]] = i; -----------------------------------------------------------------------------------------
} -
for (int i = 0; i < n; i++) {
int x = sorted_by_second[i]; #include "../c++_template.cpp"
a[ptr_group[group[x]]++] = x;
} // =========================
new_group[a[0]] = 0; // Depth First Search (DFS)
for (int i = 1; i < n; i++) { // =========================
if (group[a[i]] != group[a[i - 1]]) { const int MAXN = 1000;
new_group[a[i]] = new_group[a[i - 1]] + 1; vector<int> g[MAXN];
} else { bool visited[MAXN];
int pre = (a[i - 1] + step >= n ? -1 : group[a[i - 1] + step]); int n;
int cur = (a[i] + step >= n ? -1 : group[a[i] + step]);
new_group[a[i]] = new_group[a[i - 1]] + (pre != cur); //recursive
} void dfs(int u) {
} visited[u] = true;
swap(group, new_group); for(int v : g[u]) {
cnt = group[a[n - 1]] + 1; if(!visited[v]) {
step <<= 1; dfs(v);
} }
return a; }
} }
//implicit graph }
int dirs[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}; }
const char EMPTY = '*'; int best_split(int n) {
int floodfill(int r, int c, char color) { int tot_cap = 0;
if (r < 0 || r >= R || c < 0 || c >= C) return 0; // outside grid rep(i,0,N-1) tot_cap += capacities[i];
if (grid[r][c] != EMPTY) return 0; // cannot be colored return search_splits(0,n,tot_cap);
grid[r][c] = color; }
int ans = 1;
rep(i,0,4) ans += floodfill(r + dirs[i][0], c + dirs[i][1], color);
return ans;
}
-----------------------------------------------------------------------------------------
-
-----------------------------------Brute Force-------------------------------------------
-
-----------------------------------------------------------------------------------------
-
/* =============================== */
/* Try all 2^n subsets of n items */
/* =============================== */
file:///C:/Users/moazz/Desktop/template.cpp 9/10 file:///C:/Users/moazz/Desktop/template.cpp 10/10
This information is used to place each element into the correct slot immediately, so
there is no need to rearrange lists.
1 2 2 3 5 6 8 9
3.2.5 Computational Complexity
3.2 Properties of Sorting Algorithms For typical serial sorting algorithms good behaviour is O (n log n), with parallel sort
in O (log2 n), and bad behaviour is O (n2 ).
3.2.1 Types of Sorting Algorithms
There are two broad types of sorting algorithms: integer sorts and comparison sorts.
3.3 Common O (n2 ) Algorithms
Comparison Sorts
These algorithms are arguably easy to implement but generally not used due to high
Comparison sorts compare elements at each step of the algorithm to determine if one
time complexity.
element should be to the left or right of another element.
Comparison sorts are usually more straightforward to implement than integer sorts,
but comparison sorts are limited by a lower bound of O (n log n), meaning that, on
average, comparison sorts cannot be faster than O (n log n). 3.3.1 Bubble Sort
Bubble sort is a simple sorting algorithm. The algorithm starts at the beginning of
Integer Sorts
the data set. It compares the first two elements, and if the first is greater than the
Integer sorts are sometimes called counting sorts (though there is a specific integer second, it swaps them. It continues doing this for each pair of adjacent elements to
sort algorithm called counting sort). Integer sorts do not make comparisons. Integer the end of the data set. It then starts again with the first two elements, repeating
sorts determine for each element x - how many elements are less than x. For example, until no swaps have occurred on the last pass. It is rarely used to sort large, unordered
if there are 14 elements that are less than x, then x will be placed in the 15th slot. data sets.
11 12
5. Merge the sorted subarrays array[ a . . . k ] and array[k + 1 . . . b] into a sorted
2 2 2 2 4 4 4 4 4 4 subarray array[ a . . . b].
4 4 4 4 2 2 3 3 3 3 Merge sort is an efficient algorithm, because it halves the size of the subarray at
each step. The recursion consists of O (log n) levels, and processing each level takes
1 1 3 3 3 3 2 2 2 2 O(n) time. Merging the subarrays array[ a . . . k] and array[k + 1 . . . b] is possible in
linear time, because they are already sorted.
3 3 1 1 1 1 1 1 1 1 For example, consider sorting the following array:
Example of Bubble Sort
1 3 6 2 8 2 5 9
Iteration A 4 3 2 1 2 3 1 4 1 3 2 6 2 8 5 9
1 2 3 6 2 5 8 9
Iteration B 3 4 2 1 2 1 3 4
Finished 1 2 3 4 1 2 2 3 5 6 8 9
3 2 4 1
Insertion Sort Example 3.4.2 Quick Sort
Quicksort uses divide and conquer to sort an array. Divide and conquer is a technique
used for breaking algorithms down into subproblems, solving the subproblems, and
3.4 Common O (n log n) Algorithms then combining the results back together to solve the original problem. It can be
helpful to think of this method as divide, conquer, and combine.
Here are the divide, conquer, and combine steps that quicksort uses:
3.4.1 Merge Sort
Divide:
Merge sort sorts a subarray array[ a . . . b] as follows:
1. Pick a pivot element, A[q]. Picking a good pivot is the key for a fast imple-
1. If a = b, do not do anything, because the subarray is already sorted. mentation of quicksort; however, it is difficult to determine what a good pivot
might be.
2. Calculate the position of the middle element: k = b( a + b)/2c.
2. Partition, or rearrange, the array into two subarrays: A[ p, ..., q − 1] such that
3. Recursively sort the subarray array[ a . . . k ]. all elements are less than A[q], and A[q + 1, ..., r ] such that all elements are
greater than or equal to A[q].
4. Recursively sort the subarray array[k + 1 . . . b].
13 14
Conquer: 3.4.3 Heap Sort
1. Sort the subarrays A[ p, ..., q − 1] and A[q + 1, ..., r ] recursively with quicksort. Heapsort is a much more efficient version of selection sort. It also works by determining
the largest (or smallest) element of the list, placing that at the end (or beginning) of
Combine: the list, then continuing with the rest of the list, but accomplishes this task efficiently
by using a data structure called a heap, a special type of binary tree.
1. No work is needed to combine the arrays because they are already sorted.
3.4.4 Counting Sort
3 7 8 5 2 1 9 5 4 Counting sort assumes that each of the n input elements in a list has a key value
ranging from 0 to k, for some integer k. For each element in the list, counting sort
3 7 8 5 2 1 9 5 4 determines the number of elements that are less than it. Counting sort can use this
information to place the element directly into the correct slot of the output array.
Counting sort uses three lists: the input list, A[0, 1, ..., n], the output list,
3 5 8 5 2 1 9 4 7
B[0, 1, ..., n], and a list that serves as temporary memory, C [0, 1, ..., k ]. Note that A
and B have n slots (a slot for each element), while C contains k slots (a slot for each
3 9 8 5 2 1 4 5 7 key value).
3 1 8 5 2 4 9 5 7
3.5 Comparison of Sorting Algorithms
3 1 2 5 4 8 9 5 7
Algorithm Best Case Worst Case Average Space Usage Stable?
3 1 2 4 5 8 9 5 7 Case
Bubble Sort O (n) O ( n2 ) O ( n2 ) O (1) Yes
Insertion O (n) O ( n2 ) O ( n2 ) O (1) Yes
Sort
3 1 2 5 8 9 5 7
Merge Sort O (n log n) O (n log n) O (n log n) O (n) Yes
Quicksort O (n log n) O ( n2 ) O (n log n) O (n) Usually Not
1 2 3 5 5 9 7 8 Heapsort O (n log n) O (n log n) O (n log n) O (1) No
Counting O (k + n) O (k + n) O (k + n) O (k + n) Yes
5 5 7 9 8 Sort
vector<int> v = {4,2,5,3,5,8,3};
sort(v.begin(),v.end());
15 16
After the sorting, the contents of the vector will be [2, 3, 3, 4, 5, 5, 8]. The default
sorting order is increasing, but a reverse order is possible as follows:
sort(v.rbegin(),v.rend());
if (num & 1)
cout << "ODD";
else
cout << "EVEN";
a ^= b;
b ^= a;
a ^= b;
17 19
4.1.4 Compute XOR from 1 to n (direct method) 4.1.5 Check if a number is a power of 2
bool poweroftwo(int x)
Algorithm: Compute XOR of numbers from 1 to n
{
Input: n return x & (x-1) == 0;
Output: XOR of all numbers from 1 to n }
1 Find the remainder of n by moduling it with 4.
2 Check,
(I) If rem = 0, then xor will be same as n.
(II) If rem = 1, then xor will be 1. 4.1.6 Change case of English alphabet
(II) If rem = 1, then xor will be 1.
(II) If rem = 3 ,then xor will be 0. ch |= ’ ’; //Upper to Lower
ch &= ’_’ ; //Lower to Upper
int computeXOR(int n)
{
4.1.7 Find log2 x of integer
if (n % 4 == 0)
return n;
if (n % 4 == 1) int logarithm(int x)
return 1; {
if (n % 4 == 2) int res = 0;
return n + 1; while (x >>= 1)
else res++;
return 0; return res;
} }
20 21
Set the kth bit 4.3 C++ Special Functions
x |= (1 < < k) //sets the kth bit of x to one The g++ compiler provides the following functions for counting bits:
x &= ~(1 << k) //unsets the kth bit of x to zero • __builtin_popcount( x ): the number of ones in the number
T = (S & (-S)) While the above functions only support int numbers, there are also long long
//T is a power of two with only one bit set which is the LSB. versions of the functions available with the suffix ll like __builtin_popcountll( x ).
22 23
Then, the following code prints all elements that belong to the set: Solution
Solution
Naive Solution of time complexity O (n3 ):
Go through all O(n2 ) pairs of rows and for each pair ( a, b) calculate the
number of columns that contain a black square in both rows in O(n) time.
The following code assumes that color[y][ x ] denotes the color in row y and
column x:
int count = 0;
for (int i = 0; i < n; i++) {
if (color[a][i] == 1 && color[b][i] == 1) count++;
}
Then, those columns account for count(count − 1)/2 subgrids with black
corners, because we can choose any two of them to form a subgrid.
24 25
5.2 Generating Permutations
Problem: Print all the permutations of a set of size n.
Example. For example, the permutations of {0, 1, 2} are (0, 1, 2), (0, 2, 1), (1, 0, 2),
Solution
We can use the built-in C++ function next_permutation which rearranges the
array into the next lexicographically greater permutation. It returns 1 if this is
Brute-Force Algorithms possible and 0 otherwise.
Solution
We can use the bit representation of numbers to generate subsets.
Let’s say, set s has n elements. We will use the bits of numbers to show the
presence of element in set. If x th bit in number is SET, then x th element in s is
present in current subset. We will loop from 0 to 2n − 1, and for each number,
we will check among the first n bits, the SET bits in it and take corresponding
elements. In each iteration, we will have one subset.
27 28
For example, the following code calculates n!, the factorial of n, modulo m:
where,
( a + b) (mod m) = ( a (mod m) + b (mod m)) (mod m) a is the first number,
( a − b) (mod m) = ( a (mod m) − b (mod m)) (mod m) b is the last number and
( a · b) (mod m) = ( a (mod m) · b (mod m)) (mod m) n is the amount of numbers.
97 98
16.5.3 Sum of Geometric Progression 17.2 Sieve of Eratosthenes
bk − a
a + ak + ak2 + · · · + b =
k−1 The sieve of Eratosthenes is one of the most efficient ways to find all primes smaller
where, than n when n is smaller than 10 million
a is the first number,
Problem: Given a number n, print all primes smaller than or equal to n. It is also
b is the last number and
given that n is a small number.
the ratio between consecutive numbers is k.
A special case of a sum of a geometric progression is the formula
Solution
1 + 2 + 4 + 8 + . . . + 2n−1 = 2n − 1. Following is the algorithm to find all the prime numbers less than or equal to a
given integer n by Eratosthenes’s method:
2. The algorithm iterates through the numbers 2...n one by one. Always
when a new prime x is found, the algorithm records that the multiples of
x (2x, 3x, 4x, ...) which are ≥ x2 are not primes, because the number x
divides them.
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
0 0 0 0 2 0 2 0 2 3 2 0 3 0 2 3 2 0 3 0 2
Time Complexity is O (n log (log n)) which is very close to linear O (n)
99 102
17.3 Prime Factorization for very large numbers. In competitive programming, some problems are based on
these common conjectures.
Problem: Given a number n, print all prime factors of n. Conjecture 1 (Goldbach’s conjecture): Each even integer n >2 can be represented
as a sum n = a + b so that both a and b are primes.
Solution
Conjecture 2 (Goldbach’s weak conjecture): Every odd number greater than 5 can
Following are the steps to find all prime factors. be expressed as the sum of three primes. (A prime may be used more than once in
1. While n is divisible by 2, print 2 and divide n by 2. the same sum.) This is trivial to prove if the above conjecture is proved to be true.
√ Conjecture 3 (Twin prime conjecture): There is an infinite number of pairs of the
2. After step 1, n must be odd. Now start a loop from i = 3 to n. While form p, p + 2, where both p and p + 2 are primes.
i divides n, print i and divide n by i, increment i by 2 and continue. Conjecture 4 (Legendre’s conjecture): There is always a prime between numbers
3. If n is a prime number and is greater than 2, then n will not become 1 n2 and (n + 1)2 , where n is any positive integer.
by above two steps. So print n if it is greater than 2. Conjecture 5 (Collatz Conjecture): Collatz conjecture states that a number n
converges to 1 on repeatedly performing the following operations:
vector<int> factors(int n)
n → n/2 if n is even
{
vector<int> f; n → 3n + 1 if n is odd
while (n%2 == 0)
This has been verified for numbers up to 5.6 × 1013 . For example is x = 3, then:
{
f.push_back(2);
n = n/2; 3 → 10 → 5 → 16 → 8 → 4 → 2 → 1
} Conjecture 6 (Mersenne Prime Conjecture): There are infinitely positive integers n
for (int i = 3; i <= sqrt(n); i = i+2) for which 2n − 1 is a prime number. (There are currently 47 Mersenne primes known)
{
while (n%i == 0)
{
f.push_back(i);
n = n/i;
}
}
// This condition is to handle the case when n
// is a prime number greater than 2
if (n > 2)
f.push_back(n);
return f;
}
√
Time Complexity is O ( n)
There is an even efficient solution which uses Sieve of Eratosthenes to pre
compute prime numbers. It has time complexity O (log n). You can read about
it on GeeksforGeeks or CP3 book.
103 104
17.5 Modulo of Big Number 17.7 Euler’s Totient Function
Problem: Given a big number num represented as string and an integer x, find value Euler’s totient function ϕ(n) gives the number of coprime numbers to n between
of num mod x. Output is expected as an integer. 1 and n. For example, ϕ(14) = 6, because 1, 3, 5, 9, 11 and 13 are coprime to 14.
The value of ϕ(n) can be calculated from the prime factorization of n using the
Solution formula
The idea is to process all digits one by one and use the property that ( xy)
1
mod a ≡ ( x mod a × y mod a) mod a. Below is the implementation. ϕ(n) = n ∏ 1 −
p|n
p
int modulo(string num, int x) Where the product is over the distinct prime numbers dividing n.Note that
{ ϕ(n) = n − 1 if n is prime. The implementation of totient function is shown below.
// Initialize result
int res = 0; int totient(int n)
{
// One by one process all digits of ’num’ int result = n;
for (int i = 0; i < num.length(); i++) for (int p = 2; p * p <= n; ++p)
res = (res*10 + (int)num[i] - ’0’) % x; {
return res; if (n % p == 0)
} {
while (n % p == 0) n /= p;
result -= result / p;
}
}
Solution
The naive solution would run in O (n) time. Using modular exponentiation we 17.8 Some Common Theorems
can bring down the complexity to O (log n) by using the following algorithm:
Theorem is a mathematical result that has been proved for every input value which
1 n=0 lies in it’s domain.
x n = { x n/2 · x n/2 n is even
x n −1 · x n is odd 17.8.1 Lagrange’s Four-Square Theorem
Lagrange’s theorem states that every positive integer can be represented as a sum of
int power(int x, int n, int m) four squares. i.e.,
{ n = a2 + b2 + c2 + d2
if (n == 0) return 1%m;
long long u = power(x,n/2,m); where, n, a, b, c, d ∈ N
u = (u*u)%m;
if (n%2 == 1) u = (u*x)%m; The number of representations of a natural number n as the sum of four squares
return u; is denoted by r4 (n). Jacobi’s four-square theorem states that this is eight times
} the sum of the divisors of n if n is odd and 24 times the sum of the odd divisors
of n if n is even. In particular, for a prime number p we have the explicit formula
r4 ( p ) = 8( p + 1)
105 106
17.8.2 Wilson’s Theorem 17.8.4 Pythagorean Triples
Wilson’s theorem states that a number n is prime exactly when A Pythagorean triple is a triple ( a, b, c) that satisfies the Pythagorean theorem
a2 + b2 = c2 , which means that there is a right triangle with side lengths a, b and c.
(n − 1)! ≡ −1 (mod n) For example, (3, 4, 5) is a Pythagorean triple.
If ( a, b, c) is a Pythagorean triple, all triples of the form (ka, kb, kc) are also
OR Pythagorean triples where k>1. A Pythagorean triple is primitive if a, b and c are
coprime, and all Pythagorean triples can be constructed from primitive triples using a
( n − 1) ! (mod n) = n − 1 multiplier k.
However the theorem cannot be applied to large values of n, because it is difficult to Euclid’s formula can be used to produce all primitive Pythagorean triples. Each
calculate values of (n − 1)! when n is even as large as 50. such triple is of the form
(n2 − m2 , 2nm, n2 + m2 ),
17.8.3 Fibonacci Numbers
where 0<m<n, n and m are coprime and at least one of n and m is even. For
The Fibonacci sequence is defined as follows: example, when m = 1 and n = 2, the formula produces the smallest Pythagorean
triple
F0 = 0, F1 = 1, Fn = Fn−1 + Fn−2 (22 − 12 , 2 · 2 · 1, 22 + 12 ) = (3, 4, 5).
The first elements of the sequence are:
17.9 Linear Diophantine Equations
0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, ...
A Diophantine equation is a polynomial equation, usually in two or more unknowns,
Cassini’s identity such that only the integral solutions are required. An Integral solution is a solution
such that all the unknown variables take only integer values.
Fn−1 Fn+1 − Fn2 = (−1)n Problem: Given three integers a, b, c representing a linear equation of the form :
ax + by = c. Determine if the equation has a solution such that x and y are both
Addition Rule integral values.
Solution
Fk Fn+1 + Fk−1 Fn
Fn+k = {
F2n = Fn ( Fn+1 + Fn−1 ), if k = n For linear Diophantine equation equations, integral solutions exist if and only if,
the GCD of coefficients of the two variables divides the constant term perfectly.
GCD identity In other words the integral solution exists if, gcd( a, b)|c.
Thus the algorithm to determine if an equation has integral solution is pretty
GCD ( Fm , Fn ) = FGCD(m,n) straightforward.
k
N = ∑ Fci
i =0
107 108
17.10 Euclid’s Algorithm for GCD 17.11 Modular Inverse
Problem: Given two non-negative integers a and b, we have to find their gcd(greatest The inverse of x (mod m) is a number x −1 such that
common divisor), i.e. the largest number which is a divisor of both a and b. It’s
commonly denoted by gcd( a, b). Mathematically it is defined as: xx −1 (mod m) ≡ 1
Using modular inverses, we can divide numbers modulo m, because division by x
gcd( a, b) = max k.
k=1...∞ : k| a ∧k |b corresponds to multiplication by x −1 . For example, to evaluate the value of 36/6
(mod 17), we can use the formula 2 × 3 (mod 17), because 36 (mod 17) = 2 and
(here the symbol "|" denotes divisibility, i.e. "k | a" means "k divides a")
6−1 (mod 17) = 3.
Solution However, a modular inverse does not always exist. For example, if x = 2 and
m = 4, the equation
The algorithm is extremely simple:
xx −1 (mod m) = 1
a, if b = 0 cannot be solved, because all multiples of 2 are even and the remainder can never be
gcd( a, b) = {
gcd(b, a (mod b)), otherwise. 1 when m = 4. It turns out that the value of x −1 mod m can be calculated exactly
when x and m are coprime.
int gcd (int a, int b) A short one-liner to compute modular inverse when x and m are coprime is shown
{ below
if (b == 0)
return a; long long int inv(long long int x, long long int m)
else {
return gcd (b, a % b); return 1<x ? m - inv(m%x,x)*m/x : 1;
} }
But C++11 has a built-in fuction to calculate gcd If m is prime modular inverse can be calculate by: x −1 = x m−2 . This implementation
is shown below(using modular exponentiation):
__gcd(a,b) //returns gcd(a,b)
int power(int x, int n, int m)
Time Complexity is O (log{min{ a, b}}) {
if (n == 0) return 1%m;
long long u = power(x,n/2,m);
u = (u*u)%m;
if (n%2 == 1) u = (u*x)%m;
return u;
}
int inv(long long int x, long long int m)
{
return power(x, m-2, m)
//return x^{m-2} mod m
}
109 110
17.12 Chinese Remainder Theorem
Problem: Find x that satisfies the following equations:
x ≡ a1 (mod m1 )
x ≡ a2 (mod m2 )
x ≡ a3 (mod m3 )
···
x ≡ an (mod mn )
where all pairs of m1 , m2 , . . . , mn are coprime.
Solution
Let M = m1 × m2 × m3 × · · · × mn
Let M1 , M2 , M3 , · · · , Mn be such that
M1 = M/m1
M2 = M/m2
M3 = M/m3
···
Mn = M/mn
Let y1 , y2 , y3 , · · · , yn be such that yi is the modular inverse of Mi i.e.,
M1 × y1 ≡ 1 (mod m1 )
M2 × y2 ≡ 1 (mod m2 )
M3 × y3 ≡ 1 (mod m3 )
···
Mn × yn ≡ 1 (mod mn )
Then x ≡ a1 M1 y1 + a2 M2 y2 + a2 M2 y2 + · · · + an Mn yn (mod M )
x + kM
Example.
x ≡ 3 (mod 8)
x ≡ 1 (mod 9)
x ≡ 4 (mod 11)
∴ M = 8 × 9 × 11 = 792
M1 = 792/8 = 99
M2 = 792/9 = 88
M3 = 792/11 = 72
111
Pablo Messina’s ICPC Notebook CONTENTS - CONTENTS Página 1 de 40
Contents 7.6 Lowest Commen Ancestor (LCA) . . . . . . . . . . . . . . . . . . . . . . 23
7.7 Diameter of a Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25
1 C++ Template 2 7.8 Articulation Points, Cut Edges, Biconnected Components . . . . . . . . 25
7.9 Strongly Connected Components . . . . . . . . . . . . . . . . . . . . . . 26
2 C++ Cheat Sheet 2 7.10 Max Flow : Dinic . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 26
3 Data Structures 4 8 Mathematics 27
3.1 C++ STL . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 8.1 Euclidean Algorithm . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 27
3.1.1 Pairs & Tuples . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 8.2 Primality Test . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28
3.1.2 Array . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 8.3 Prime Factorization . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29
3.1.3 Vector . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5 8.4 Binary modular exponentiation . . . . . . . . . . . . . . . . . . . . . . . 30
3.1.4 Queue & Stack . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 8.5 Modular Binomial Coefficient . . . . . . . . . . . . . . . . . . . . . . . . 30
3.1.5 Priority Queue . . . . . . . . . . . . . . . . . . . . . . . . . . . . 6 8.6 Modular Multinomial Coefficient . . . . . . . . . . . . . . . . . . . . . . 30
3.1.6 Set & Multiset . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7 8.7 Chinese Remainder Theorem (CRT) . . . . . . . . . . . . . . . . . . . . 31
3.1.7 Map & Multimap . . . . . . . . . . . . . . . . . . . . . . . . . . . 8 8.8 Theorems . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 32
3.1.8 Unordered Set & Multiset . . . . . . . . . . . . . . . . . . . . . . 9 8.8.1 Pick’s Theorem . . . . . . . . . . . . . . . . . . . . . . . . . . . . 32
3.1.9 Unordered Map & Multimap . . . . . . . . . . . . . . . . . . . . 10
3.1.10 Deque . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 9 Geometry 32
3.1.11 List . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11 9.1 Geometry 2D Utils . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 32
3.1.12 Policy based Data Structures: Ordered Set . . . . . . . . . . . . 11 9.2 Geometry 3D Utils . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 33
3.1.13 Bitset . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 9.3 Polygon Algorithms . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
3.2 Sparse Tables . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 9.4 Trigonometry . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 36
3.3 Fenwick Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 9.5 Convex Hull . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 36
3.4 Fenwick Tree 2D . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13 9.6 Green’s Theorem . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37
3.5 Segment Tree . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 13
3.6 Segment Tree Lazy . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 14 10 Strings 37
3.7 Union-Find . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15 10.1 Suffix Array . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37
10.2 Trie . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 38
4 Binary Search 16 10.3 Rolling Hashing . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 38
10.4 KMP (Knuth Morris Pratt) . . . . . . . . . . . . . . . . . . . . . . . . . 39
5 Ternary Search 17 10.5 Shortest Repeating Cycle . . . . . . . . . . . . . . . . . . . . . . . . . . 40
6 Dynamic Programming 17
6.1 Longest Increasing Subsequence . . . . . . . . . . . . . . . . . . . . . . . 17
6.2 Travelling Salesman Problem . . . . . . . . . . . . . . . . . . . . . . . . 17
6.3 Knapsack . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 18
6.4 Divide & Conquer Optimization . . . . . . . . . . . . . . . . . . . . . . 19
7 Graphs 20
7.1 BFS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
7.2 DFS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20
7.3 TopoSort . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 21
7.4 Dijkstra . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 21
7.5 Minimum Spanning Tree (Kruskal & Prim) . . . . . . . . . . . . . . . . 22
Pablo Messina’s ICPC Notebook 2 C++ CHEAT SHEET - Página 2 de 40
1 C++ Template 29 ss >> y; // y = 12345678910
30
31 // 2) stoi, stoll
1 #pragma GCC optimize("Ofast") 32 string str_dec = "2001, A Space Odyssey";
2 #include <bits/stdc++.h> 33 string str_hex = "40c3";
3 using namespace std; 34 string str_bin = "-10010110001";
4 // defines 35 string str_auto = "0x7f";
5 #define rep(i,a,b) for(int i = a; i <= b; ++i) 36 int sz;
6 #define invrep(i,b,a) for(int i = b; i >= a; --i) 37 int i_dec = stoi(str_dec,&sz);
7 #define umap unordered_map 38 int i_hex = stoi(str_hex,0,16);
8 #define uset unordered_set 39 int i_bin = stoi(str_bin,0,2);
9 // typedefs; 40 int i_auto = stoi(str_auto,0,0);
10 typedef unsigned long long int ull; 41 cout << str_dec << ": " << i_dec << " and [" << str_dec.substr(sz) << "]\n";
11 typedef long long int ll; 42 cout << str_hex << ": " << i_hex << ’\n’;
12 typedef vector<int> vi; 43 cout << str_bin << ": " << i_bin << ’\n’;
13 typedef pair<int,int> ii; 44 cout << str_auto << ": " << i_auto << ’\n’;
14 // ------------------------------- 45 // 2001, A Space Odyssey: 2001 and [, A Space Odyssey]
15 int main() { 46 // 40c3: 16579
16 ios::sync_with_stdio(false); 47 // -10010110001: -1201
17 cin.tie(0); cout.tie(0); 48 // 0x7f: 127
18 return 0; 49 string str = "8246821 0xffff 020";
19 } 50 int sz = 0;
51 while (!str.empty()) {
2 C++ Cheat Sheet 52
53
long long ll = stoll(str,&sz,0);
cout << str.substr(0,sz) << " interpreted as " << ll << ’\n’;
54 str = str.substr(sz);
1 /* ================================= */ 55 }
2 /* Input/Output with C++: cin & cout */ 56 // 8246821 interpreted as 8246821
3 /* ================================= */ 57 // 0xffff interpreted as 65535
4 58 // 020 interpreted as 16
5 // reading many lines of unknown length 59
13 67 /* ============================ */
14 // printing floating with fixed precision 68 /* C++ STRING UTILITY FUNCTIONS */
15 cout << setprecision(6) << fixed; 69 /* ============================ */
16 cout << 12312.12312355; 70
5 #define rep(i,a,b) for(int i=a; i<=b; i++) 59 int arr[5] = {1, 2, 3, 4, 5};
6 using namespace std; 60 v2.assign(arr, arr + 5); // v2 = {1, 2, 3, 4, 5}
7 61 v2.assign(arr, arr + 3); // v2 = {1, 2, 3}
8 //============================== 62
25 //===================================================== 79
14 // or 68 //==========================
15 unordered_map<string,float> m; 69 // unordered_map::operator=
16 m.emplace("a", 1.50); 70 //==========================
17 m.emplace("b", 2.10); 71 typedef unordered_map<string,string> stringmap;
Pablo Messina’s ICPC Notebook 3 DATA STRUCTURES - 3.1 C++ STL Página 11 de 40
72 stringmap merge (stringmap a,stringmap b) { 22 // 1 10 20 30 30 20 2 3 4 5
73 stringmap temp(a); temp.insert(b.begin(),b.end()); return temp; 23 // ^
74 } 24 cout << "mylist contains:";
75 int main() { 25 for (int x : mylist) cout << ’ ’ << x;
76 stringmap first, second, third; 26 cout << ’\n’;
77 first = {{"AAPL","Apple"},{"MSFT","Microsoft"}}; // init list 27 // mylist contains: 1 10 20 30 30 20 2 3 4 5
78 second = {{"GOOG","Google"},{"ORCL","Oracle"}}; // init list 28
79 third = merge(first,second); // move 29 //=======
80 first = third; // copy 30 // ERASE
81 cout << "first contains:"; 31 //=======
82 for (auto& x: first) cout << " " << x.first << ":" << x.second; 32 // http://www.cplusplus.com/reference/list/list/erase/
83 cout << ’\n’; 33
84 return 0; 34 list<int> mylist;
85 } 35 list<int>::iterator it1,it2;
86 // first contains: MSFT:Microsoft AAPL:Apple GOOG:Google ORCL:Oracle 36 // set some values:
37 rep(i,1,9) mylist.push_back(i*10);
3.1.10 Deque 38 // 10 20 30 40 50 60 70 80 90
39 it1 = it2 = mylist.begin(); // ^^
1 // references: 40 advance (it2,6); // ^ ^
2 // http://www.cplusplus.com/reference/deque/deque/ 41 ++it1; // ^ ^
3 // https://www.geeksforgeeks.org/deque-cpp-stl/ 42
1 // Find the index of the first item that satisfies a predicate 55 int main () {
2 // over a range [i,j), i.e., from i to j-1 56 vector<int> v{10,20,30,30,20,10,10,20}; // 10 20 30 30 20 10 10 20
3 // If no such index exists, j is returned 57 sort (v.begin(), v.end()); // 10 10 10 20 20 20 30 30
4 function binsearch(array, i, j) { 58 auto low = lower_bound (v.begin(), v.end(), 20); // ^
5 assert(i < j) // since the range is [i,j), then j must be > i 59 auto up = upper_bound (v.begin(), v.end(), 20); // ^
6 while (i < j) { 60 cout << "lower_bound at position " << (low- v.begin()) << ’\n’;
7 m = (i+j) >> 1; // m = (i+j) / 2; 61 cout << "upper_bound at position " << (up - v.begin()) << ’\n’;
8 if (predicate(array[m])) 62 return 0;
9 j = m 63 }
10 else 64
11 i = m + 1 65 // ------------------------------------------------
12 } 66 // Query: how many items are LESS THAN (<) value x
13 return i; // notice that i == j if the predicate is false for the whole range 67
16 // ----------------------------- 70 // ------------------------------------------------
17 // EXAMPLE 1: Integer Lowerbound 71 // Query: how many items are GREATER THAN (>) value x
18 // predicate(a, i, key) = (a[i] >= key) 72
3 /* ===================== */ 57 // ---------------------------------
4 58 // TOP-DOWN RECURSION (pseudo-code)
5 // -------------------------------------- 59
7 #define invrep(i,b,a) for (int i=b; i>=a; --i) 61 // move node u "k" levels up towards the root
8 62 // i.e. find the k-th ancestor of u
9 // General comments: 63 int raise(int u, int k) {
10 // * Both of these methods assume that we are working with a connected 64 for (int e = 0; k; e++, k>>=1) if (k&1) u = anc(u,e);
11 // graph ’g’ of ’n’ nodes, and that nodes are compactly indexed from 0 to n-1. 65 return u;
12 // In case you have a forest of trees, a simple trick is to create a fake 66 }
13 // root and connect all the trees to it (make sure to re-index all your nodes) 67
14 // * ’g’ need not be a ’tree’, DFS fill implictly find a tree for you 68 int lca(int u, int v) {
15 // in case you don’t care of the specific tree (e.g. if cycles are not important) 69 if (D[u] < D[v]) swap(u, v);
16 70 u = raise(u, D[u] - D[v]); // raise lowest to same level
17 // ------------------------------------------------------------ 71 if (u == v) return u; // same node, we are done
18 // METHOD 1: SPARSE TABLE - BINARY LIFTING (aka JUMP POINTERS) 72 // raise u and v to their highest ancestors below the LCA
19 // ------------------------------------------------------------ 73 invrep (e, maxe, 0) {
20 // construction: O(|V| log |V|) 74 // greedily take the biggest 2^e jump possible as long as
21 // query: O(log|V|) 75 // u and v still remain BELOW the LCA
22 // ** advantages: 76 if (anc(u,e) != anc(v,e)) {
23 // - the lca query can be modified to compute querys over the path between 2 nodes 77 u = anc(u,e), v = anc(v,e);
24 // - it’s possible to append new leaf nodes to the tree 78 }
25 79 }
26 struct LCA { 80 // the direct parent of u (or v) is lca(u,v)
27 vector<int> A, D; // ancestors, depths 81 return anc(u,0);
28 vector<vector<int>> *g; // pointer to graph 82 }
29 int n, maxe; // num nodes, max exponent 83
30 int& anc(int u, int e) { return A[e * n + u]; } 84 // distance between ’u’ and ’v’
31 int inline log2(int x) { return 31 - __builtin_clz(x); } 85 int dist(int u, int v) {
32 86 return D[u] + D[v] - 2 * D[lca(u,v)];
Pablo Messina’s ICPC Notebook 7 GRAPHS - 7.6 Lowest Commen Ancestor (LCA) Página 24 de 40
87 } 140 D[idx++] = depth;
88 // optimized version (in case you already computed lca(u,v)) 141 }
89 int dist(int u, int v, int lca_uv) { 142 }
90 return D[u] + D[v] - 2 * D[lca_uv]; 143 }
91 } 144
92 // get the node located k steps from ’u’ walking towards ’v’ 145 LCA(vector<vector<int>>& _g, int root) {
93 int kth_node_in_path(int u, int v, int k) { 146 g = &_g;
94 int lca_uv = lca(u,v); 147 n = _g.size();
95 if (D[u] - D[lca_uv] >= k) return raise(u, k); 148 H.assign(n, -1);
96 return raise(v, dist(u,v,lca_uv) - k); 149 E.resize(2*n);
97 } 150 D.resize(2*n);
98 151 idx = 0;
99 int add_child(int p, int u) { // optional 152 dfs(root, 0); // euler tour
100 // add to graph 153 int nn = idx; // <-- make sure you use the correct number
101 (*g)[p].push_back(u); 154 int maxe = log2(nn);
102 // update depth 155 DP.resize(nn * (maxe+1));
103 D[u] = D[p] + 1; 156 // build sparse table with bottom-up DP
104 // update ancestors 157 rep(i,0,nn-1) rmq(i,0) = i; // base case
105 anc(u,0) = p; 158 rep(e,1,maxe) { // general cases
106 rep (e, 1, maxe){ 159 rep(i, 0, nn - (1 << e)) {
107 p = anc(p,e-1); 160 // i ... i + 2 ^ (e-1) - 1
108 if (p == -1) break; 161 int i1 = rmq(i,e-1);
109 anc(u,e) = p; 162 // i + 2 ^ (e-1) ... i + 2 ^ e - 1
110 } 163 int i2 = rmq(i + (1 << (e-1)), e-1);
111 } 164 // choose index with minimum depth
112 }; 165 rmq(i,e) = D[i1] < D[i2] ? i1 : i2;
113 166 }
114 // ------------------------------------------ 167 }
115 // METHOD 2: SPARSE TABLE - EULER TOUR + RMQ 168 }
116 // ------------------------------------------ 169
117 // construction: O(2|V| log 2|V|) = O(|V| log |V|) 170 int lca(int u, int v) {
118 // query: O(1) (** assuming that __builtin_clz is mapped to an 171 // get ocurrence indexes in increasing order
119 // efficient processor instruction) 172 int l = H[u], r = H[v];
120 173 if (l > r) swap(l, r);
121 174 // get node with minimum depth in range [l .. r] in O(1)
122 struct LCA { 175 int len = r - l + 1;
123 vector<int> E, D, H; // E = euler tour, D = depth, H = first index of node in euler 176 int e = log2(len);
tour 177 int i1 = rmq(l,e);
124 vector<int> DP // memo for range minimun query 178 int i2 = rmq(r - ((1 << e) - 1), e);
125 vector<vector<int>> *g; // pointer to graph 179 return D[i1] < D[i2] ? E[i1] : E[i2];
126 int idx; // tracks node ocurrences 180 }
127 int n; // number of nodes 181
128 182 int dist(int u, int v) {
129 int& rmq(int i, int e) { return DP[e * idx + i]; } 183 // make sure you use H to retrieve the indexes of u and v
130 inline int log2(int x) { return 31 - __builtin_clz(x); } 184 // within the Euler Tour sequence before using D
131 185 return D[H[u]] + D[H[v]] - 2 * D[H[lca(u,v)]];
132 void dfs(int u, int depth) { 186 }
133 H[u] = idx; // index of first u’s ocurrence 187 }
134 E[idx] = u; // record node ocurrence 188
135 D[idx++] = depth; // record depth 189 // -----------------
136 for (int v : (*g)[u]) { 190 // EXAMPLE OF USAGE
137 if (H[v] == -1) { 191 // -----------------
138 dfs(v, depth + 1); // explore v’s subtree and come back to u 192 int main() {
139 E[idx] = u; // new ocurrence of u 193 // build graph
Pablo Messina’s ICPC Notebook 7 GRAPHS - 7.7 Diameter of a Tree Página 25 de 40
194 int n, m; 2 // Tarjan’s Algorithm
195 scanf("%d%d", &n, &m); 3 // -------------------
196 vector<vector<int>> g(n); 4 //references:
197 while (m--) { 5 //https://www.youtube.com/watch?v=jFZsDDB0-vo
198 int u, v; scanf("%d%d", &u, &v); 6 //https://www.hackerearth.com/practice/algorithms/graphs/articulation-points-and-bridges/
199 g[u].push_back(v); tutorial/
200 g[v].push_back(u); 7 //https://www.hackerearth.com/practice/algorithms/graphs/biconnected-components/tutorial/
201 } 8 //http://web.iitd.ac.in/~bspanda/biconnectedMTL776.pdf
202 // init LCA 9 typedef pair<int,int> ii;
203 LCA lca(g,0); 10 const int MAXN = 1000;
204 // answer queries 11 int depth[MAXN];
205 int q; scanf("%d", &q); 12 int low[MAXN];
206 while (q--) { 13 vector<int> g[MAXN];
207 int u, v; scanf("%d%d", &u, &v); 14 stack<ii> edge_stack;
208 printf("LCA(%d,%d) = %d\n", u, v, lca.lca(u,v)); 15
209 printf("dist(%d,%d) = %d\n", u, v, lca.dist(u,v)); 16 void print_and_remove_bicomp(int u, int v) {
210 } 17 puts("biconnected component found:");
211 }; 18 ii uv(u,v);
19 while (true) {
7.7 Diameter of a Tree 20 ii top = edge_stack.top();
21 edge_stack.pop();
1 // ========================== 22 printf("(%d, %d)\n", top.first, top.second);
2 // Find Tree’s Diameter Ends 23 if (top == uv) break;
3 // ========================== 24 }
4 const int MAXN = 10000; 25 }
5 26
6 int farthest_from(vector<vi>& g, int s) { // find farthest node from ’s’ with BFS 27 void dfs(int u, int p, int d) { // (node, parent, depth)
7 static int dist[MAXN]; 28 static num_root_children = 0;
8 memset(dist, -1, sizeof(int) * g.size()); 29 depth[u] = d;
9 int farthest = s; 30 low[u] = d; // u at least can reach itself (ignoring u-p edge)
10 queue<int> q; 31 for(int v : g[u]) {
11 q.push(s); 32 if (v == p) continue; // direct edge to parent -> ignore
12 dist[s] = 0; 33 if (depth[v] == -1) { // exploring a new, unvisited child node
13 while (!q.empty()) { 34 edge_stack.emplace(u,v); // add edge to stack
14 int u = q.front(); q.pop(); 35 dfs(v, u, d + 1); // explore recursively v’s subtree
15 for (int v : g[u]) { 36 // 1) detect articulation points and biconnected components
16 if (dist[v] == -1) { 37 if (p == -1) { // 1.1) special case: if u is root
17 dist[v] = dist[u] + 1; 38 if (++num_root_children == 2) {
18 q.push(v); 39 // we detected that root has AT LEAST 2 children
19 if (dist[v] > dist[farthest]) farthest = v; 40 // therefore root is an articulation point
20 } 41 printf("root = %d is articulation point\n", root);
21 } 42 }
22 } 43 // whenever we come back to the root, we just finished
23 return farthest; 44 // exploring a whole biconnected component
24 } 45 print_and_remove_bicomp(u,v);
25 46 } else if (low[v] >= d) { // 1.2) general case: non-root
26 void find_diameter(vector<vi>& g, int& e1, int& e2) { 47 printf("u = %d is articulation point\n", u);
27 e1 = farthest_from(g, 0); 48 // we entered through and came back to an AP,
28 e2 = farthest_from(g, e1); 49 // so we just finished exploring a whole biconnected component
29 } 50 print_and_remove_bicomp(u,v);
51 }
7.8 Articulation Points, Cut Edges, Biconnected Components 52 // 2) detect cut edges (a.k.a. bridges)
53 if (low[v] > depth[u]) {
1 // ------------------- 54 printf("(u,v) = (%d, %d) is cut edge\n", u, v);
Pablo Messina’s ICPC Notebook 7 GRAPHS - 7.9 Strongly Connected Components Página 26 de 40
55 } 40 ids.assign(n, -1);
56 // propagate low 41 low.resize(n);
57 low[u] = min(low[u], low[v]); 42 instack.assign(n, 0);
58 } else if (depth[v] < d) { // back-edge to proper ancestor 43 ID = 0;
59 edge_stack.emplace(u,v); // add edge to stack 44 rep(u, 0, n-1) if (ids[u] == -1) dfs(u);
60 low[u] = min(low[u], depth[v]); // propagate low 45 }
61 } else { // forward-edge to an already visited descendant 46 };
62 // => do nothing, because this edge was already considered as a 47
63 // back-edge from v -> u 48 // example of usage
64 } 49 int main() {
65 } 50 // read and build graph from standard input
66 } 51 int n, m; cin >> n >> m;
52 vector<vector<int>> g(n);
7.9 Strongly Connected Components 53 while(m--) {
54 int u, v; cin >> u >> v; u--, v--;
1 #include <bits/stdc++.h> 55 g[u].push_back(v);
2 #define rep(i,a,b) for(int i=a; i<=b; ++i) 56 }
3 using namespace std; 57 // find SCCs
4 // ----------------------------------------- 58 tarjanSCC tscc(g);
5 // implementation of Tarjan’s SCC algorithm 59 return 0;
6 struct tarjanSCC { 60 }
7 vector<int> _stack, ids, low;
8 vector<bool> instack; 7.10 Max Flow : Dinic
9 vector<vector<int>>* g;
10 int n, ID; 1 // Time Complexity:
11 void dfs(int u) { 2 // - general worst case: O (|E| * |V|^2)
12 ids[u] = low[u] = ID++; 3 // - unit capacities: O( min(V^(2/3), sqrt(E)) * E)
13 instack[u] = true; 4 // - Bipartite graph (unit capacities) + source & sink (any capacities): O(E sqrt V)
14 _stack.push_back(u); 5
15 for (int v : (*g)[u]) { 6 #include <bits/stdc++.h>
16 if (ids[v] == -1) { 7 using namespace std;
17 dfs(v); 8 typedef long long int ll;
18 low[u] = min(low[v], low[u]); 9
19 } else if (instack[v]) { 10 struct Dinic {
20 low[u] = min(low[v], low[u]); 11 struct edge {
21 } 12 int to, rev;
22 } 13 ll f, cap;
23 if (low[u] == ids[u]) { 14 };
24 // u is the root of a SCC 15
25 // ** here you can do whatever you want 16 vector<vector<edge>> g;
26 // with the SCC just found 17 vector<ll> dist;
27 cout << "SCC found!\n"; 18 vector<int> q, work;
28 // remove SCC from top of the stack 19 int n, sink;
29 while (1) { 20
30 int x = _stack.back(); _stack.pop_back(); 21 bool bfs(int start, int finish) {
31 instack[x] = false; 22 dist.assign(n, -1);
32 if (x == u) break; 23 dist[start] = 0;
33 } 24 int head = 0, tail = 0;
34 } 25 q[tail++] = start;
35 } 26 while (head < tail) {
36 tarjanSCC(vector<vector<int>>& _g) { 27 int u = q[head++];
37 g = &_g; 28 for (const edge &e : g[u]) {
38 n = _g.size(); 29 int v = e.to;
39 _stack.reserve(n); 30 if (dist[v] == -1 and e.f < e.cap) {
Pablo Messina’s ICPC Notebook 8 MATHEMATICS - Página 27 de 40
31 dist[v] = dist[u] + 1; 84 int main() {
32 q[tail++] = v; 85 Dinic din(2);
33 } 86 din.add_edge(0,1,10);
34 } 87 ll mf = din.max_flow(0,1);
35 } 88 }
36 return dist[finish] != -1;
37
38
}
8 Mathematics
39 ll dfs(int u, ll f) {
40 if (u == sink) 8.1 Euclidean Algorithm
41 return f;
42 for (int &i = work[u]; i < (int)g[u].size(); ++i) { 1 typedef long long int ll;
43 edge &e = g[u][i]; 2
44 int v = e.to; 3 inline ll mod(ll x, ll m) { return ((x %= m) < 0) ? x+m : x; }
45 if (e.cap <= e.f or dist[v] != dist[u] + 1) 4
46 continue; 5 /* ============================= */
47 ll df = dfs(v, min(f, e.cap - e.f)); 6 /* GCD (greatest common divisor) */
48 if (df > 0) { 7 /* ============================= */
49 e.f += df; 8 // OPTION 1: using C++ builtin function __gcd
50 g[v][e.rev].f -= df; 9 __gcd(a,b)
51 return df; 10 // OPTION 2: manually usings euclid’s algorithm
52 } 11 int gcd (ll a, ll b) {
53 } 12 while (b) { a %= b; swap(a,b); }
54 return 0; 13 return a;
55 } 14 }
56 15
57 Dinic(int n) { 16 /* ============ */
58 this->n = n; 17 /* extended GCD */
59 g.resize(n); 18 /* ============ */
60 dist.resize(n); 19 // extended euclid’s algorithm: find g, x, y such that
61 q.resize(n); 20 // a * x + b * y = g = gcd(a, b)
62 } 21 // The algorithm finds a solution (x0,y0) but there are infinite more:
63 22 // x = x0 + n * (b/g)
64 void add_edge(int u, int v, ll cap) { 23 // y = y0 - n * (a/g)
65 edge a = {v, (int)g[v].size(), 0, cap}; 24 // where n is integer, are the set of all solutions
66 edge b = {u, (int)g[u].size(), 0, 0}; //Poner cap en vez de 0 si la arista es 25
bidireccional 26 // --- version 1: iterative
67 g[u].push_back(a); 27 ll gcdext(ll a, ll b, ll& x, ll& y) {
68 g[v].push_back(b); 28 ll r2, x2, y2, r1, x1, y1, r0, x0, y0, q;
69 } 29 r2 = a, x2 = 1, y2 = 0;
70 30 r1 = b, x1 = 0, y1 = 1;
71 ll max_flow(int source, int dest) { 31 while (r1) {
72 sink = dest; 32 q = r2 / r1;
73 ll ans = 0; 33 r0 = r2 % r1;
74 while (bfs(source, dest)) { 34 x0 = x2 - q * x1;
75 work.assign(n, 0); 35 y0 = y2 - q * y1;
76 while (ll delta = dfs(source, LLONG_MAX)) 36 r2 = r1, x2 = x1, y2 = y1;
77 ans += delta; 37 r1 = r0, x1 = x0, y1 = y0;
78 } 38 }
79 return ans; 39 ll g = r2; x = x2, y = y2;
80 } 40 if (g < 0) g = -g, x = -x, y = -y; // make sure g > 0
81 }; 41 // for debugging (in case you think you might have bugs)
82 42 // assert (g == a * x + b * y);
83 // usage 43 // assert (g == __gcd(abs(a),abs(b)));
Pablo Messina’s ICPC Notebook 8 MATHEMATICS - 8.2 Primality Test Página 28 de 40
44 return g; 97 /* Linear Congruence Equation */
45 } 98 /* ========================== */
46 99 // recommended reading:
47 // --- version 2: recursive 100 // http://gauss.math.luc.edu/greicius/Math201/Fall2012/Lectures/linear-congruences.
48 ll gcdext(ll a, ll b, ll& x, ll& y) { article.pdf
49 if (a == 0) { 101
50 x = 0, y = 1; 102 // find smallest integer x (mod m) that solves the equation
51 return b; 103 // a * x = b (mod m)
52 } 104 bool lincongeq(ll a, ll b, ll m, ll& x) {
53 ll x1, y1; 105 assert (m > 0);
54 ll g = gcdext(b % a, a, x1, y1); 106 a = mod(a,m);
55 x = y1 - (b / a) * x1; 107 b = mod(b,m);
56 y = x1; 108 ll s, t;
57 return g; 109 ll g = gcdext(a,m,s,t);
58 } 110 if (b % g == 0) {
59 111 ll bb = b/g;
60 /* ====================== */ 112 ll mm = m/g;
61 /* multiplicative inverse */ 113 ll n = -s*bb/mm;
62 /* ====================== */ 114 x = s*bb + n*mm;
63 // find x such that a * x = 1 (mod m) 115 if (x < 0) x += mm;
64 // this is the same as finding x, y such that 116 // for debugging
65 // a * x + m * y = 1, which can be done with gcdext 117 // assert (0 <= x and x < m);
66 // and then returning x (mod m) 118 // assert (mod(a*x,m) == b);
67 ll mulinv(ll a, ll m) { 119 return true;
68 ll x, y; 120 }
69 if (gcdext(a, m, x, y) == 1) return mod(x, m); // make sure 0 <= x < m 121 return false;
70 return -1; // no inverse exists 122 }
71 }
72 8.2 Primality Test
73 /* =========================== */
74 /* Linear Diophantine Equation */ 1 // ===============
75 /* =========================== */ 2 // trial division
76 // recommended readings: 3 //=================
77 // http://gauss.math.luc.edu/greicius/Math201/Fall2012/Lectures/linear-diophantine. 4 // complexity: ~O( sqrt(x) )
article.pdf 5 bool isPrime(int x) {
78 // http://mathonline.wikidot.com/solutions-to-linear-diophantine-equations 6 for (int d = 2; d * d <= x; d++) {
79 7 if (x % d == 0)
80 // find intengers x and y such that a * x + b * y = c 8 return false;
81 bool lindiopeq(ll a, ll b, ll c, ll& x, ll& y) { 9 }
82 if (a == 0 and b == 0) { // special case 10 return true;
83 if (c == 0) { x = y = 0; return true; } 11 }
84 return false; 12
85 } 13 // =======================================
86 // general case 14 // trial division with precomputed primes
87 ll s, t; 15 // =======================================
88 ll g = gcdext(a,b,s,t); 16 // complexity: ~O( sqrt(x)/log(sqrt(x)) )
89 if (c % g == 0) { 17 // + time of precomputing primes
90 x = s*(c/g), y = t*(c/g); 18 bool isPrime(int x, vector<int>& primes) {
91 return true; 19 for (int p : primes) {
92 } 20 if (p*p > x) break;
93 return false; 21 if (p % x == 0)
94 } 22 return false;
95 23 }
96 /* ========================== */ 24 return true;
Pablo Messina’s ICPC Notebook 8 MATHEMATICS - 8.3 Prime Factorization Página 29 de 40
25 } 79 }
26 80
27 81 bool MillerRabin(u64 n) { // returns true if n is prime, else returns false.
28 // ============= 82 if (n < 2)
29 // Miller-Rabin 83 return false;
30 // ============= 84
31 // complexity: O (k * log^3(n)) 85 int r = 0;
32 // references: 86 u64 d = n - 1;
33 // https://cp-algorithms.com/algebra/primality_tests.html 87 while ((d & 1) == 0) {
34 // https://en.wikipedia.org/wiki/Miller%E2%80%93Rabin_primality_test#Complexity 88 d >>= 1;
35 using u64 = uint64_t; 89 r++;
36 using u128 = __uint128_t; 90 }
37 91
38 u64 binpower(u64 base, u64 e, u64 mod) { 92 for (int a : {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}) {
39 u64 result = 1; 93 if (n == a)
40 base %= mod; 94 return true;
41 while (e) { 95 if (check_composite(n, a, d, r))
42 if (e & 1) 96 return false;
43 result = (u128)result * base % mod; 97 }
44 base = (u128)base * base % mod; 98 return true;
45 e >>= 1; 99 }
46 }
47 return result; 8.3 Prime Factorization
48 }
49 1 //=====================
50 bool check_composite(u64 n, u64 a, u64 d, int s) { 2 // Prime Factorization
51 u64 x = binpower(a, d, n); 3 //=====================
52 if (x == 1 || x == n - 1) 4 // reference: https://cp-algorithms.com/algebra/factorization.html
53 return false; 5
54 for (int r = 1; r < s; r++) { 6 // method 1: trial division
55 x = (u128)x * x % n; 7 // complexity: ~ O( sqrt(n) + log_2(n) )
56 if (x == n - 1) 8 vector<int> trial_division(int n) {
57 return false; 9 vector<int> factors;
58 } 10 for (int d = 2; d*d <= n; d++) {
59 return true; 11 while (n % d == 0) {
60 }; 12 factors.push_back(d);
61 13 if ((n /= d) == 1) return factors;
62 bool MillerRabin(u64 n) { // returns true if n is probably prime, else returns false. 14 }
63 if (n < 4) 15 }
64 return n == 2 || n == 3; 16 if (n > 1) factors.push_back(n);
65 17 return factors;
66 int s = 0; 18 }
67 u64 d = n - 1; 19
68 while ((d & 1) == 0) { 20 // method 2: precomputed primes
69 d >>= 1; 21 // complexity: ~ O( sqrt(n) / log(sqrt(n)) + log_2(n) )
70 s++; 22 // + time of precomputing primes
71 } 23 vector<int> trial_division_precomp(int n, vector<int>& primes) {
72 24 vector<int> factors;
73 for (int i = 0; i < iter; i++) { 25 for (int d : primes) {
74 int a = 2 + rand() % (n - 3); 26 if (d*d > n) break;
75 if (check_composite(n, a, d, s)) 27 while (n % d == 0) {
76 return false; 28 factors.push_back(d);
77 } 29 if ((n /= d) == 1) return factors;
78 return true; 30 }
Pablo Messina’s ICPC Notebook 8 MATHEMATICS - 8.4 Binary modular exponentiation Página 30 de 40
31 } 14 // choose(n,0) = choose(n,n) = 1
32 if (n > 1) factors.push_back(n); 15
33 return factors; 16 // 1.1) DP top-down
34 } 17 ll memo[MAXN+1][MAXN+1];
35 18 ll choose(int n, int k) {
36 //==================================== 19 ll& ans = memo[n][k];
37 // Prime Factorization of Factorials 20 if (ans != -1) return ans;
38 //==================================== 21 if (k == 0) return ans = 1;
39 // references: 22 if (n == k) return ans = 1;
40 // http://mathforum.org/library/drmath/view/67291.html 23 if (n < k) return ans = 0;
41 // https://janmr.com/blog/2010/10/prime-factors-of-factorial-numbers/ 24 return ans = (choose(n-1,k) + choose(n-1,k-1)) % MOD;
42 #define umap unordered_map 25 }
43 umap<int,int> factorial_prime_factorization(int n, vector<int>& primes) { 26
44 umap<int,int> prime2exp; 27 // 1.2) DP bottom-up
45 for (int p : primes) { 28 ll choose[MAXN+1][MAXN+1];
46 if (p > n) break; 29 rep(m,1,MAXN) {
47 int e = 0; 30 choose[m][0] = choose[m][m] = 1;
48 int tmp = n; 31 rep(k,1,m-1) choose[m][k] = (choose[m-1][k] + choose[m-1][k-1]) % MOD;
49 while ((tmp /= p) > 0) e += tmp; 32 }
50 if (e > 0) prime2exp[p] = e; 33
51 } 34 // -------------------------------------------------
52 return prime2exp; 35 // method 3: factorials and multiplicative inverse
53 } 36 // n! / (k! * (n-k)!) = n! * (k! * (n-k)!)^-1 (MOD N)
37 // we need to find the multiplicative inverse of (k! * (n-k)!) MOD N
8.4 Binary modular exponentiation 38
39 ll fac[MAXN+1];
40 ll choose_memo[MAXN+1][MAXN+1];
1 // compute a^b (mod m) 41 void init() {
2 int binary_exp(int a, int b, int m) { 42 fac[0] = 1;
3 a %= m; 43 rep(i,1,MAXN) fac[i] = (i * fac[i-1]) % MOD;
4 int res = 1; 44 memset(choose_memo, -1, sizeof choose_memo);
5 while (b > 0) { 45 }
6 if (b&1) res = (res * a) % m; 46 ll choose_mod(int n, int k) {
7 a = (a * a) % m; 47 if (choose_memo[n][k] != -1) return choose_memo[n][k];
8 b >>= 1; 48 return choose_memo[n][k] = mul(fac[n], mulinv(mul(fac[k], fac[n-k])));
9 } 49 }
10 return res;
11 } 8.6 Modular Multinomial Coefficient
8.5 Modular Binomial Coefficient 1 typedef long long int ll;
2 const ll MOD = 1000000007ll; // a prime number
1 #define rep(i,a,b) for(int i = a; i <= b; ++i) 3 const int MAXN = 1000;
2 typedef long long int ll; 4
3 const ll MOD = 1000000007ll; // a prime number 5 /* ===================== */
4 const int MAXN = 1000; 6 /* MODULAR MULTINOMIAL */
5 7 /* ===================== */
6 /* ================== */ 8
7 /* MODULAR BINOMIAL */ 9 ll memo[MAXN+1][MAXN+1];
8 /* ================== */ 10 ll choose(int n, int k) {
9 // choose_mod(n,k) = n! / (k! * (n-k)!) % MOD 11 ll& ans = memo[n][k];
10 12 if (ans != -1) return ans;
11 // --------------------- 13 if (k == 0) return ans = 1;
12 // method 1: DP 14 if (n == k) return ans = 1;
13 // choose(n,k) = (choose(n-1,k-1) + choose(n-1,k)) % MOD 15 if (n < k) return ans = 0;
Pablo Messina’s ICPC Notebook 8 MATHEMATICS - 8.7 Chinese Remainder Theorem (CRT) Página 31 de 40
16 return ans = (choose(n-1,k) + choose(n-1,k-1)) % MOD; 39 // sol = r1 + m1 * (r2-r1)/g * x’ (mod LCM(m1,m2))
17 } 40 // where x’ comes from
18 41 // m1 * x’ + m2 * y’ = g = GCD(m1,m2)
19 // reference: https://math.stackexchange.com/a/204209/503889 42 // where x’ and y’ are the values found by extended euclidean algorithm (gcdext)
20 ll multinomial(vector<int> ks) { 43 // Useful references:
21 int n = 0; 44 // https://codeforces.com/blog/entry/61290
22 ll ans = 1; 45 // https://forthright48.com/chinese-remainder-theorem-part-1-coprime-moduli
23 for (int k : ks) { 46 // https://forthright48.com/chinese-remainder-theorem-part-2-non-coprime-moduli
24 n += k; 47 // ** Note: this solution works if lcm(m1,m2) fits in a long long (64 bits)
25 ans = (ans * choose(n,k)) % MOD; 48 pair<ll,ll> CRT(ll r1, ll m1, ll r2, ll m2) {
26 } 49 ll g, x, y; g = gcdext(m1, m2, x, y);
27 return ans; 50 if ((r1 - r2) % g != 0) return {-1, -1}; // no solution
28 } 51 ll z = m2/g;
52 ll lcm = m1 * z;
8.7 Chinese Remainder Theorem (CRT) 53 ll sol = add(mod(r1, lcm), m1*mul(mod(x,z),mod((r2-r1)/g,z),z), lcm);
54 // for debugging (in case you think you might have bugs)
1 #include <bits/stdc++.h> 55 // assert (0 <= sol and sol < lcm);
2 typedef long long int ll; 56 // assert (sol % m1 == r1 % m1);
3 using namespace std; 57 // assert (sol % m2 == r2 % m2);
4 58 return {sol, lcm}; // solution + lcm(m1,m2)
5 inline ll mod(ll x, ll m) { return ((x %= m) < 0) ? x+m : x; } 59 }
6 inline ll mul(ll x, ll y, ll m) { return (x * y) % m; } 60
33 86 /* ========================= */
34 /* ========================================================= */ 87 /* Line Segment Intersection */
35 /* Cross Product -> orientation of point with respect to ray */ 88 /* ========================= */
36 /* ========================================================= */ 89 // returns whether segments p1q1 and p2q2 intersect, inspired by:
37 // cross product (b - a) x (c - a) 90 // https://www.geeksforgeeks.org/check-if-two-given-line-segments-intersect/
38 ll cross(Point& a, Point& b, Point& c) { 91 bool do_segments_intersect(Point& p1, Point& q1, Point& p2, Point& q2) {
39 ll dx0 = b.x - a.x, dy0 = b.y - a.y; 92 int o11 = orientation(p1, q1, p2);
40 ll dx1 = c.x - a.x, dy1 = c.y - a.y; 93 int o12 = orientation(p1, q1, q2);
41 return dx0 * dy1 - dx1 * dy0; 94 int o21 = orientation(p2, q2, p1);
42 // return (b - a).cross(c - a); // alternatively, using struct function 95 int o22 = orientation(p2, q2, q1);
Pablo Messina’s ICPC Notebook 9 GEOMETRY - 9.2 Geometry 3D Utils Página 33 de 40
96 if (o11 != o12 and o21 != o22) // general case -> non-collinear intersection 150 }
97 return true; 151
98 if (o11 == o12 and o11 == 0) { // particular case -> segments are collinear 152 /* ==================================== */
99 Point dl1 = {min(p1.x, q1.x), min(p1.y, q1.y)}; 153 /* Point - Line / Line Segment distance */
100 Point ur1 = {max(p1.x, q1.x), max(p1.y, q1.y)}; 154 /* ==================================== */
101 Point dl2 = {min(p2.x, q2.x), min(p2.y, q2.y)}; 155 // reference: https://stackoverflow.com/questions/849211/shortest-distance-between-a-
102 Point ur2 = {max(p2.x, q2.x), max(p2.y, q2.y)}; point-and-a-line-segment
103 return do_rectangles_intersect(dl1, ur1, dl2, ur2); 156
104 } 157 // get distance between p and projection of p on line <- a - b ->
105 return false; 158 double point_line_dist(Point& p, Point& a, Point& b) {
106 } 159 Point d = b-a;
107 160 double t = d.dot(p-a) / d.norm2();
108 /* ======================== */ 161 return (a + d * t - p).norm();
109 /* Line - Line Intersection */ 162 }
110 /* ======================== */ 163
111 ll det(Point& a, Point& b) { return a.x * b.y - a.y * b.x; } 164 // get distance between p and truncated projection of p on segment a -> b
112 // return whether straight lines <-a1-b1-> and <-a2-b2-> intersect each other 165 double point_segment_dist(Point& p, Point& a, Point& b) {
113 // if they intersect, we assign values to t1 and t2 such that 166 if (a==b) return (p-a).norm(); // segment is a single point
114 // a1 + (b1 - a1) * t1 == a2 + (b2 - a2) * t2 167 Point d = b-a; // direction
115 bool find_line_line_intersection(Point& a1, Point& b1, Point& a2, Point& b2, 168 double t = d.dot(p-a) / d.norm2();
116 double& t1, double& t2) { 169 if (t <= 0) return (p-a).norm(); // truncate left
117 Point d1 = b1 - a1; 170 if (t >= 1) return (p-b).norm(); // truncate right
118 Point d2 = b2 - a2; 171 return (a + d * t - p).norm();
119 Point _d2 = d2 * -1; 172 }
120 ll detA = det(d1, _d2); 173
121 if (detA == 0) return false; // parallel lines 174 /* ====================================== */
122 Point b = a2 - a1; 175 /* Straight Line Hashing (integer coords) */
123 t1 = (double)det(b, _d2)/(double)detA; 176 /* ====================================== */
124 t2 = (double)det(d1, b)/(double)detA; 177 // task: given 2 points p1, p2 with integer coordinates, output a unique
125 return true; 178 // representation {a,b,c} such that a*x + b*y + c = 0 is the equation
126 } 179 // of the straight line defined by p1, p2. This representation must be
127 180 // unique for each straight line, no matter which p1 and p2 are sampled.
128 181 struct Point { ll x, y; };
129 /* =================== */ 182 tuple<ll,ll,ll> hash_line(const Point& p1, const Point& p2) {
130 /* Circle Intersection */ 183 ll a = p1.y - p2.y;
131 /* =================== */ 184 ll b = p2.x - p1.x;
132 struct Circle { double x, y, r; } 185 ll c = p1.x * (p2.y - p1.y) - p1.y * (p2.x - p1.x);
133 bool is_fully_outside(double r1, double r2, double d_sqr) { 186 ll sgn = (a < 0 or (a == 0 and b < 0)) ? -1 : 1;
134 double tmp = r1 + r2; 187 ll g = __gcd(abs(a), __gcd(abs(b), abs(c))) * sgn;
135 return d_sqr > tmp * tmp; 188 return make_tuple(a/g, b/g, c/g);
136 } 189 }
137 bool is_fully_inside(double r1, double r2, double d_sqr) { 190 // task: given 2 points p1 and p2 with integer coords, return a pair {a, b}
138 if (r1 > r2) return false; 191 // which is unique for all straight lines having the same slope as
139 double tmp = r2 - r1; 192 // the straight line that goes through p1 and p2
140 return d_sqr < tmp * tmp; 193 pair<ll,ll> hash_slope(const Point& p1, const Point& p2) {
141 } 194 ll dx = p2.x - p1.x;
142 bool do_circles_intersect(Circle& c1, Circle& c2) { 195 ll dy = p2.y - p1.y;
143 double dx = c1.x - c2.x; 196 ll sgn = (dx < 0 or (dx == 0 and dy < 0)) ? -1 : 1;
144 double dy = c1.y - c2.y; 197 ll g = __gcd(abs(dx), abs(dy)) * sgn;
145 double d_sqr = dx * dx + dy * dy; 198 return {dx/g, dy/g};
146 if (is_fully_inside(c1.r, c2.r, d_sqr)) return false; 199 }
147 if (is_fully_inside(c2.r, c1.r, d_sqr)) return false;
148 if (is_fully_outside(c1.r, c2.r, d_sqr)) return false; 9.2 Geometry 3D Utils
149 return true;
Pablo Messina’s ICPC Notebook 9 GEOMETRY - 9.3 Polygon Algorithms Página 34 de 40
1 /* =========================== */ 9.3 Polygon Algorithms
2 /* Example of Point Definition */
3 /* =========================== */ 1 #include <bits/stdc++.h>
4 struct Point { // 3D 2 #define rep(i,a,b) for(int i = a; i <= b; ++i)
5 double x, y, z; 3
6 bool operator==(const Point& p) const { return x==p.x and y==p.y and z==p.z; } 4 // ----- Utils ------
7 Point operator+(const Point& p) const { return {x+p.x, y+p.y, z+p.z}; } 5 const double EPS = 1e-8;
8 Point operator-(const Point& p) const { return {x-p.x, y-p.y, z-p.z}; } 6 struct Point {
9 Point operator*(double d) const { return {x*d, y*d, z*d}; } 7 ll x, y;
10 double norm2() { return x*x + y*y + z*z; } 8 Point operator-(const Point& p) const { return {x - p.x, y - p.y}; }
11 double norm() { return sqrt(norm2()); } 9 Point operator+(const Point& p) const { return {x + p.x, y + p.y}; }
12 double dot(const Point& p) { return x*p.x + y*p.y + z*p.z; } 10 ll cross(const Point& p) const { return x*p.y - y*p.x; }
13 Point cross(Point& p) { 11 ll dot(const Point& p) const { return x*p.x + y*p.y; }
14 return { 12 };
15 y*p.z - z*p.y, 13 ll cross(Point& a, Point& b, Point& c) {
16 z*p.x - x*p.z, 14 ll dx0 = b.x - a.x, dy0 = b.y - a.y;
17 x*p.y - y*p.x 15 ll dx1 = c.x - a.x, dy1 = c.y - a.y;
18 }; 16 return dx0 * dy1 - dx1 * dy0;
19 } 17 }
20 Point unit() { 18 int orientation(Point& a, Point& b, Point& c) {
21 double d = norm(); 19 ll tmp = cross(a,b,c);
22 return {x/d, y/d, z/d}; 20 return tmp < 0 ? -1 : tmp == 0 ? 0 : 1; // sign
23 } 21 }
24 static Point from_sphere_coords(double r, double u, double v) { 22
25 return { 23 /* ======================================== */
26 r*cos(u)*cos(v), 24 /* Area of 2D non self intersecting Polygon */
27 r*cos(u)*sin(v), 25 /* ======================================== */
28 r*sin(u) 26 //based on Green’s Theorem:
29 }; 27 //http://math.blogoverflow.com/2014/06/04/greens-theorem-and-area-of-polygons/
30 } 28 // ** points must be sorted ccw or cw
31 }; 29 double polygon_area(vector<Point>& pol) {
32 // compute angle (0 <= angle <= PI) between vectors a and b 30 int n = pol.size()
33 // ** for better performance, the norms can be precomputed 31 double area = 0;
34 // or norms can be ommited altogether if a and b are unit vectors 32 for (int i = n-1, j = 0; j < n; i = j++) {
35 double angle_between(Point& a, Point& b) { 33 area += (pol[i].x + pol[j].x) * (pol[j].y - pol[i].y);
36 return acos(a.dot(b)/(a.norm() * b.norm())); 34 }
37 } 35 return area * 0.5; // use abs(area * 0.5) if points are cw
38 // check if point p belongs to the sphere arc from a to b. 36 }
39 // ** this assumes that a and b are points on a sphere centered at (0,0,0), 37
40 // and the sphere arc from a to b is the shortest path on the sphere connecting them 38 /* ================ */
41 const double EPS = 1e-8; 39 /* Point in Polygon */
42 bool point_in_arc(Point& a, Point& b, Point& p) { 40 /* ================ */
43 double angle_ab = angle_between(a, b); 41
44 double angle_ap = angle_between(a, p); 42 // -----------------------------
45 if (angle_ap > angle_ab) return false; 43 // 1) Convex Polygons
46 Point n = a.cross(b); 44
47 Point c_hat = n.cross(a).unit(); 45 // 1.1) O(N) method
48 double R = a.norm(); 46 bool point_in_convexhull(Point& p, vector<Point>& ch) {
49 Point a_hat = a * (1./R); 47 int n = ch.size();
50 Point a_rotated = (a_hat * cos(angle_ap) + c_hat * sin(angle_ap)) * R; 48 for (int i=n-1, j=0; j<n; i=j++) {
51 return (p - a_rotated).norm() < EPS; 49 if (cross(ch[i], ch[j], p) < 0) return false;
52 } 50 }
51 return true;
52 }
Pablo Messina’s ICPC Notebook 9 GEOMETRY - 9.3 Polygon Algorithms Página 35 de 40
53 107 }
54 // 1.2) O(log N) method 108
55 bool point_in_triangle(Point& a, Point& b, Point& c, Point& x) { 109 /* ================================= */
56 return cross(a, b, x) >= 0 and cross(b, c, x) >= 0 and cross(c, a, x) >= 0; 110 /* Find extreme point in Convex Hull */
57 } 111 /* ================================= */
58 bool point_in_convexhull(Point& p, vector<Point>& ch) { 112 // given two points a and b defining a vector a -> b, and given a convex hull with points
59 if (cross(ch[0], ch[1], p) < 0) return false; 113 // sorted ccw, find the index in the convex hull of the extreme point.
60 if (cross(ch[0], ch.back(), p) > 0) return false; 114 // ** the extreme point is the "leftmost" point in the convex hull with respect to the
61 int l = 2, r = ch.size() - 1; 115 // vector a -> b (if there are 2 leftmost points, pick anyone)
62 while (l < r) { 116 int extreme_point_index(Point &a, Point &b, vector<Point> &ch) {
63 int m = (l+r) >> 1; 117 int n = ch.size();
64 if (cross(ch[0], ch[m], p) <= 0) r = m; 118 Point v = b - a;
65 else l = m+1; 119 v = Point(-v.y, v.x); // to find the leftmost point
66 } 120 if (v.dot(ch[0]) >= v.dot(ch[1]) && v.dot(ch[0]) >= v.dot(ch[n - 1])) {
67 return point_in_triangle(ch[0], ch[l-1], ch[l], p); 121 return 0;
68 } 122 }
69 123 int l = 0, r = n;
70 // ---------------------------------------------- 124 while (true) {
71 // 2) General methods: for complex / simple polygons 125 int m = (l + r) / 2;
72 126 if (v.dot(ch[m]) >= v.dot(ch[m + 1]) && v.dot(ch[m]) >= v.dot(ch[m - 1])) {
73 /* Nonzero Rule (winding number) */ 127 return m;
74 bool inPolygon_nonzero(Point p, vector<Point>& pts) { 128 }
75 int wn = 0; // winding number 129 int d1 = v.dot(ch[l + 1] - ch[l]) > 0;
76 Point prev = pts.back(); 130 int d2 = v.dot(ch[m + 1] - ch[m]) > 0;
77 rep (i, 0, (int)pts.size() - 1) { 131 int a = v.dot(ch[m]) > v.dot(ch[l]);
78 Point curr = pts[i]; 132 if (d1) { if (d2 && a) l = m; else r = m; }
79 if (prev.y <= p.y) { 133 else { if (!d2 && a) r = m; else l = m; }
80 if (p.y < curr.y && cross(prev, curr, p) > 0) 134 }
81 ++ wn; // upward & left 135 }
82 } else { 136
83 if (p.y >= curr.y && cross(prev, curr, p) < 0) 137 /* ========================================= */
84 -- wn; // downward & right 138 /* Line Segment and Convex Hull Intersection */
85 } 139 /* ========================================= */
86 prev = curr; 140 pair<int,int> find_crossing_edge(Point& a, Point& b, vector<Point>& ch, int start, int
87 } end) {
88 return wn != 0; // non-zero :) 141 int o_ref = orientation(a, b, ch[start]);
89 } 142 int n = ch.size();
90 143 int l = start, r = start + ((end - start + n) % n);
91 /* EvenOdd Rule (ray casting - crossing number) */ 144 while (l < r) {
92 bool inPolygon_evenodd(Point p, vector<Point>& pts) { 145 int m = (l+r) >> 1;
93 int cn = 0; // crossing number 146 if (orientation(a, b, ch[m % n]) != o_ref) r = m;
94 Point prev = pts.back(); 147 else l = m+1;
95 rep (i, 0, (int)pts.size() - 1) { 148 }
96 Point curr = pts[i]; 149 return {(l-1+n) % n, l%n};
97 if (((prev.y <= p.y) && (p.y < curr.y)) // upward crossing 150 }
98 || ((prev.y > p.y) && (p.y >= curr.y))) { // downward crossing 151 void find_segment_convexhull_intersection(Point& a, Point& b, vector<Point>& ch) {
99 // check intersect’s x-coordinate to the right of p 152 // find rightmost and leftmost points in convex hull wrt vector a -> b
100 double t = (p.y - prev.y) / (curr.y - prev.y); 153 int i1 = extreme_point_index(a, b, ch);
101 if (p.x < prev.x + t * (curr.x - prev.x)) 154 int i2 = extreme_point_index(b, a, ch);
102 ++cn; 155 // make sure the extremes are not to the same side
103 } 156 int o1 = orientation(a, b, ch[i1]);
104 prev = curr; 157 int o2 = orientation(a, b, ch[i2]);
105 } 158 if (o1 == o2) return; // all points are to the right (left) of a -> b (no
106 return (cn & 1); // odd -> in, even -> out intersection)
Pablo Messina’s ICPC Notebook 9 GEOMETRY - 9.4 Trigonometry Página 36 de 40
159 // find 2 edges in the convex hull intersected by the straight line <- a - b -> 2 using namespace std;
160 pair<int,int> e1 = find_crossing_edge(a, b, ch, i1, i2); // binsearch from i1 to i2 3 #define rep(i,a,b) for(int i = a; i <= b; ++i)
ccw 4 #define invrep(i,b,a) for(int i = b; i >= a; --i)
161 pair<int,int> e2 = find_crossing_edge(a, b, ch, i2, i1); // binsearch from i2 to i1 5 typedef long long int ll;
ccw 6 // ----------------------------------------------
162 // find exact intersection points 7 // Convex Hull: Andrew’s Montone Chain Algorithm
163 double r1, s1, r2, s2; 8 // ----------------------------------------------
164 assert (find_line_line_intersection(a, b, ch[e1.first], ch[e1.second], r1, s1)); 9 struct Point {
165 assert (find_line_line_intersection(a, b, ch[e2.first], ch[e2.second], r2, s2)); 10 ll x, y;
166 // make sure intersections are significant and within line segment range 11 bool operator<(const Point& p) const {
167 if (r1 > 1.0 - EPS and r2 > 1.0 - EPS) return; // intersections above line segment 12 return x < p.x || (x == p.x && y < p.y);
168 if (r1 < EPS and r2 < EPS) return; // intersections below line segment 13 }
169 if (abs(r1 - r2) < EPS) return; // insignificant intersection in a single point 14 };
170 if (r1 > r2) swap(r1, r2), swap(e1, e2), swap(s1, s2); // make sure r1 < r2 15
171 // ** HERE DO WHATEVER YOU WANT WITH INTERSECTIONS FOUND 16 ll cross(Point& a, Point& b, Point& c) {
172 // 1) a + (b-a) * max(r1, 0) <--- first point of segment a -> b inside convex hull 17 ll dx0 = b.x - a.x, dy0 = b.y - a.y;
173 // if r1 < 0, point a is strictly inside the convex hull 18 ll dx1 = c.x - a.x, dy1 = c.y - a.y;
174 // 2) a + (b-a) * min(r2, 1) <--- last point of segment a -> b inside convex hull 19 return dx0 * dy1 - dx1 * dy0;
175 // if r2 > 1, point b is strictly inside the convex hull 20 }
176 cout << "(significant) intersection detected!\n"; 21
177 } 22 vector<Point> upper_hull(vector<Point>& P) {
23 // sort points lexicographically
9.4 Trigonometry 24 int n = P.size(), k = 0;
25 sort(P.begin(), P.end());
1 /* ================= */ 26 // build upper hull
2 /* Angle of a vector */ 27 vector<Point> uh(n);
3 /* ================= */ 28 invrep (i, n-1, 0) {
4 const double PI = acos(-1); 29 while (k >= 2 && cross(uh[k-2], uh[k-1], P[i]) <= 0) k--;
5 const double _2PI = 2 * PI; 30 uh[k++] = P[i];
6 31 }
7 double correct_angle(double angle) { // to ensure 0 <= angle <= 2PI 32 uh.resize(k);
8 while (angle < 0) angle += _2PI; 33 return uh;
9 while (angle > _2PI) angle -= _2PI; 34 }
10 return angle; 35
11 } 36 vector<Point> lower_hull(vector<Point>& P) {
12 double angle(double x, double y) { 37 // sort points lexicographically
13 // atan2 by itself returns an angle in range [-PI, PI] 38 int n = P.size(), k = 0;
14 // no need to "correct it" if that range is ok for you 39 sort(P.begin(), P.end());
15 return correct_angle(atan2(y, x)); 40 // collect lower hull
16 } 41 vector<Point> lh(n);
17 42 rep (i, 0, n-1) {
18 /* ============== */ 43 while (k >= 2 && cross(lh[k-2], lh[k-1], P[i]) <= 0) k--;
19 /* Cosine Theorem */ 44 lh[k++] = P[i];
20 /* ============== */ 45 }
21 // Given triangle with sides a, b and c, returns the angle opposed to side a. 46 lh.resize(k);
22 // a^2 = b^2 + c^2 - 2*b*c*cos(alpha) 47 return lh;
23 // => alpha = acos((b^2 + c^2 - a^2) /(2*b*c)) 48 }
24 double get_angle(double a, double b, double c) { 49
273 274
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.2. AD HOC MATHEMATICAL PROBLEMS c Steven, Felix, Suhendry
Note that if a certain programming language only has log function in a specific base,
class Main { we can get logb (a) (base b) by using the fact that logb (a) = log(a)/log(b).
public static void main(String[] args) {
A nice feature of the logarithmic function is that it can be used to count the number
Scanner sc = new Scanner(System.in); // a few test cases
of digits of a given decimal a. This formula (int)floor(1 + log10((double)a)) re-
while (true) {
turns the number of digits in decimal number a. To count the number of digits in other
int b = sc.nextInt(); if (b == 0) break;
base b, we can use: (int)floor(1 + log10((double)a) / log10((double)b)).
BigInteger p = new BigInteger(sc.next(), b); // 2nd parameter
BigInteger m = new BigInteger(sc.next(), b); // is the base We are probably aware of thepsquare root function, e.g., sqrt(a), but
p some of us stum-
System.out.println((p.mod(m)).toString(b)); // print in base b ble when asked to compute n a (the n-th root of a). Fortunately, n a can be rewritten
} as a1/n . We can then use built in formula like pow((double)a, 1.0 / (double)n) or
} exp(log((double)a) * 1.0 / (double)n).
}
• Grid
These problems involve grid manipulation. The grid can be complex, but the grid
Source code: ch5/basicremains UVa10551.java follows some primitive rules. The ‘trivial’ 1D/2D grid are not classified here (review
1D/2D array section in Book 1). The solution usually depends on the problem solver’s
• Number Systems or Sequences creativity in finding the patterns to manipulate/navigate the grid or in converting the
Some Ad Hoc mathematical problems involve definitions of existing (or made-up) Num- given one into a simpler one.
ber Systems or Sequences, and our task is to produce either the number (sequence) See an example for Kattis - beehouseperimeter. You are given a honeycomb structure
within some range or just the n-th number, verify if the given number (sequence) is described by R, the number of cells of the side of honeycomb. The cells are numbered
valid according to the definition, etc. Usually, following the problem description care- from 1 to R3 (R 1)3 in row major order. For example for R = 3, the honeycomb
fully is the key to solving the problem. But some harder problems require us to simplify looks like Figure 5.1.
the formula first. Some well-known examples are:
275 276
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.2. AD HOC MATHEMATICAL PROBLEMS c Steven, Felix, Suhendry
etc. We can represent a polynomial by storing the coefficients of the polynomial’s terms
sorted by (descending order of) their powers. The (basic) operations on polynomials Exercise 5.2.1*: All these sequence of numbers below have at least one formula(s)/pattern(s).
usually require some careful usage of loops. Some polynomials are special: Please give your best guess of what are the next three numbers in each sequence!
p
Degree-2, e.g., g(x) = ax2 + bx + c (with classic roots r = ( b ± b2 4ac)/2a), and 1. 1, 2, 4, 8, 16, . . .
Degree-3, e.g., h(x) = ax3 + bx2 + cx + d that on some applications can be derived 2*. 1, 2, 4, 8, 16, 31, . . .
back into a Degree-2 polynomial of h0 (x) = 3ax2 + 2bx + c.
3. 2, 3, 5, 7, 11, 13, . . .
Later in Section 9.11, we discuss O(n2 ) straightforward polynomial multiplication and
4*. 2, 3, 5, 7, 11, 13, 19, . . .
the faster O(n log n) one using Fast Fourier Transform.
Exercise 5.2.2*: Study (Ruffini-)Horner’s method for finding the roots of a polynomial
• Fraction equation f (x) = 0.
numerator
These problems involve representing number as fraction: denominator . Most frequent
operation is to simplify the given fraction to its simplest form. We can do this by Exercise 5.2.3*: Given 1 < a < 10, 1 n 109 , show how to compute the value of
dividing both numerator n and denominator d with their greatest common divisor (1 ⇥ a + 2 ⇥ a2 + 3 ⇥ a3 + . . . + n ⇥ an ) modulo 109 + 7 efficiently, i.e., in O(log n). Both a
(gcd(n, d), also see Section 5.3.6). Another frequent operations are to add, subtract, and n are integers. Note that the naı̈ve O(n) solution is not acceptable. You may need to
multiply two (or more) fractions. Python has a built-in Fraction class that are well read Section 5.3.9 (modular arithmetic) and Section 5.8 (fast (modular) exponentiation).
equipped to deal with all these basic fraction operations.
See an example below for UVa 10814 - Simplifying Fractions where we are asked to
Programming Exercises related to Ad Hoc Mathematical problems:
reduce a large fraction to its simplest form.
a. Finding (Simple) Formula (or Pattern), Easier
class Main { 1. Entry Level: Kattis - twostones * (just check odd or even)
public static void main(String[] args) {
2. UVa 10751 - Chessboard * (trivial for N = 1 and N = 2; derive the
Scanner sc = new Scanner(System.in); formula first for N > 2; hint: use diagonal as much as possible)
int N = sc.nextInt();
3. UVa 12004 - Bubble Sort * (try small n; get the pattern; use long long)
while (N-- > 0) { // we have to use > 0
4. UVa 12918 - Lucky Thief * (sum of arithmetic progression; long long)
BigInteger p = sc.nextBigInteger();
String ch = sc.next(); // ignore this char 5. Kattis - averageshard * (find O(n) formula; also see Kattis - averageseasy)
BigInteger q = sc.nextBigInteger(); 6. Kattis - bishops * (chess pattern involving bishops; from IPSC 2004)
BigInteger gcd_pq = p.gcd(q); // wow :) 7. Kattis - crne * (simulate cutting process on small numbers; get formula)
System.out.println(p.divide(gcd_pq) + " / " + q.divide(gcd_pq)); Extra UVa: 01315, 10014, 10110, 10170, 10499, 10696, 10773, 10940, 11202,
} 11393, 12027, 12502. 12725, 12992, 13049, 13071, 13216.
} Extra Kattis: alloys, averageseasy, chanukah, limbo1, pauleigon, sequential-
} manufacturing, soylent, sumkindofproblem.
b. Finding (Simple) Formula (or Pattern), Harder
from fractions import Fraction # Python’s built in 1. Entry Level: UVa 10161 - Ant on a Chessboard * (sqrt and ceil)
N = int(input())
2. UVa 11038 - How Many O’s * (define a function f that counts the
for _ in range(N): number of 0s from 1 to n; also available at Kattis - howmanyzeros *)
frac = Fraction("".join(input().split(" "))) # simplified form
3. UVa 11231 - Black and White Painting * (there is an O(1) formula)
print(str(frac.numerator) + " / " + str(frac.denominator))
4. UVa 11718 - Fantasy of a Summation * (convert loops to a closed form
formula; use modPow to compute the results)
Source code: ch5/UVa10814.java|py 5. Kattis - mortgage * (geometric progression; divergent but finite; special case
when r = 1.0 (no interest))
6. Kattis - neighborhoodwatch * (sum of AP; inclusion-exclusion)
• Really Ad Hoc 7. Kattis - nine * (find the required formula)
These are other mathematics-related problems outside the sub-categories above.
Extra UVa: 00651, 00913, 10493, 10509, 10666, 10693, 10710, 10882, 10970,
10994, 11170, 11246, 11296, 11298, 11387, 12909, 13096, 13140.
We suggest that the readers—especially those who are new to mathematical problems—kick-
start their training programme on mathematical problems by solving at least 2 or 3 problems Extra Kattis: appallingarchitecture, beautifulprimes, dickandjane, doorman,
from each sub-category, especially the ones that we highlighted as must try *. eatingout, limbo2, loorolls, otherside, rectangularspiral, sequence.
277 278
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.2. AD HOC MATHEMATICAL PROBLEMS c Steven, Felix, Suhendry
279 280
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry
281 282
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry
Sieve of Eratosthenes: Generating List of Prime Numbers 5.3.2 Probabilistic Prime Testing (Java Only)
If we want to generate a list of prime numbers within the range [0..N ], there is a better We have just discussed the Sieve of Eratosthenes algorithm and a deterministic prime testing
algorithm than testing each number in the range for primality. The algorithm is called ‘Sieve algorithm that is good enough for many contest problems. However, you have to type in a
of Eratosthenes’ invented by Eratosthenes of Cyrene. few lines of C++/Java/Python code to do that. If you just need to check whether a single
First, this Sieve algorithm sets all integers in the range to be ‘probably prime’ but sets 0 (or at most, a few9 ) and usually (very) large integer (beyond the limit of 64-bit integer) is a
and 1 to be not prime. Then, it takes 2 as prime and crosses out all multiples7 of 2 starting prime, e.g., UVa 10235 below to decide if the given N is not a prime, an ‘emirp’ (the reverse
from 2 ⇥ 2 = 4, 6, 8, 10, . . . until the multiple is greater than N . Then it takes the next of its digits is also a prime), or just a normal prime, then there is an alternative and shorter
non-crossed 3 as a prime and crosses out all multiples of 3 starting from 3⇥3 = 9, 12, 15, . . .. approach with the function isProbablePrime in Java10 BigInteger11 —a probabilistic prime
Then it takes 5 and crosses out all multiples of 5 starting from 5 ⇥ 5 = 25, 30, 35, . . .. And testing function based on Miller-Rabin algorithm [26, 32]. There is an important parameter
so on . . .. After that, integers that remain uncrossed within the range [0..N ] are primes. of this function: certainty. If this function returns true, then the probability that the tested
certainty
This algorithm does approximately (N ⇥ (1/2 + 1/3 + 1/5 + 1/7 + . . . + 1/last prime in BigInteger is a prime exceeds 1 12 . Usually, certainty = 10 should be enough12
range N )) operations. Using ‘sum of reciprocals8 of primes up to N ’, we end up with the as 1 ( 12 )10 = 0.9990234375 is ⇡ 1.0. Note that using larger value of certainty obviously
time complexity of roughly O(N log log N ). decreases the probability of WA but doing so slows down your program and thus increases
Since generating a list of primes 10K using the sieve is fast (our code below can go up the risk of TLE13 .
to 107 in ⇡ 1s), we opt to use the sieve for smaller primes and reserve the optimized prime
testing function for larger primes—see previous discussion. class Main {
public static void main(String[] args) {
typedef long long ll; Scanner sc = new Scanner(System.in);
while (sc.hasNext()) {
ll _sieve_size; int N = sc.nextInt(); System.out.printf("%d is ", N);
bitset<10000010> bs; // 10^7 is the rough limit BigInteger BN = BigInteger.valueOf(N);
vll p; // compact list of primes String R = new StringBuffer(BN.toString()).reverse().toString();
int RN = Integer.parseInt(R);
void sieve(ll upperbound) { // range = [0..upperbound] BigInteger BRN = BigInteger.valueOf(RN);
_sieve_size = upperbound+1; // to include upperbound if (!BN.isProbablePrime(10)) // certainty 10 is enough
bs.set(); // all 1s System.out.println("not prime.");
bs[0] = bs[1] = 0; // except index 0+1 else if ((N != RN) && BRN.isProbablePrime(10))
for (ll i = 2; i < _sieve_size; ++i) if (bs[i]) { System.out.println("emirp.");
// cross out multiples of i starting from i*i else
for (ll j = i*i; j < _sieve_size; j += i) bs[j] = 0; System.out.println("prime.");
p.push_back(i); // add prime i to the list }
} }
} }
283 284
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry
A naı̈ve algorithm generates a list of primes (e.g., with sieve) and checks which prime(s) 5.3.4 Functions Involving Prime Factors
can actually divide the integer N —without changing N . This can be improved!
There are other well-known number
p ptheoretic functions involving prime factors shown below.
A better algorithm utilizes a kind of Divide and Conquer spirit. An integer N can be
All variants have similar O( N /ln N ) time complexity with the basic prime factoring via
expressed as: N = p ⇥ N 0 , where p is a prime factor and N 0 is another number which is
trial division. Interested readers can read Chapter 7: “Multiplicative Functions” of [33].
N/p—i.e., we can reduce the size of N by taking out its prime factor p. We can keep doing
this until eventually N 0 = 1. To speed up the process even further, we putilize the divisibility 1. numPF(N): Count the number of prime factors of integer N.
property that there is no more than one prime p divisor greater than
p N , so we only repeat
the process of finding prime factors until p > N . Stopping at N entails a special case: if For example: N = 60 has 4 prime factors: {2, 2, 3, 5}. The solution is a simple tweak
2
(current p) > N and N is still not 1, then N is the last prime factor. The code below takes of the trial division algorithm to find prime factors shown earlier.
in an integer N and returns the list of prime factors.
In the worst case, when N is prime, this p prime factoring algorithm with trialpdivision int numPF(ll N) {
requires
p testing
p all smaller primes up to N , mathematically denoted as O(⇡( N )) = int ans = 0;
O( N /ln N ) can be very slow14 —see the example of factoring a large composite number for (int i = 0; (i < (int)p.size()) && (p[i]*p[i] <= N); ++i)
136 117 223 861 into two large prime factors: 104 729 ⇥ 1 299 709 in the code below. However, while (N%p[i] == 0) { N /= p[i]; ++ans; }
if given composite numbers with lots of small prime factors, this algorithm is reasonably return ans + (N != 1);
fast15 —see 142 391 208 960 which is 210 ⇥ 34 ⇥ 5 ⇥ 74 ⇥ 11 ⇥ 13. }
vll primeFactors(ll N) { // pre-condition, N >= 1
vll factors; 2. numDiv(N): Count the number of divisors of integer N.
for (int i = 0; (i < (int)p.size()) && (p[i]*p[i] <= N); ++i) A divisor of N is defined as an integer that divides N without leaving a remainder. If
while (N%p[i] == 0) { // found a prime for N a number N = ai ⇥ bj ⇥ . . . ⇥ ck , then N has (i + 1) ⇥ (j + 1) ⇥ . . . ⇥ (k + 1) divisors.
N /= p[i]; // remove it from N This is because there are i + 1 ways to choose prime factor a (0, 1, . . . , i 1, i times),
factors.push_back(p[i]); j + 1 ways to choose prime factor b, . . ., and k + 1 ways to choose prime factor c. The
} total number of ways is the multiplication of these numbers.
if (N != 1) factors.push_back(N); // remaining N is a prime
return factors; Example: N = 60 = 22 ⇥ 31 ⇥ 51 has (2 + 1) ⇥ (1 + 1) ⇥ (1 + 1) = 3 ⇥ 2 ⇥ 2 = 12
} divisors. The 12 divisors are: {1, 2, 3, 4, 5, 6, 10, 12, 15, 20, 30, 60}. The prime factors
of 60 are highlighted. See that N has more divisors than prime factors.
// inside int main()
sieve(10000000); int numDiv(ll N) {
vll r; int ans = 1; // start from ans = 1
for (int i = 0; (i < (int)p.size()) && (p[i]*p[i] <= N); ++i) {
r = primeFactors((1LL<<31)-1); // Mersenne prime int power = 0; // count the power
for (auto &pf : r) printf("> %lld\n", pf); while (N%p[i] == 0) { N /= p[i]; ++power; }
ans *= power+1; // follow the formula
r = primeFactors(136117223861LL); // large prime factors }
for (auto &pf : r) printf("> %lld\n", pf); // 104729*1299709 return (N != 1) ? 2*ans : ans; // last factor = N^1
}
r = primeFactors(5000000035LL); // large prime factors
for (auto &pf : r) printf("> %lld\n", pf); // 5*1000000007 3. sumDiv(N): Sum the divisors of integer N.
r = primeFactors(142391208960LL); // large composite In the previous example, N = 60 has 12 divisors. The sum of these divisors is 168.
for (auto &pf : r) printf("> %lld\n", pf); // 2^10*3^4*5*7^4*11*13 This can be computed via prime factors too. If a number N = ai ⇥ bj ⇥ . . . ⇥ ck ,
i+1 j+1 k+1
then the sum of divisors of N is a a 1 1 ⇥ b b 1 1 ⇥ ... ⇥ c c 1 1 . This closed form is
i+1
r = primeFactors(100000380000361LL); // 10000019^2 derived from summation of geometric progression series. a a 1 1 is the summation of
0 1 i 1 i
for (auto &pf : r) printf("> %lld\n", pf); // fail to factor! (why?) a , a , . . . , a , a . The total sum of divisors is the multiplication of these summation
of geometric progression series of each prime factor.
22+1 1 1+1 1+1
14
In real life applications, very large primes are commonly used in cryptography and encryption (e.g., RSA Example: N = 60 = 22 ⇥31 ⇥51 , sumDiv(60) = 2 1
⇥3 3 1 1⇥5 5 1 1 = 7⇥8⇥24
1⇥2⇥4
= 168.
algorithm) because it is computationally challenging to factor a very large number into its prime factors,
i.e., x = p1 p2 where both p1 and p2 are very large primes. We can avoid raising a prime factor pi to a certain power k using O(log k) exponenti-
15
Also see Section 9.12 for a faster (but rare) integer factoring algorithm. ation (see Section 5.8) by writing this sumDiv(N) function iteratively:
285 286
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry
int EulerPhi[MAX_N+10];
4. EulerPhi(N): Count the number of positive integers < N that are relatively prime for (int i = 1; i <= MAX_N; ++i) EulerPhi[i] = i;
to N . Recall: Two integers a and b are said to be relatively prime (or coprime) if for (int i = 2; i <= MAX_N; ++i)
gcd(a, b) = 1, e.g., 25 and 42. A naı̈ve algorithm to count the number of positive if (EulerPhi[i] == i) // i is a prime number
integers < N that are relatively prime to N starts with counter = 0, iterates through for (int j = i; j <= MAX_N; j += i)
i 2 [1..N -1], and increases the counter if gcd(i, N ) = 1. This is slow for large N . EulerPhi[j] = (EulerPhi[j]/i) * (i-1);
Q
A better algorithm is the Euler’s Phi (Totient) function '(N ) = N ⇥ pi (1 p1i ),
where pi is prime factor of N . These O(Nplog logp N ) modified sieve algorithms should be preferred over (up to) N individual
Example: N = 36 = 22 ⇥ 32 . '(36) = 36 ⇥ (1 12 ) ⇥ (1 13 ) = 12. Those 12 positive calls to O( N /ln N ) numDiffPF(N) or EulerPhi(N) if there are many queries over a large
integers that are relatively prime to 36 are {1, 5, 7, 11, 13, 17, 19, 23, 25, 29, 31, 35}. range, e.g., [1..n], but MAX N is at most 107 (note that we need to prepare a rather big array
in a sieve method). However, if we just need to compute the number of di↵erent prime
factors or Euler Phi for a single (or a few) but (very) large integer N , it may be faster to
ll EulerPhi(ll N) {
just use individual calls of numDiffPF(N) or EulerPhi(N).
ll ans = N; // start from ans = N
for (int i = 0; (i < (int)p.size()) && (p[i]*p[i] <= N); ++i) {
if (N%p[i] == 0) ans -= ans/p[i]; // count unique
while (N%p[i] == 0) N /= p[i]; // prime factor Exercise 5.3.5.1*: Can we write the modified sieve code for the other functions listed in
} Section 5.3.4 (i.e., other than numDiffPF(N) and EulerPhi(N)) without increasing the time
if (N != 1) ans -= ans/N; // last factor complexity of sieve? If we can, write the required code! If we cannot, explain why!
return ans;
}
Source code: ch5/primes.cpp|java|py|ml 5.3.6 Greatest Common Divisor & Least Common Multiple
The Greatest Common Divisor (GCD) of two integers: a, b denoted by gcd(a, b), is the
largest positive integer d such that d | a and d | b where x | y means that x divides y.
Exercise 5.3.4.1: Implement numDiffPF(N) and sumPF(N) that are similar to numPF(N)! Example of GCD: gcd(4, 8) = 4, gcd(6, 9) = 3, gcd(20, 12) = 4. One practical usage of GCD
numDiffPF(N): Count the number of di↵erent prime factors of N. is to simplify fractions (see UVa 10814 in Section 5.2), e.g., 69 = 6/gcd(6,9)
9/gcd(6,9)
= 6/3
9/3
= 23 .
sumPF(N): Sum the prime factors of N. Finding the GCD of two integers is an easy task with an e↵ective Divide and Conquer
Exercise 5.3.4.2: What are the answers for numPF(N), numDiffPF(N), sumPF(N), numDiv(N), Euclid algorithm [33, 7] which can be implemented as a one liner code (see below). Thus
sumDiv(N), and EulerPhi(N) when N is a prime? finding the GCD of two integers is usually not the main issue in a Mathematics-related
contest problem, but just part of a bigger solution.
287 288
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry
The GCD is closely related to Least (or Lowest) Common Multiple (LCM). The LCM Kattis - factovisors/UVa 10139 - Factovisors
of two integers (a, b) denoted by lcm(a, b), is defined as the smallest positive integer l such
Abridged problem description: “Does m divide n! (0 n, m 231 -1)?”. Recall that in
that a | l and b | l. Example of LCM: lcm(4, 8) = 8, lcm(6, 9) = 18, lcm(20, 12) = 60.
Section 5.3.7, we note that n!, i.e., f ac(n), grows very fast. We mention that with built-in
It has been shown (see [33]) that: lcm(a, b) = a ⇥ b/gcd(a, b) = a/gcd(a, b) ⇥ b. This can
data types, the largest factorial that we can still compute precisely is only 20!. In Book 1,
also be implemented as a one liner code (see below). Both GCD and LCM algorithms run
we show that we can compute large integers with Big Integer technique. However, it is very
in O(log10 n) = O(log n), where n = min(a, b).
slow to precisely compute the exact value of n! for large n.
The solution for this problem is to work with the prime factors of m and check if each
int gcd(int a, int b) { return b == 0 ? a : gcd(b, a%b); }
of those prime factors has ‘support’ in n!. This check is called the Legendre’s
P formula. Let
int lcm(int a, int b) { return a / gcd(a, b) * b; }
vp (n!) be the highest power of p that divides n. We can compute vp (n!) via 1 n
i=1 b pi c.
For example, when n = 6, we have 6! = 2⇥3⇥4⇥5⇥6 = 2⇥3⇥(22 )⇥5⇥(2⇥3) = 24 ⇥32 ⇥5
Note16 that since C++17, both gcd and lcm functions are already built-in <numeric> when expressed as its prime power factorization (we do not actually need to do this). Now
library. In Java, we can use method gcd(a, b) in BigInteger class. In Python, we can use if m1 = 9 = 32 , then this prime factor 32 has support in 6! because v3 (6!) = 2 and 32 32 .
gcd(a, b) in math module. Thus, m1 = 9 divide 6!. However, m2 = 54 = 21 ⇥ 33 has no support because although
The GCD of more than 2 numbers can be found via multiple calls of gcd of 2 numbers, v2 (6!) = 4 and 21 24 , we have v3 (6!) = 2 and 33 > 32 . Thus m2 = 54 does not divide 6!.
e.g., gcd(a, b, c) = gcd(a, gcd(b, c)). The strategy to find the LCM of more than 2 numbers
is similar. Source code: ch5/factovisors UVa10139.cpp|java|py
Exercise 5.3.6.1: The LCM formula is lcm(a, b) = a⇥b / gcd(a, b) but why do we
use a / gcd(a, b) ⇥ b instead? Try a = 2 ⇥ 109 and b = 8 using 32-bit signed integers. Exercise 5.3.8.1: Determine what is the GCD and LCM of (26 ⇥ 33 ⇥ 971 , 25 ⇥ 52 ⇥ 112 )?
Exercise 5.3.6.2: Please write the gcd(a, b) routine in iterative fashion! Exercise 5.3.8.2: Count the number of trailing zeroes of n! (assume 1 n 200 000).
Exercise 5.3.6.3*: Study alternative ‘binary gcd’ computation that replaces division (inside
modulo operation) with bit shift operations, subtractions, and comparisons. This version is
known as Stein’s algorithm.
5.3.9 Modular Arithmetic
Some (mathematical) computations in programming problems can end up having very large
positive (or very small negative) intermediate/final integer results that are beyond the range
5.3.7 Factorial of the largest built-in integer data type (currently the 64-bit long long in C++ or long
in Java). In Book 1, we have shown a way to compute Big Integers precisely. In Section
Factorial17 of n, i.e., n! or f ac(n) is defined as 1 if n = 0 and n⇥f ac(n-1) if n > 0. However, it 5.3.8, we have shown another way to work with Big Integers via its prime factors. For some
is usually more convenient to work with the iterative version, i.e., f ac(n) = 2 ⇥ 3 ⇥ 4 ⇥ . . . ⇥ other problems18 , we are only interested in the result modulo a number (usually a prime,
(n-1) ⇥ n (loop from 2 to n, skipping 1). The value of f ac(n) grows very fast. We are to minimize collision) so that the intermediate/final results always fit inside built-in integer
only able to use C/C++ long long/Java long/OCaml Int64 for up to f ac(20). Beyond data type. In this subsection, we discuss these types of problems.
that, we may need to work with the prime factors of a factorial (see Section 5.3.8), get the In UVa 10176 - Ocean Deep! Make it shallow!!, we are asked to convert a long binary
intermediate and final results modulo a smaller (usually a prime) number (see Section 5.3.9), number (up to 100 digits) to decimal. A quick calculation shows that the largest possible
or to use either Python or Java BigInteger for precise but slow computation (see Book 1). number is 2100 -1 which is beyond the range of a 64-bit integer. But the problem only asks
if the result is divisible by 131 071 (a prime number). So what we need to do is to convert
5.3.8 Working with Prime Factors binary to decimal digit by digit, while performing % 131 071 operation to the intermediate
result (note that ‘%’ is a symbol of modulo operation). If the final result is 0, then the actual
Other than using the Big Integer technique (see Book 1) which is ‘slow’, we can work with the number in binary (which we never compute in its entirety), is divisible by 131 071.
intermediate computations of large integers accurately by working with the prime factors of Important: The modulo of a negative integer can be surprising to some who are not
the integers instead of the actual integers themselves. Therefore, for some non-trivial number aware of their programming language specific behavior, e.g., 10 % 7 = 4 (in Python) but
theoretic problems, we have to work with the prime factors of the input integers even if the C++/Java % operator and OCaml mod operator produces 3 instead. To be safer if we
main problem is not really about prime numbers. After all, prime factors are the building need to find a non-negative integer a (mod m), we use ((a % m) + m) % m. For the given
blocks of integers. Let’s see the next case study. example, we have (( 10 % 7) + 7) % 7 = ( 3 + 7) % 7 = 4 % 7 = 4.
16
There is no built-in gcd function in OCaml.
17 18
We can also have multifactorial. The most common form of multifactorial is the double factorial, denoted As of year 2020, we observe that the number of problems that require Big Integer technique is decreasing
as n!!, e.g., 14!! = 14 ⇥ 12 ⇥ 10 ⇥ . . . ⇥ 2 = 645 120. This is used in Section 8.2.1. whereas the number of problems that require modular arithmetic technique is increasing.
289 290
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry
The following are true involving modular arithmetic: 5.3.10 Extended Euclidean Algorithm
1. (a + b) % m = ((a % m) + (b % m)) % m In Section 5.3.6, we have seen that gcd(a, 0) = a and gcd(a, b) = gcd(b, a%b) but this
Example: (15 + 29) % 8 Euclid’s algorithm can be extended. On top of computing the gcd(a, b) = d, the Extended
= ((15 % 8) + (29 % 8)) % 8 = (7 + 5) % 8 = 4 Euclidean algorithm can also computes the coefficients of Bézout identity (lemma), i.e.,
integers x and y such that ax + by = gcd(a, b). The implementation is as follows:
2. (a b) % m = ((a % m) (b % m)) % m
Example: (37 - 15) % 6
int extEuclid(int a, int b, int &x, int &y) { // pass x and y by ref
= ((37 % 6) - (15 % 6)) % 6 = (1 - 3) % 6 = -2 or 4
int xx = y = 0;
3. (a ⇥ b) % m = ((a % m) ⇥ (b % m)) % m int yy = x = 1;
Example: (23 ⇥ 12) % 5 while (b) { // repeats until b == 0
= ((23 % 5) ⇥ (12 % 5)) % 5 = (3 ⇥ 2) % 5 = 1 int q = a/b;
int t = b; b = a%b; a = t;
t = xx; xx = x-q*xx; x = t;
Modular Multiplicative Inverse t = yy; yy = y-q*yy; y = t;
Now, (a / b) % m is harder to compute assuming a is very large, otherwise, simply divide a }
by b and modulo the result by b. Note that a might appear in the form of a = a1 ⇥a2 ⇥· · ·⇥an return a; // returns gcd(a, b)
where each ai is small enough to fit in a built-in integer data type. Thus, it might be tempting }
to modulo a and b to m independently, perform the division, and modulo the result again.
However, this approach is wrong! (((a1 ⇥ a2 ⇥ · · · ⇥ an ) % m) / (b % m)) % m does not
For example: a = 25, b = 18
necessarily equal to (a / b) % m, i.e., the previous modular arithmetic does not work for
extendedEuclid(25, 18, x, y) updates x = 5, y = 7, and returns d = 1.
division. For example, (30 / 5) % 10 = 6 is not equal to ((30 % 10) / (5 % 10)) % 10 = 0.
This means 25 ⇥ 5 + 18 ⇥ 7 = gcd(25, 18) = 1.
Another example, (27 / 3) % 13 = 9 is not equal to ((27 % 13) / (3 % 13)) % 13 = 13 .
Fortunately, we can rewrite (a / b) % m as (a ⇥ b 1 ) % m where b 1 is the modular
multiplicative inverse of b with respect to modulus m. In other words, b 1 is an integer such Solving Linear Diophantine Equation
that (b ⇥ b 1 ) % m = 1. Then, all we have to do is solving (a ⇥ b 1 ) % m using the previous
Problem: Suppose a housewife buys apples and oranges with cost of 8.39 dollars.
modular arithmetic (for multiplication). So, how do we find b 1 % m?
An apple costs 25 cents. An orange costs 18 cents. How many of each fruit does she buy?
If m is a prime number, then we can use Fermat’s little theorem for b and m where
This problem can be modeled as a linear equation with two variables: 25x + 18y = 839.
gcd(b, m) = 1, i.e., bm 1 ⌘ 1 (mod m). If we multiply both sides with b 1 , then we will
Since we know that both x and y must be integers, this linear equation is called the Linear
obtain bm 1 · b 1 ⌘ 1 · b 1 (mod m) or simply bm 2 ⌘ b 1 (mod m). Then, to find the
Diophantine Equation. We can solve Linear Diophantine Equation with two variables even
modular multiplicative inverse of b (i.e., b 1 % m), simply compute bm 2 % m, e.g., us-
if we only have one equation! The solution is as follows:
ing efficient modular exponentiation discussed in Section 5.8.2 combined with the previous
modular arithmetic for multiplication. Therefore, (a ⇥ b 1 ) % m when m is a prime number Let a and b be integers with d = gcd(a, b). The equation ax + by = c has no integral
equals to ((a % m) ⇥ (bm 2 % m)) % m. solutions if d | c is not true. But if d | c, then there are infinitely many integral solutions.
If m is not necessarily a prime number but gcd(b, m) = 1, then we can use Euler’s The first solution (x0 , y0 ) can be found using the Extended Euclidean algorithm and the rest
Theorem, i.e., b'(m) ⌘ 1 (mod m) where '(m) is the Euler’s Phi (Totient) of m, the number can be derived from x = x0 + (b/d)n, y = y0 (a/d)n, where n is an integer. Programming
of positive integers < m which are relative prime to m. Observe that when m is a prime contest problems may have additional constraints to make the output finite (and unique).
number, Euler’s Theorem reduces to Fermat’s little theorem, i.e., '(m) = m 1. Similar Using extendedEuclid, we can solve the motivating problem shown earlier above:
to the previous, we simply need to compute b'(m) 1 % m to get the modular multiplicative The Linear Diophantine Equation with two variables 25x + 18y = 839.
inverse of b. Therefore, (a ⇥ b 1 ) % m equals to ((a % m) ⇥ (b'(m) 1 % m)) % m. Recall that extendedEuclid(25, 18) helps us get:
Example 1: a = 27, b = 3, m = 13. (27 / 3) % 13 = ((27 % 13) ⇥ (3 1 % 13)) % 13 25 ⇥ 5 + 18 ⇥ 7 = gcd(25, 18) = 1.
= ((27 % 13) ⇥ (311 % 13)) % 13 = (1 ⇥ 9) % 13 = 9.
We multiply the left and right hand side of the equation above by 839/gcd(25, 18) = 839:
Example 2: a = 27, b = 3, m = 10. (27 / 3) % 10 = ((27 % 10) ⇥ (3 1 % 10)) % 10
25 ⇥ 4195 + 18 ⇥ 5873 = 839.
= ((27 % 10) ⇥ (33 % 10)) % 10 = (1 ⇥ 9) % 10 = 9.
Thus x = 4195 + (18/1)n and y = 5873 (25/1)n.
Alternatively, we can also use the Extended Euclid algorithm to compute the modular
multiplicative inverse of b (while still assuming gcd(b, m) = 1). We discuss this version in the Since we need to have non-negative x and y (non-negative number of apples and oranges),
next Section 5.3.10. Note that if gcd(b, m) 6= 1, then b does not have a modular multiplicative we have two more additional constraints:
inverse with respect to modulus m. 4195 + 18n 0 and 5873 25n 0, or
4195/18 n 5873/25, or
233.05 n 234.92.
291 292
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry
The only possible integer n is 234. Thus the unique solution is x = 4195 + 18 ⇥ 234 = 17
and y = 5873 25 ⇥ 234 = 23, i.e., 17 apples (of 25 cents each) and 23 oranges (of 18 cents
each) for a total of 8.39 dollars. Programming Exercises related to Number Theory:
a. Prime Numbers
Modular Multiplicative Inverse with Extended Euclidean Algorithm
1. Entry Level: UVa 00543 - Goldbach’s Conjecture * (sieve; complete
Now let’s compute x such that b ⇥ x = 1 (mod m). This b ⇥ x = 1 (mod m) is equivalent to search; Goldbach’s conjecture19 ; similar to UVa 00686, 10311, and 10948)
b⇥x = 1+m⇥y where y can be any integer. We rearrange the formula into b⇥x m⇥y = 1 2. UVa 01644 - Prime Gap * (LA 3883 - Tokyo07; sieve; prime check, upper
or b ⇥ x + m ⇥ y = 1 as y is a variable that can absorb the negative sign. This is a Linear bound - lower bound)
Diophantine Equation that can be solved with the Extended Euclidean algorithm to obtain 3. UVa 10650 - Determinate Prime * (3 uni-distance consecutive primes)
the value of x (and y—ignored). This x = b 1 (mod m). 4. UVa 11752 - The Super ... * (try base 2 to 216 ; composite power; sort)
Note that the result b 1 (mod m) can only be found if b and m are relatively prime, i.e., 5. Kattis - enlarginghashtables * (use sieve up to 40 000; prime test numbers
gcd(b, m) = 1. It can be implemented as follows (notice our safeguard mod sub-routine to greater than 2n; check primality of n itself)
deal with the case when a % m is negative): 6. Kattis - primesieve * (use sieve up to 108 ; it is fast enough)
7. Kattis - reseto * (sieve of Eratosthenes until the k-th crossing)
int mod(int a, int m) { // returns a (mod m)
return ((a%m) + m) % m; // ensure positive answer Extra UVa: 00406, 00686, 00897, 00914, 10140, 10168, 10311, 10394, 10490,
} 10852, 10948.
b. (Probabilistic) Prime Testing
int modInverse(int b, int m) { // returns b^(-1) (mod m) 1. Entry Level: Kattis - pseudoprime * (yes if !isPrime(p) && a.modPow(p,
int x, y; p) = a; Big Integer; also available at UVa 11287 - Pseudoprime Numbers)
int d = extEuclid(b, m, x, y); // to get b*x + m*y == d 2. UVa 01180 - Perfect Numbers * (LA 2350 - Dhaka01; small prime check)
if (d != 1) return -1; // to indicate failure 3. UVa 01210 - Sum of Consecutive ... * (LA 3399 - Tokyo05; simple)
// b*x + m*y == 1, now apply (mod m) to get b*x == 1 (mod m)
4. UVa 10235 - Simply Emirp * (case analysis: prime/emirp/not prime;
return mod(x, m); emirp is prime number that if reversed is still a prime number)
}
5. Kattis - flowergarden * (Euclidean dist; small prime check; use isProba-
blePrime; simulation; faster solutions exist)
Now we can compute (a ⇥ b 1 ) % m even if m is not a prime but gcd(b, m) == 1 via ((a
6. Kattis - goldbach2 * (simple brute force problem; use isProbablePrime; faster
% m) ⇥ modInverse(b, m)) % m. solutions exist)
Example 1: ((27 * 3 1 ) % 7 7. Kattis - primes2 * (convert input to either base 2/8/10/16; skip those that
= ((27 % 7) ⇥ modInverse(3, 7)) % 7 = (6 ⇥ 5) % 7 = 30 % 7 = 2. cause NumberFormatException error; use isProbablePrime test and gcd)
Example 2: ((27 * 4 1 ) % 7 Extra UVa: 00960, 10924, 12542.
= ((27 % 7) ⇥ modInverse(4, 7)) % 7 = (6 ⇥ 2) % 7 = 12 % 7 = 2. c. Finding Prime Factors
Example 3 (m is not a prime but gcd(b, m) == 1: ((520 * 25 1 ) % 18 1. Entry Level: UVa 00583 - Prime Factors * (basic factorization problem)
= ((520 % 18) ⇥ modInverse(25, 18) % 18 = (16 ⇥ 13) % 18 = 208 % 18 = 10. 2. UVa 11466 - Largest Prime Divisor * (use efficient sieve implementation
This is because extendedEuclid(25, 18, x, y) updates x = 5, y = 7, and returns d = 1, to get the largest prime factors)
so we have x = ((-5%18) + 18) % 18 = (-5 + 18) % 18 = 13 % 18 = 13. 3. UVa 12703 - Little Rakin * (uses small Fibonacci numbers up to 40 and
simple prime factorization as a and b can be non primes)
Source code: ch5/modInverse.cpp|java|py 4. UVa 12805 - Raiders of the Lost Sign * (prime check; primes of format
4m 1 and 4m + 1; simple prime factorization)
5. Kattis - pascal * (find lowest prime factor of N ; special case: N = 1)
5.3.11 Number Theory in Programming Contests
6. Kattis - primalrepresentation * (factorization problem; use sieve to avoid
We will discuss Pollard’s rho (a faster integer factoring algorithm than the one shown in TLE; use long long; 231 1 is a prime)
Section 5.3.3) in Section 9.12. We will also discuss Chinese Remainder Theorem (CRT) 7. Kattis - primereduction * (factorization problem)
(that uses the Extended Euclidean algorithm in Section 5.3.10) in Section 9.13. Extra UVa: 00516, 10392.
However, there are many other number theoretic problems that cannot be discussed one
by one in this book (e.g., the various divisibility properties). Based on our experience, Also see Section 9.12 for a faster (but rare) integer factoring algorithm.
number theory problems frequently appear in ICPCs especially in Asia. It is a good idea for 19
Christian Goldbach’s conjecture (updated by Leonhard Euler) is as follows: Every even number 4 can
one team member to specifically study number theory listed in this book and beyond. be expressed as the sum of two prime numbers
293 294
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.3. NUMBER THEORY c Steven, Felix, Suhendry
20 21
GCD and/or LCM problems that requires factorization are in ‘Working with Prime Factors’ category. Factorial problems that requires factorization are categorized in ‘Working with Prime Factors’ category.
295 296
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.4. COMBINATORICS c Steven, Felix, Suhendry
297 298
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.4. COMBINATORICS c Steven, Felix, Suhendry
ll Cat[MAX_N];
typedef long long ll;
const int MAX_N = 100010;
// inside int main()
const int p = 1e9+7; // p is a prime > MAX_N
Cat[0] = 1;
for (int n = 0; n < MAX_N-1; ++n) // O(MAX_N log p)
ll inv(ll a) { // Fermat’s little theorem
Cat[n+1] = ((4*n+2)%p * Cat[n]%p * inv(n+2)) % p;
return modPow(a, p-2, p); // modPow in Section 5.8
cout << Cat[100000] << "\n"; // the answer is 945729344
} // that runs in O(log p)
ll fact[MAX_N]; We provide our modular arithmetic-style implementations in the source code below:
23
Binomial is a special case of polynomial that only has two terms. Source code: ch5/combinatorics.cpp|java|py
299 300
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.4. COMBINATORICS c Steven, Felix, Suhendry
Catalan numbers are (surprisingly) found in various combinatorial problems. Here, we list In online programming contests where contestant can access the Internet, there is one
down some of the more interesting ones (there are several others). All examples below use more technique that may be useful. First, generate the output for small instances and
n = 3 and Cat(3) = ((2⇥3) C3 )/(3 + 1) = (6 C3 )/4 = 20/4 = 5. then search for that sequence at OEIS (The On-Line Encyclopedia of Integer Sequences)
hosted at https://oeis.org/. If you are lucky, OEIS can tell you the name of the sequence
1. Cat(n) counts the number of distinct binary trees with n vertices, e.g., for n = 3: and/or the required general formula for the larger instances. Moreover, you can also use
https://wolframalpha.com/ to help you process/simplify mathematical formulas.
* * * * * There are still many other counting principles and formulas, too many to be discussed
/ / / \ \ \ in this book. As this is not a pure (discrete) mathematics book, we close this section by
* * * * * * giving a quick revision on some combinatorics techniques and give a few written exercises to
/ \ / \ test/further improve your combinatorics skills.
* * * *
• Fundamental counting principle (rule of sum): If there are n ways to do one action,
2. Cat(n) counts the number of expressions containing n pairs of parentheses which are m ways to do another action, and these two actions cannot be done at the same time,
correctly matched, e.g., for n = 3, we have: ()()(), ()(()), (())(), ((())), and (()()). For then there are n + m ways to choose one of these combined actions. We can classify
more details about this problem, see Book 1. Counting Paths on DAG (review Book 1 and also see Section 8.3) as this.
3. Cat(n) counts the number of di↵erent ways n + 1 factors can be completely parenthe- • Fundamental counting principle (rule of product): If there are n ways to do one action
sized, e.g., for n = 3 and 3 + 1 = 4 factors: {a, b, c, d}, we have: (ab)(cd), a(b(cd)), and m ways to do another action afterwards, then there are n ⇥ m ways to do both.
((ab)c)d, (a(bc))d, and a((bc)d).
• A permutation is an arrangement of objects without repetition and the order is impor-
4. Cat(n) counts the number of ways a convex polygon (see Section 7.3) of n + 2 sides tant. There are n! permutations of a set of size n distinct elements.
can be triangulated. See Figure 5.2—left.
• If the set is actually a multiset (with duplicates), then there are fewer than n! per-
5. Cat(n) counts the number of monotonic paths along the edges of an n ⇥ n grid, which mutations. Suppose that there are k distinct elements, then the actual number of
do not pass above the diagonal. A monotonic path is one which starts in the lower permutations is : (n1 )!⇥(n2n! where ni is the frequency of each distinct element i
)!⇥...⇥(nk )!
left corner, finishes in the upper right corner, and consists entirely of edges pointing and n1 + n2 + . . . + nk = n. This formula is also called as the multinomial coefficients,
rightwards or upwards. See Figure 5.2—right. the generalization of the binomial coefficients discussed in Section 5.4.2.
• There are C(n, k) number of ways to take k items out of a set of n distinct elements.
Exercise 5.4.3.1*: Which one is the hardest to factorize (see Section 5.3.3) assuming that
n is an arbitrary large integer: f ib(n), C(n, k) (assume that k = n/2), or Cat(n)? Why?
Exercise 5.4.3.2*: Catalan numbers Cat(n) appear in some other interesting problems Exercise 5.4.4.1: Count the number of di↵erent possible outcomes if you roll two 6-sided
other than the ones shown in this section. Investigate! dices and flip three 2-sided coins? Will the answer be di↵erent if we do this (rolling and
flipping) one by one in some order versus if we do this in one go?
Exercise 5.4.4.2: How many ways to form a three digits number from {0, 1, 2, . . . , 9},
5.4.4 Combinatorics in Programming Contests each digit can only be used once, 0 cannot be used as the leading digit, and one of the digit
must be 7?
The classic combinatorics-related problems involving (pure) Fibonacci and Catalan numbers
are getting rare as of year 2020. However, there are still many other combinatorics problems Exercise 5.4.4.3: How many possible passwords are there if the length of the password
involving permutations (Section 5.3.7) and combinations (that is, Binomial Coefficients, is between 1 to 10 characters and each character can either be alphabet letters [‘a’..‘z’] or
Section 5.4.2). Some of the basic ones are listed in the programming exercises below and [‘A’..‘Z’] or digits [0..9]? Please output the answer modulo 1e9+7.
the more interesting ones (but (very) rare) are listed in Section 9.15. Note that a pure Exercise 5.4.4.4: Suppose you have a 6-letter word ‘FACTOR’. If we take 3 letters from
and/or classic combinatorics problem is rarely used in modern IOI/ICPC but combinatorics this word ‘FACTOR’, we may have another word, like ‘ACT’, ‘CAT’, ‘ROT’, etc. What is
is usually a subproblem of a bigger problem (Section 8.7). the number of di↵erent 3-letter words that can be formed with the letters from ‘FACTOR’ ?
301 302
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.4. COMBINATORICS c Steven, Felix, Suhendry
Exercise 5.4.4.5: Given the 5-letter word ‘BOBBY’, rearrange the letters to get another c. Catalan Numbers
word, e.g., ‘BBBOY’, ‘YOBBB’, etc. How many di↵erent permutations are possible? 1. Entry Level: UVa 10223 - How Many Nodes? * (you can precalculate
Exercise 5.4.4.6: Using the principle of inclusion-exclusion, count this: how many integers the answers as there are only 19 Catalan Numbers < 232 -1)
in [1..1M] that are multiples of 5 and 7? 2. UVa 00991 - Safe Salutations * (Catalan Numbers)
3. UVa 10007 - Count the Trees * (answer is Cat(n) ⇥ n!; Big Integer)
Exercise 5.4.4.7: Solve UVa 11401 - Triangle Counting! “Given n rods of length 1, 2,
4. UVa 10312 - Expression Bracketing * (number of binary bracketing =
. . . , n, pick any 3 of them and build a triangle. How many distinct triangles can you make
Cat(n); number of bracketing = Super-Catalan numbers)
(consider triangle inequality, see Section 7.2)? (3 n 1M ) ”.
5. Kattis - catalan * (basic Catalan Numbers)
Exercise 5.4.4.8*: There are A boys and B girls. Count the number of ways to select a 6. Kattis - catalansquare * (Catalan Numbers++; follow the description)
group of people such that the number of boys is equal to the number of girls in the chosen 7. Kattis - fiat * (N -th Catalan Number; use Fermat’s little theorem)
group, e.g., A = 3 and B = 2, then there are 1/6/3 way(s) to select a group with 0/2/4
people, respectively, with a total of 1+6+3 = 10 ways. Extra UVa: 10303, 10643.
c. Others, Easier
1. Entry Level: UVa 11401 - Triangle Counting * (spot the pattern)
2. UVa 11310 - Delivery Debacle * (requires DP: let dp[i] be the number
Programming Exercises related to Combinatorics: of ways the cakes can be packed for a box 2⇥ i)
3. UVa 11597 - Spanning Subtree * (graph theory; trivial)
a. Fibonacci Numbers 4. UVa 12463 - Little Nephew * (double the socks and the shoes first)
1. Entry Level: UVa 00495 - Fibonacci Freeze * (O(n) DP; Big Integer) 5. Kattis - character * (OEIS A000295)
2. UVa 00763 - Fibinary Numbers * (Zeckendorf representation; greedy; 6. Kattis - honey * (OEIS A002898)
Big Integer) 7. Kattis - integerdivision * (count frequencies of each remainder of [0..d-1]; add
3. UVa 10334 - Ray Through Glasses * (combinatorics; Big Integer) C(freq, 2) per such remainder)
4. UVa 10689 - Yet Another Number ... * (easy; Pisano period) Extra UVa: 10079, 11115, 11480, 11609.
5. Kattis - anti11 * (this problem is a modified Fibonacci numbers)
c. Others, Harder
6. Kattis - batmanacci * (Fibonacci; observation on N ; Divide and Conquer)
1. Entry Level: UVa 10784 - Diagonal * (the number of diagonals in n-gon
7. Kattis - rijeci * (simple simulation with a single loop; Fibonacci)
= n ⇤ (n 3)/2; use it to derive the solution)
Extra UVa: 00580, 00900, 00948, 01258, 10183, 10450, 10497, 10579, 10862, 2. UVa 01224 - Tile Code * (LA 3904 - Seoul07; derive formula from ob-
11000, 11089, 11161, 11780, 12281, 12620. serving the small instances first)
Extra Kattis: interestingintegers. 3. UVa 11069 - A Graph Problem * (use Dynamic Programming)
b. Binomial Coefficients: 4. UVa 11538 - Chess Queen * (count along rows/columns/diagonals)
5. Kattis - anagramcounting * (use Java BigInteger)
1. Entry Level: UVa 00369 - Combinations * (be careful with overflow issue)
6. Kattis - incognito * (count frequencies; combinatorics; minus one)
2. UVa 10541 - Stripe * (a good combinatorics problem)
7. Kattis - tritiling * (there are two related recurrences here; also available at
3. UVa 11955 - Binomial Theorem * (pure application; DP) UVa 10918 - Tri Tiling)
P
4. UVa 12712 - Pattern Locker * (the answer is N i=M C(L ⇤ L, i) ⇤ i!, but
Extra UVa: 00153, 00941, 10359, 10733, 10790, 11204, 11270, 11554, 12001,
simplify the computation of this formula instead of running it directly)
12022.
5. Kattis - election * (compute the answers with help of binomial coefficients)
6. Kattis - lockedtreasure * (the answer is n Cm 1 ) Extra Kattis: kitchencombinatorics.
7. Kattis - oddbinom * (OEIS A006046) c. Also see Section 9.15 for a few rare (combinatorics) formulas and theorems.
303 304
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.5. PROBABILITY THEORY c Steven, Felix, Suhendry
5.5 Probability Theory – Abridged problem description of UVa 10759 - Dice Throwing: n common cubic
dice are thrown. What is the probability that the sum of all thrown dices is at
Probability Theory is a branch of mathematics dealing with the analysis of random phe- least x? (constraints: 1 n 24, 0 x < 150).
nomena. Although an event like an individual (fair) coin toss is random, the sequence of
random events will exhibit certain statistical patterns if the event is repeated many times. The sample space (the denominator of the probability value) is very simple to
This can be studied and predicted. For example, the probability of a head appearing is 1/2 compute. It is 6n .
(similarly with a tail). Therefore, if we flip a (fair) coin n times, we expect that we see heads The number of events is slightly harder to compute. We need a (simple) DP
n/2 times. because there are lots of overlapping subproblems. The state is (dice lef t, score)
In programming contests, problems involving probability are either solvable with: where dice lef t keeps track of the remaining dice that we can still throw (starting
from n) and score counts the accumulated score so far (starting from 0). DP can
• Closed-form formula. For these problems, one has to derive the required (usually O(1)) be used as there are only n ⇥ (n ⇥ 6) = 6n2 distinct states for this problem.
formula. For example, let’s discuss how to derive the solution for UVa 10491 - Cows
and Cars24 , which is a generalized version of a TV show: ‘The Monty Hall problem’25 . When dice lef t = 0, we return 1 (event) if score x, or return 0 otherwise;
When dice lef t > 0, we try throwing one more dice. The outcome v for this dice
You are given NCOW S number of doors with cows, NCARS number of doors with cars,
can be one of six values and we move to state (dice lef t-1, score+v). We sum all
and NSHOW number of doors (with cows) that are opened for you by the presenter.
the events. The time complexity is O(6n2 ⇥ 6) = O(36n2 ) which is very small as
Now, you need to count the probability of winning a car (by opening a door that has
n 24 in this problem.
a car behind it) assuming that you will always switch to another unopened door.
The first step is to realize that there are two ways to get a car. Either you pick a cow One final requirement is that we have to use gcd (see Section 5.3.6) to simplify the
first and then switch to a car, or you pick a car first, and then switch to another car. probability fraction (see Section 5.2). In some other problems, we may be asked to
The probability of each case can be computed as shown below. output the probability value correct to a certain digit after decimal point (either
between [0.0..1.0] or as percentages [0.0..100.0]).
In the first case, the chance of picking a cow first is (NCOW S /(NCOW S + NCARS )).
Then, the chance of switching to a car is (NCARS /(NCARS + NCOW S NSHOW 1)). – Abridged problem description of Kattis - bobby: Betty has an S-sided fair dice
Multiply these two values together to get the probability of the first case. The -1 is to (having values 1 through S). Betty challenges Bobby to obtain a total value R
account for the door that you have already chosen, as you cannot switch to it. on at least X out of Y rolls. If Bobby is successful, Betty will give Bobby W
The probability of the second case can be computed in a similar manner. The chance times of his initial bet. Should Bobby take the bet? Or in another word, is his
of picking a car first is (NCARS /(NCARS + NCOW S )). Then, the chance of switching to expected return greater than his original bet?
a car is ((NCARS 1)/(NCARS + NCOW S NSHOW 1)). Both -1 accounts for the car To simplify, let’s assume that Bobby bets 1 unit of currency, is his expected return
that you have already chosen. strictly greater than 1 unit?
Sum the probability values of these two cases together to get the final answer.
For a single roll of an S-sided fair dice, Bobby’s chance to hit R or higher (a
• Exploration of the search (sample) space to count number of events (usually harder to success) is psuccess = S SR+1 and consequently Bobby’s chance to hit R-1 or lower
count; may deal with combinatorics—see Section 5.4, Complete Search—see Book 1, (a failure) is RS 1 (or 1 psuccess ).
or Dynamic Programming–see Book 1) over the countable sample space (usually much
We can then write a recursive function exp val(num roll, num success). We
simpler to count). Examples:
simulate the roll one by one. The base case is when num roll == Y where
– ‘UVa 12024 - Hats’ is a problem of n people who store their n hats in a cloakroom we return W if num success X or 0 otherwise. In general case, we do one
for an event. When the event is over, these n people take their hats back. Some more throw that can be either a success with probability psuccess or a failure
take a wrong hat. Compute how likely is that everyone takes a wrong hat. with probability (1 psuccess ) and add both expected values due to linearity of
This problem can be solved via brute-force and pre-calculation by trying all n! expectation. The time complexity is O(Y 2 ) which is very small as Y 10.
permutations and see how many times the required events appear over n! because
n 12 in this problem and such O(n! ⇥ n) naı̈ve solution will only take about a
minute to run. However, a more math-savvy contestant can use this Derangement Exercise 5.5.1: Instead of memorizing the formula, show how to derive the Derangement
(DP) formula instead: An = (n-1) ⇥ (An 1 + An 2 ) that will be fast enough for DP formula An = (n-1) ⇥ (An 1 + An 2 ).
much higher n, possibly combined with modular arithmetic. Exercise 5.5.2: There are 15 students in a class. 8 of them are boys and the other 7 are
24
girls. The teacher wants to form a group of 5 students in random fashion. What is the
You may be interested to attempt an interactive problem : Kattis - askmarilyn too.
25 probability that the formed group consists of all girls?
This is an interesting probability puzzle. Readers who have not heard this problem before are encouraged
to do some Internet search and read the history of this problem. In the original problem, NCOW S = 2,
NCARS = 1, and NSHOW = 1. The probability of staying with your original choice is 13 and the probability
of switching to another unopened door is 23 and therefore it is always beneficial to switch.
305 306
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.6. CYCLE-FINDING c Steven, Felix, Suhendry
5.6 Cycle-Finding
Programming Exercises about Probability Theory: 5.6.1 Problem Description
a. Probability Theory, Easier Given a function f : S ! S (that maps a natural number from a finite set S to another
natural number in the same finite set S) and an initial value x0 2 N , the sequence of iterated
1. Entry Level: UVa 10491 - Cows and Cars * (2 ways: either pick a cow function values: {x0 , x1 = f (x0 ), x2 = f (x1 ), . . . , xi = f (xi 1 ), . . .} must eventually use
first, then switch to a car; or pick a car first, and then switch to another car)
the same value twice, i.e., 9i < j such that xi = xj . Once this happens, the sequence must
2. UVa 01636 - Headshot * (LA 4596 - NorthEasternEurope09; ad hoc then repeat the cycle of values from xi to xj 1 . Let µ (the start of cycle) be the smallest
probability question, one tricky special case involving all zeroes)
index i and (the cycle length) be the smallest positive integer such that xµ = xµ+ . The
3. UVa 10238 - Throw the Dice * (DP; s: (dice left, score); try F values; cycle-finding problem26 is defined as the problem of finding µ and given f (x) and x0 .
Big Integer; no need to simplify the fraction; see UVa 10759) For example, in UVa 00350 - Pseudo-Random Numbers, we are given a pseudo-random
4. UVa 11181 - Probability (bar) Given * (iterative brute force; try all number generator f (x) = (Z ⇥ x + I)%M with x0 = L and we want to find out the sequence
possibilities) length before any number is repeated (i.e., the ). A good pseudo-random number generator
5. Kattis - bobby * (computation of expected value) should have a large . Otherwise, the numbers generated will not look ‘random’.
6. Kattis - dicebetting * (s: (dice left, distinct numbers so far); each throw can Let’s try this process with the sample test case Z = 7, I = 5, M = 12, L = 4, so we
increase distinct numbers so far or not) have f (x) = (7 ⇥ x + 5)%12 and x0 = 4. The sequence of iterated function values is
7. Kattis - odds * (complete search; simple probability) {4, 9, 8, 1, 0, 5, 4, . . .}. We have µ = 0 and = 6 as x0 = xµ+ = x0+6 = x6 = 4. The
Extra UVa: 10328, 10759, 12024, 12114, 12230, 12457, 12461. sequence of iterated function values cycles from index 6 onwards.
On another test case Z = 26, I = 11, M = 80, L = 7, we have f (x) = (26 ⇥ x + 11)%80
Extra Kattis: dicegame, orchard, password, secretsanta. and x0 = 7. The sequence of iterated function values is {7, 33, 69, 45, 61, 77, 13, 29, 45, . . .}.
b. Probability Theory, Harder This time, we have µ = 3 and = 5.
1. Entry Level: UVa 11628 - Another lottery * (p[i] = ticket bought by i
at the last round/total tickets bought at the last round by all n; gcd) 5.6.2 Solutions using Efficient Data Structures
2. UVa 10056 - What is the Probability? * (get the closed form formula) A simple algorithm that will work for many cases and/or variants of this cycle-finding
3. UVa 10648 - Chocolate Box * (DP; s: (rem boxes, num empty)) problem uses an efficient data structure to store key to value information: a number xi
4. UVa 11176 - Winning Streak * (DP, s: (rem games, streak); t: lose this (the key) has been first encountered at iteration i (the value) in the sequence of iterated
game, or win the next W = [1..n] games and lose the (W+1)-th game) function values. Then for xj that is encountered later (j > i), we test if xj is already stored
5. Kattis - anthony * (DP probability; need to drop one parameter (N or M ) in the data structure. If it is, it implies that xj = xi , µ = i, = j i. This algorithm
and recover it from the other one) runs in O((µ + ) ⇥ DS cost) where DS cost is the cost per one data structure operation
6. Kattis - goodcoalition * (DP probability; like Knapsack) (insert/search). This algorithm requires at least O(µ + ) space to store past values.
7. Kattis - lostinthewoods * (simulate random walks of various lengths and dis- For many cycle-finding problems with rather large S (and likely large µ + ), we can use
tribute the probabilities per iteration; the answer will converge eventually) O(µ + + buf f er) space C++ STL unordered map/Java HashMap/Python dict/OCaml
Hashtbl to store/check the iteration indices of past values in O(1) time. But if we just need
Extra UVa: 00542, 00557, 10218, 10777, 11021, 11346, 11500, 11762.
to stop the algorithm upon encountering the first repeated number, we can use C++ STL
Extra Kattis: 2naire, anotherdice, bond, bribe, explosion, genius, gnollhypoth- unordered set/Java HashSet/Python set (curly braces {}) instead.
esis, pollygone, ra✏e, redsocks. For other cycle-finding problems with relatively small S (and likely small µ + ), we
may even use the O(|S|) space Direct Addressing Table (DAT) to store/check the iteration
indices of past values also in O(1) time.
Note that by trading-o↵ (large, up to O(µ + )) memory space, we can actually solve
Profile of Algorithm Inventors this cycle-finding problem in efficient O(µ + ) runtime.
John M. Pollard (born 1941) is a British mathematician who has invented algorithms for
the factorization of large numbers (the Pollard’s rho algorithm, see Section 9.12) and for the Exercise 5.6.2.1: Notice that on many random test cases of UVa 00350, the values of µ
calculation of discrete logarithms (not discussed in this book). and are close to 0. However, generate a simple test case (choose Z, I, M , and L) for UVa
00350 so that even an O(µ + ) algorithm really runs in O(M ), i.e., almost, if not all possible
Richard Peirce Brent (born 1946) is an Australian mathematician and computer scientist. integers 2 [0..M -1] are used before a cycle is detected.
His research interests include number theory (in particular factorization), random number
generators, computer architecture, and analysis of algorithms. He has invented or co-invented
various mathematics algorithms. 26
We can also view this problem as a graph problem, i.e., finding the start and length of a cycle in a
functional graph/pseudo tree.
307 308
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.6. CYCLE-FINDING c Steven, Felix, Suhendry
3. Finding
309 310
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.7. GAME THEORY (BASIC) c Steven, Felix, Suhendry
For more examples, visit the VisuAlgo, cycle-finding visualization and define your own29 Decision Tree
f (x) = (a ⇥ x2 + b ⇥ x + c)%M and your own x0 to see this algorithm in action. One way is to write a recursive code to explore the Decision Tree of the game (a.k.a. the
Game Tree). If there is no overlapping subproblem, pure recursive backtracking is suitable.
Visualization: https://visualgo.net/en/cyclefinding
Otherwise, Dynamic Programming is needed. Each vertex describes the current player and
Source code: ch5/UVa00350.cpp|java|py|ml the current state of the game. Each vertex is connected to all other vertices legally reachable
from that vertex according to the game rules. The root vertex describes the starting player
and the initial game state. If the game state at a leaf vertex is a winning state, it is a win
Exercise 5.6.3.1*: Richard Peirce Brent invented an improved version of Floyd’s cycle- for the current player (and a lose for the other player). At an internal vertex, the current
finding algorithm shown above. Study and implement Brent’s algorithm [4]. player chooses a vertex that guarantees a win with the largest margin (or if a win is not
possible, chooses a vertex with the least loss). This is called the Minimax strategy.
For example, in UVa 10368 - Euclid’s Game, there are two players: Stan (player 0)
and Ollie (player 1). The state of the game is a triple of integers (id, a, b). The current
player id can subtracts any positive multiple of the lesser of the two numbers, integer b,
Programming Exercises related to Cycle-Finding:
from the greater of the two numbers, integer a, provided that the resulting number must be
nonnegative. We always maintain that a b. Stan and Ollie plays alternately, until one
1. Entry Level: UVa 00350 - Pseudo-Random Numbers * (very basic cycle-
finding problem; simply run Floyd’s cycle-finding algorithm) player is able to subtract a multiple of the lesser number from the greater to reach 0, and
thereby wins. The first player is Stan. The decision tree for a game with initial state id = 0,
2. UVa 11036 - Eventually periodic ... * (cycle-finding; evaluate Reverse Polish
a = 34, and b = 12 is shown in Figure 5.5.
f with a stack)
Let’s trace what happens in Figure 5.5. At the root (initial state), we have triple
3. UVa 11053 - Flavius Josephus ... * (cycle-finding; the answer is N - ) (0, 34, 12). At this point, player 0 (Stan) has two choices: either to subtract a b = 34 12 =
4. UVa 11511 - Frieze Patterns * (cycle-finding on vectors; notice that the 22 and move to vertex (1, 22, 12) (the left branch) or to subtract a 2 ⇥ b = 34 2 ⇥ 12 = 10
pattern will cycle fast) and move to vertex (1, 12, 10) (the right branch). We try both choices recursively.
5. Kattis - dragondropped * (interactive cycle finding problem; tight constraints) Let’s start with the left branch. At vertex (1, 22, 12)—(Figure 5.5—B), the current player
1 (Ollie) has no choice but to subtract a b = 22 12 = 10. We are now at vertex (0, 12, 10)—
6. Kattis - fibonaccicycles * (detect cycle of f ib(n)%k using fast data structure)
(Figure 5.5—C). Again, Stan only has one choice which is to subtract a b = 12 10 = 2.
7. Kattis - rats * (string processing plus cycle-finding; unordered set) We are now at leaf vertex (1, 10, 2)—(Figure 5.5—D). Ollie has several choices but Ollie can
Extra UVa: 00202, 00275, 00408, 00547, 00942, 00944, 10162, 10515, 10591, definitely win as a 5 ⇥ b = 10 5 ⇥ 2 = 0 and it implies that vertex (0, 12, 10) is a losing
11549, 11634, 12464, 13217. state for Stan and vertex (1, 22, 12) is a winning state for Ollie.
Now we explore the right branch. At vertex (1, 12, 10)—(Figure 5.5—E), the current
Extra Kattis: cool1, happyprime, partygame.
player 1 (Ollie) has no choice but to subtract a b = 12 10 = 2. We are now at leaf
vertex (0, 10, 2)—(Figure 5.5—F). Stan has several choices but Stan can definitely win as
29
a 5 ⇥ b = 10 5 ⇥ 2 = 0 and it implies that vertex (1, 12, 10) is a losing state for Ollie.
This is slightly more generic than the f (x) = (Z ⇥ x + I)%M shown in this section.
311 312
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.7. GAME THEORY (BASIC) c Steven, Felix, Suhendry
Mathematical Insights to Speed-up the Solution 1. Entry Level: Kattis - euclidsgame * (minimax; backtracking; also available at
UVa 10368 - Euclid’s Game)
Not all game theory problems can be solved by exploring the entire decision tree of the game,
2. UVa 10111 - Find the Winning ... * (Tic-Tac-Toe; minimax; backtracking)
especially if the size of the tree is large. If the problem involves numbers, we may need to
come up with some mathematical insights to speed up the computation. 3. UVa 10536 - Game of Euler * (model the 4 ⇥ 4 board and 48 possible pins
For example, in UVa 00847 - A multiplication game, there are two players: Stan (player as bitmask; then this is a simple two player game)
0) and Ollie (player 1) again. The state of the game30 is an integer p. The current player 4. UVa 11489 - Integer Game * (game theory; reducible to simple math)
can multiply p with any number between 2 to 9. Stan and Ollie again play alternately, until 5. Kattis - bachetsgame * (2 players game; Dynamic Programming; also available at
one player is able to multiply p with a number between 2 to 9 such that p n (n is the UVa 10404 - Bachet’s Game)
target number), and thereby win. The first player is Stan with p = 1.
6. Kattis - blockgame2 * (observe the pattern; 2 winnable cases if N == M and
Figure 5.6 shows an instance of this multiplication game with n = 17. Initially, player 0 N %M == 0; only 1 move if M < N < 2M ; we can always win if N > 2M )
(Stan) has up to 8 choices (to multiply p = 1 by [2..9]). However, all of these 8 states are
7. Kattis - linije * (game theory; check conditions on how Mirko can win and when
winning states of player 1 as player 1 can always multiply the current p by [2..9] to make
Slavko can win; involves MCBM)
p 17—(Figure 5.6—B). Therefore player 0 (Stan) will surely lose—(Figure 5.6—A).
As 1 < n < 4 294 967 295, the resulting decision tree on the largest test case can be Extra UVa: 10578, 12293, 12469.
extremely huge. This is because each vertex in this decision tree has a huge branching factor Extra Kattis: amultiplicationgame, cuttingbrownies, irrationaldivision, ivana, joy-
of 8 (as there are 8 possible numbers to choose from between 2 to 9). It is not feasible to lessgame, peggamefortwo.
actually explore the decision tree.
It turns out that the optimal strategy for Stan to win is to always multiply p with
9 (the largest possible) while Ollie will always multiply p with 2 (the smallest possible).
Such optimization insights can be obtained by observing the pattern found in the output of
smaller instances of this problem. Note that math-savvy contestant may want to prove this
observation first before coding the solution.
30
This time we omit the player id. However, this parameter id is still shown in Figure 5.6 for clarity.
313 314
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.8. MATRIX POWER c Steven, Felix, Suhendry
The graph: 0->1 with length 1: 0->1 (only 1 path) int modPow(int b, int p, int m) { // assume 0 <= b < m
0->1 with length 2: impossible if (p == 0) return 1;
0--1 0->1 with length 3: 0->1->2->1 (and 0->1->0->1) int ans = modPow(b, p/2, m); // this is O(log p)
| 0->1 with length 4: impossible ans = mod(ans*ans, m); // double it first
2--3 0->1 with length 5: 0->1->2->3->2->1 (and 4 others) if (p&1) ans = mod(ans*b, m); // *b if p is odd
return ans; // ans always in [0..m-1]
2 3 2 3 2 3 2 3 }
0 1 0 0 1 0 1 0 0 2 0 1 0 5 0 3
6 1 0 1 0 7 6 0 2 0 1 7 6 2 0 3 0 7 6 5 0 8 0 7
6
M =4 7 M2 = 6 7 M3 = 6 7 M5 = 6 7 int main() {
0 1 0 1 5 4 1 0 2 0 5 4 0 3 0 2 5 4 0 8 0 5 5
0 0 1 0 0 1 0 1 1 0 2 0 3 0 5 0 ios::sync_with_stdio(false); cin.tie(NULL);
int c; cin >> c;
• Speed-up some DP problems as shown later in this section. while (c--) {
int x, y, n; cin >> x >> y >> n;
31
A matrix is a rectangular (2D) array of numbers. Matrix of size m ⇥ n has m rows and n columns. The cout << modPow(x, y, n) << "\n";
elements of the matrix is usually denoted by the matrix name with two subscripts.
32 }
Identity matrix is a square matrix with all zeroes except that cells along the main diagonal are all ones.
33
If we need f ib(n) for all n 2 [0..n], use O(n) DP solution mentioned in Section 5.4.1 instead. return 0;
34
If you encounter input size of ‘gigantic’ value in programming contest problems, like 1B, the problem }
author is usually looking for a logarithmic solution. Notice that log2 (1B) ⇡ log2 (230 ) is still just 30!
35 36
The derivation of this Fibonacci matrix is shown in Section 5.8.4. Technically, an integer is a 1 ⇥ 1 square matrix.
315 316
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.8. MATRIX POWER c Steven, Felix, Suhendry
317 318
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.8. MATRIX POWER c Steven, Felix, Suhendry
319 320
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.9. SOLUTION TO NON-STARRED EXERCISES c Steven, Felix, Suhendry
321 322
CHAPTER 5. MATHEMATICS c Steven, Felix, Suhendry 5.10. CHAPTER NOTES c Steven, Felix, Suhendry
Exercise 5.4.4.7: The answers for few smallest n = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, . . .} are 5.10 Chapter Notes
{1, 3, 7, 13, 22, 34, 50, 70, 95, 125}. You can generate these numbers using brute force solution
first. Then find the pattern and use it. Notice that the 9 di↵erences between these 10 This chapter has grown significantly since the first edition of this book. However, even
numbers are {+2, +4, +6, +9, +12, +16, +20, +25, +30, . . .}. The 8 di↵erences of these 9 after we reach the fourth edition, we are aware that there are still many more mathematical
di↵erences are {+2, +2, +3, +3, +4, +4, +5, +5, . . .}, which can be exploited. problems and algorithms that have not been discussed in this chapter, e.g.,
Exercise 5.5.1: Let’s label the people with p1 , p2 , . . . , pn and the hats with h1 , h2 , . . . , hn . • There are many more rare combinatorics problems and formulas,
Now consider the first person p1 . This person has n-1 choices of taking someone else’s hat • There are other theorems, hypotheses, and conjectures,
(hi not h1 ). Now consider the follow up action of the original owner of hi , which is pi . There
• (Computational) Geometry is also part of Mathematics, but since we have a special
are two possibilities for pi :
chapter for that, we reserve the discussions about geometry problems in Chapter 7.
• pi does not take h1 , then this problem reduces to derangement problem with n-1 people • Later in Chapter 9, we discuss more rare mathematics algorithms/problems, e.g.,
and n-1 hats because each of the other n-1 people has 1 forbidden choice from among
– Fast Fourier Transform for fast polynomial multiplication (Section 9.11),
the remaining n-1 hats (pi is forbidden to take h1 ).
– Pollard’s rho algorithm for fast integer factorization (Section 9.12),
• pi somehow takes h1 , then this problem reduces to derangement problem with n-2 – Chinese Remainder Theorem to solve system of congruences (Section 9.13),
people and n-2 hats. – Lucas’ Theorem to compute C(n, k)%p (Section 9.14),
– Rare Formulas or Theorems (Section 9.15),
Hence, An = (n-1) ⇥ (An 1 + An 2 ).
7⇥6 42
– Sprague-Grundy Theorem in Combinatorial Game Theory (Section 9.16),
Exercise 5.5.2: We need to use Combinatorics. C(7, 5)/C(15, 5) = = = 0.2.
15⇥14 210 – Gaussian Elimination for solving systems of linear equations (Section 9.17).
Exercise 5.6.2.1: Simply set Z = 1, I = 1, M as large as possible, e.g., M = 108 , and
There are really many topics about mathematics. This is not surprising since various math-
L = 0. Then the sequence of iterated function values is {0, 1, 2, . . . , M -2, M -1, 0, . . .}.
ematical problems have been investigated by people since hundreds of years ago. Some of
Exercise 5.8.4.1: For Tribonacci with N = 3, a = {0, 1, 1, 1} and x = {0, 0, 1}, them are discussed in this chapter and in Chapter 7-9, many others are not, and yet only 1 or
we have xt = 0 + 1 ⇥ xt 1 + 1 ⇥ xt 2 + 1 ⇥ xt 3 that can be written in matrix form as: 2 will actually appear in a problem set. To do well in ICPC, it is a good idea to have at least
2 3 2 3 2 3 one strong mathematician in your ICPC team in order to have those 1 or 2 mathematical
1 0 0 0 1 a0 = 1 problems solved. Mathematical prowess is also important for IOI contestants. Although the
6 0 1 1 1 7 6 Xi 7 6 Xi+1 7
6 7 6 7 6 7 amount of problem-specific topics to be mastered is smaller, many IOI tasks require some
4 0 1 0 0 5 ⇥ 4 Xi 1 5 = 4 Xi 5
form of ‘mathematical insights’.
0 0 1 0 Xi 2 Xi 1 We end this chapter by listing some pointers that may be of interest: read number theory
books, e.g., [33], investigate mathematical topics in https://www.wolframalpha.com or
Wikipedia, and attempt programming exercises related to mathematical problems like the
ones in https://projecteuler.net [14] and https://brilliant.org [5].
The breakdown of the number of programming exercises from each section is shown below:
323 324
6.2. AD HOC STRING (HARDER) c Steven, Felix, Suhendry
• Cipher/Encode/Encrypt/Decode/Decrypt (Harder)
Chapter 6 This is the harder form of this big category.
In this chapter, we present one more topic that appears in ICPC—although not as frequently1 1. In UVa 00325 - Identifying Legal Pascal Real Constants, we are asked to decide if
as graph and mathematics problems—string processing. String processing is common in the the given line of input is a legal Pascal Real constant. Suppose the line is stored
research field of bioinformatics. As the strings (e.g., DNA strings) that the researchers in String s, then the following one-liner Java code is the required solution:
deal with are usually (very) long, efficient string-specific data structures and algorithms are
s.matches("[-+]?\\d+(\\.\\d+([eE][-+]?\\d+)?|[eE][-+]?\\d+)")
necessary. Some of these problems are presented as contest problems in ICPCs. By mastering
the content of this chapter, ICPC contestants will have a better chance at tackling those 2. In UVa 00494 - Kindergarten Counting Game, we are asked to count how many
string processing problems. words are there in a given line. Here, a word is defined as a consecutive sequence
String processing tasks also appear in IOI, but usually they do not require advanced of letters (upper and/or lower case). Suppose the line is stored in String s, then
string data structures or algorithms due to syllabus [15] restrictions. Additionally, the input the following one-liner Java code is the required solution:
and output format of IOI tasks are usually simple2 . This eliminates the need to code tedious
s.replaceAll("[^a-zA-Z]+", " ").trim().split(" ").length
input parsing or output formatting commonly found in the ICPC problems. IOI tasks that
require string processing are usually still solvable using basic problem solving paradigms • Output Formatting
(Complete Search, D&C, Greedy, or DP). It is sufficient for IOI contestants to skim through This is the harder form of this big category.
all sections in this chapter except Section 6.3 which is about string processing with DP.
However, we believe that it may be advantageous for some IOI contestants to learn some of • String Comparison
the more advanced materials outside of their syllabus ahead of time. In this group of problems, the contestants are asked to compare strings with various
This chapter is structured as follows: it starts with a list of medium to hard/tedious Ad criteria. This sub-category is similar to the string matching problems in Section 6.4,
Hoc string problems solvable with just basic string processing skills (but harder than the but these problems mostly use strcmp-related functions.
ones discussed in Book 1). Solving many of them will definitely improve your programming
skills, but we have to make a remark that recent contest problems in ICPC (and also IOI) • Really Ad Hoc
usually do not ask for basic string processing solutions except for the ‘giveaway’ problem These are other Ad Hoc string related problems that cannot be classified into one of
that most teams (contestants) should be able to solve. The more important sections are the the other sub categories above.
string processing problems solvable with Dynamic Programming (DP) (Section 6.3), string
matching problems (Section 6.4), an extensive discussion on string processing problems where
we have to deal with reasonably long strings using Trie/Suffix Trie/Tree/Array (Section Profile of Algorithm Inventor
6.5), an alternative string matching algorithm using hashing (Section 6.6), and finally a
discussion of medium Ad Hoc string problems that uses various string techniques: Anagram Donald Ervin Knuth (born 1938) is a computer scientist and Professor Emeritus at Stan-
and Palindrome (Section 6.7). ford University. He is the author of the popular Computer Science book: “The Art of
Computer Programming”. Knuth has been called the ‘father’ of the analysis of algorithms.
1
One potential reason: String input is harder to parse correctly (due to issues like whitespaces, newlines, Knuth is also the creator of the TEX, the computer typesetting system used in this book.
etc) and string output is harder to format correctly, making such string-based I/O less preferred over the
more precise integer-based I/O.
2
IOI 2010-2019 require contestants to implement functions instead of coding I/O routines.
325 326
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.2. AD HOC STRING (HARDER) c Steven, Felix, Suhendry
Extra Kattis: selectgroup. 1. Entry Level: Kattis - raggedright * (just simulate the requirement)
2. UVa 10393 - The One-Handed Typist * (follow problem description)
c. Regular Expression3
3. UVa 11483 - Code Creator * (straightforward; use ‘escape character’)
1. Entry Level: UVa 00494 - Kindergarten ... * (trivial with regex)
4. UVa 12916 - Perfect Cyclic String * (factorize n; string period; also see
2. UVa 00325 - Identifying Legal ... * (trivial with regex) UVa 11452)
3. UVa 00576 - Haiku Review * (solvable with regex) 5. Kattis - irepeatmyself * (string period; complete search)
4. UVa 10058 - Jimmi’s Riddles * (solvable with regex) 6. Kattis - periodicstrings * (brute force; skip non divisor)
5. Kattis - apaxiaaans * (solvable with regex) 7. Kattis - zipfslaw * (sort the words to simplify this problem; also available at
6. Kattis - hidden * (just 1D array manipulation; we can also use regex) UVa 10126 - Zipf’s Law)
7. Kattis - lindenmayorsystem * (DAT; map char to string; simulation; max Extra UVa: 00263, 00892, 00943, 01215, 10045, 10115, 10197, 10361, 10391,
answer 30 ⇥ 55 ; we can also use regex) 10508, 10679, 11452, 11839, 11962, 12243, 12414.
Extra Kattis: apaxianparent, help2, kolone, nimionese, orderlyclass, quickes-
timate, rotatecut, textureanalysis, thore, tolower.
3
There are a few other string processing problems that are solvable with regex too. However, since almost
every string processing problems that can be solved with regex can also be solved with standard ways, it is
not crucial to use regex in competitive programming.
327 328
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.3. STRING PROCESSING WITH DP c Steven, Felix, Suhendry
329 330
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.3. STRING PROCESSING WITH DP c Steven, Felix, Suhendry
Exercise 6.3.2.3: The LCS problem can be solved in O(n log k) when all characters are Extra UVa: 00164, 00526, 00531, 01207, 01244, 10066, 10100, 10192.
distinct, e.g., if you are given two permutations of length n as in UVa 10635. k is the length Extra Kattis: declaration, ls, signals.
of the answer. Solve this variant! b. Non Classic
1. Entry Level: Kattis - stringfactoring * (s: the min weight of substring [i..j];
also available at UVa 11022 - String Factoring)
6.3.3 Non Classical String Processing with DP 2. UVa 11258 - String Partition * (dp(i) = int from substring [i..k] + dp(k))
In this section, we discuss Kattis - hillnumbers. A hill number is a positive integer, the 3. UVa 11361 - Investigating Div-Sum ... * (counting paths in DAG; need
digits of which possibly rise and then possibly fall, but never fall and then rise, like 12321, insights for efficient implementation; K > 90 is useless; digit DP)
12223, and 33322111. However, 1232321 is not a hill number. Verifying if a given number is 4. UVa 11552 - Fewest Flops * (dp(i, c) = minimum number of chunks after
a hill number or not is trivial. The hard part of the problem is this: Given a single integer considering the first i segments ending with character c)
n (assume it is already vetted as a hill number), count the number of positive hill numbers 5. Kattis - exam * (s: (pos, correct left); t: either your friend is wrong or your
less than or equal to n. The main issue is 1 n 1018 . friend is right, process accordingly; easier solution exists)
Initially, it may seem impossible to try all numbers n (TLE) or create a DP table 6. Kattis - heritage * (s: (cur pos); t: try all N words in dictionary; output
up to 1018 cells (MLE). However, if we realize that there are only up to 19 digits in 1018 , final answer modulo a prime)
then we can actually treat the numbers as strings of at most 20 digits and process the digits 7. Kattis - hillnumbers * (digit DP; s: (pos, prev digit, is rising, is lower); try
one by one. This is called ‘Digit DP’ in the competitive programming community and not digit by digit; see the discussion in this section)
considered as a classic solution yet. Basically, there are some big numbers and the problem Extra UVa: 11081, 11084, 12855,
is asking for some property of the number that is decomposable to its individual digits.
Extra Kattis: chemistsvows, cudak, digitsum, haiku, zapis.
Realizing this, we can then quickly come up with the initial state s: (pos) and the initial
transition of trying all possible next digit [0..9] one by one. However, we will quickly realize Also see Section 6.7.2 for a classic string problem: Palindrome that has a
that we need to remember what was the previous used digit so we update our state to s: few interesting variants that require DP solutions.
(pos, prev digit). Now we can check if prev digit and next digit is rising, plateau, or
falling as per requirement. However, we will quickly realize that we also need to remember if
we have reached the peak before and are now concentrating on the falling part, so we update
our state to s: (pos, prev digit, is rising). We start with is rising = true and Profile of Algorithm Inventors
can only set is rising at most once in a valid hill number.
Up to here, this state is almost complete but after some initial testing, we will then James Hiram Morris (born 1941) is a Professor of Computer Science. He is a co-discoverer
realize that we count the answer wrongly. It turns out that we still need one more parameter of the Knuth-Morris-Pratt algorithm for string search.
is lower to have this complete state s: (pos, prev digit, is rising, is lower) where Vaughan Ronald Pratt (born 1944) is a Professor Emeritus at Stanford University. He was
is lower = false initially and we set is lower = true once we use next digit that is one of the earliest pioneers in the field of computer science. He has made several contributions
strictly lower than the actual digit of n at that pos. With this state, we can correctly to foundational areas such as search algorithms, sorting algorithms, and primality testing.
compute the required answer and the details are left behind for the readers. He is also a co-discoverer of the Knuth-Morris-Pratt algorithm for string-search.
331 332
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.4. STRING MATCHING c Steven, Felix, Suhendry
333 334
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.4. STRING MATCHING c Steven, Felix, Suhendry
... at i = 43 and j = 5, we have matches from i = 43 to i = 50 ... We provide our source code that compares the library solution, naı̈ve matching, and one
So P = ‘SEVENTY SEVEN’ is found again at index i = 38. other string matching algorithm: Rabin-Karp that will be discussed in Section 6.6 with the
1 2 3 4 5 KMP algorithm discussed in this section.
012345678901234567890123456789012345678901234567890
T = I DO NOT LIKE SEVENTY SEV BUT SEVENTY SEVENTY SEVEN Source code: ch6/string matching.cpp|java|py|ml
P = SEVENTY SEVEN
0123456789012
Exercise 6.4.1*: Run kmpPreprocess() on P = “ABABA” and show the reset table b!
1
Exercise 6.4.2*: Run kmpSearch() with P = “ABABA” and T = “ACABAABABDABABA”.
To get such speed up, KMP has to preprocess the pattern string and get the ‘reset table’ b
Explain how the KMP search looks like?
(back). If given pattern string P = “SEVENTY SEVEN”, then table b will look like this:
1
0 1 2 3 4 5 6 7 8 9 0 1 2 3
P = S E V E N T Y S E V E N 6.4.3 String Matching in a 2D Grid
b = -1 0 0 0 0 0 0 0 0 1 2 3 4 5
The string matching problem can also be posed in 2D. Given a 2D grid/array of characters
This means, if mismatch happens in j = 11 (see the example above), i.e., after finding a (instead of the well-known 1D array of characters), find the occurrence(s) of pattern P in
match for “SEVENTY SEV”, then we know that we have to retry matching P from index j = the grid. Depending on the problem requirement, the search direction can be up to 4 or 8
b[11] = 3, i.e., KMP now assumes that it has matched only the first three characters of cardinal directions, and either the pattern must be in a straight line or it can bend.
“SEVENTY SEV”, which is “SEV”, because the next match can start with that prefix “SEV”.
The relatively short implementation of the KMP algorithm with comments is shown below. For the example from Kattis - boggle below, the pattern can bend. The solution for such
This implementation has a time complexity of O(n + m), or usually just O(n) as n > m. ‘bendable’ string matching in a 2D grid is usually recursive backtracking (see Book 1). This
is because unlike the 1D counterpart where we always go to the right, at every coordinate
const int MAX_N = 200010; (row, col) of the 2D grid, we have more than one choice to explore. The time complexity is
exponential thus this can only work for a small grid.
char T[MAX_N], P[MAX_N]; // T = text, P = pattern To speed up the backtracking process, usually we employ this simple pruning strategy:
int n, m; // n = |T|, m = |P| once the recursion depth exceeds the length of pattern P, we can immediately prune that
int b[MAX_N], n, m; // b = back table recursive branch. This is also called as depth-limited search (see Section 9.20).
ACMA // From Kattis - boggle
void kmpPreprocess() { // call this first
APcA // We can go to 8 directions and the pattern can bend
int i = 0, j = -1; b[0] = -1; // starting values
toGI // ‘contest’ is highlighted as lowercase in the grid
while (i < m) { // pre-process P
nest // can you find ‘CONTEST’, ‘ICPC’, ‘ACM’, and ‘GCPC’?
while ((j >= 0) && (P[i] != P[j])) j = b[j]; // different, reset j
++i; ++j; // same, advance both For the example from UVa 10010, the pattern can must be in a straight line. If the grid is
b[i] = j; small we can still use the easier to code recursive backtracking mentioned earlier. However
} if the grid is large, we probably need to do multiple O(n + m) string matchings, one for each
} row/column/diagonal and their reverse directions.
void kmpSearch() { // similar as above abcdefghigg // From UVa 10010 - Where’s Waldorf?
int i = 0, j = 0; // starting values hebkWaldork // We can go to 8 directions, but must be straight
while (i < n) { // search through T ftyawAldorm // ‘WALDORF’ is highlighted as UPPERCASE in the grid
while ((j >= 0) && (T[i] != P[j])) j = b[j]; // if different, reset j ftsimrLqsrc
++i; ++j; // if same, advance both byoarbeDeyv // Can you find ‘BAMBI’ and ‘BETTY’?
if (j == m) { // a match is found klcbqwikOmk
printf("P is found at index %d in T\n", i-j); strebgadhRb // Can you find ‘DAGBERT’ in this row?
j = b[j]; // prepare j for the next yuiqlxcnbjF
}
} Note that the topic of String Matching will be revisited two more times. In Section 6.5, we
} will discuss how to solve this problem using string-specific data structures. In Section 6.6,
we will discuss how to solve this problem using a probabilistic algorithm.
335 336
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry
337 338
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry
339 340
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry
6.5.3 Applications of Suffix Tree be > 1 terminating vertices in the subtree rooted at x) and these suffixes share a common
prefix (which implies a repeated substring). The fact that x is the deepest internal vertex
Assuming that the Suffix Tree of a string T is already built, we can use it for these applications
(from root) implies that its path label is the longest repeated substring.
(this list is not exhaustive):
Example: In the Suffix Tree of T = “GATAGACA$” in Figure 6.5, the LRS is “GA” as it is
String Matching in O(m + occ) the path label of the deepest internal vertex x—“GA” is repeated twice in “GATAGACA$”. The
answer can be found with O(n) pass through the Suffix Tree. To deepen your understanding
With Suffix Tree, we can find all (exact) occurrences of a pattern string P in T in O(m + occ) of this application, visit VisuAlgo, Suffix Tree visualization, to create your own Suffix Tree
where m is the length of the pattern string P itself and occ is the total number of occurrences (on small string T with unique longest repeat substring or several equally-longest repeat
of P in T—no matter how long the string T (of length n) is13 . When the Suffix Tree is already substrings) and test this Longest Repeated Substring application.
built, this approach is much faster than the string matching algorithms discussed earlier in
Section 6.4.
Given the Suffix Tree of T, our task is to search for the vertex x in the Suffix Tree whose
path label represents the pattern string P. Note that a matching is simply a common prefix
between the pattern string P and some suffixes of string T. This is done by just one root to
(at worst) leaf traversal of the Suffix Tree of T following the edge labels. The vertex closest
to the root with path label that starts with P is the desired vertex x. Then, the suffix indices
stored in the terminating vertices (leaves) of the subtree rooted at x are the occurrences of
P in T.
Example: In the Suffix Tree of T = “GATAGACA$” shown in Figure 6.4 and P = “A”, we
can simply traverse from root, go along the edge with edge label ‘A’ to find vertex x with Figure 6.5: Longest Repeated Substring of T = “GATAGACA$”
the path label ‘A’. There are 4 occurrences14 of ‘A’ in the subtree rooted at x. They are
suffix 7: “A$”, suffix 5: “ACA$”, suffix 3: “AGACA$”, and suffix 1: “ATAGACA$”. If P = “Z”,
then the Suffix Tree traversal will not be able to find a suitable vertex x and reports that Finding the Longest Common Substring in O(n)
“P is not found”. To deepen your understanding of this application, visit VisuAlgo, Suffix
Tree visualization, to create your own Suffix Tree (on a small string T) and test this String
Matching application using a pattern string P of your choice.
341 342
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry
For example, with T1 = “GATAGACA$” and T2 = “CATA#”, The Longest Common Substring
is “ATA” of length 3. In Figure 6.6, we see the vertices with path labels “A”, “ATA”, “CA”,
and “TA” have two di↵erent terminating symbols (notice that vertex with path label “GA”
is not considered as both suffix “GACA$” and “GATAGACA$” come from T1 ). These are the
common substrings between T1 and T2 . The deepest marked vertex is “ATA” and this is
the longest common substring between T1 and T2 . To deepen your understanding of this
application, visit VisuAlgo, Suffix Tree visualization, to create your own Suffix Tree (on two
small strings: T1 and T2 ) and test this Longest Common Substring application.
Exercise 6.5.3.1: Use the Suffix Tree in Figure 6.4; Find P1 = “C” and P2 = “CAT”!
Exercise 6.5.3.2: Find the LRS in T = “CGACATTACATTA$”! Build the Suffix Tree first. Figure 6.7: Sorting the Suffixes of T = “GATAGACA$”
Exercise 6.5.3.3: Find the LCS of T1 = “STEVEN$” and T2 = “SEVEN#”!
Exercise 6.5.3.4*: Instead of finding the LRS, we now want to find the repeated substring Basically, Suffix Array is an integer array that stores a permutation of n indices of sorted
that occurs the most. Among several possible candidates, pick the longest one. For example, suffixes. For example, consider the same19 T = “GATAGACA$” with n = 9. The Suffix Array
if T = “DEFG1ABC2DEFG3ABC4ABC$”, the answer is “ABC” of length 3 that occurs three times of T is a permutation of integers [0..n-1] = {8, 7, 5, 3, 1, 6, 4, 0, 2} as shown in
(not “BC” of length 2 or “C” of length 1 which also occur three times) instead of “DEFG” of Figure 6.7. That is, the suffixes in sorted order are suffix SA[0] = suffix 8 = “$”, suffix
length 4 that occurs only two times. Outline the strategy to find the solution! SA[1] = suffix 7 = “A$”, suffix SA[2] = suffix 5 = “ACA$”, . . . , and finally suffix SA[8] =
Exercise 6.5.3.5*: The Longest Repeated Substring (LRS) problem presented in this sec- suffix 2 = “TAGACA$”.
tion allows overlap. For example, the LRS of T = “AAAAAAAA$” is “AAAAAAA” of length 7.
What should we do if we do not allow the LRS to overlap? For example, the LRS without Suffix Tree versus Suffix Array
overlap of T = “AAAAAAAA$” should be “AAAA” of length 4.
Exercise 6.5.3.6*: Think of how to generalize this approach to find the LCS of more than
two strings. For example, given three strings T1 = “STEVEN$”, T2 = “SEVEN#”, and T3 =
“EVE@”, how to determine that their LCS is “EVE”?
Exercise 6.5.3.7*: Customize the solution further so that we find the LCS of k out of n
strings, where k n. For example, given the same three strings T1 , T2 , and T3 as above,
how to determine that the LCS of 2 out of 3 strings is “EVEN”?
Exercise 6.5.3.8*: The Longest Common Extension (LCE) problem is as follows: Given
a string T and two indices i and j, compute the longest substring of T that starts at both
i and j. Examples assuming T = “CGACATTACATTA$”. If i = 4, and j = 9, the answer is
“ATTA”. If i = 7, and j = 9, the answer is “A”. How to solve this with Suffix Tree?
Figure 6.8: Suffix Tree (Left) and Suffix Array (Right) of T = “GATAGACA$”
6.5.4 Suffix Array Suffix Tree and Suffix Array are closely related20 . As we can see in Figure 6.8, the DFS tree
In the previous subsection, we have shown several string processing problems that can be traversal (neighbors are ordered based on sorted edge labels) of the Suffix Tree visits the
solved if the Suffix Tree is already built. However, the efficient implementation of linear time terminating vertices (the leaves) in Suffix Array order. An internal vertex in the Suffix
Suffix Tree construction (see [35]) is complex and thus risky under a programming contest Tree corresponds to a range in the Suffix Array (a collection of sorted suffixes that share a
setting. Fortunately, the next data structure that we are going to describe—the Suffix Longest Common Prefix (LCP)—to be computed below). A terminating vertex (always
Array invented by Udi Manber and Gene Myers [25]—has similar functionalities as the at leaf due to the usage of a terminating character) in the Suffix Tree corresponds to an
Suffix Tree but is (much) simpler to construct and use, especially in a programming contest individual index in the Suffix Array (a single suffix). Keep these similarities in mind.
setting. Thus, we will skip the discussion on O(n) Suffix Tree construction (see [35]) and They will be useful in the next subsection when we discuss applications of Suffix Array.
instead focus on the O(n log n) Suffix Array construction (see [37]) which is easier to use18 .
19
Then, in the next subsection, we will show that we can apply Suffix Array to solve problems Notice that we also use the terminating symbol ‘$’ to simplify Suffix Array discussion.
20
Memory usage: Suffix Tree has n|⌃| pointers where |⌃| is the number of di↵erent characters in T thus
that have been shown to be solvable with Suffix Tree. it requires O(n|⌃| log n) bits to store its data. On the other hand, Suffix Array is just an array of n indices
18
The di↵erence between O(n) and O(n log n) algorithms in programming contest setup is not much. thus it only needs O(n log n) bits to store its data, slightly more memory efficient.
343 344
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry
When applied to string T = “GATAGACA$”, the naı̈ve SA construction code above that sorts
all suffixes with built-in sorting and string comparison library really produces the correct
Suffix Array = {8, 7, 5, 3, 1, 6, 4, 0, 2}. However, this is barely useful except for
contest problems with n 2500. The overall runtime of this algorithm is O(n2 log n) because
the strcmp operation that is used to determine the order of two (possibly long) suffixes is Table 6.1: L/R: Before/After Sorting; k = 1; the initial sorted order appears
too costly, up to O(n) per pair of suffix comparison.
Example 1: The rank of suffix 5 “ACA$” is (‘A’, ‘C’) = (65, 67).
Computing Longest Common Prefix Between Consecutive Sorted Suffixes
Example 2: The rank of suffix 3 “AGACA$” is (‘A’, ‘G’) = (65, 71).
Given the Suffix Array of T, we can compute the Longest Common Prefix (LCP) between After we sort these ranking pairs, the order of suffixes is now like Table 6.1—right,
consecutive sorted suffixes in Suffix Array order. By definition, LCP[0] = 0 as suffix SA[0] where suffix 5 “ACA$” comes before suffix 3 “AGACA$”, etc.
is the first suffix in Suffix Array order without any other suffix preceding it. For i > 0,
LCP[i] = the length of LCP between suffix SA[i] and suffix SA[i-1]. For example, in • At iteration k = 2, the ranking pair of suffix SA[i] is (RA[SA[i]], RA[SA[i]+2]).
Figure 6.8—right, we see that suffix SA[7] = suffix 0 = “GACAGATA$” has an LCP “GA” of This ranking pair is now obtained by looking at the first pair and the second pair of
length 2 with its previous sorted suffix SA[6] = suffix 4 = “GACA$”. We can compute LCP characters only. To get the new ranking pairs, we do not have to recompute many
directly by definition by using the code below. However, this approach is slow as it can things. We set the first one, i.e., Suffix 8 “$” to have new rank r = 0. Then, we iterate
increase the value of L up to O(n2 ) times, e.g., try T = “AAAAAAA$”. from i = [1..n-1]. If the ranking pair of suffix SA[i] is di↵erent from the ranking
pair of the previous suffix SA[i-1] in sorted order, we increase the rank r = r + 1.
// continuation from above Otherwise, the rank stays at r (see Table 6.2—left).
vi LCP(n);
LCP[0] = 0; // default value
for (int i = 1; i < n; ++i) { // compute by def, O(n^2)
int L = 0; // always reset L to 0
while ((SA[i]+L < n) && (SA[i-1]+L < n) &&
(T[SA[i]+L] == T[SA[i-1]+L])) ++L; // same L-th char, ++L
LCP[i] = L;
}
printf("T = ’%s’\n", T);
printf(" i SA[i] LCP[i] Suffix SA[i]\n");
Table 6.2: L/R: Before/After Sorting; k = 2; “GATAGACA” and “GACA” are swapped
for (int i = 0; i < n; ++i)
printf("%2d %2d %2d %s\n", i, SA[i], LCP[i], T+SA[i]); Example 1: In Table 6.1—right, the ranking pair of suffix 7 “A$” is (65, 36) which is
di↵erent with the ranking pair of previous suffix 8 “$-” which is (36, 0). Therefore in
The source code of this slow algorithm is given below using the fastest language (C++), but Table 6.2—left, suffix 7 has a new rank 1.
it is probably not that useful to be used in a modern programming contest.
Example 2: In Table 6.1—right, the ranking pair of suffix 4 “GACA$” is (71, 65) which
Source code: ch6/sa lcp slow.cpp is similar with the ranking pair of previous suffix 0 “GATAGACA$” which is also (71, 65).
345 346
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry
Therefore in Table 6.2—left, since suffix 0 is given a new rank 6, then suffix 4 is also theorem [20]. The idea is simple: it is easier to compute the LCP in the original position
given the same new rank 6. order of the suffixes instead of the lexicographic order of the suffixes. In Table 6.4—right, we
Once we have updated RA[SA[i]] 8i 2 [0..n-1], the value of RA[SA[i]+k] can be have the original position order of the suffixes of T = ‘GATAGACA$’. Observe that column
easily determined too. In our explanation, if SA[i]+k n, we give a default rank 0. PLCP[i] forms a pattern: decrease-by-1 block (2 ! 1 ! 0); increase to 1; decrease-by-1
See Exercise 6.5.4.1 for more details on the implementation aspect of this step. block again (1 ! 0); increase to 1 again; decrease-by-1 block again (1 ! 0), etc.
At this stage, the ranking pair of suffix 0 “GATAGACA$” is (6, 7) and suffix 4 “GACA$”
is (6, 5). These two suffixes are still not in sorted order whereas all the other suffixes
are already in their correct order. After another round of sorting, the order of suffixes
is now like Table 6.2—right.
347 348
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry
349 350
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry
Exercise 6.5.4.5*: Show how to extend the computation of LCP between two consecutive We start by finding the lower bound. The current range is i = [0..8] and thus the middle
sorted suffixes into computation of LCP between a range of sorted suffixes, i.e., answer one is i = 4. We compare the first two characters of suffix SA[4], which is “ATAGACA$”, with
LCP(i, j). For example in Figure 6.8, LCP(1, 4) = 1 (“A”), LCP(6, 7) = 2 (“GA”), and P = ‘GA’. As P = ‘GA’ is larger, we continue exploring i = [5..8]. Next, we compare the
LCP(0, 8) = 0 (nothing in common). first two characters of suffix SA[6], which is “GACA$”, with P = ‘GA’. It is a match. As we
Exercise 6.5.4.6*: Show how to use LCP information to compute the number of distinct are currently looking for the lower bound, we do not stop here but continue exploring i =
substrings in T in O(n log n) time. [5..6]. P = ‘GA’ is larger than suffix SA[5], which is “CA$”. We stop after checking that
SA[8] doesn’t start with prefix P = ‘GA’. Index i = 6 is the lower bound, i.e., suffix SA[6],
which is “GACA$”, is the first time pattern P = ‘GA’ appears as a prefix of a suffix in the
list of sorted suffixes.
6.5.5 Applications of Suffix Array
We have mentioned earlier that Suffix Array is closely related to Suffix Tree. In this subsec-
tion, we show that with Suffix Array (which is easier to construct), we can solve the string
processing problems shown in Section 6.5.3 that are solvable using Suffix Tree.
// extension of class Suffix Array above Next, we search for the upper bound. The first step is the same as above. But at the second
ii stringMatching(const char *P) { // in O(m log n) step, we have a match between suffix SA[6], which is “GACA$”, with P = ‘GA’. Since now
int m = (int)strlen(P); // usually, m < n we are looking for the upper bound, we continue exploring i = [7..8]. We find another
int lo = 0, hi = n-1; // range = [0..n-1] match when comparing suffix SA[7], which is “GATAGACA$”, with P = ‘GA’. We stop here.
while (lo < hi) { // find lower bound This i = 7 is the upper bound in this example, i.e., suffix SA[7], which is “GATAGACA$”, is
int mid = (lo+hi) / 2; // this is round down the last time pattern P = ‘GA’ appears as a prefix of a suffix in the list of sorted suffixes.
int res = strncmp(T+SA[mid], P, m); // P in suffix SA[mid]?
(res >= 0) ? hi = mid : lo = mid+1; // notice the >= sign
} Finding the Longest Repeated Substring in O(n)
if (strncmp(T+SA[lo], P, m) != 0) return {-1, -1}; // if not found If we have computed the Suffix Array in O(n log n) and the LCP between consecutive suffixes
ii ans; ans.first = lo; in Suffix Array order in O(n), then we can determine the length of the Longest Repeated
hi = n-1; // range = [lo..n-1] Substring (LRS) of T in O(n).
while (lo < hi) { // now find upper bound The length of the LRS is just the highest number in the LCP array. In Table 6.4—left
int mid = (lo+hi) / 2; that corresponds to the Suffix Array and the LCP of T = “GATAGACA$”, the highest number
int res = strncmp(T+SA[mid], P, m); is 2 at index i = 7. The first 2 characters of the corresponding suffix SA[7] (suffix 0) is
(res > 0) ? hi = mid : lo = mid+1; // notice the > sign “GA”. This is the LRS in T.
}
if (strncmp(T+SA[hi], P, m) != 0) --hi; // special case
ans.second = hi; Finding the Longest Common Substring in O(n)
return ans; // returns (lb, ub) Without loss of generality, let’s consider the case with only two strings. We use the same
} // where P is found example as in the Suffix Tree section earlier: T1 = “GATAGACA$” and T2 = “CATA#”. To solve
the Longest Common Substring (LCS) problem using Suffix Array, first we have to concate-
A sample execution of this string matching algorithm on the Suffix Array of T = “GATAGACA$” nate both strings (note that the terminating characters of both strings must be di↵erent)
with P = “GA” is shown in Table 6.5. to produce T = “GATAGACA$CATA#”. Then, we compute the Suffix and LCP array of T as
22
This is achievable by using the strncmp function to compare only the first m characters of both suffixes. shown in Table 6.6.
351 352
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.5. SUFFIX TRIE/TREE/ARRAY c Steven, Felix, Suhendry
Then, we go through consecutive suffixes in O(n). If two consecutive suffixes belong to Extra Kattis: aliens, burrowswheeler, dvaput, lifeforms, repeatedsubstrings, string-
di↵erent owners (can be easily checked23 , for example we can test if suffix SA[i] belongs to multimatching, substrings.
T1 by testing if SA[i] < the length of T1 ), we look at the LCP array and see if the maximum Others: SPOJ SARRAY - Suffix Array (problem author: Felix Halim), IOI 2008
LCP found so far can be increased. After one O(n) pass, we will be able to determine the - Type Printer (DFS traversal of Suffix Trie).
LCS. In Figure 6.6, this happens when i = 7, as suffix SA[7] = suffix 1 = “ATAGACA$CATA#” Also see Section 8.7 for some harder problems that uses (Suffix) Trie data struc-
(owned by T1 ) and its previous suffix SA[6] = suffix 10 = “ATA#” (owned by T2 ) have a ture as sub-routine.
common prefix of length 3 which is “ATA”. This is the LCS.
Finally, we close this section and this chapter by highlighting the availability of our source
code. Please spend some time understanding the source code which may not be trivial for
those who are new with Suffix Array. Profile of Data Structure Inventors
Source code: ch6/sa lcp.cpp|java|py|ml Udi Manber is an Israeli computer scientist. He works in Google as one of their vice
presidents of engineering. Along with Gene Myers, Manber invented Suffix Array data
Exercise 6.5.5.1*: Suggest some possible improvements to the stringMatching() function structure in 1991.
shown in this section so that the time complexity improves to O(m + log n)! Eugene “Gene” Wimberly Myers, Jr. is an American computer scientist and bioin-
Exercise 6.5.5.2*: Compare the KMP algorithm shown in Section 6.4 and Rabin-Karp formatician, who is best known for his development of the BLAST (Basic Local Alignment
algorithm in Section 6.6 with the string matching feature of Suffix Array, then decide a rule Search Tool) tool for sequence analysis. His 1990 paper that describes BLAST has received
of thumb on when it is better to use Suffix Array to deal with string matching and when it over 24 000 citations making it among the most highly cited paper ever. He also invented
is better to use KMP, Rabin-Karp, or just standard string libraries. Suffix Array with Udi Manber.
Exercise 6.5.5.3*: Solve the exercises on Suffix Tree applications using Suffix Array instead:
• Exercise 6.5.3.4* (repeated substrings that occurs the most, and if ties, the longest),
• Exercise 6.5.3.5* (LRS with no overlap),
• Exercise 6.5.3.6* (LCS of n 2 strings),
• Exercise 6.5.3.7* (LCS of k out of n strings where k n), and
24
• Exercise 6.5.3.8* (LCE of T given i and j). You can try solving these problems with Suffix Tree, but you have to learn how to code the Suffix Tree
construction algorithm by yourself. The programming problems listed here are solvable with Suffix Array.
25
Min Lexicographic Rotation is a problem of finding the rotation of a string with the lowest lexicographical
order of all possible rotations. For example, the lexicographically minimal rotation of “CGAGTC][AGCT”
23
With three or more strings, this check will have more ‘if statements’. (emphasis of ‘][’ added) is “AGCTCGAGTC”.
353 354
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.6. STRING MATCHING WITH HASHING c Steven, Felix, Suhendry
355 356
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry 6.6. STRING MATCHING WITH HASHING c Steven, Felix, Suhendry
357 358
CHAPTER 6. STRING PROCESSING c Steven, Felix, Suhendry
6.7.1 Anagram
An anagram is a word (or phrase/string) whose letters (characters) can be rearranged
to obtain another word, e.g., ‘elevenplustwo’ is an anagram of ‘twelveplusone’. Two
words/strings that have di↵erent lengths are obviously not anagram.
Sorting Solution
The common strategy to check if two equal-length words/strings of n characters are anagram
is to sort the letters of the words/strings and compare the results. For example, take wordA
= ‘cab’, wordB = ‘bca’. After sorting, wordA = ‘abc’ and wordB = ‘abc’ too, so they
are anagram. Review Book 1 for various sorting techniques. This runs in O(n log n).
6.7.2 Palindrome
A palindrome is a word (or a sequence/string) that can be read the same way in either
direction. For example, ‘ABCDCBA’ is a palindrome.
359