Better Data Science | Generate PDF Reports with
Python
● Install any library you don't have with pip install <libraryname> command
● It's likely you won't have FPDF installed, so install it with:
○ pip install fpdf
In [1]:
import os
import shutil
import numpy as np
import pandas as pd
import calendar
from datetime import datetime
from fpdf import FPDF
import [Link] as plt
from matplotlib import rcParams
rcParams['[Link]'] = False
rcParams['[Link]'] = False
Data generation
● generate_sales_data() functions returns a Pandas DataFrame with dummy data for
a given month
● Month is passed as integer
In [2]:
def generate_sales_data(month: int) -> [Link]:
# Date range from first day of month until last
# Use ```[Link](year, month)``` to get the last date
dates = pd.date_range(
start=datetime(year=2020, month=month, day=1),
end=datetime(year=2020, month=month, day=[Link](2020, month)[1])
)
# Sales numbers as a random integer between 1000 and 2000
sales = [Link](low=1000, high=2000, size=len(dates))
# Combine into a single dataframe
return [Link]({
'Date': dates,
'ItemsSold': sales
})
In [10]:
generate_sales_data(month=3)
Visualizing sales data
● plot() function visualizes a single sales month (time series)
● Instead of showing the figure, the function saves it to a file
○ Filename is specified by a parameter
In [3]:
def plot(data: [Link], filename: str) -> None:
[Link](figsize=(12, 4))
[Link](color='#F2F2F2', alpha=1, zorder=0)
[Link](data['Date'], data['ItemsSold'], color='#087E8B', lw=3, zorder=5)
[Link](f'Sales 2020/{data["Date"].[Link][0]}', fontsize=17)
[Link]('Period', fontsize=13)
[Link](fontsize=9)
[Link]('Number of items sold', fontsize=13)
[Link](fontsize=9)
[Link](filename, dpi=300, bbox_inches='tight', pad_inches=0)
[Link]()
return
In [4]:
december = generate_sales_data(month=12)
plot(data=december, filename='[Link]')
Construct page elements
● construct() function makes a directory for plots and than makes a sales chart for
every month in 2020 except January
○ January was excluded because we want to show how you can
have different number of elements on reports page
○ Feel free to include it
■ Change for i in range(2, 13) to for i in range(1, 13)
● Once the visualizations are saved, they are appended to a list of list structure
(matrix)
○ Max of 3 elements per row
○ Can be lower
○ A single row in this matrix represents a single page
■ If the row has 3 elements, the report page will
have 3 visualizations
In [5]:
PLOT_DIR = 'plots'
def construct():
# Delete folder if exists and create it again
try:
[Link](PLOT_DIR)
[Link](PLOT_DIR)
except FileNotFoundError:
[Link](PLOT_DIR)
# Iterate over all months in 2020 except January
for i in range(2, 13):
# Save visualization
plot(data=generate_sales_data(month=i), filename=f'{PLOT_DIR}/{i}.png')
# Construct data shown in document
counter = 0
pages_data = []
temp = []
# Get all plots
files = [Link](PLOT_DIR)
# Sort them by month - a bit tricky because the file names are strings
files = sorted([Link](PLOT_DIR), key=lambda x: int([Link]('.')[0]))
# Iterate over all created visualization
for fname in files:
# We want 3 per page
if counter == 3:
pages_data.append(temp)
temp = []
counter = 0
[Link](f'{PLOT_DIR}/{fname}')
counter += 1
return [*pages_data, temp]
In [6]:
plots_per_page = construct()
● 4 pages in total
● First 3 have 3 plots per page
● The last one has only 2
In [7]:
plots_per_page
PDF class
● Inherits from PDF
○ All methods and properties are inherited
○ Don't forget to call super() in the constructor
● Class is used to generate report from the plots_per_page matrix
In [8]:
class PDF(FPDF):
def __init__(self):
super().__init__()
[Link] = 210
[Link] = 297
def header(self):
# Custom logo and positioning
# Create an `assets` folder and put any wide and short image inside
# Name the image `[Link]`
[Link]('assets/[Link]', 10, 8, 33)
self.set_font('Arial', 'B', 11)
[Link]([Link] - 80)
[Link](60, 1, 'Sales report', 0, 0, 'R')
[Link](20)
def footer(self):
# Page numbers in the footer
self.set_y(-15)
self.set_font('Arial', 'I', 8)
self.set_text_color(128)
[Link](0, 10, 'Page ' + str(self.page_no()), 0, 0, 'C')
def page_body(self, images):
# Determine how many plots there are per page and set positions
# and margins accordingly
if len(images) == 3:
[Link](images[0], 15, 25, [Link] - 30)
[Link](images[1], 15, [Link] / 2 + 5, [Link] - 30)
[Link](images[2], 15, [Link] / 2 + 90, [Link] - 30)
elif len(images) == 2:
[Link](images[0], 15, 25, [Link] - 30)
[Link](images[1], 15, [Link] / 2 + 5, [Link] - 30)
else:
[Link](images[0], 15, 25, [Link] - 30)
def print_page(self, images):
# Generates the report
self.add_page()
self.page_body(images)
● Instantiate it and create a report:
In [9]:
pdf = PDF()
for elem in plots_per_page:
pdf.print_page(elem)
[Link]('[Link]', 'F')