Python String Functions Comprehensive Cheat
Sheet
Python Built-in String Methods
Case Conversion
s = "Hello World"
s.lower() # "hello world"
s.upper() # "HELLO WORLD"
s.title() # "Hello World"
s.capitalize() # "Hello world"
s.swapcase() # "hELLO wORLD"
s.casefold() # "hello world" (aggressive lowercase)
String Testing/Validation
s = "Hello123"
s.isalpha() # False (contains numbers)
s.isdigit() # False (contains letters)
s.isalnum() # True (alphanumeric)
s.isascii() # True (ASCII characters only)
s.isdecimal() # False
s.isnumeric() # False
s.islower() # False
s.isupper() # False
s.istitle() # False
s.isspace() # False
s.isprintable() # True
s.isidentifier() # False (not valid Python identifier)
Whitespace & Trimming
s = " Hello World "
s.strip() # "Hello World" (both ends)
s.lstrip() # "Hello World " (left end)
s.rstrip() # " Hello World" (right end)
s.strip('Hd') # "ello Worl" (specific characters)
# Padding
s.center(20) # " Hello World "
s.ljust(20) # "Hello World "
s.rjust(20) # " Hello World"
s.zfill(20) # "00000000Hello World"
Finding & Searching
s = "Hello World Hello"
s.find('World') # 6 (first occurrence)
s.rfind('Hello') # 12 (last occurrence)
s.index('World') # 6 (like find, but raises exception if not found)
s.rindex('Hello') # 12 (like rfind, but raises exception)
s.count('Hello') # 2 (count occurrences)
s.startswith('Hello') # True
s.endswith('World') # False
s.startswith(('Hi', 'Hello')) # True (tuple of prefixes)
Replacement & Modification
s = "Hello World"
s.replace('World', 'Python') # "Hello Python"
s.replace('l', 'L', 1) # "HeLlo World" (replace first occurrence)
# Translation
trans = str.maketrans('aeiou', '12345')
s.translate(trans) # "H2ll4 W4rld"
# Remove characters
s.translate(str.maketrans('', '', 'aeiou')) # "Hll Wrld"
Splitting & Joining
s = "apple,banana,cherry"
s.split(',') # ['apple', 'banana', 'cherry']
s.split(',', 1) # ['apple', 'banana,cherry'] (max splits)
s.rsplit(',', 1) # ['apple,banana', 'cherry'] (right split)
# Advanced splitting
s = "apple\nbanana\tcherry"
s.splitlines() # ['apple', 'banana\tcherry']
s.partition(',') # ('apple', ',', 'banana,cherry')
s.rpartition(',') # ('apple,banana', ',', 'cherry')
# Joining
','.join(['a', 'b', 'c']) # "a,b,c"
''.join(['H', 'e', 'l', 'l', 'o']) # "Hello"
Encoding & Decoding
s = "Hello"
s.encode('utf-8') # b'Hello'
s.encode('ascii') # b'Hello'
b'Hello'.decode('utf-8') # "Hello"
# Handle errors
s.encode('ascii', errors='ignore') # Ignore non-ASCII
s.encode('ascii', errors='replace') # Replace with ?
String Formatting
Old Style (% formatting)
name = "John"
age = 30
"Hello %s, you are %d years old" % (name, age)
"Hello %(name)s, age: %(age)d" % {'name': name, 'age': age}
# Format specifiers
"%d" % 42 # "42" (integer)
"%f" % 3.14159 # "3.141590" (float)
"%.2f" % 3.14159 # "3.14" (2 decimal places)
"%s" % "hello" # "hello" (string)
"%r" % "hello" # "'hello'" (repr)
New Style (.format())
name = "John"
age = 30
# Positional arguments
"Hello {}, you are {} years old".format(name, age)
"Hello {0}, you are {1} years old".format(name, age)
"Hello {1}, you are {0} years old".format(age, name) # Reorder
# Keyword arguments
"Hello {name}, you are {age} years old".format(name=name, age=age)
# Format specifications
"{:.2f}".format(3.14159) # "3.14"
"{:>10}".format("hello") # " hello" (right align)
"{:<10}".format("hello") # "hello " (left align)
"{:^10}".format("hello") # " hello " (center align)
"{:0>10}".format("hello") # "00000hello" (pad with zeros)
F-strings (Python 3.6+)
name = "John"
age = 30
price = 19.99
f"Hello {name}, you are {age} years old"
f"Price: ${price:.2f}" # "Price: $19.99"
f"Age in hex: {age:x}" # "Age in hex: 1e"
f"Name uppercase: {name.upper()}"
f"Expression: {2 + 3}" # "Expression: 5"
# Alignment and padding
f"{name:>10}" # " John" (right align)
f"{name:<10}" # "John " (left align)
f"{name:^10}" # " John " (center align)
f"{age:04d}" # "0030" (pad with zeros)
# Date formatting
from datetime import datetime
now = datetime.now()
f"Date: {now:%Y-%m-%d %H:%M:%S}"
Regular Expressions with Strings
Import and Basic Usage
import re
text = "The phone number is 123-456-7890"
pattern = r"\d{3}-\d{3}-\d{4}"
re.search(pattern, text) # Match object or None
re.match(pattern, text) # Match from start only
re.findall(pattern, text) # List of all matches
re.finditer(pattern, text) # Iterator of match objects
re.sub(pattern, "XXX", text) # Replace matches
re.split(pattern, text) # Split by pattern
Pattern Compilation
pattern = re.compile(r"\d+")
pattern.search(text)
pattern.findall(text)
pattern.sub("X", text)
# Flags
pattern = re.compile(r"hello", re.IGNORECASE | re.MULTILINE)
Common Patterns
# Email validation
email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
# Phone numbers
phone_pattern = r"\(\d{3}\)\s*\d{3}-\d{4}" # (123) 456-7890
# URLs
url_pattern = r"https?://[^\s]+"
# Dates
date_pattern = r"\d{1,2}/\d{1,2}/\d{4}" # MM/DD/YYYY
# Words only
word_pattern = r"\b[a-zA-Z]+\b"
# Numbers
number_pattern = r"-?\d+\.?\d*"
Pandas String Functions (.str accessor)
Basic Operations
import pandas as pd
df = pd.DataFrame({'text': ['Hello', 'World', 'Python']})
df['text'].str.lower() # Lowercase
df['text'].str.upper() # Uppercase
df['text'].str.title() # Title case
df['text'].str.capitalize() # Capitalize first letter
df['text'].str.swapcase() # Swap case
df['text'].str.len() # String length
String Testing
df['text'].str.isalpha() # All alphabetic
df['text'].str.isdigit() # All digits
df['text'].str.isalnum() # Alphanumeric
df['text'].str.islower() # All lowercase
df['text'].str.isupper() # All uppercase
df['text'].str.isspace() # All whitespace
df['text'].str.isnumeric() # Numeric
df['text'].str.isdecimal() # Decimal
Finding & Searching
df['text'].str.contains('hello', case=False) # Boolean mask
df['text'].str.contains('hello|world', regex=True) # Regex pattern
df['text'].str.startswith('He') # Starts with
df['text'].str.endswith('lo') # Ends with
df['text'].str.find('l') # Position of substring
df['text'].str.count('l') # Count occurrences
df['text'].str.match(r'[A-Z]') # Match regex at start
df['text'].str.fullmatch(r'[A-Za-z]+') # Full string match
Replacement & Extraction
df['text'].str.replace('l', 'L') # Replace substring
df['text'].str.replace(r'[aeiou]', 'X', regex=True) # Regex replace
df['text'].str.extract(r'([A-Z])') # Extract with groups
df['text'].str.extractall(r'([a-z])') # Extract all matches
df['text'].str.findall(r'[aeiou]') # Find all pattern matches
Splitting & Slicing
df['text'].str.split() # Split on whitespace
df['text'].str.split('l') # Split on character
df['text'].str.split(expand=True) # Split into columns
df['text'].str.rsplit('l', n=1) # Right split, max splits
df['text'].str.partition('l') # Partition into 3 parts
df['text'].str.rpartition('l') # Right partition
# Slicing
df['text'].str[0] # First character
df['text'].str[0:3] # First 3 characters
df['text'].str[-1] # Last character
df['text'].str[::2] # Every 2nd character
Padding & Alignment
df['text'].str.pad(10, side='left', fillchar='0') # Left pad
df['text'].str.pad(10, side='right', fillchar='-') # Right pad
df['text'].str.pad(10, side='both', fillchar='*') # Center pad
df['text'].str.center(10, fillchar='=') # Center align
df['text'].str.ljust(10, fillchar=' ') # Left justify
df['text'].str.rjust(10, fillchar=' ') # Right justify
df['text'].str.zfill(10) # Zero fill
Cleaning & Trimming
df['text'].str.strip() # Strip whitespace both ends
df['text'].str.lstrip() # Strip left whitespace
df['text'].str.rstrip() # Strip right whitespace
df['text'].str.strip('He') # Strip specific characters
df['text'].str.normalize('NFKD') # Unicode normalization
df['text'].str.encode('utf-8') # Encode to bytes
df['text'].str.decode('utf-8') # Decode from bytes
String Constants & Utilities
String Module Constants
import string
string.ascii_letters # 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
string.ascii_lowercase # 'abcdefghijklmnopqrstuvwxyz'
string.ascii_uppercase # 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
string.digits # '0123456789'
string.hexdigits # '0123456789abcdefABCDEF'
string.octdigits # '01234567'
string.punctuation # '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
string.printable # All printable characters
string.whitespace # ' \t\n\r\x0b\x0c'
String Template
from string import Template
template = Template('Hello $name, you are $age years old')
result = template.substitute(name='John', age=30)
# Safe substitute (ignores missing keys)
result = template.safe_substitute(name='John')
Advanced String Operations
Multiple String Operations
# Method chaining
text = " HELLO WORLD "
result = text.strip().lower().replace('world', 'python').title()
# "Hello Python"
# Multiple replacements
def multiple_replace(text, replacements):
for old, new in replacements.items():
text = text.replace(old, new)
return text
replacements = {'hello': 'hi', 'world': 'python'}
multiple_replace('hello world', replacements)
String Performance Tips
# Use join for concatenating many strings
words = ['apple', 'banana', 'cherry']
result = ''.join(words) # Efficient
# Avoid: result = word1 + word2 + word3 # Inefficient for many strings
# Use f-strings for modern Python
name = "John"
f"Hello {name}" # Fast and readable
# Use in operator for membership testing
'hello' in 'hello world' # Efficient
'hello world'.find('hello') != -1 # Less efficient
Common String Patterns
# Remove non-alphanumeric characters
import re
clean_text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
# Title case with exceptions
def smart_title(text):
exceptions = ['and', 'or', 'but', 'the', 'a', 'an', 'in', 'on', 'at', 'to']
words = text.split()
result = []
for i, word in enumerate(words):
if i == 0 or word.lower() not in exceptions:
result.append(word.capitalize())
else:
result.append(word.lower())
return ' '.join(result)
# Reverse string
text[::-1] # Simple reverse
''.join(reversed(text)) # Alternative method
# Check if palindrome
def is_palindrome(s):
s = re.sub(r'[^a-zA-Z0-9]', '', s).lower()
return s == s[::-1]
Quick Reference - Most Used String Operations
# Essential String Methods (Top 25)
s.lower() s.upper() s.strip()
s.split() s.replace() s.find()
s.startswith() s.endswith() s.join()
s.isdigit() s.isalpha() s.count()
f"Hello {name}" s.format() s.encode()
s.decode() s.ljust() s.rjust()
s.center() s.zfill() s.partition()
s.translate() s.casefold() len(s)