

Python 正则表达式

章鱼出海

2025-10-06

导读：正则表达式是处理字符串的强力工具，它使用特定的语法规则来描述字符串的模式，可以用于验证、搜索、替换和提取字符串

正则表达式是处理字符串的强力工具，它使用特定的语法规则来描述字符串的模式，可以用于验证、搜索、替换和提取字符串内容。

正则表达式基础语法

1. 基本匹配规则

import re

def basic_patterns():
    """基本匹配模式"""
    
    # 精确匹配
    pattern1 = r'hello'
    text1 = 'hello world'
    match1 = re.search(pattern1, text1)
    print(f"精确匹配 '{pattern1}' 在 '{text1}': {match1.group() if match1 else '不匹配'}")
    
    # 数字匹配
    pattern2 = r'\d{3}'  # 匹配3个数字
    text2 = '我的电话是123，你的电话是456'
    matches2 = re.findall(pattern2, text2)
    print(f"数字匹配 '{pattern2}' 在 '{text2}': {matches2}")
    
    # 字母数字匹配
    pattern3 = r'\w+'  # 匹配一个或多个字母数字
    text3 = 'user_name123 和 test-var'
    matches3 = re.findall(pattern3, text3)
    print(f"字母数字匹配 '{pattern3}' 在 '{text3}': {matches3}")
    
    # 任意字符匹配
    pattern4 = r'py.'  # 匹配py开头加任意字符
    text4 = 'pyc, pyo, py!, python'
    matches4 = re.findall(pattern4, text4)
    print(f"任意字符匹配 '{pattern4}' 在 '{text4}': {matches4}")

def quantifiers():
    """量词使用"""
    
    texts = [
        'a', 'ab', 'abc', 'abcd', 
        'a b c', 'a  b   c',
        '100', '1000', '10000'
    ]
    
    patterns = [
        (r'a?', '0或1个a'),
        (r'a*', '0或多个a'),
        (r'a+', '1或多个a'),
        (r'a{2}', '2个a'),
        (r'a{2,3}', '2-3个a'),
        (r'\s+', '1或多个空格'),
        (r'\d{3,5}', '3-5个数字')
    ]
    
    for pattern, description in patterns:
        print(f"\n模式 '{pattern}' ({description}):")
        for text in texts:
            matches = re.findall(pattern, text)
            if matches:
                print(f"  '{text}' -> {matches}")

if __name__ == "__main__":
    basic_patterns()
    quantifiers()

2. 字符集和范围

import re

def character_sets():
    """字符集和范围"""
    
    # 字符集示例
    texts = [
        'abc', '123', 'a1b2', 'A_B', 
        'hello-world', 'test@example.com'
    ]
    
    patterns = [
        (r'[abc]', '匹配a,b,c任意字符'),
        (r'[0-9]', '匹配数字'),
        (r'[a-z]', '匹配小写字母'),
        (r'[A-Za-z]', '匹配所有字母'),
        (r'[0-9a-zA-Z_]', '匹配字母数字下划线'),
        (r'[^0-9]', '匹配非数字字符'),
        (r'[\w\-]', '匹配字母数字下划线和横线')
    ]
    
    for pattern, description in patterns:
        print(f"\n模式 '{pattern}' ({description}):")
        for text in texts:
            matches = re.findall(pattern, text)
            if matches:
                print(f"  '{text}' -> {matches}")

def word_and_boundary():
    """单词和边界匹配"""
    
    texts = [
        'hello world', 'helloworld', 
        'the theme', 'them',
        'cat category catfish'
    ]
    
    patterns = [
        (r'\bhello\b', '完整单词hello'),
        (r'\bthe\b', '完整单词the'),
        (r'\bcat\b', '完整单词cat'),
        (r'^hello', '行首的hello'),
        (r'world$', '行尾的world')
    ]
    
    for pattern, description in patterns:
        print(f"\n模式 '{pattern}' ({description}):")
        for text in texts:
            match = re.search(pattern, text)
            if match:
                print(f"  '{text}' -> 匹配位置: {match.span()}")

def alternation():
    """选择匹配"""
    
    texts = [
        'python', 'Python', 'PYTHON',
        'java', 'javascript', 'javac'
    ]
    
    patterns = [
        (r'(P|p)ython', 'Python或python'),
        (r'java(script)?', 'java或javascript'),
        (r'(java|python)', 'java或python')
    ]
    
    for pattern, description in patterns:
        print(f"\n模式 '{pattern}' ({description}):")
        for text in texts:
            match = re.search(pattern, text)
            if match:
                print(f"  '{text}' -> 匹配: {match.group()}")

if __name__ == "__main__":
    character_sets()
    word_and_boundary()
    alternation()

re 模块核心功能

1. 匹配和搜索

import re

def match_vs_search():
    """match() vs search() 区别"""
    
    text = 'hello world, welcome to python programming'
    
    patterns = [
        r'hello',
        r'world',
        r'python',
        r'programming'
    ]
    
    print("=== match() 方法 ===")
    for pattern in patterns:
        # match() 从字符串开头匹配
        match_obj = re.match(pattern, text)
        if match_obj:
            print(f"'{pattern}' 匹配: {match_obj.group()} (位置: {match_obj.span()})")
        else:
            print(f"'{pattern}' 不匹配")
    
    print("\n=== search() 方法 ===")
    for pattern in patterns:
        # search() 搜索整个字符串
        match_obj = re.search(pattern, text)
        if match_obj:
            print(f"'{pattern}' 找到: {match_obj.group()} (位置: {match_obj.span()})")

def findall_finditer():
    """findall() 和 finditer() 使用"""
    
    text = '我的电话是123-4567，工作电话是890-1234，紧急联系是555-6789'
    
    # 电话号码模式
    phone_pattern = r'\d{3}-\d{4}'
    
    print("=== findall() 返回匹配字符串列表 ===")
    phones = re.findall(phone_pattern, text)
    print(f"找到的电话号码: {phones}")
    
    print("\n=== finditer() 返回匹配对象迭代器 ===")
    phone_matches = re.finditer(phone_pattern, text)
    for i, match in enumerate(phone_matches, 1):
        print(f"电话 {i}: {match.group()} (位置: {match.span()})")

def fullmatch_demo():
    """fullmatch() 完全匹配"""
    
    test_cases = [
        ('123-4567', r'\d{3}-\d{4}'),
        ('123-456', r'\d{3}-\d{4}'),
        ('abc123', r'[a-z]+\d{3}'),
        ('abc12', r'[a-z]+\d{3}')
    ]
    
    print("=== fullmatch() 完全匹配 ===")
    for text, pattern in test_cases:
        match_obj = re.fullmatch(pattern, text)
        if match_obj:
            print(f"'{text}' 完全匹配 '{pattern}': {match_obj.group()}")
        else:
            print(f"'{text}' 不完全匹配 '{pattern}'")

if __name__ == "__main__":
    match_vs_search()
    findall_finditer()
    fullmatch_demo()

2. 字符串分割和替换

import re

def split_demo():
    """字符串分割"""
    
    test_cases = [
        ('a b   c', r'\s+', '分割空格'),
        ('a,b, c  d', r'[\s,]+', '分割空格和逗号'),
        ('a,b;; c  d', r'[\s,;]+', '分割空格、逗号和分号'),
        ('2023-08-15', r'-', '按横线分割日期'),
        ('name=John&age=30&city=NY', r'[&=]', '分割URL参数')
    ]
    
    print("=== 正则表达式分割字符串 ===")
    for text, pattern, description in test_cases:
        result = re.split(pattern, text)
        print(f"{description}: '{text}' -> {result}")

def sub_demo():
    """字符串替换"""
    
    text = '今天是2023-08-15，会议在10:30开始，联系电话：123-456-7890'
    
    replacements = [
        (r'\d{4}-\d{2}-\d{2}', 'XXXX-XX-XX', '替换日期'),
        (r'\d{2}:\d{2}', 'XX:XX', '替换时间'),
        (r'\d{3}-\d{3}-\d{4}', 'XXX-XXX-XXXX', '替换电话号码'),
        (r'\d+', '#', '替换所有数字')
    ]
    
    print("=== 正则表达式替换 ===")
    print(f"原始文本: {text}\n")
    
    for pattern, repl, description in replacements:
        result = re.sub(pattern, repl, text)
        print(f"{description}: {result}")

def sub_with_callback():
    """使用回调函数进行替换"""
    
    text = '价格分别是 $10.50, $20.00, 和 $15.75'
    
    def convert_price(match):
        """将价格转换为人民币"""
        price_usd = float(match.group(1))
        price_rmb = price_usd * 7.2  # 假设汇率 1:7.2
        return f'¥{price_rmb:.2f}'
    
    # 匹配 $数字.数字 格式的价格
    pattern = r'\$(\d+\.\d{2})'
    
    result = re.sub(pattern, convert_price, text)
    print(f"原始: {text}")
    print(f"转换: {result}")

def subn_demo():
    """subn() - 返回替换次数"""
    
    text = '苹果 香蕉 苹果 橙子 苹果 葡萄'
    
    # 替换所有"苹果"为"水果"，并返回替换次数
    result, count = re.subn(r'苹果', '水果', text)
    
    print(f"原始文本: {text}")
    print(f"替换结果: {result}")
    print(f"替换次数: {count}")

if __name__ == "__main__":
    split_demo()
    print("\n" + "="*50 + "\n")
    sub_demo()
    print("\n" + "="*50 + "\n")
    sub_with_callback()
    print("\n" + "="*50 + "\n")
    subn_demo()

3. 分组提取

import re

def basic_groups():
    """基础分组"""
    
    text = '联系电话：010-12345678'
    
    # 提取区号和号码
    pattern = r'(\d{3})-(\d{8})'
    match = re.search(pattern, text)
    
    if match:
        print(f"完整匹配: {match.group(0)}")
        print(f"区号: {match.group(1)}")
        print(f"号码: {match.group(2)}")
        print(f"所有分组: {match.groups()}")
        print(f"分组字典: {match.groupdict()}")

def named_groups():
    """命名分组"""
    
    text = '张三,30岁,住在北京市朝阳区'
    
    # 使用命名分组
    pattern = r'(?P<name>\w+),(?P<age>\d+)岁,住在(?P<address>.+)'
    match = re.search(pattern, text)
    
    if match:
        print("=== 命名分组 ===")
        print(f"完整匹配: {match.group(0)}")
        print(f"姓名: {match.group('name')}")
        print(f"年龄: {match.group('age')}")
        print(f"地址: {match.group('address')}")
        print(f"分组字典: {match.groupdict()}")

def email_parsing():
    """邮箱解析分组"""
    
    emails = [
        'user@example.com',
        'john.doe@company.co.uk',
        'invalid-email',
        'name@domain'
    ]
    
    # 邮箱解析模式
    pattern = r'^([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+)\.([a-zA-Z]{2,})$'
    
    for email in emails:
        match = re.match(pattern, email)
        if match:
            print(f"有效邮箱: {email}")
            print(f"  用户名: {match.group(1)}")
            print(f"  域名: {match.group(2)}")
            print(f"  后缀: {match.group(3)}")
        else:
            print(f"无效邮箱: {email}")

def time_parsing():
    """时间解析分组"""
    
    times = [
        '19:05:30',
        '9:5:3',
        '23:59:59',
        '24:00:00'  # 无效时间
    ]
    
    # 时间解析模式
    pattern = r'^([01]?[0-9]|2[0-3]):([0-5]?[0-9]):([0-5]?[0-9])$'
    
    for time_str in times:
        match = re.match(pattern, time_str)
        if match:
            hours, minutes, seconds = match.groups()
            print(f"有效时间: {time_str}")
            print(f"  时: {hours}, 分: {minutes}, 秒: {seconds}")
        else:
            print(f"无效时间: {time_str}")

def nested_groups():
    """嵌套分组"""
    
    text = '<div class="header">标题</div>'
    
    # 嵌套分组提取HTML标签
    pattern = r'<(\w+)(?:\s+([^>]*))?>([^<]*)</\1>'
    match = re.search(pattern, text)
    
    if match:
        print("=== HTML标签解析 ===")
        print(f"完整匹配: {match.group(0)}")
        print(f"标签名: {match.group(1)}")
        print(f"属性: {match.group(2)}")
        print(f"内容: {match.group(3)}")
        print(f"所有分组: {match.groups()}")

if __name__ == "__main__":
    basic_groups()
    print("\n" + "="*50 + "\n")
    named_groups()
    print("\n" + "="*50 + "\n")
    email_parsing()
    print("\n" + "="*50 + "\n")
    time_parsing()
    print("\n" + "="*50 + "\n")
    nested_groups()

4. 贪婪 vs 非贪婪匹配

import re

def greedy_vs_lazy():
    """贪婪匹配 vs 非贪婪匹配"""
    
    text = '<div>内容1</div><div>内容2</div>'
    
    print("原始文本:", text)
    
    # 贪婪匹配
    greedy_pattern = r'<div>.*</div>'
    greedy_match = re.search(greedy_pattern, text)
    print(f"\n贪婪匹配 '{greedy_pattern}':")
    print(f"匹配结果: {greedy_match.group() if greedy_match else '无匹配'}")
    
    # 非贪婪匹配
    lazy_pattern = r'<div>.*?</div>'
    lazy_matches = re.findall(lazy_pattern, text)
    print(f"\n非贪婪匹配 '{lazy_pattern}':")
    print(f"匹配结果: {lazy_matches}")

def number_example():
    """数字匹配的贪婪问题"""
    
    text = '102300'
    
    print(f"原始数字: {text}")
    
    # 贪婪匹配 - 问题
    greedy_pattern = r'^(\d+)(0*)$'
    greedy_match = re.match(greedy_pattern, text)
    if greedy_match:
        print(f"\n贪婪匹配 '{greedy_pattern}':")
        print(f"分组1: {greedy_match.group(1)}")
        print(f"分组2: {greedy_match.group(2)}")
    
    # 非贪婪匹配 - 解决方案
    lazy_pattern = r'^(\d+?)(0*)$'
    lazy_match = re.match(lazy_pattern, text)
    if lazy_match:
        print(f"\n非贪婪匹配 '{lazy_pattern}':")
        print(f"分组1: {lazy_match.group(1)}")
        print(f"分组2: {lazy_match.group(2)}")

def html_extraction():
    """HTML内容提取示例"""
    
    html_text = '''
    <div class="article">
        <h1>标题</h1>
        <p>第一段内容</p>
        <p>第二段内容</p>
    </div>
    <div class="comments">
        <p>评论1</p>
        <p>评论2</p>
    </div>
    '''
    
    print("=== HTML内容提取 ===")
    
    # 错误的贪婪匹配
    print("\n1. 贪婪匹配 (错误):")
    greedy_pattern = r'<div.*>(.*)</div>'
    greedy_matches = re.findall(greedy_pattern, html_text, re.DOTALL)
    for i, match in enumerate(greedy_matches, 1):
        print(f"匹配 {i}: {repr(match[:50])}...")
    
    # 正确的非贪婪匹配
    print("\n2. 非贪婪匹配 (正确):")
    lazy_pattern = r'<div.*?>(.*?)</div>'
    lazy_matches = re.findall(lazy_pattern, html_text, re.DOTALL)
    for i, match in enumerate(lazy_matches, 1):
        print(f"匹配 {i}: {repr(match[:50])}...")
    
    # 更精确的匹配
    print("\n3. 精确匹配:")
    precise_pattern = r'<div class="article">(.*?)</div>'
    precise_match = re.search(precise_pattern, html_text, re.DOTALL)
    if precise_match:
        print(f"文章内容: {repr(precise_match.group(1)[:100])}...")

if __name__ == "__main__":
    greedy_vs_lazy()
    print("\n" + "="*50 + "\n")
    number_example()
    print("\n" + "="*50 + "\n")
    html_extraction()

5. 预编译和性能优化

import re
import time

def compile_demo():
    """预编译正则表达式"""
    
    # 需要多次使用的模式
    email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    phone_pattern = r'^\d{3}-\d{3,8}$'
    
    # 预编译
    email_re = re.compile(email_pattern)
    phone_re = re.compile(phone_pattern)
    
    test_emails = [
        'user@example.com',
        'invalid.email',
        'name@domain.org',
        'test@sub.domain.com'
    ]
    
    test_phones = [
        '010-12345',
        '123-456789',
        'invalid-phone',
        '020-8086'
    ]
    
    print("=== 邮箱验证 ===")
    for email in test_emails:
        if email_re.match(email):
            print(f"有效邮箱: {email}")
        else:
            print(f"无效邮箱: {email}")
    
    print("\n=== 电话验证 ===")
    for phone in test_phones:
        if phone_re.match(phone):
            print(f"有效电话: {phone}")
        else:
            print(f"无效电话: {phone}")

def performance_comparison():
    """性能比较：预编译 vs 直接使用"""
    
    text = '这是一个测试文本，包含很多需要匹配的内容。' * 1000
    pattern = r'测试|匹配|内容'
    
    # 测试直接使用
    start_time = time.time()
    for _ in range(1000):
        re.findall(pattern, text)
    direct_time = time.time() - start_time
    
    # 测试预编译
    compiled_re = re.compile(pattern)
    start_time = time.time()
    for _ in range(1000):
        compiled_re.findall(text)
    compiled_time = time.time() - start_time
    
    print("=== 性能比较 ===")
    print(f"直接使用耗时: {direct_time:.4f}秒")
    print(f"预编译使用耗时: {compiled_time:.4f}秒")
    print(f"性能提升: {direct_time/compiled_time:.2f}倍")

def flags_demo():
    """标志位使用"""
    
    text = '''Hello WORLD
    this is a MULTI-line
    TEXT example'''
    
    patterns = [
        (r'^hello', '默认标志 - 不匹配'),
        (r'^hello', re.IGNORECASE, '忽略大小写 - 匹配'),
        (r'^hello', re.IGNORECASE | re.MULTILINE, '多行模式 - 匹配每行开头'),
        (r'.+', re.DOTALL, '点号匹配所有字符（包括换行）'),
        (r'^[a-z ]+$', re.IGNORECASE | re.MULTILINE, '多行+忽略大小写')
    ]
    
    print("原始文本:")
    print(text)
    print("\n=== 标志位演示 ===")
    
    for pattern_info in patterns:
        if len(pattern_info) == 3:
            pattern, flags, description = pattern_info
            matches = re.findall(pattern, text, flags=flags)
        else:
            pattern, description = pattern_info
            matches = re.findall(pattern, text)
        
        print(f"\n{description}: '{pattern}'")
        print(f"匹配结果: {matches}")

if __name__ == "__main__":
    compile_demo()
    print("\n" + "="*50 + "\n")
    performance_comparison()
    print("\n" + "="*50 + "\n")
    flags_demo()

实践练习

1. 邮箱验证

import re

def is_valid_email(addr):
    """版本一：验证邮箱是否合法"""
    # 邮箱正则表达式
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return bool(re.match(pattern, addr))

def test_email_validation():
    """测试邮箱验证"""
    test_emails = [
        'someone@gmail.com',
        'bill.gates@microsoft.com',
        'bob#example.com',
        'mr-bob@example.com',
        'test.user+tag@sub.domain.co.uk',
        'invalid.email',
        'name@domain',
        '@domain.com',
        'user@.com'
    ]
    
    print("=== 邮箱验证测试 ===")
    for email in test_emails:
        is_valid = is_valid_email(email)
        status = "有效" if is_valid else "无效"
        print(f"{email:30} -> {status}")

def name_of_email(addr):
    """版本二：提取邮箱名字"""
    # 模式1: <名字> 邮箱地址
    pattern1 = r'^<([^>]+)>\s*[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    # 模式2: 直接邮箱地址
    pattern2 = r'^([a-zA-Z0-9._%+-]+)@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    
    match1 = re.match(pattern1, addr)
    if match1:
        return match1.group(1)
    
    match2 = re.match(pattern2, addr)
    if match2:
        return match2.group(1)
    
    return None

def test_name_extraction():
    """测试名字提取"""
    test_cases = [
        '<Tom Paris> tom@voyager.org',
        'tom@voyager.org',
        '<John Doe> john.doe@example.com',
        'alice.smith@company.co.uk',
        'invalid-email'
    ]
    
    print("\n=== 邮箱名字提取测试 ===")
    for case in test_cases:
        name = name_of_email(case)
        print(f"{case:35} -> {name}")

if __name__ == "__main__":
    test_email_validation()
    test_name_extraction()
    
    # 验证练习要求
    print("\n=== 练习验证 ===")
    assert is_valid_email('someone@gmail.com')
    assert is_valid_email('bill.gates@microsoft.com')
    assert not is_valid_email('bob#example.com')
    assert not is_valid_email('mr-bob@example.com')
    print('邮箱验证测试通过!')
    
    assert name_of_email('<Tom Paris> tom@voyager.org') == 'Tom Paris'
    assert name_of_email('tom@voyager.org') == 'tom'
    print('名字提取测试通过!')

2. 实用正则表达式示例

import re

def practical_examples():
    """实用正则表达式示例"""
    
    # 1. URL提取
    print("=== URL提取 ===")
    text1 = "访问 https://www.example.com 和 http://sub.domain.org/path?query=1"
    url_pattern = r'https?://[^\s]+'
    urls = re.findall(url_pattern, text1)
    print(f"文本: {text1}")
    print(f"提取的URL: {urls}")
    
    # 2. 中文提取
    print("\n=== 中文提取 ===")
    text2 = "Hello 世界！这是中文文本。English words here."
    chinese_pattern = r'[\u4e00-\u9fff]+'
    chinese_words = re.findall(chinese_pattern, text2)
    print(f"文本: {text2}")
    print(f"中文词汇: {chinese_words}")
    
    # 3. 密码强度验证
    print("\n=== 密码强度验证 ===")
    def check_password_strength(password):
        # 至少8位，包含大小写字母和数字
        pattern = r'^(?=.*[a-z])(?=.*[A-Z])(?=.*\d).{8,}$'
        return bool(re.match(pattern, password))
    
    passwords = ['weak', 'Weak1', 'Strong123', 'VERYstrong123']
    for pwd in passwords:
        strength = "强" if check_password_strength(pwd) else "弱"
        print(f"密码 '{pwd}': {strength}")
    
    # 4. HTML标签清理
    print("\n=== HTML标签清理 ===")
    html_text = '<div class="test">Hello <b>World</b>!</div>'
    clean_text = re.sub(r'<[^>]+>', '', html_text)
    print(f"原始HTML: {html_text}")
    print(f"清理后: {clean_text}")
    
    # 5. 身份证号验证
    print("\n=== 身份证号验证 ===")
    def validate_id_card(id_card):
        # 简单的身份证格式验证
        pattern = r'^[1-9]\d{5}(18|19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dXx]$'
        return bool(re.match(pattern, id_card))
    
    id_cards = ['11010119900307567X', '123456', '510101198501011234']
    for id_card in id_cards:
        valid = "有效" if validate_id_card(id_card) else "无效"
        print(f"身份证 {id_card}: {valid}")

def advanced_email_parser():
    """高级邮箱解析"""
    
    def parse_email(email):
        """解析邮箱的各个部分"""
        pattern = r'''
            ^
            (?P<local>[a-zA-Z0-9._%+-]+)   # 本地部分
            @
            (?P<domain>[a-zA-Z0-9.-]+)      # 域名
            \.
            (?P<tld>[a-zA-Z]{2,})           # 顶级域名
            $
        '''
        
        match = re.match(pattern, email, re.VERBOSE)
        if match:
            return match.groupdict()
        return None
    
    emails = [
        'user.name+tag@sub.example.co.uk',
        'test@localhost',
        'invalid.email@com'
    ]
    
    print("=== 高级邮箱解析 ===")
    for email in emails:
        parts = parse_email(email)
        if parts:
            print(f"邮箱: {email}")
            print(f"  本地部分: {parts['local']}")
            print(f"  域名: {parts['domain']}")
            print(f"  顶级域名: {parts['tld']}")
        else:
            print(f"无效邮箱: {email}")

if __name__ == "__main__":
    practical_examples()
    print("\n" + "="*50 + "\n")
    advanced_email_parser()

总结

正则表达式核心要点：

基本语法：

\d - 数字，\w - 字母数字，\s - 空白字符
. - 任意字符，* - 0或多个，+ - 1或多个，? - 0或1个
{n} - n个，{n,m} - n到m个

字符集：

[abc] - 匹配a,b,c
[a-z] - 匹配小写字母
[^0-9] - 匹配非数字

分组和引用：

() - 捕获分组
(?:) - 非捕获分组
(?P<name>) - 命名分组

边界匹配：

^ - 行首，$ - 行尾
\b - 单词边界

贪婪与非贪婪：

默认贪婪：.*
非贪婪：.*?

re模块方法：

match() - 从头匹配
search() - 搜索匹配
findall() - 查找所有
sub() - 替换
split() - 分割

性能优化：

预编译常用模式
使用合适的标志位
避免过度复杂的正则表达式

正则表达式是处理文本的强大工具，熟练掌握可以大大提高开发效率。建议在实践中不断练习，逐步掌握各种复杂模式的使用。

【声明】内容源于网络

章鱼出海

跨境分享坊 | 每天提供跨境参考

内容 47037

粉丝 3

章鱼出海跨境分享坊 | 每天提供跨境参考

总阅读259.4k

粉丝3

内容47.0k