正则表达式：实用指南

基本模式

// 邮箱验证（实用，非 RFC 5321 兼容）
const emailRegex = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/

// URL 匹配
const urlRegex = /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/

// IP 地址（IPv4）
const ipv4Regex = /^(\d{1,3}\.){3}\d{1,3}$/
// 带范围验证的严格版本
const strictIpv4Regex = /^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/

// 电话号码（美国格式）
const phoneRegex = /^(\+1)?\s?(\(\d{3}\)|\d{3})[\s.-]?\d{3}[\s.-]?\d{4}$/

// 强密码
const passwordRegex = /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$/

// 信用卡（主要卡组织）
const creditCardRegex = /^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|6(?:011|5[0-9]{2})[0-9]{12})$/

正则表达式：解决实际问题的实用模式插图

高级特性

// 命名捕获组
const dateRegex = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/
const match = '2026-01-15'.match(dateRegex)
const { year, month, day } = match.groups  // { year: '2026', month: '01', day: '15' }

// 前瞻和后顾
// 正向前瞻：foo(?=bar) - foo 后跟 bar
const priceRegex = /\d+(?=\s*USD)/  // 后跟 USD 的数字
'100 USD 200 EUR'.match(priceRegex)  // ['100']

// 负向前瞻：foo(?!bar) - foo 后不跟 bar
const nonNegative = /\d+(?!\s*USD)/  // 后不跟 USD 的数字

// 正向后顾：(?<=foo)bar - bar 前有 foo
const afterDollar = /(?<=\$)\d+(?:\.\d{2})?/  // $ 后的数字
'$100.50'.match(afterDollar)  // ['100.50']

// 负向后顾：(?<!foo)bar
const notAfterDollar = /(?<!\$)\d+/

// 非捕获组
const nonCapturing = /(?:https?|ftp):\/\//

正则表达式：解决实际问题的实用模式插图

实际模式

// 解析日志行
const logRegex = /^(?<timestamp>\d{4}-\d{2}-\d{2}T[\d:.]+Z)\s+(?<level>ERROR|WARN|INFO|DEBUG)\s+(?<service>\w+)\s+-\s+(?<message>.+)$/

const log = '2026-01-15T10:30:00Z ERROR auth - Login failed for user@example.com'
const { timestamp, level, service, message } = log.match(logRegex)?.groups ?? {}

// 提取查询参数
function parseQueryString(qs: string): Record<string, string> {
  const params: Record<string, string> = {}
  const regex = /[?&]?([^=]+)=([^&]*)/g
  let match
  while ((match = regex.exec(qs)) !== null) {
    params[decodeURIComponent(match[1])] = decodeURIComponent(match[2])
  }
  return params
}

// 驼峰转连字符
const toKebab = (str: string) => str
  .replace(/([a-z])([A-Z])/g, '$1-$2')
  .replace(/([A-Z]+)([A-Z][a-z])/g, '$1-$2')
  .toLowerCase()
// toKebab('camelCaseString') => 'camel-case-string'

// 按单词截断（非字符）
const truncateWords = (text: string, maxWords: number) =>
  text.split(/\s+/).slice(0, maxWords).join(' ') + 
  (text.split(/\s+/).length > maxWords ? '...' : '')

// 去除多余空白
const normalize = (s: string) => s.replace(/\s+/g, ' ').trim()

// 提取提及和话题标签
const mentions = (text: string) => [...text.matchAll(/@(\w+)/g)].map(m => m[1])
const hashtags = (text: string) => [...text.matchAll(/#(\w+)/g)].map(m => m[1])

正则表达式：解决实际问题的实用模式插图

Python 正则模式

import re

# 编译以提高性能（复用）
email_pattern = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}#39;)

# 所有匹配

def extract_ips(text: str) -> list[str]:
    pattern = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b')
    return pattern.findall(text)

# 带函数的替换

def mask_sensitive(text: str) -> str:
    def mask_email(m):
        parts = m.group().split('@')
        return parts[0][:2] + '***@' + parts[1]
    
    return re.sub(r'\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b', mask_email, text)

# 命名组

def parse_log(line: str) -> dict | None:
    pattern = re.compile(
        r'(?P<ip>\d+\.\d+\.\d+\.\d+) .+ "(?P<method>GET|POST|PUT|DELETE) (?P<path>/\S*)'
    )
    m = pattern.search(line)
    return m.groupdict() if m else None

# 标志

text = "Hello World hello"
re.findall(r'hello', text, re.IGNORECASE)  # ['Hello', 'hello']
re.match(r'.*', multiline_text, re.DOTALL | re.MULTILINE)

正则测试与调试

// 创建可复用的验证器
function createValidator(pattern: RegExp, message: string) {
  return (value: string) => ({
    valid: pattern.test(value),
    message: pattern.test(value) ? '' : message,
  })
}

const validateEmail = createValidator(
  /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/,
  'Invalid email format'
)

// 调试复杂正则
function debugRegex(pattern: RegExp, input: string) {
  const match = pattern.exec(input)
  if (!match) return { matched: false }
  
  return {
    matched: true,
    fullMatch: match[0],
    groups: match.groups,
    captures: match.slice(1),
    index: match.index,
  }
}

常见陷阱

问题	解决方案
贪婪 vs 懒惰	使用 `.*?` 实现懒惰匹配
特殊字符	使用 `\` 转义
性能	只编译一次模式
灾难性回溯	使用原子组或占有量词
Unicode	在 JS 中使用 `/u` 标志

页面加载失败

正则表达式：解决实际问题的实用模式

正则表达式：实用指南

基本模式

高级特性

实际模式

Python 正则模式

正则测试与调试

常见陷阱