Regex

Character Classes

.         any character except newline
\d        digit [0-9]
\D        non-digit [^0-9]
\w        word character [a-zA-Z0-9_]
\W        non-word character
\s        whitespace (space, tab, newline)
\S        non-whitespace
[abc]     a, b, or c
[^abc]    not a, b, or c
[a-z]     lowercase letters a to z
[A-Z0-9]  uppercase letters or digits

Quantifiers

a*        zero or more
a+        one or more
a?        zero or one (optional)
a{3}      exactly 3
a{3,5}    between 3 and 5
a{3,}     3 or more
a*?       zero or more (lazy)
a+?       one or more (lazy)
a{3,5}?   between 3 and 5 (lazy)

Anchors & Boundaries

^abc      start of string/line
abc$      end of string/line
\A        start of string only
\z        end of string only
\b        word boundary
\B        non-word boundary

# examples
^\d+      string starts with digits
\.$       string ends with a dot
\bcat\b   match "cat" as whole word

Groups & References

(abc)       capturing group
(?:abc)     non-capturing group
(?<name>abc)  named capturing group
\1          backreference to group 1
\2          backreference to group 2
$1          group 1 in replacement

# examples
(\w+)\s+\1           # match repeated word
(\d{4})-(\d{2})-(\d{2})   # date: $1-$2-$3
(?<year>\d{4})       # named group "year"

Alternation & Lookaround

cat|dog        cat or dog
(?=foo)        lookahead: followed by foo
(?!bar)        negative lookahead: not followed by bar
(?<=foo)       lookbehind: preceded by foo
(?<!bar)       negative lookbehind: not preceded by bar

# examples
\d+(?=px)           # digits followed by "px"
\d+(?!px)           # digits NOT followed by "px"
(?<=\$)\d+          # digits preceded by "$"
(?<!un)\w+          # word NOT preceded by "un"

Flags

/pattern/gi     g = global (all matches)
                i = case-insensitive
                m = multiline (^/$ match line start/end)
                s = dotall (. matches newline)
                x = verbose (ignore whitespace)
                u = unicode mode

# common usage
/foo/g          # find all "foo" in string
/^bar/m         # match "bar" at start of any line
/./s            # dot matches everything including \n

Escaping

\.       literal dot
\\       literal backslash
\*       literal asterisk
\+       literal plus
\?       literal question mark
\^       literal caret
\$       literal dollar sign
\[       literal bracket
\]       literal bracket
\(       literal parenthesis
\)       literal parenthesis
\|       literal pipe

Common Patterns

# email (basic)
[\w.+-]+@[\w-]+\.[\w.]+

# IPv4 address
\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}

# date (YYYY-MM-DD)
\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])

# URL
https?:\/\/[\w\-]+(?:\.[\w\-]+)+(?:\/[\w\-._~:?#@!$&'()*+,;=]*)?

# hex color
#[0-9a-fA-F]{3,8}

# phone (US)
\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}

Usage in Code

import re
re.search(r'\d+', 'abc123')          # first match
re.findall(r'\d+', 'a1b22c333')      # all matches
re.sub(r'\d+', 'X', 'a1b22')         # replace
re.match(r'^\w+', 'hello world')     # match at start
'abc123'.match(/\d+/)                // first match
'abc123'.match(/\d+/g)               // all matches
'abc123'.replace(/\d+/, 'X')         // replace
/^hello/.test('hello world')         // test match
import "regexp"
re := regexp.MustCompile(`\d+`)
re.FindString("abc123")              // "123"
re.FindAllString("a1b22c333", -1)    // ["1", "22", "333"]
re.ReplaceAllString("a1b22", "X")    // "aXbX"
re.MatchString("abc123")             // true

Tips & Tricks

# match everything between quotes
"[^"]*"

# match HTML/XML tag
<(\w+)[^>]*>.*?<\/\1>

# password (min 8, uppercase, lowercase, digit)
^(?=.*[a-z])(?=.*[A-Z])(?=.*\d).{8,}$

# trim whitespace
^\s+|\s+$

# capture filename without extension
^(.+?)(?:\.[^.]+)?$

# number with optional decimals
-?\d+(?:\.\d+)?

字符类

.         除换行符外的任意字符
\d        数字 [0-9]
\D        非数字 [^0-9]
\w        单词字符 [a-zA-Z0-9_]
\W        非单词字符
\s        空白字符(空格、制表、换行)
\S        非空白字符
[abc]     a、b 或 c
[^abc]    非 a、b、c
[a-z]     小写字母 a 到 z
[A-Z0-9]  大写字母或数字

量词

a*        零次或多次
a+        一次或多次
a?        零次或一次(可选)
a{3}      恰好 3 次
a{3,5}    3 到 5 次
a{3,}     3 次及以上
a*?       零次或多次(懒惰模式)
a+?       一次或多次(懒惰模式)
a{3,5}?   3 到 5 次(懒惰模式)

锚点与边界

^abc      字符串/行首
abc$      字符串/行尾
\A        仅字符串开头
\z        仅字符串结尾
\b        单词边界
\B        非单词边界

# 示例
^\d+      以数字开头
\.$       以点号结尾
\bcat\b   精确匹配 "cat" 整个单词

分组与引用

(abc)       捕获分组
(?:abc)     非捕获分组
(?<name>abc)  命名捕获分组
\1          反向引用第 1 组
\2          反向引用第 2 组
$1          替换中引用第 1 组

# 示例
(\w+)\s+\1           # 匹配重复单词
(\d{4})-(\d{2})-(\d{2})   # 日期: $1-$2-$3
(?<year>\d{4})       # 命名分组 "year"

或与前后断言

cat|dog        cat 或 dog
(?=foo)        正向先行断言:后面跟着 foo
(?!bar)        负向先行断言:后面不跟 bar
(?<=foo)       正向后行断言:前面是 foo
(?<!bar)       负向后行断言:前面不是 bar

# 示例
\d+(?=px)           # 后面跟着 "px" 的数字
\d+(?!px)           # 后面不跟 "px" 的数字
(?<=\$)\d+          # 前面是 "$" 的数字
(?<!un)\w+          # 前面不是 "un" 的单词

修饰符

/pattern/gi     g = 全局匹配(匹配所有)
                i = 忽略大小写
                m = 多行模式(^/$ 匹配行首行尾)
                s = 点号通配(. 匹配换行符)
                x = 宽松模式(忽略空白)
                u = Unicode 模式

# 常见用法
/foo/g          # 查找所有 "foo"
/^bar/m         # 匹配每行开头的 "bar"
/./s            # 点号匹配包括 \n 的所有字符

转义字符

\.       匹配字面点号
\\       匹配字面反斜杠
\*       匹配字面星号
\+       匹配字面加号
\?       匹配字面问号
\^       匹配字面脱字符
\$       匹配字面美元符
\[       匹配字面左方括号
\]       匹配字面右方括号
\(       匹配字面左括号
\)       匹配字面右括号
\|       匹配字面管道符

常用模式

# 邮箱(基础)
[\w.+-]+@[\w-]+\.[\w.]+

# IPv4 地址
\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}

# 日期 (YYYY-MM-DD)
\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])

# URL
https?:\/\/[\w\-]+(?:\.[\w\-]+)+(?:\/[\w\-._~:?#@!$&'()*+,;=]*)?

# 十六进制颜色
#[0-9a-fA-F]{3,8}

# 手机号(中国)
1[3-9]\d{9}

编程语言用法

import re
re.search(r'\d+', 'abc123')          # 第一个匹配
re.findall(r'\d+', 'a1b22c333')      # 所有匹配
re.sub(r'\d+', 'X', 'a1b22')         # 替换
re.match(r'^\w+', 'hello world')     # 从开头匹配
'abc123'.match(/\d+/)                // 第一个匹配
'abc123'.match(/\d+/g)               // 所有匹配
'abc123'.replace(/\d+/, 'X')         // 替换
/^hello/.test('hello world')         // 测试是否匹配
import "regexp"
re := regexp.MustCompile(`\d+`)
re.FindString("abc123")              // "123"
re.FindAllString("a1b22c333", -1)    // ["1", "22", "333"]
re.ReplaceAllString("a1b22", "X")    // "aXbX"
re.MatchString("abc123")             // true

实用技巧

# 匹配引号内的内容
"[^"]*"

# 匹配 HTML/XML 标签
<(\w+)[^>]*>.*?<\/\1>

# 密码(至少8位,含大小写和数字)
^(?=.*[a-z])(?=.*[A-Z])(?=.*\d).{8,}$

# 去除首尾空白
^\s+|\s+$

# 提取文件名(不含扩展名)
^(.+?)(?:\.[^.]+)?$

# 数字(含可选小数和负号)
-?\d+(?:\.\d+)?