Regular Expressions in Go
Go’s regexp package provides support for regular expressions using the RE2 engine. Regular expressions are powerful for pattern matching, text validation, and string manipulation.
Basic Pattern Matching
Simple string matching:
package main
import (
"fmt"
"regexp"
)
func main() {
// Compile regex pattern
pattern := regexp.MustCompile(`\b\w{5}\b`) // 5-letter words
// Test if string matches
text := "Hello world from Go"
if pattern.MatchString(text) {
fmt.Println("Found 5-letter word(s)")
}
// Find first match
match := pattern.FindString(text)
fmt.Println("First match:", match) // "Hello"
// Find all matches
matches := pattern.FindAllString(text, -1)
fmt.Println("All matches:", matches) // ["Hello", "world"]
}Compiling Patterns
Different ways to compile regex:
package main
import (
"fmt"
"regexp"
)
func main() {
// Method 1: Compile and check for errors
pattern, err := regexp.Compile(`\d{3}-\d{2}-\d{4}`)
if err != nil {
fmt.Println("Invalid regex:", err)
return
}
// Method 2: MustCompile (panics on error)
phonePattern := regexp.MustCompile(`\d{3}-\d{2}-\d{4}`)
// Test patterns
test := "123-45-6789"
if phonePattern.MatchString(test) {
fmt.Println("Valid SSN format")
}
// Method 3: For one-time use
if regexp.MatchString(`^[a-zA-Z]+$`, "Hello") {
fmt.Println("Contains only letters")
}
}Common Patterns
Useful regex patterns:
package main
import (
"fmt"
"regexp"
)
func main() {
patterns := map[string]string{
"email": `^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`,
"phone": `^\+?[\d\s\-\(\)]+$`,
"date": `^\d{4}-\d{2}-\d{2}$`,
"url": `^https?://[^\s]+$`,
"ip": `^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`,
"hex_color": `^#[0-9A-Fa-f]{6}$`,
}
testStrings := map[string]string{
"email": "[email protected]",
"phone": "+1 (555) 123-4567",
"date": "2023-12-25",
"url": "https://golang.org",
"ip": "192.168.1.1",
"hex_color": "#FF5733",
}
for name, pattern := range patterns {
regex := regexp.MustCompile(pattern)
testStr := testStrings[name]
if regex.MatchString(testStr) {
fmt.Printf("%s: %s ✓\n", name, testStr)
} else {
fmt.Printf("%s: %s ✗\n", name, testStr)
}
}
}Finding and Extracting
Extracting data from text:
package main
import (
"fmt"
"regexp"
)
func main() {
// Extract email addresses
text := "Contact us at [email protected] or [email protected]"
emailPattern := regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`)
emails := emailPattern.FindAllString(text, -1)
fmt.Println("Emails found:", emails)
// Extract phone numbers
text2 := "Call 555-123-4567 or (555) 987-6543"
phonePattern := regexp.MustCompile(`\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}`)
phones := phonePattern.FindAllString(text2, -1)
fmt.Println("Phones found:", phones)
// Extract key-value pairs
config := "name=John;age=30;city=New York"
kvPattern := regexp.MustCompile(`(\w+)=([^;]+)`)
matches := kvPattern.FindAllStringSubmatch(config, -1)
for _, match := range matches {
key := match[1]
value := match[2]
fmt.Printf("%s = %s\n", key, value)
}
}Groups and Capturing
Using capture groups:
package main
import (
"fmt"
"regexp"
"strconv"
)
func main() {
// Parse log entries
logPattern := regexp.MustCompile(`(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}) (\w+) (.+)`)
logLine := "2023-01-15 10:30:45 INFO User login successful"
matches := logPattern.FindStringSubmatch(logLine)
if matches != nil {
date := matches[1]
time := matches[2]
level := matches[3]
message := matches[4]
fmt.Printf("Date: %s\n", date)
fmt.Printf("Time: %s\n", time)
fmt.Printf("Level: %s\n", level)
fmt.Printf("Message: %s\n", message)
}
// Parse HTML tags
htmlPattern := regexp.MustCompile(`<(\w+)([^>]*)>(.*?)</\1>`)
html := `<div class="container">Hello <span>world</span></div>`
tags := htmlPattern.FindAllStringSubmatch(html, -1)
for _, tag := range tags {
tagName := tag[1]
attributes := tag[2]
content := tag[3]
fmt.Printf("Tag: %s, Attributes: %s, Content: %s\n", tagName, attributes, content)
}
}Replacing Text
Search and replace with regex:
package main
import (
"fmt"
"regexp"
)
func main() {
// Simple replacement
text := "Hello, world! Hello, universe!"
pattern := regexp.MustCompile(`Hello`)
result := pattern.ReplaceAllString(text, "Hi")
fmt.Println("Simple replace:", result)
// Replacement with groups
phoneText := "Call me at 555-123-4567 or 555-987-6543"
phonePattern := regexp.MustCompile(`(\d{3})-(\d{3})-(\d{4})`)
// Mask middle digits
masked := phonePattern.ReplaceAllString(phoneText, "$1-***-$3")
fmt.Println("Masked phones:", masked)
// Replacement function
censorPattern := regexp.MustCompile(`\b\d{4}\b`) // 4-digit numbers
censored := censorPattern.ReplaceAllStringFunc("My card is 1234-5678-9012-3456", func(match string) string {
return strings.Repeat("*", len(match))
})
fmt.Println("Censored:", censored)
// Expand with groups
namePattern := regexp.MustCompile(`(\w+) (\w+)`)
greeting := namePattern.ReplaceAllString("John Doe", "Hello, $1 $2!")
fmt.Println("Greeting:", greeting)
}Splitting Strings
Split strings using regex:
package main
import (
"fmt"
"regexp"
)
func main() {
// Split on multiple delimiters
text := "apple, orange; banana|grape"
delimiters := regexp.MustCompile(`[,;|]`)
fruits := delimiters.Split(text, -1)
fmt.Println("Fruits:", fruits)
// Split CSV-like data (simple example)
csv := "John,Doe,30\nJane,Smith,25\nBob,Johnson,35"
lines := regexp.MustCompile(`\n`).Split(csv, -1)
for _, line := range lines {
fields := regexp.MustCompile(`,`).Split(line, -1)
fmt.Printf("Name: %s %s, Age: %s\n", fields[0], fields[1], fields[2])
}
// Split on word boundaries
sentence := "Hello, world! How are you?"
words := regexp.MustCompile(`\W+`).Split(sentence, -1)
fmt.Println("Words:", words)
}Performance Tips
Optimizing regex usage:
package main
import (
"fmt"
"regexp"
"time"
)
func benchmarkRegex(pattern, text string, iterations int) time.Duration {
regex := regexp.MustCompile(pattern)
start := time.Now()
for i := 0; i < iterations; i++ {
regex.MatchString(text)
}
return time.Since(start)
}
func main() {
text := "The quick brown fox jumps over the lazy dog"
// Test different patterns
patterns := []string{
`fox`, // Literal match
`f..`, // Character class
`\bfox\b`, // Word boundary
`.*fox.*`, // Greedy match
`.*?fox.*?`, // Non-greedy match
}
iterations := 100000
for _, pattern := range patterns {
duration := benchmarkRegex(pattern, text, iterations)
fmt.Printf("Pattern: %-12s Time: %v\n", pattern, duration)
}
// Pre-compile for repeated use
emailPattern := regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)
emails := []string{
"[email protected]",
"[email protected]",
"invalid-email",
"[email protected]",
}
for _, email := range emails {
if emailPattern.MatchString(email) {
fmt.Printf("✓ %s\n", email)
} else {
fmt.Printf("✗ %s\n", email)
}
}
}Advanced Patterns
Complex regex patterns:
package main
import (
"fmt"
"regexp"
)
func main() {
// Password validation
passwordPattern := regexp.MustCompile(`^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$`)
passwords := []string{
"Password123!",
"weak",
"NoNumbers!",
"nouppercase123!",
}
for _, pwd := range passwords {
if passwordPattern.MatchString(pwd) {
fmt.Printf("✓ Strong: %s\n", pwd)
} else {
fmt.Printf("✗ Weak: %s\n", pwd)
}
}
// URL parsing
urlPattern := regexp.MustCompile(`^(https?)://([^:/]+)(?::(\d+))?(.*)$`)
urls := []string{
"https://www.example.com:8080/path?query=value",
"http://localhost/api/v1/users",
}
for _, url := range urls {
matches := urlPattern.FindStringSubmatch(url)
if matches != nil {
protocol := matches[1]
host := matches[2]
port := matches[3]
path := matches[4]
if port == "" {
if protocol == "https" {
port = "443"
} else {
port = "80"
}
}
fmt.Printf("URL: %s\n", url)
fmt.Printf(" Protocol: %s\n", protocol)
fmt.Printf(" Host: %s\n", host)
fmt.Printf(" Port: %s\n", port)
fmt.Printf(" Path: %s\n", path)
fmt.Println()
}
}
// Code parsing (simple)
code := `func main() { fmt.Println("Hello") }`
funcPattern := regexp.MustCompile(`func\s+(\w+)\s*\([^)]*\)\s*{([^}]*)}`)
if matches := funcPattern.FindStringSubmatch(code); matches != nil {
funcName := matches[1]
funcBody := matches[2]
fmt.Printf("Function: %s\n", funcName)
fmt.Printf("Body: %s\n", funcBody)
}
}Best Practices
- Pre-compile patterns: Use
regexp.MustCompilefor frequently used regex - Use raw strings: Use backticks for regex patterns to avoid escaping
- Test patterns: Always test regex against various inputs
- Avoid catastrophic backtracking: Be careful with nested quantifiers
- Use anchors:
^and$for exact matches when appropriate - Prefer simple patterns: Complex regex can be hard to maintain
- Document patterns: Comment what your regex is supposed to match
- Consider alternatives: Sometimes string functions are simpler
- Profile performance: Test regex performance with your data
- Handle errors: Check for compilation errors
Regular expressions are powerful tools for text processing in Go. The RE2 engine used by Go provides linear time guarantees, making it safer than some other regex engines. With proper understanding and testing, regex can solve many text processing challenges efficiently.
For more on string manipulation, check our file I/O tutorial. If you need to work with structured data, see the JSON tutorial.