Regular Expressions in Go

Go’s regexp package provides support for regular expressions using the RE2 engine. Regular expressions are powerful for pattern matching, text validation, and string manipulation.

Basic Pattern Matching

Simple string matching:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    // Compile regex pattern
    pattern := regexp.MustCompile(`\b\w{5}\b`) // 5-letter words
    
    // Test if string matches
    text := "Hello world from Go"
    if pattern.MatchString(text) {
        fmt.Println("Found 5-letter word(s)")
    }
    
    // Find first match
    match := pattern.FindString(text)
    fmt.Println("First match:", match) // "Hello"
    
    // Find all matches
    matches := pattern.FindAllString(text, -1)
    fmt.Println("All matches:", matches) // ["Hello", "world"]
}

Compiling Patterns

Different ways to compile regex:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    // Method 1: Compile and check for errors
    pattern, err := regexp.Compile(`\d{3}-\d{2}-\d{4}`)
    if err != nil {
        fmt.Println("Invalid regex:", err)
        return
    }
    
    // Method 2: MustCompile (panics on error)
    phonePattern := regexp.MustCompile(`\d{3}-\d{2}-\d{4}`)
    
    // Test patterns
    test := "123-45-6789"
    if phonePattern.MatchString(test) {
        fmt.Println("Valid SSN format")
    }
    
    // Method 3: For one-time use
    if regexp.MatchString(`^[a-zA-Z]+$`, "Hello") {
        fmt.Println("Contains only letters")
    }
}

Common Patterns

Useful regex patterns:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    patterns := map[string]string{
        "email":     `^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`,
        "phone":     `^\+?[\d\s\-\(\)]+$`,
        "date":      `^\d{4}-\d{2}-\d{2}$`,
        "url":       `^https?://[^\s]+$`,
        "ip":        `^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`,
        "hex_color": `^#[0-9A-Fa-f]{6}$`,
    }
    
    testStrings := map[string]string{
        "email":     "[email protected]",
        "phone":     "+1 (555) 123-4567",
        "date":      "2023-12-25",
        "url":       "https://golang.org",
        "ip":        "192.168.1.1",
        "hex_color": "#FF5733",
    }
    
    for name, pattern := range patterns {
        regex := regexp.MustCompile(pattern)
        testStr := testStrings[name]
        
        if regex.MatchString(testStr) {
            fmt.Printf("%s: %s ✓\n", name, testStr)
        } else {
            fmt.Printf("%s: %s ✗\n", name, testStr)
        }
    }
}

Finding and Extracting

Extracting data from text:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    // Extract email addresses
    text := "Contact us at [email protected] or [email protected]"
    emailPattern := regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`)
    
    emails := emailPattern.FindAllString(text, -1)
    fmt.Println("Emails found:", emails)
    
    // Extract phone numbers
    text2 := "Call 555-123-4567 or (555) 987-6543"
    phonePattern := regexp.MustCompile(`\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}`)
    
    phones := phonePattern.FindAllString(text2, -1)
    fmt.Println("Phones found:", phones)
    
    // Extract key-value pairs
    config := "name=John;age=30;city=New York"
    kvPattern := regexp.MustCompile(`(\w+)=([^;]+)`)
    
    matches := kvPattern.FindAllStringSubmatch(config, -1)
    for _, match := range matches {
        key := match[1]
        value := match[2]
        fmt.Printf("%s = %s\n", key, value)
    }
}

Groups and Capturing

Using capture groups:

package main

import (
    "fmt"
    "regexp"
    "strconv"
)

func main() {
    // Parse log entries
    logPattern := regexp.MustCompile(`(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}) (\w+) (.+)`)
    
    logLine := "2023-01-15 10:30:45 INFO User login successful"
    
    matches := logPattern.FindStringSubmatch(logLine)
    if matches != nil {
        date := matches[1]
        time := matches[2]
        level := matches[3]
        message := matches[4]
        
        fmt.Printf("Date: %s\n", date)
        fmt.Printf("Time: %s\n", time)
        fmt.Printf("Level: %s\n", level)
        fmt.Printf("Message: %s\n", message)
    }
    
    // Parse HTML tags
    htmlPattern := regexp.MustCompile(`<(\w+)([^>]*)>(.*?)</\1>`)
    
    html := `<div class="container">Hello <span>world</span></div>`
    
    tags := htmlPattern.FindAllStringSubmatch(html, -1)
    for _, tag := range tags {
        tagName := tag[1]
        attributes := tag[2]
        content := tag[3]
        
        fmt.Printf("Tag: %s, Attributes: %s, Content: %s\n", tagName, attributes, content)
    }
}

Replacing Text

Search and replace with regex:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    // Simple replacement
    text := "Hello, world! Hello, universe!"
    pattern := regexp.MustCompile(`Hello`)
    
    result := pattern.ReplaceAllString(text, "Hi")
    fmt.Println("Simple replace:", result)
    
    // Replacement with groups
    phoneText := "Call me at 555-123-4567 or 555-987-6543"
    phonePattern := regexp.MustCompile(`(\d{3})-(\d{3})-(\d{4})`)
    
    // Mask middle digits
    masked := phonePattern.ReplaceAllString(phoneText, "$1-***-$3")
    fmt.Println("Masked phones:", masked)
    
    // Replacement function
    censorPattern := regexp.MustCompile(`\b\d{4}\b`) // 4-digit numbers
    
    censored := censorPattern.ReplaceAllStringFunc("My card is 1234-5678-9012-3456", func(match string) string {
        return strings.Repeat("*", len(match))
    })
    fmt.Println("Censored:", censored)
    
    // Expand with groups
    namePattern := regexp.MustCompile(`(\w+) (\w+)`)
    greeting := namePattern.ReplaceAllString("John Doe", "Hello, $1 $2!")
    fmt.Println("Greeting:", greeting)
}

Splitting Strings

Split strings using regex:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    // Split on multiple delimiters
    text := "apple, orange; banana|grape"
    delimiters := regexp.MustCompile(`[,;|]`)
    
    fruits := delimiters.Split(text, -1)
    fmt.Println("Fruits:", fruits)
    
    // Split CSV-like data (simple example)
    csv := "John,Doe,30\nJane,Smith,25\nBob,Johnson,35"
    lines := regexp.MustCompile(`\n`).Split(csv, -1)
    
    for _, line := range lines {
        fields := regexp.MustCompile(`,`).Split(line, -1)
        fmt.Printf("Name: %s %s, Age: %s\n", fields[0], fields[1], fields[2])
    }
    
    // Split on word boundaries
    sentence := "Hello, world! How are you?"
    words := regexp.MustCompile(`\W+`).Split(sentence, -1)
    fmt.Println("Words:", words)
}

Performance Tips

Optimizing regex usage:

package main

import (
    "fmt"
    "regexp"
    "time"
)

func benchmarkRegex(pattern, text string, iterations int) time.Duration {
    regex := regexp.MustCompile(pattern)
    start := time.Now()
    
    for i := 0; i < iterations; i++ {
        regex.MatchString(text)
    }
    
    return time.Since(start)
}

func main() {
    text := "The quick brown fox jumps over the lazy dog"
    
    // Test different patterns
    patterns := []string{
        `fox`,           // Literal match
        `f..`,           // Character class
        `\bfox\b`,       // Word boundary
        `.*fox.*`,       // Greedy match
        `.*?fox.*?`,     // Non-greedy match
    }
    
    iterations := 100000
    
    for _, pattern := range patterns {
        duration := benchmarkRegex(pattern, text, iterations)
        fmt.Printf("Pattern: %-12s Time: %v\n", pattern, duration)
    }
    
    // Pre-compile for repeated use
    emailPattern := regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)
    
    emails := []string{
        "[email protected]",
        "[email protected]",
        "invalid-email",
        "[email protected]",
    }
    
    for _, email := range emails {
        if emailPattern.MatchString(email) {
            fmt.Printf("✓ %s\n", email)
        } else {
            fmt.Printf("✗ %s\n", email)
        }
    }
}

Advanced Patterns

Complex regex patterns:

package main

import (
    "fmt"
    "regexp"
)

func main() {
    // Password validation
    passwordPattern := regexp.MustCompile(`^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$`)
    
    passwords := []string{
        "Password123!",
        "weak",
        "NoNumbers!",
        "nouppercase123!",
    }
    
    for _, pwd := range passwords {
        if passwordPattern.MatchString(pwd) {
            fmt.Printf("✓ Strong: %s\n", pwd)
        } else {
            fmt.Printf("✗ Weak: %s\n", pwd)
        }
    }
    
    // URL parsing
    urlPattern := regexp.MustCompile(`^(https?)://([^:/]+)(?::(\d+))?(.*)$`)
    
    urls := []string{
        "https://www.example.com:8080/path?query=value",
        "http://localhost/api/v1/users",
    }
    
    for _, url := range urls {
        matches := urlPattern.FindStringSubmatch(url)
        if matches != nil {
            protocol := matches[1]
            host := matches[2]
            port := matches[3]
            path := matches[4]
            
            if port == "" {
                if protocol == "https" {
                    port = "443"
                } else {
                    port = "80"
                }
            }
            
            fmt.Printf("URL: %s\n", url)
            fmt.Printf("  Protocol: %s\n", protocol)
            fmt.Printf("  Host: %s\n", host)
            fmt.Printf("  Port: %s\n", port)
            fmt.Printf("  Path: %s\n", path)
            fmt.Println()
        }
    }
    
    // Code parsing (simple)
    code := `func main() { fmt.Println("Hello") }`
    funcPattern := regexp.MustCompile(`func\s+(\w+)\s*\([^)]*\)\s*{([^}]*)}`)
    
    if matches := funcPattern.FindStringSubmatch(code); matches != nil {
        funcName := matches[1]
        funcBody := matches[2]
        fmt.Printf("Function: %s\n", funcName)
        fmt.Printf("Body: %s\n", funcBody)
    }
}

Best Practices

  1. Pre-compile patterns: Use regexp.MustCompile for frequently used regex
  2. Use raw strings: Use backticks for regex patterns to avoid escaping
  3. Test patterns: Always test regex against various inputs
  4. Avoid catastrophic backtracking: Be careful with nested quantifiers
  5. Use anchors: ^ and $ for exact matches when appropriate
  6. Prefer simple patterns: Complex regex can be hard to maintain
  7. Document patterns: Comment what your regex is supposed to match
  8. Consider alternatives: Sometimes string functions are simpler
  9. Profile performance: Test regex performance with your data
  10. Handle errors: Check for compilation errors

Regular expressions are powerful tools for text processing in Go. The RE2 engine used by Go provides linear time guarantees, making it safer than some other regex engines. With proper understanding and testing, regex can solve many text processing challenges efficiently.

For more on string manipulation, check our file I/O tutorial. If you need to work with structured data, see the JSON tutorial.

Last updated on