RegexLib — Regular Expression Library

Comprehensive regular expression support for Plan9Basic. Pattern matching, searching, replacing, splitting, and capture group extraction. Built on Delphi’s System.RegularExpressions unit. 16 base functions (28 with option overloads).

CategoryCountDescription
Validation2regex_isvalid, regex_error$
Matching2regex_match, regex_matchfull (+ option overloads)
Find / Search3regex_find$, regex_findpos, regex_findlen (+ option overloads)
Find All2regex_findall#, regex_count (+ option overloads)
Replace2regex_replace$, regex_replacefirst$ (+ option overloads)
Split1regex_split# (+ option overload)
Groups / Capture3regex_groups#, regex_group$, regex_groupcount
Utility1regex_escape$
FeatureDescription
EngineDelphi System.RegularExpressions (PCRE-compatible)
ResultsTStringList pointers (from StrListLib) with 0-based indexing
Positionsregex_findpos() returns 1-based positions
Backreferences$1, $2, etc. in replacement strings
Error HandlingInvalid patterns return 0 or empty strings — no exceptions
MemoryAll TStringList results are garbage-collected

Regex Options

Options are numeric values combined with +. Pass as the last parameter to functions that accept an options argument. Plan9Basic does not have predefined constants, so define your own or use numeric values directly.

OptionValueDescription
REGEX_IGNORECASE1Case-insensitive matching
REGEX_MULTILINE2^ and $ match line boundaries (not just start/end of string)
REGEX_SINGLELINE4. matches newlines (normally it doesn’t)
REGEX_EXPLICITCAPTURE8Only named or explicitly numbered groups capture
╯ plan9basic
' Define your own constants
let REGEX_IGNORECASE = 1
let REGEX_MULTILINE = 2
let REGEX_SINGLELINE = 4
let REGEX_EXPLICITCAPTURE = 8

' Combine with +
opts = REGEX_IGNORECASE + REGEX_MULTILINE
found = regex_match("[a-z]+", text$, opts)

' Or use numeric values directly
found = regex_match("[a-z]+", text$, 3)  ' 1 + 2 = 3

Validation Functions

FunctionSignatureDescription
regex_isvalid(pattern$)regex_isvalid@$Check if pattern is valid regex (1=valid, 0=invalid)
regex_error$(pattern$)regex_error$@$Error message for invalid pattern (empty string if valid)
╯ plan9basic
' Validate before using a pattern
if regex_isvalid("[a-z]+") = 1 then
    println "Valid pattern"
endif

' Get error description
err$ = regex_error$("[invalid")
if err$ <> "" then
    println "Error: "; err$
endif
ⓘ Note: Always validate user-supplied patterns with regex_isvalid() before passing them to other regex functions.

Matching Functions

FunctionSignatureDescription
regex_match(pat$, text$)regex_match@$$Match pattern anywhere in text (1/0)
regex_match(pat$, text$, opts)regex_match@$$nMatch with options
regex_matchfull(pat$, text$)regex_matchfull@$$Match pattern against entire text (1/0)
regex_matchfull(pat$, text$, opts)regex_matchfull@$$nFull match with options
╯ plan9basic
' Partial match - pattern found anywhere in string
if regex_match("\d+", "abc123def") = 1 then
    println "Contains numbers"
endif

' Case-insensitive (option 1)
if regex_match("hello", "HELLO WORLD", 1) = 1 then
    println "Found hello"
endif

' Full match - entire string must match
email$ = "user@example.com"
if regex_matchfull("^[\w.-]+@[\w.-]+\.\w+$", email$) = 1 then
    println "Valid email format"
endif
ⓘ Note: regex_match() succeeds if the pattern is found anywhere in the text. regex_matchfull() requires the entire text to match the pattern.

Find / Search Functions

FunctionSignatureDescription
regex_find$(pat$, text$)regex_find$@$$First match as string (empty if none)
regex_find$(pat$, text$, opts)regex_find$@$$nFirst match with options
regex_findpos(pat$, text$)regex_findpos@$$1-based position of first match (0 if none)
regex_findpos(pat$, text$, opts)regex_findpos@$$nFind position with options
regex_findlen(pat$, text$)regex_findlen@$$Length of first match (0 if none)
regex_findlen(pat$, text$, opts)regex_findlen@$$nFind length with options
╯ plan9basic
' Find first number in text
num$ = regex_find$("\d+", "Price: $42.99")
println "Found: "; num$  ' 42

' Position of first digit (1-based!)
pos = regex_findpos("\d", "abc123")
println "First digit at: "; pos  ' 4

' Length of first match
ln = regex_findlen("\d+", "abc123def")
println "Match length: "; ln  ' 3
⚠ Warning: regex_findpos() returns 1-based positions (consistent with other Plan9Basic string functions). Returns 0 when no match is found.

Find All Functions

FunctionSignatureDescription
regex_findall#(pat$, text$)regex_findall#@$$All matches as TStringList (0-based)
regex_findall#(pat$, text$, opts)regex_findall#@$$nFind all with options
regex_count(pat$, text$)regex_count@$$Number of matches
regex_count(pat$, text$, opts)regex_count@$$nCount with options
╯ plan9basic
' Find all words
matches# = regex_findall#("\w+", "Hello World Test")
n = strings_count(matches#)
for i = 0 to n - 1
    println strings_strings$(matches#, i)
next
' Hello / World / Test

' Count vowels (case-insensitive)
cnt = regex_count("[aeiou]", "Hello World", 1)
println "Vowel count: "; cnt  ' 3
ⓘ Note: regex_findall#() returns a TStringList pointer. Use strings_count() and strings_strings$() from StrListLib to access individual results (0-based indexing).

Replace Functions

FunctionSignatureDescription
regex_replace$(pat$, text$, repl$)regex_replace$@$$$Replace all matches
regex_replace$(pat$, text$, repl$, opts)regex_replace$@$$$$nReplace all with options
regex_replacefirst$(pat$, text$, repl$)regex_replacefirst$@$$$Replace first match only
regex_replacefirst$(pat$, text$, repl$, opts)regex_replacefirst$@$$$$nReplace first with options
╯ plan9basic
' Replace all digits with X
result$ = regex_replace$("\d", "abc123def", "X")
println result$  ' abcXXXdef

' Swap first and last name using capture groups
name$ = "John Smith"
result$ = regex_replace$("(\w+) (\w+)", name$, "$2, $1")
println result$  ' Smith, John

' Replace only the first match
result$ = regex_replacefirst$("\d", "a1b2c3", "X")
println result$  ' aXb2c3
ⓘ Note: Use $1, $2, etc. in the replacement string to reference capture groups from the pattern.

Split Functions

FunctionSignatureDescription
regex_split#(pat$, text$)regex_split#@$$Split string by pattern (returns TStringList, 0-based)
regex_split#(pat$, text$, opts)regex_split#@$$nSplit with options
╯ plan9basic
' Split by multiple delimiters (comma, semicolon, or whitespace)
parts# = regex_split#("[,;\s]+", "apple,banana;cherry orange")
for i = 0 to strings_count(parts#) - 1
    println strings_strings$(parts#, i)
next
' apple / banana / cherry / orange

Group / Capture Functions

FunctionSignatureDescription
regex_groups#(pat$, text$)regex_groups#@$$All capture groups from first match (TStringList, 0=full match)
regex_groups#(pat$, text$, opts)regex_groups#@$$nGroups with options
regex_group$(pat$, text$, index)regex_group$@$$nSpecific capture group (0=full match, 1+=groups)
regex_group$(pat$, text$, index, opts)regex_group$@$$nnGroup with options
regex_groupcount(pat$, text$)regex_groupcount@$$Number of groups including full match
╯ plan9basic
' Parse date: capture year, month, day
groups# = regex_groups#("(\d{4})-(\d{2})-(\d{2})", "Date: 2025-01-03")
if strings_count(groups#) > 0 then
    println "Full:  "; strings_strings$(groups#, 0)   ' 2025-01-03
    println "Year:  "; strings_strings$(groups#, 1)   ' 2025
    println "Month: "; strings_strings$(groups#, 2)   ' 01
    println "Day:   "; strings_strings$(groups#, 3)   ' 03
endif

' Extract domain from email
domain$ = regex_group$("@([\w.-]+)", "user@example.com", 1)
println "Domain: "; domain$  ' example.com

' Count groups
cnt = regex_groupcount("(\w+)-(\w+)-(\w+)", "abc-def-ghi")
println "Groups: "; cnt  ' 4 (full match + 3 captures)
ⓘ Note: Group index 0 is always the full match. Numbered capture groups start at index 1.

Utility Functions

FunctionSignatureDescription
regex_escape$(text$)regex_escape$@$Escape special regex characters for literal matching
╯ plan9basic
' Make user input safe for regex
userInput$ = "What is (2+2)?"
safePattern$ = regex_escape$(userInput$)
' safePattern$ = "What is \(2\+2\)\?"

if regex_match(safePattern$, userInput$) = 1 then
    println "Found exact text"
endif

Complete Examples

Email Validator

╯ email_validator.bas
' Email validation program
function validateEmail$(email$) local pattern$, result$
    pattern$ = "^[\w.-]+@[\w.-]+\.[a-zA-Z]{2,}$"
    if regex_matchfull(pattern$, email$) = 1 then
        result$ = "Valid"
    else
        result$ = "Invalid"
    endif
    return result$
endfunction

emails$ = "user@example.com,invalid@,test@site.org,bad email"
parts# = regex_split#(",", emails$)

for i = 0 to strings_count(parts#) - 1
    email$ = strings_strings$(parts#, i)
    println email$; " -> "; validateEmail$(email$)
next

Log Parser

╯ log_parser.bas
' Parse Apache-style log entries
logLine$ = "192.168.1.1 - - [03/Jan/2025:10:15:30 +0000] \"GET /index.html HTTP/1.1\" 200 1234"

pattern$ = "^(\d+\.\d+\.\d+\.\d+).*\[([^\]]+)\].*\"(\w+) ([^ ]+).*\" (\d+) (\d+)"
groups# = regex_groups#(pattern$, logLine$)

if strings_count(groups#) >= 7 then
    println "IP:     "; strings_strings$(groups#, 1)
    println "Date:   "; strings_strings$(groups#, 2)
    println "Method: "; strings_strings$(groups#, 3)
    println "Path:   "; strings_strings$(groups#, 4)
    println "Status: "; strings_strings$(groups#, 5)
    println "Size:   "; strings_strings$(groups#, 6)
else
    println "Failed to parse"
endif

Text Sanitizer

╯ sanitizer.bas
' Sanitize user input: strip HTML, normalize whitespace
function sanitize$(text$) local result$
    result$ = regex_replace$("<[^>]+>", text$, "")
    result$ = regex_replace$("\s+", result$, " ")
    result$ = regex_replace$("^\s+|\s+$", result$, "")
    return result$
endfunction

dirty$ = "  <b>Hello</b>   <script>alert('xss')</script>   World!  "
clean$ = sanitize$(dirty$)

println "Original: ["; dirty$; "]"
println "Cleaned:  ["; clean$; "]"

Phone Number Formatter

╯ phone_format.bas
' Format phone numbers consistently
function formatPhone$(phone$) local digits$, area$, prefix$, line$, result$
    digits$ = regex_replace$("[^\d]", phone$, "")
    
    if len(digits$) = 10 then
        area$ = mid$(digits$, 1, 3)
        prefix$ = mid$(digits$, 4, 3)
        line$ = mid$(digits$, 7, 4)
        result$ = "(" + area$ + ") " + prefix$ + "-" + line$
    else if len(digits$) = 11 and left$(digits$, 1) = "1" then
        area$ = mid$(digits$, 2, 3)
        prefix$ = mid$(digits$, 5, 3)
        line$ = mid$(digits$, 8, 4)
        result$ = "+1 (" + area$ + ") " + prefix$ + "-" + line$
    else
        result$ = phone$
    end if
    return result$
end function

println formatPhone$("5551234567")
println formatPhone$("(555) 123-4567")
println formatPhone$("555.123.4567")
println formatPhone$("1-555-123-4567")

Common Regex Patterns

PatternDescriptionExample Match
\d+One or more digits123
\w+One or more word charactershello_123
[a-zA-Z]+One or more lettersHello
^\s*$Empty or whitespace-only line   
\b\w+\bWhole wordword
[^,]+Anything except commaabc def
\S+@\S+\.\S+Simple email patterna@b.com
\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}IPv4 address192.168.1.1
#[0-9a-fA-F]{6}Hex color code#FF5733
(\w+)\s(\w+)Two words (capture each)John Smith
^https?://URL protocol prefixhttps://

Notes & Error Handling

TopicDetails
0-based resultsTStringList results (regex_findall#, regex_split#, regex_groups#) use 0-based indexing
1-based positionsregex_findpos() returns 1-based positions (like other Plan9Basic string functions)
BackreferencesUse $1, $2, etc. in replacement strings to reference capture groups
Group index 0Group 0 is always the full match; numbered captures start at 1
MemoryAll TStringList results are managed by the garbage collector automatically
Error handlingInvalid patterns return 0 or empty strings — no exceptions raised to BASIC
PerformanceFor repeated matching, compile complex patterns once and reuse
StrListLibResults use StrListLib functions: strings_count(), strings_strings$()

Quick Reference

All 28 Registered Signatures

FunctionSignatureDescription
regex_isvalid(pat$)regex_isvalid@$Pattern valid? (1/0)
regex_error$(pat$)regex_error$@$Error message for invalid pattern
regex_match(pat$, text$)regex_match@$$Match anywhere (1/0)
regex_match(pat$, text$, opts)regex_match@$$nMatch with options
regex_matchfull(pat$, text$)regex_matchfull@$$Match entire text (1/0)
regex_matchfull(pat$, text$, opts)regex_matchfull@$$nFull match with options
regex_find$(pat$, text$)regex_find$@$$First match string
regex_find$(pat$, text$, opts)regex_find$@$$nFirst match with options
regex_findpos(pat$, text$)regex_findpos@$$Position of first match (1-based)
regex_findpos(pat$, text$, opts)regex_findpos@$$nFind position with options
regex_findlen(pat$, text$)regex_findlen@$$Length of first match
regex_findlen(pat$, text$, opts)regex_findlen@$$nFind length with options
regex_findall#(pat$, text$)regex_findall#@$$All matches (TStringList)
regex_findall#(pat$, text$, opts)regex_findall#@$$nFind all with options
regex_count(pat$, text$)regex_count@$$Number of matches
regex_count(pat$, text$, opts)regex_count@$$nCount with options
regex_replace$(pat$, text$, repl$)regex_replace$@$$$Replace all matches
regex_replace$(pat$, text$, repl$, opts)regex_replace$@$$$$nReplace all with options
regex_replacefirst$(pat$, text$, repl$)regex_replacefirst$@$$$Replace first match
regex_replacefirst$(pat$, text$, repl$, opts)regex_replacefirst$@$$$$nReplace first with options
regex_split#(pat$, text$)regex_split#@$$Split by pattern (TStringList)
regex_split#(pat$, text$, opts)regex_split#@$$nSplit with options
regex_groups#(pat$, text$)regex_groups#@$$All capture groups (TStringList)
regex_groups#(pat$, text$, opts)regex_groups#@$$nGroups with options
regex_group$(pat$, text$, idx)regex_group$@$$nSpecific capture group
regex_group$(pat$, text$, idx, opts)regex_group$@$$nnGroup with options
regex_groupcount(pat$, text$)regex_groupcount@$$Number of groups
regex_escape$(text$)regex_escape$@$Escape special regex chars

16 base functions — 28 registered signatures (with option overloads) across 8 categories.