Cython routing added
Some checks failed
Lint Code / lint (push) Failing after 44s
CI/CD Pipeline / lint (push) Successful in 0s
Run Tests / test (3.12) (push) Successful in 3m48s
Run Tests / test (3.13) (push) Successful in 3m7s
CI/CD Pipeline / test (push) Successful in 1s
CI/CD Pipeline / build-and-release (push) Has been skipped
CI/CD Pipeline / notify (push) Successful in 1s

This commit is contained in:
Илья Глазунов 2026-01-31 02:44:50 +03:00
parent fe541778f1
commit eeeccd57da
10 changed files with 1106 additions and 63 deletions

View File

@ -22,6 +22,11 @@ jobs:
with:
fetch-depth: 0
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libpcre2-dev
- name: Setup Python
uses: actions/setup-python@v4
with:
@ -45,6 +50,9 @@ jobs:
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --with dev
- name: Build Cython extensions
run: poetry run python scripts/build_cython.py build_ext --inplace
- name: Build package
run: |
poetry build

View File

@ -17,6 +17,11 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libpcre2-dev
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
@ -40,6 +45,9 @@ jobs:
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --with dev
- name: Build Cython extensions
run: poetry run python scripts/build_cython.py build_ext --inplace
- name: Run tests
run: poetry run pytest tests/ -v

153
benchmarks/bench_routing.py Normal file
View File

@ -0,0 +1,153 @@
#!/usr/bin/env python3
"""
Benchmark script for routing performance comparison.
Compares:
- Pure Python implementation with standard re (_routing_py)
- Cython implementation with PCRE2 JIT (_routing)
Usage:
python benchmarks/bench_routing.py
"""
import re
import time
import statistics
from typing import Callable, Tuple
from pyserve._routing_py import (
FastRouter as PyFastRouter,
FastRouteMatch as PyFastRouteMatch,
)
try:
from pyserve._routing import (
FastRouter as CyFastRouter,
FastRouteMatch as CyFastRouteMatch,
)
CYTHON_AVAILABLE = True
except ImportError:
CYTHON_AVAILABLE = False
print("Cython module not compiled. Run: poetry run python scripts/build_cython.py\n")
def benchmark(func: Callable, iterations: int = 100000) -> Tuple[float, float]:
"""Benchmark a function and return mean/stdev in nanoseconds."""
times = []
# Warmup
for _ in range(1000):
func()
# Actual benchmark
for _ in range(iterations):
start = time.perf_counter_ns()
func()
end = time.perf_counter_ns()
times.append(end - start)
return statistics.mean(times), statistics.stdev(times)
def format_time(ns: float) -> str:
"""Format time in nanoseconds to human readable format."""
if ns < 1000:
return f"{ns:.1f} ns"
elif ns < 1_000_000:
return f"{ns/1000:.2f} µs"
else:
return f"{ns/1_000_000:.2f} ms"
def setup_router(router_class):
"""Setup a router with typical routes."""
router = router_class()
# Exact routes
router.add_route("=/health", {"return": "200 OK"})
router.add_route("=/api/status", {"return": "200 OK"})
router.add_route("=/favicon.ico", {"return": "204"})
# Regex routes
router.add_route("~^/api/v1/users/(?P<user_id>\\d+)$", {"proxy_pass": "http://users-service"})
router.add_route("~^/api/v1/posts/(?P<post_id>\\d+)$", {"proxy_pass": "http://posts-service"})
router.add_route("~\\.(css|js|png|jpg|gif|svg|woff2?)$", {"root": "./static"})
router.add_route("~^/api/", {"proxy_pass": "http://api-gateway"})
# Default route
router.add_route("__default__", {"spa_fallback": True, "root": "./dist"})
return router
def run_benchmarks():
print("=" * 70)
print("ROUTING BENCHMARK")
print("=" * 70)
print()
# Test paths with different matching scenarios
test_cases = [
("/health", "Exact match (first)"),
("/api/status", "Exact match (middle)"),
("/api/v1/users/12345", "Regex match with groups"),
("/static/app.js", "Regex match (file extension)"),
("/api/v2/other", "Regex match (simple prefix)"),
("/some/random/path", "Default route (fallback)"),
("/nonexistent", "Default route (fallback)"),
]
iterations = 100000
print(f"Iterations: {iterations:,}")
print()
# Setup routers
py_router = setup_router(PyFastRouter)
cy_router = setup_router(CyFastRouter) if CYTHON_AVAILABLE else None
results = {}
for path, description in test_cases:
print(f"Path: {path}")
print(f" {description}")
# Python implementation (standard re)
py_mean, py_std = benchmark(lambda p=path: py_router.match(p), iterations)
results[(path, "Python (re)")] = py_mean
print(f" Python (re): {format_time(py_mean):>12} ± {format_time(py_std)}")
# Cython implementation (PCRE2 JIT)
if CYTHON_AVAILABLE and cy_router:
cy_mean, cy_std = benchmark(lambda p=path: cy_router.match(p), iterations)
results[(path, "Cython (PCRE2)")] = cy_mean
speedup = py_mean / cy_mean if cy_mean > 0 else 0
print(f" Cython (PCRE2): {format_time(cy_mean):>12} ± {format_time(cy_std)} ({speedup:.2f}x faster)")
print()
# Summary
if CYTHON_AVAILABLE:
print("=" * 70)
print("SUMMARY")
print("=" * 70)
py_total = sum(v for k, v in results.items() if k[1] == "Python (re)")
cy_total = sum(v for k, v in results.items() if k[1] == "Cython (PCRE2)")
print(f" Python (re) total: {format_time(py_total)}")
print(f" Cython (PCRE2) total: {format_time(cy_total)}")
print(f" Overall speedup: {py_total / cy_total:.2f}x")
# Show JIT compilation status
print()
print("PCRE2 JIT Status:")
for route in cy_router.list_routes(): # type: ignore False linter error
if route["type"] == "regex":
jit = route.get("jit_compiled", False)
status = "✓ JIT" if jit else "✗ No JIT"
print(f" {status}: {route['pattern']}")
if __name__ == "__main__":
run_benchmarks()

6
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
[[package]]
name = "a2wsgi"
@ -1720,5 +1720,5 @@ wsgi = ["a2wsgi"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.12"
content-hash = "653d7b992e2bb133abde2e8b1c44265e948ed90487ab3f2670429510a8aa0683"
python-versions = ">=3.12, <=3.13.7"
content-hash = "411b746f1a577ed635af9fd3e01daf1fa03950d27ef23888fc7cdd0b99762404"

486
pyserve/_routing.pyx Normal file
View File

@ -0,0 +1,486 @@
# cython: language_level=3
# cython: boundscheck=False
# cython: wraparound=False
# cython: cdivision=True
from libc.stdint cimport uint32_t
from libc.stddef cimport size_t
from cpython.bytes cimport PyBytes_AsString, PyBytes_GET_SIZE
cimport cython
from ._routing_pcre2 cimport *
from typing import Optional
# Type aliases for cleaner NULL casts
ctypedef pcre2_compile_context* compile_ctx_ptr
ctypedef pcre2_match_context* match_ctx_ptr
ctypedef pcre2_general_context* general_ctx_ptr
# Buffer size for error messages
DEF ERROR_BUFFER_SIZE = 256
# Maximum capture groups we support
DEF MAX_CAPTURE_GROUPS = 32
cdef class PCRE2Pattern:
cdef:
pcre2_code* _code
pcre2_match_data* _match_data
bint _jit_available
str _pattern_str
uint32_t _capture_count
dict _name_to_index # Named capture groups
list _index_to_name # Index to name mapping
def __cinit__(self):
self._code = NULL
self._match_data = NULL
self._jit_available = <bint>False
self._capture_count = 0
self._name_to_index = {}
self._index_to_name = []
def __dealloc__(self):
if self._match_data is not NULL:
pcre2_match_data_free(self._match_data)
self._match_data = NULL
if self._code is not NULL:
pcre2_code_free(self._code)
self._code = NULL
@staticmethod
cdef PCRE2Pattern _create(str pattern, bint case_insensitive=<bint>False, bint use_jit=<bint>True):
cdef:
PCRE2Pattern self = PCRE2Pattern.__new__(PCRE2Pattern)
bytes pattern_bytes
const char* pattern_ptr
Py_ssize_t pattern_len
uint32_t options = 0
int errorcode = 0
PCRE2_SIZE erroroffset = 0
int jit_result
uint32_t capture_count = 0
self._pattern_str = pattern
self._name_to_index = {}
self._index_to_name = []
pattern_bytes = pattern.encode('utf-8')
pattern_ptr = PyBytes_AsString(pattern_bytes)
pattern_len = PyBytes_GET_SIZE(pattern_bytes)
options = PCRE2_UTF | PCRE2_UCP
if case_insensitive:
options |= PCRE2_CASELESS
self._code = pcre2_compile(
<PCRE2_SPTR>pattern_ptr,
<PCRE2_SIZE>pattern_len,
options,
&errorcode,
&erroroffset,
<compile_ctx_ptr>NULL
)
if self._code is NULL:
error_msg = PCRE2Pattern._get_error_message(errorcode)
raise ValueError(f"PCRE2 compile error at offset {erroroffset}: {error_msg}")
if use_jit:
jit_result = pcre2_jit_compile(self._code, PCRE2_JIT_COMPLETE)
self._jit_available = <bint>(jit_result == 0)
pcre2_pattern_info(self._code, PCRE2_INFO_CAPTURECOUNT, <void*>&capture_count)
self._capture_count = capture_count
self._match_data = pcre2_match_data_create_from_pattern(self._code, <general_ctx_ptr>NULL)
if self._match_data is NULL:
pcre2_code_free(self._code)
self._code = NULL
raise MemoryError("Failed to create match data")
self._extract_named_groups()
return self
cdef void _extract_named_groups(self):
cdef:
uint32_t namecount = 0
uint32_t nameentrysize = 0
PCRE2_SPTR nametable
uint32_t i
int group_num
bytes name_bytes
str name
pcre2_pattern_info(self._code, PCRE2_INFO_NAMECOUNT, <void*>&namecount)
if namecount == 0:
return # void return
pcre2_pattern_info(self._code, PCRE2_INFO_NAMEENTRYSIZE, <void*>&nameentrysize)
pcre2_pattern_info(self._code, PCRE2_INFO_NAMETABLE, <void*>&nametable)
self._index_to_name = [None] * (self._capture_count + 1)
for i in range(namecount):
group_num = (<int>nametable[0] << 8) | <int>nametable[1]
name_bytes = <bytes>(nametable + 2)
name = name_bytes.decode('utf-8')
self._name_to_index[name] = group_num
if <uint32_t>group_num <= self._capture_count:
self._index_to_name[<Py_ssize_t>group_num] = name
nametable += nameentrysize
@staticmethod
cdef str _get_error_message(int errorcode):
cdef:
PCRE2_UCHAR buffer[ERROR_BUFFER_SIZE]
int result
result = pcre2_get_error_message(errorcode, buffer, ERROR_BUFFER_SIZE)
if result < 0:
return f"Unknown error {errorcode}"
return (<bytes>buffer).decode('utf-8')
cpdef bint search(self, str subject):
"""
Search for pattern anywhere in subject.
Returns True if found, False otherwise.
"""
cdef:
bytes subject_bytes
const char* subject_ptr
Py_ssize_t subject_len
int result
if self._code is NULL:
return <bint>False
subject_bytes = subject.encode('utf-8')
subject_ptr = PyBytes_AsString(subject_bytes)
subject_len = PyBytes_GET_SIZE(subject_bytes)
if self._jit_available:
result = pcre2_jit_match(
self._code,
<PCRE2_SPTR>subject_ptr,
<PCRE2_SIZE>subject_len,
0, # start offset
0, # options
self._match_data,
<match_ctx_ptr>NULL
)
else:
result = pcre2_match(
self._code,
<PCRE2_SPTR>subject_ptr,
<PCRE2_SIZE>subject_len,
0,
0,
self._match_data,
<match_ctx_ptr>NULL
)
return <bint>(result >= 0)
cpdef dict groupdict(self, str subject):
"""
Match pattern and return dict of named groups.
Returns empty dict if no match or no named groups.
"""
cdef:
bytes subject_bytes
const char* subject_ptr
Py_ssize_t subject_len
int result
PCRE2_SIZE* ovector
dict groups = {}
str name
int index
PCRE2_SIZE start, end
if self._code is NULL or not self._name_to_index:
return groups
subject_bytes = subject.encode('utf-8')
subject_ptr = PyBytes_AsString(subject_bytes)
subject_len = PyBytes_GET_SIZE(subject_bytes)
if self._jit_available:
result = pcre2_jit_match(
self._code,
<PCRE2_SPTR>subject_ptr,
<PCRE2_SIZE>subject_len,
0, 0,
self._match_data,
<match_ctx_ptr>NULL
)
else:
result = pcre2_match(
self._code,
<PCRE2_SPTR>subject_ptr,
<PCRE2_SIZE>subject_len,
0, 0,
self._match_data,
<match_ctx_ptr>NULL
)
if result < 0:
return groups
ovector = pcre2_get_ovector_pointer(self._match_data)
for name, index in self._name_to_index.items():
start = ovector[<Py_ssize_t>(2 * index)]
end = ovector[<Py_ssize_t>(2 * index + 1)]
if start != PCRE2_UNSET and end != PCRE2_UNSET:
groups[name] = subject_bytes[start:end].decode('utf-8')
else:
groups[name] = None
return groups
cpdef tuple search_with_groups(self, str subject):
cdef:
bytes subject_bytes
const char* subject_ptr
Py_ssize_t subject_len
int result
PCRE2_SIZE* ovector
dict groups = {}
str name
int index
PCRE2_SIZE start, end
if self._code is NULL:
return (False, {})
subject_bytes = subject.encode('utf-8')
subject_ptr = PyBytes_AsString(subject_bytes)
subject_len = PyBytes_GET_SIZE(subject_bytes)
if self._jit_available:
result = pcre2_jit_match(
self._code,
<PCRE2_SPTR>subject_ptr,
<PCRE2_SIZE>subject_len,
0, 0,
self._match_data,
<match_ctx_ptr>NULL
)
else:
result = pcre2_match(
self._code,
<PCRE2_SPTR>subject_ptr,
<PCRE2_SIZE>subject_len,
0, 0,
self._match_data,
<match_ctx_ptr>NULL
)
if result < 0:
return (False, {})
if self._name_to_index:
ovector = pcre2_get_ovector_pointer(self._match_data)
for name, index in self._name_to_index.items():
start = ovector[<Py_ssize_t>(2 * index)]
end = ovector[<Py_ssize_t>(2 * index + 1)]
if start != PCRE2_UNSET and end != PCRE2_UNSET:
groups[name] = subject_bytes[start:end].decode('utf-8')
else:
groups[name] = None
return (True, groups)
@property
def pattern(self) -> str:
return self._pattern_str
@property
def jit_compiled(self) -> bool:
return <bint>self._jit_available
@property
def capture_count(self) -> int:
return self._capture_count
cdef class FastRouteMatch:
cdef:
public dict config
public dict params
def __cinit__(self):
self.config = {}
self.params = {}
def __init__(self, dict config, params=None):
self.config = config
self.params = params if params is not None else {}
cdef class FastRouter:
"""
High-performance router with PCRE2 JIT-compiled patterns.
Matching order (nginx-like):
1. Exact routes (prefix "=") - O(1) dict lookup
2. Regex routes (prefix "~" or "~*") - PCRE2 JIT matching
3. Default route (fallback)
"""
cdef:
dict _exact_routes
list _regex_routes
dict _default_route
bint _has_default
int _regex_count
def __cinit__(self):
self._exact_routes = {}
self._regex_routes = []
self._default_route = {}
self._has_default = <bint>False
self._regex_count = 0
def __init__(self):
self._exact_routes = {}
self._regex_routes = []
self._default_route = {}
self._has_default = <bint>False
self._regex_count = 0
def add_route(self, str pattern, dict config):
cdef:
str exact_path
str regex_pattern
bint case_insensitive
PCRE2Pattern compiled_pattern
if pattern.startswith("="):
exact_path = pattern[1:]
self._exact_routes[exact_path] = config
elif pattern == "__default__":
self._default_route = config
self._has_default = <bint>True
elif pattern.startswith("~"):
case_insensitive = <bint>pattern.startswith("~*")
regex_pattern = pattern[2:] if case_insensitive else pattern[1:]
try:
compiled_pattern = PCRE2Pattern._create(regex_pattern, case_insensitive)
self._regex_routes.append((compiled_pattern, config))
self._regex_count = len(self._regex_routes)
except (ValueError, MemoryError):
pass # Skip invalid patterns
cpdef object match(self, str path):
cdef:
dict config
dict params
int i
PCRE2Pattern pattern
tuple route_entry
bint matched
if path in self._exact_routes:
config = self._exact_routes[path]
return FastRouteMatch(config, {})
for i in range(self._regex_count):
route_entry = <tuple>self._regex_routes[i]
pattern = <PCRE2Pattern>route_entry[0]
config = <dict>route_entry[1]
matched, params = pattern.search_with_groups(path)
if matched:
return FastRouteMatch(config, params)
if self._has_default:
return FastRouteMatch(self._default_route, {})
return None
@property
def exact_routes(self) -> dict:
return self._exact_routes
@property
def routes(self) -> dict:
"""Return regex routes as dict (pattern_str -> config)."""
cdef:
dict result = {}
PCRE2Pattern pattern
for pattern, config in self._regex_routes:
result[pattern.pattern] = config
return result
@property
def default_route(self) -> Optional[dict]:
return self._default_route if self._has_default else None
cpdef list list_routes(self):
cdef:
list result = []
str path_str
dict config
PCRE2Pattern pattern
for path_str, config in self._exact_routes.items():
result.append({
"type": "exact",
"pattern": f"={path_str}",
"config": config,
})
for pattern, config in self._regex_routes:
result.append({
"type": "regex",
"pattern": pattern.pattern,
"jit_compiled": pattern.jit_compiled,
"config": config,
})
if self._has_default:
result.append({
"type": "default",
"pattern": "__default__",
"config": self._default_route,
})
return result
def compile_pattern(str pattern, bint case_insensitive=<bint>False) -> PCRE2Pattern:
"""
Compile a PCRE2 pattern with JIT support.
Args:
pattern: Regular expression pattern
case_insensitive: Whether to match case-insensitively
Returns:
Compiled PCRE2Pattern object
"""
return PCRE2Pattern._create(pattern, case_insensitive)
def fast_match(router: FastRouter, str path):
"""
Convenience function for matching a path.
Args:
router: FastRouter instance
path: URL path to match
Returns:
FastRouteMatch or None
"""
return router.match(path)

208
pyserve/_routing_pcre2.pxd Normal file
View File

@ -0,0 +1,208 @@
# cython: language_level=3
from libc.stdint cimport uint8_t, uint32_t, int32_t
from libc.stddef cimport size_t
cdef extern from "pcre2.h":
pass
cdef extern from *:
ctypedef struct pcre2_code_8:
pass
ctypedef pcre2_code_8 pcre2_code
ctypedef struct pcre2_match_data_8:
pass
ctypedef pcre2_match_data_8 pcre2_match_data
ctypedef struct pcre2_compile_context_8:
pass
ctypedef pcre2_compile_context_8 pcre2_compile_context
ctypedef struct pcre2_match_context_8:
pass
ctypedef pcre2_match_context_8 pcre2_match_context
ctypedef struct pcre2_general_context_8:
pass
ctypedef pcre2_general_context_8 pcre2_general_context
ctypedef uint8_t PCRE2_UCHAR
ctypedef const uint8_t* PCRE2_SPTR
ctypedef size_t PCRE2_SIZE
uint32_t PCRE2_CASELESS
uint32_t PCRE2_MULTILINE
uint32_t PCRE2_DOTALL
uint32_t PCRE2_UTF
uint32_t PCRE2_UCP
uint32_t PCRE2_NO_UTF_CHECK
uint32_t PCRE2_ANCHORED
uint32_t PCRE2_ENDANCHORED
uint32_t PCRE2_JIT_COMPLETE
uint32_t PCRE2_JIT_PARTIAL_SOFT
uint32_t PCRE2_JIT_PARTIAL_HARD
int PCRE2_ERROR_NOMATCH
int PCRE2_ERROR_PARTIAL
int PCRE2_ERROR_JIT_STACKLIMIT
PCRE2_SIZE PCRE2_UNSET
PCRE2_SIZE PCRE2_ZERO_TERMINATED
pcre2_code* pcre2_compile_8(
PCRE2_SPTR pattern,
PCRE2_SIZE length,
uint32_t options,
int* errorcode,
PCRE2_SIZE* erroroffset,
pcre2_compile_context* ccontext
)
void pcre2_code_free_8(pcre2_code* code)
int pcre2_jit_compile_8(pcre2_code* code, uint32_t options)
pcre2_match_data* pcre2_match_data_create_from_pattern_8(
const pcre2_code* code,
pcre2_general_context* gcontext
)
pcre2_match_data* pcre2_match_data_create_8(
uint32_t ovecsize,
pcre2_general_context* gcontext
)
void pcre2_match_data_free_8(pcre2_match_data* match_data)
int pcre2_match_8(
const pcre2_code* code,
PCRE2_SPTR subject,
PCRE2_SIZE length,
PCRE2_SIZE startoffset,
uint32_t options,
pcre2_match_data* match_data,
pcre2_match_context* mcontext
)
int pcre2_jit_match_8(
const pcre2_code* code,
PCRE2_SPTR subject,
PCRE2_SIZE length,
PCRE2_SIZE startoffset,
uint32_t options,
pcre2_match_data* match_data,
pcre2_match_context* mcontext
)
PCRE2_SIZE* pcre2_get_ovector_pointer_8(pcre2_match_data* match_data)
uint32_t pcre2_get_ovector_count_8(pcre2_match_data* match_data)
int pcre2_pattern_info_8(
const pcre2_code* code,
uint32_t what,
void* where
)
uint32_t PCRE2_INFO_CAPTURECOUNT
uint32_t PCRE2_INFO_NAMECOUNT
uint32_t PCRE2_INFO_NAMETABLE
uint32_t PCRE2_INFO_NAMEENTRYSIZE
uint32_t PCRE2_INFO_JITSIZE
int pcre2_get_error_message_8(
int errorcode,
PCRE2_UCHAR* buffer,
PCRE2_SIZE bufflen
)
int pcre2_substring_copy_byname_8(
pcre2_match_data* match_data,
PCRE2_SPTR name,
PCRE2_UCHAR* buffer,
PCRE2_SIZE* bufflen
)
int pcre2_substring_copy_bynumber_8(
pcre2_match_data* match_data,
uint32_t number,
PCRE2_UCHAR* buffer,
PCRE2_SIZE* bufflen
)
int pcre2_substring_get_byname_8(
pcre2_match_data* match_data,
PCRE2_SPTR name,
PCRE2_UCHAR** bufferptr,
PCRE2_SIZE* bufflen
)
int pcre2_substring_get_bynumber_8(
pcre2_match_data* match_data,
uint32_t number,
PCRE2_UCHAR** bufferptr,
PCRE2_SIZE* bufflen
)
void pcre2_substring_free_8(PCRE2_UCHAR* buffer)
cdef inline pcre2_code* pcre2_compile(
PCRE2_SPTR pattern,
PCRE2_SIZE length,
uint32_t options,
int* errorcode,
PCRE2_SIZE* erroroffset,
pcre2_compile_context* ccontext
) noexcept:
return pcre2_compile_8(pattern, length, options, errorcode, erroroffset, ccontext)
cdef inline void pcre2_code_free(pcre2_code* code) noexcept:
pcre2_code_free_8(code)
cdef inline int pcre2_jit_compile(pcre2_code* code, uint32_t options) noexcept:
return pcre2_jit_compile_8(code, options)
cdef inline pcre2_match_data* pcre2_match_data_create_from_pattern(
const pcre2_code* code,
pcre2_general_context* gcontext
) noexcept:
return pcre2_match_data_create_from_pattern_8(code, gcontext)
cdef inline void pcre2_match_data_free(pcre2_match_data* match_data) noexcept:
pcre2_match_data_free_8(match_data)
cdef inline int pcre2_match(
const pcre2_code* code,
PCRE2_SPTR subject,
PCRE2_SIZE length,
PCRE2_SIZE startoffset,
uint32_t options,
pcre2_match_data* match_data,
pcre2_match_context* mcontext
) noexcept:
return pcre2_match_8(code, subject, length, startoffset, options, match_data, mcontext)
cdef inline int pcre2_jit_match(
const pcre2_code* code,
PCRE2_SPTR subject,
PCRE2_SIZE length,
PCRE2_SIZE startoffset,
uint32_t options,
pcre2_match_data* match_data,
pcre2_match_context* mcontext
) noexcept:
return pcre2_jit_match_8(code, subject, length, startoffset, options, match_data, mcontext)
cdef inline PCRE2_SIZE* pcre2_get_ovector_pointer(pcre2_match_data* match_data) noexcept:
return pcre2_get_ovector_pointer_8(match_data)
cdef inline uint32_t pcre2_get_ovector_count(pcre2_match_data* match_data) noexcept:
return pcre2_get_ovector_count_8(match_data)
cdef inline int pcre2_pattern_info(const pcre2_code* code, uint32_t what, void* where) noexcept:
return pcre2_pattern_info_8(code, what, where)
cdef inline int pcre2_get_error_message(int errorcode, PCRE2_UCHAR* buffer, PCRE2_SIZE bufflen) noexcept:
return pcre2_get_error_message_8(errorcode, buffer, bufflen)

129
pyserve/_routing_py.py Normal file
View File

@ -0,0 +1,129 @@
"""
Pure Python fallback for _routing when PCRE2/Cython is not available.
This module provides the same interface using the standard library `re` module.
It's slower than the Cython+PCRE2 implementation but works everywhere.
In future we may add pcre2.py library support for better performance in this module.
"""
import re
from typing import Any, Dict, List, Optional, Pattern, Tuple
class FastRouteMatch:
__slots__ = ("config", "params")
def __init__(self, config: Dict[str, Any], params: Optional[Dict[str, str]] = None):
self.config = config
self.params = params if params is not None else {}
class FastRouter:
"""
Router with regex pattern matching.
Matching order (nginx-like):
1. Exact routes (prefix "=") - O(1) dict lookup
2. Regex routes (prefix "~" or "~*") - linear scan
3. Default route (fallback)
"""
__slots__ = ("_exact_routes", "_regex_routes", "_default_route", "_has_default", "_regex_count")
def __init__(self) -> None:
self._exact_routes: Dict[str, Dict[str, Any]] = {}
self._regex_routes: List[Tuple[Pattern[str], Dict[str, Any]]] = []
self._default_route: Dict[str, Any] = {}
self._has_default: bool = False
self._regex_count: int = 0
def add_route(self, pattern: str, config: Dict[str, Any]) -> None:
if pattern.startswith("="):
exact_path = pattern[1:]
self._exact_routes[exact_path] = config
return
if pattern == "__default__":
self._default_route = config
self._has_default = True
return
if pattern.startswith("~"):
case_insensitive = pattern.startswith("~*")
regex_pattern = pattern[2:] if case_insensitive else pattern[1:]
flags = re.IGNORECASE if case_insensitive else 0
try:
compiled_pattern = re.compile(regex_pattern, flags)
self._regex_routes.append((compiled_pattern, config))
self._regex_count = len(self._regex_routes)
except re.error:
pass # Ignore invalid patterns
def match(self, path: str) -> Optional[FastRouteMatch]:
if path in self._exact_routes:
config = self._exact_routes[path]
return FastRouteMatch(config, {})
for pattern, config in self._regex_routes:
match_obj = pattern.search(path)
if match_obj is not None:
params = match_obj.groupdict()
return FastRouteMatch(config, params)
if self._has_default:
return FastRouteMatch(self._default_route, {})
return None
@property
def exact_routes(self) -> Dict[str, Dict[str, Any]]:
return self._exact_routes
@property
def routes(self) -> Dict[Pattern[str], Dict[str, Any]]:
return {p: c for p, c in self._regex_routes}
@property
def default_route(self) -> Optional[Dict[str, Any]]:
return self._default_route if self._has_default else None
def list_routes(self) -> List[Dict[str, Any]]:
result: List[Dict[str, Any]] = []
for path, config in self._exact_routes.items():
result.append({
"type": "exact",
"pattern": f"={path}",
"config": config,
})
for pattern, config in self._regex_routes:
result.append({
"type": "regex",
"pattern": pattern.pattern,
"config": config,
})
if self._has_default:
result.append({
"type": "default",
"pattern": "__default__",
"config": self._default_route,
})
return result
def fast_match(router: FastRouter, path: str) -> Optional[FastRouteMatch]:
"""
Convenience function for matching a path.
Args:
router: FastRouter instance
path: URL path to match
Returns:
FastRouteMatch or None
"""
return router.match(path)

View File

@ -1,7 +1,6 @@
import mimetypes
import re
from pathlib import Path
from typing import Any, Dict, Optional, Pattern
from typing import Any, Dict
from urllib.parse import urlparse
import httpx
@ -10,60 +9,19 @@ from starlette.responses import FileResponse, PlainTextResponse, Response
from .logging_utils import get_logger
try:
from pyserve._routing import FastRouteMatch, FastRouter, fast_match # type: ignore
CYTHON_ROUTING_AVAILABLE = True
except ImportError:
from pyserve._routing_py import FastRouteMatch, FastRouter, fast_match
CYTHON_ROUTING_AVAILABLE = False
logger = get_logger(__name__)
class RouteMatch:
def __init__(self, config: Dict[str, Any], params: Optional[Dict[str, str]] = None):
self.config = config
self.params = params or {}
class Router:
def __init__(self, static_dir: str = "./static"):
self.static_dir = Path(static_dir)
self.routes: Dict[Pattern, Dict[str, Any]] = {}
self.exact_routes: Dict[str, Dict[str, Any]] = {}
self.default_route: Optional[Dict[str, Any]] = None
def add_route(self, pattern: str, config: Dict[str, Any]) -> None:
if pattern.startswith("="):
exact_path = pattern[1:]
self.exact_routes[exact_path] = config
logger.debug(f"Added exact route: {exact_path}")
return
if pattern == "__default__":
self.default_route = config
logger.debug("Added default route")
return
if pattern.startswith("~"):
case_insensitive = pattern.startswith("~*")
regex_pattern = pattern[2:] if case_insensitive else pattern[1:]
flags = re.IGNORECASE if case_insensitive else 0
try:
compiled_pattern = re.compile(regex_pattern, flags)
self.routes[compiled_pattern] = config
logger.debug(f"Added regex route: {pattern}")
except re.error as e:
logger.error(f"Regex compilation error {pattern}: {e}")
def match(self, path: str) -> Optional[RouteMatch]:
if path in self.exact_routes:
return RouteMatch(self.exact_routes[path])
for pattern, config in self.routes.items():
match = pattern.search(path)
if match:
params = match.groupdict()
return RouteMatch(config, params)
if self.default_route:
return RouteMatch(self.default_route)
return None
# Aliases for backward compatibility
RouteMatch = FastRouteMatch
Router = FastRouter
class RequestHandler:

View File

@ -9,9 +9,86 @@ Or via make:
"""
import os
import subprocess
import sys
from pathlib import Path
def get_pcre2_config():
include_dirs = []
library_dirs = []
libraries = ["pcre2-8"]
try:
cflags = subprocess.check_output(
["pkg-config", "--cflags", "libpcre2-8"],
stderr=subprocess.DEVNULL
).decode().strip()
libs = subprocess.check_output(
["pkg-config", "--libs", "libpcre2-8"],
stderr=subprocess.DEVNULL
).decode().strip()
for flag in cflags.split():
if flag.startswith("-I"):
include_dirs.append(flag[2:])
for flag in libs.split():
if flag.startswith("-L"):
library_dirs.append(flag[2:])
elif flag.startswith("-l"):
lib = flag[2:]
if lib not in libraries:
libraries.append(lib)
return include_dirs, library_dirs, libraries
except (subprocess.CalledProcessError, FileNotFoundError):
pass
try:
cflags = subprocess.check_output(
["pcre2-config", "--cflags"],
stderr=subprocess.DEVNULL
).decode().strip()
libs = subprocess.check_output(
["pcre2-config", "--libs8"],
stderr=subprocess.DEVNULL
).decode().strip()
for flag in cflags.split():
if flag.startswith("-I"):
include_dirs.append(flag[2:])
for flag in libs.split():
if flag.startswith("-L"):
library_dirs.append(flag[2:])
elif flag.startswith("-l"):
lib = flag[2:]
if lib not in libraries:
libraries.append(lib)
return include_dirs, library_dirs, libraries
except (subprocess.CalledProcessError, FileNotFoundError):
pass
# Fallback: try common paths
common_paths = [
"/opt/homebrew", # macOS ARM
"/usr/local", # macOS Intel / Linux
"/usr", # Linux
]
for base in common_paths:
include_path = Path(base) / "include"
lib_path = Path(base) / "lib"
if (include_path / "pcre2.h").exists():
include_dirs.append(str(include_path))
library_dirs.append(str(lib_path))
break
return include_dirs, library_dirs, libraries
def build_extensions():
try:
from Cython.Build import cythonize
@ -29,6 +106,14 @@ def build_extensions():
print("Install with: pip install setuptools")
return False
pcre2_include, pcre2_libdir, pcre2_libs = get_pcre2_config()
if not pcre2_include:
print("WARNING: PCRE2 not found. Routing module may not compile.")
print("Install PCRE2: brew install pcre2 (macOS) or apt install libpcre2-dev (Linux)")
else:
print(f"Found PCRE2: includes={pcre2_include}, libs={pcre2_libdir}")
extensions = [
Extension(
"pyserve._path_matcher",
@ -36,6 +121,18 @@ def build_extensions():
extra_compile_args=["-O3", "-ffast-math"],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
),
Extension(
"pyserve._routing",
sources=["pyserve/_routing.pyx"],
include_dirs=pcre2_include,
library_dirs=pcre2_libdir,
libraries=pcre2_libs,
extra_compile_args=["-O3", "-ffast-math"],
define_macros=[
("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION"),
("PCRE2_CODE_UNIT_WIDTH", "8"),
],
),
]
ext_modules = cythonize(
@ -59,7 +156,9 @@ def build_extensions():
cmd.run()
print("\nCython extensions built successfully!")
print(" - pyserve/_path_matcher" + (".pyd" if sys.platform == "win32" else ".so"))
ext_suffix = ".pyd" if sys.platform == "win32" else ".so"
print(f" - pyserve/_path_matcher{ext_suffix}")
print(f" - pyserve/_routing{ext_suffix}")
return True

View File

@ -50,16 +50,10 @@ class TestRouter:
def test_router_initialization(self):
"""Test router initializes with correct defaults."""
router = Router()
assert router.static_dir == Path("./static")
assert router.routes == {}
assert router.exact_routes == {}
assert router.default_route is None
def test_router_custom_static_dir(self):
"""Test router with custom static directory."""
router = Router(static_dir="/custom/path")
assert router.static_dir == Path("/custom/path")
def test_add_exact_route(self):
"""Test adding exact match route."""
router = Router()