# Demo Pyrex extension -- string handling # Author: John Machin (sjmachin@lexicon.net) # Placed in the public domain 2006-06-19 # Minimally tested :-) # === usually in a .pxi file === cdef extern from "Python.h": void PyMem_Free(void *p) void* PyMem_Malloc(int n) except NULL int PyString_AsStringAndSize(object obj, char **buff, int *length) except -1 int PyString_Check(object obj) object PyString_FromStringAndSize(char *s, int len) # except NULL cdef union ptr_union: # I hate signed char :-) # Greg doesn't :-( unsigned char ** ucptr char ** ptr ctypedef unsigned char UC ctypedef unsigned char *UCP cdef string_in(object obj, unsigned char **buff, int *length): cdef ptr_union pu if not PyString_Check(obj): raise TypeError('arg must be a string') pu.ucptr = buff PyString_AsStringAndSize(obj, pu.ptr, length) buff = pu.ucptr cdef extern from "string.h": unsigned int strlen(char * cs) void * memset(char * s, char c, unsigned int n) int memcmp(void *s, void *t, int n) # === end of includes === # A trivial function that returns 1 if stra == strb, else 0 # If you want True/False, wrap bool() around the call :-) def strequal(stra, strb): cdef UCP pa, pb cdef int lena, lenb, i string_in(stra, &pa, &lena) string_in(strb, &pb, &lenb) if lena != lenb: return 0 for i from 0 <= i < lena: if pa[i] != pb[i]: return 0 return 1 # Sunday's Quick search (cut-down BM search) # borrowed from http://www-igm.univ-mlv.fr/~lecroq/string/node19.html """ void preQsBc(char *x, int m, int qsBc[]) { int i; for (i = 0; i < ASIZE; ++i) qsBc[i] = m + 1; for (i = 0; i < m; ++i) qsBc[x[i]] = m - i; } void QS(char *x, int m, char *y, int n) { int j, qsBc[ASIZE]; /* Preprocessing */ preQsBc(x, m, qsBc); /* Searching */ j = 0; while (j <= n - m) { if (memcmp(x, y + j, m) == 0) OUTPUT(j); j += qsBc[y[j + m]]; /* shift */ } } """ def quick_search(pattern, text): """Return a list of the offsets at which is found in .""" cdef int i, j, m, n cdef int qsBc[256] cdef UCP x, y # converting Python args to C locals string_in(pattern, &x, &m) string_in(text, &y, &n) # Preprocessing for i from 0 <= i < 256: qsBc[i] = m + 1 for i from 0 <= i < m: qsBc[x[i]] = m - i # Searching j = 0 results = [] rappend = results.append # Note judicious use of Python syntax for the results list. # We can't do any better by using the Python/C API, # which in this case is ugly, tedious and error-prone. while j <= n - m: if memcmp(x, y + j, m) == 0: rappend(j) j = j + qsBc[y[j + m]] return results