[Pyrex] Re: Pyrex enhancement request: 'intern' constant strings [LONG]

Pedro Rodriguez pedro_rodriguez at club-internet.fr
Sun May 11 02:11:17 CEST 2003


On Wed, 07 May 2003 05:51:54 +0000, Greg Ewing wrote:

>> Would it be possible for the PYREX compiler to generate code to 
>> 'intern' constant C strings at module init time
> 
> Yes, that's one of the things on my ToDo list. I suspect
> that this is the reason for Pyrex code sometimes
> being slower than equivalent interpreted Python code.
> 

The following code exhibits the problem. Greg already already told
me that this was not efficient code even without pyrex. But just
for the record, I made some basic benchmark.

# perf.py -------------------------------------------------
STATE = "A"
TRANSITION = { "A" : "A" }

def f(data):
    for v in data:
        transitions = TRANSITION[STATE]

# test_perf.py --------------------------------------------
from perf import f
import time

gl = " " * 1000000
t0 = time.time()
gx = f(gl)
t1 = time.time()
print t1 - t0


This is what I measured with python 2.2.2 :
- Python 2.2.2 : 0.9 s
- Pyrex 0.7.2  : 4.2 s
- Pyrex 0.7.2 (patched with string interning) : 1.3 s

There is a neat benefice using string interning, but surprisingly Pyrex
didn't performed as well as Python. 

Investigating a little further on this example showned that the following
generated code could be optimised:
   __pyx_2 = __Pyx_GetName(__pyx_m, "TRANSITION");

by using directly the module dictionnary, instead of the general look up
mechanism, something like :

   /* added at global level */
   static PyObject *__pyx_mdict; 

   /* added at init level, can't fail according to python docs */
   __pyx_mdict = PyModule_GetDict(__pyx_m); 

   /* generated code, careful borrowed ref instead of new */
   __pyx_2 = PyDict_GetItem(mdict, _PYX_INTERN_TRANSITION); 


a direct modification of the C code with this hack gives 0.4 s, which is
10x faster than the unmodified version, and 2x faster compared to native
python (considering the level of optimization achieved by years on Python,
I think this is not bad at all).


Hereafter, you'il find a 'hackish' patch for string interning (since
Pyrex seems to generate code on the fly, while the whole interning
mechanism required a post analysis). The module lookup should be done at
some of the modified places, but requires a little more investigation to
be automated.

-- 
Pedro


# Pyrex/Compiler/Internator.py -----------------------------------------
from Pyrex.Utils import open_new_file

CODE_TEMPLATE = """
#include "Python.h"

%s

PyObject *_pyx_intern_strings[%d];

static _Pyx_intern_init()
{
    %s
}
"""

NAME_TEMPLATE = "_PYX_INTERN_%s"

DEF_TEMPLATE = \
    """#define %s _pyx_intern_strings[%%d]""" % NAME_TEMPLATE

INIT_TEMPLATE = \
    """_pyx_intern_strings[%d] = PyString_InternFromString("%s");"""


class Internator:
    def __init__(self):
        self.strings = {}

    def get_intern_string(self, name):
        return self.strings.setdefault(name, NAME_TEMPLATE % name)

    def generate_code(self, code):
        nb = 0
        defs = []
        inits = []
        for name in self.strings:
            defs.append(DEF_TEMPLATE % (name, nb))
            inits.append(INIT_TEMPLATE % (nb, name))
            nb = nb + 1

        def_lines = "\n".join(defs)
        init_lines = "\n    ".join(inits)

        outfilename = code.f.name
        code.f.close()

        f = open(outfilename, "r")
        data = f.read()
        f.close()

        code.f = open_new_file(outfilename)
        code.putln(CODE_TEMPLATE % (def_lines, nb, init_lines))
        code.f.write(data)


# intern.patch -------------------------------------------------------
diff -ur Pyrex-0.7.2/Pyrex/Compiler/ExprNodes.py Pyrex-0.7.2.intern/Pyrex/Compiler/ExprNodes.py
--- Pyrex-0.7.2/Pyrex/Compiler/ExprNodes.py	Thu Apr 17 00:31:29 2003
+++ Pyrex-0.7.2.intern/Pyrex/Compiler/ExprNodes.py	Sat May 10 20:05:09 2003
@@ -707,11 +707,12 @@
         namespace = Naming.builtins_cname
       else: # entry.is_pyglobal
         namespace = entry.namespace_cname
+      interned_name = self.internator.get_intern_string(self.entry.name)
       code.putln(
-        '%s = __Pyx_GetName(%s, "%s"); if (!%s) %s' % (
+        '%s = __Pyx_GetName(%s, %s); if (!%s) %s' % (
         self.result,
         namespace, 
-        self.entry.name,
+        interned_name,
         self.result, 
         code.error_goto(self.pos)))		
 
@@ -720,10 +721,11 @@
       return # There was an error earlier
     if self.entry.is_pyglobal:
       namespace = self.entry.namespace_cname
+      interned_name = self.internator.get_intern_string(self.entry.name)
       code.putln(
-        'if (PyObject_SetAttrString(%s, "%s", %s) < 0) %s' % (
+        'if (PyObject_SetAttr(%s, %s, %s) < 0) %s' % (
           namespace, 
-          self.entry.name, 
+          interned_name, 
           rhs.result, 
           code.error_goto(self.pos)))
       if debug_disposal_code:
@@ -1411,11 +1413,12 @@
   
   def generate_result_code(self, code):
     if self.is_py_attr:
+      interned_name = self.internator.get_intern_string(self.attribute)
       code.putln(
-        '%s = PyObject_GetAttrString(%s, "%s"); if (!%s) %s' % (
+        '%s = PyObject_GetAttr(%s, %s); if (!%s) %s' % (
           self.result,
           self.obj.result,
-          self.attribute,
+          interned_name,
           self.result,
           code.error_goto(self.pos)))
     #elif self.type.is_pyobject:
@@ -1427,10 +1430,11 @@
   def generate_assignment_code(self, rhs, code):
     self.obj.generate_evaluation_code(code)
     if self.is_py_attr:
+      interned_name = self.internator.get_intern_string(self.attribute)
       code.putln(
-        'if (PyObject_SetAttrString(%s, "%s", %s) < 0) %s' % (
+        'if (PyObject_SetAttr(%s, %s, %s) < 0) %s' % (
           self.obj.result,
-          self.attribute,
+          interned_name,
           rhs.result,
           code.error_goto(self.pos)))
       rhs.generate_disposal_code(code)
@@ -2749,11 +2753,12 @@
 
 get_name_utility_code = \
 """
-static PyObject *__Pyx_GetName(PyObject *dict, char *name) {
+static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) {
   PyObject *result;
-  result = PyObject_GetAttrString(dict, name);
-  if (!result)
-    PyErr_SetString(PyExc_NameError, name);
+  result = PyObject_GetAttr(dict, name);
+  if (!result) {
+    PyErr_SetString(PyExc_NameError, PyString_AsString(name));
+  }
   return result;
 }
 """
diff -ur Pyrex-0.7.2/Pyrex/Compiler/Nodes.py Pyrex-0.7.2.intern/Pyrex/Compiler/Nodes.py
--- Pyrex-0.7.2/Pyrex/Compiler/Nodes.py	Thu Apr 17 00:31:29 2003
+++ Pyrex-0.7.2.intern/Pyrex/Compiler/Nodes.py	Sat May 10 19:48:23 2003
@@ -17,6 +17,8 @@
 
 from DebugFlags import debug_disposal_code
 
+from Internator import Internator
+
 class Node:
   #  pos         (string, int, int)   Source file position
   #  is_name     boolean              Is a NameNode
@@ -25,6 +27,8 @@
   is_name = 0
   is_literal = 0
   
+  internator = Internator()
+  
   def __init__(self, pos, **kw):
     self.pos = pos
     self.__dict__.update(kw)
@@ -137,6 +141,7 @@
     self.generate_const_definitions(env, code)
     self.generate_module_init_func(env, code)
     self.generate_utility_functions(env, code)
+    self.internator.generate_code(code) # XXX
     result.c_file_generated = 1
 
   def generate_module_preamble(self, env, code):
@@ -726,6 +731,7 @@
     code.putln("%s {" % header)
     code.put_var_declarations(env.temp_entries)
     env.generate_library_function_declarations(code)
+    code.putln("_Pyx_intern_init();")
     self.generate_module_creation_code(env, code)
     self.generate_global_init_code(env, code)
     self.generate_type_init_code(env, code)
@@ -809,10 +815,11 @@
           typeobj_cname,
           code.error_goto(entry.pos)))
       if typeobj_cname:
+        interned_name = self.internator.get_intern_string(scope.class_name)
         code.putln(
-          'if (PyObject_SetAttrString(%s, "%s", (PyObject *)&%s) < 0) %s' % (
+          'if (PyObject_SetAttr(%s, %s, (PyObject *)&%s) < 0) %s' % (
             Naming.module_cname,
-            scope.class_name,
+            interned_name,
             typeobj_cname,
             code.error_goto(entry.pos)))
   
@@ -2847,11 +2854,12 @@
   def generate_execution_code(self, code):
     self.module.generate_evaluation_code(code)
     for name, target in self.items:
+      interned_name = self.internator.get_intern_string(name)
       code.putln(
-        '%s = PyObject_GetAttrString(%s, "%s"); if (!%s) %s' % (
+        '%s = PyObject_GetAttr(%s, %s); if (!%s) %s' % (
           self.item.result, 
           self.module.result,
-          name,
+          interned_name,
           self.item.result,
           code.error_goto(self.pos)))
       target.generate_assignment_code(self.item, code)
@@ -2873,7 +2881,7 @@
 static void __Pyx_ReRaise(void); /*proto*/
 static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); /*proto*/
 static PyObject *__Pyx_GetExcValue(void); /*proto*/
-static PyObject *__Pyx_GetName(PyObject *dict, char *name); /*proto*/
+static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/
 static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name); /*proto*/
 static int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/
 static int __Pyx_GetStarArgs(PyObject **args, PyObject **kwds,\





More information about the Pyrex mailing list